From bc3b6e066f854bda6af412b2678ae830749c951b Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Thu, 26 Mar 2026 13:00:20 +0100 Subject: [PATCH 001/352] test(SHACL-core): :white_check_mark: add unit/integration tests --- .../must/agent_project_intersection.ttl | 21 +++++++------------ tests/integration/test_sparql_constraints.py | 18 +++++++++------- 2 files changed, 18 insertions(+), 21 deletions(-) diff --git a/tests/data/profiles/sparql_test/must/agent_project_intersection.ttl b/tests/data/profiles/sparql_test/must/agent_project_intersection.ttl index 7237b33e6..6dde72039 100644 --- a/tests/data/profiles/sparql_test/must/agent_project_intersection.ttl +++ b/tests/data/profiles/sparql_test/must/agent_project_intersection.ttl @@ -11,28 +11,23 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - -@prefix dct: . +@prefix schema: . @prefix ro-crate: . @prefix sh: . -@prefix xsd: . - -# Test shape for SPARQL constraint with BNode sourceShape resolution -# This shape ALWAYS produces a violation to test the handling of SPARQL constraints -# with BNode sourceShape (sh:sparql creates a BNode constraint node) -ro-crate:AlwaysFailShape a sh:NodeShape ; - sh:name "Always Fail Test" ; - sh:description "Test shape that always produces a violation to test SPARQL constraint handling." ; - sh:targetNode ro-crate:ROCrateMetadataFileDescriptor ; +ro-crate:AgentProjectIntersection a sh:NodeShape ; + sh:name "Agent Project Membership Validation" ; + sh:description """Check that all the agents defined in the RO-Crate metadata file descriptor are also defined as members of the project, if a project is defined.""" ; + sh:targetClass schema:Dataset ; sh:sparql [ a sh:SPARQLConstraint ; - sh:message "This is a test violation to verify SPARQL constraint handling" ; + sh:message "SPARQL constraint violation: testing BNode sourceShape resolution" ; sh:select """ SELECT $this WHERE { - BIND($this AS ?fail) + FILTER(true) } """ ; sh:severity sh:Violation ; + ] . diff --git a/tests/integration/test_sparql_constraints.py b/tests/integration/test_sparql_constraints.py index b448cad84..9a7bec693 100644 --- a/tests/integration/test_sparql_constraints.py +++ b/tests/integration/test_sparql_constraints.py @@ -86,7 +86,7 @@ def sparql_test_rocrate(): def test_sparql_profile_shape_loaded_correctly(sparql_test_profiles_path): - """Test that the sparql-test profile loads the test shape with SPARQL constraint.""" + """Test that the sparql-test profile loads the AgentProjectIntersection shape.""" registry = ShapesRegistry() shape_file = str(Path(sparql_test_profiles_path) / "must" / "agent_project_intersection.ttl") @@ -94,16 +94,18 @@ def test_sparql_profile_shape_loaded_correctly(sparql_test_profiles_path): assert len(shapes) > 0, "Should load at least one shape" - # Find the test shape (AlwaysFailShape or similar name) - test_shape = None + # Find the AgentProjectIntersection shape + agent_shape = None for shape in shapes: - if "Always" in shape.name or "Test" in shape.name or "test" in shape.name.lower(): - test_shape = shape + if "Agent" in shape.name or "agent" in shape.name.lower(): + agent_shape = shape break - assert test_shape is not None, "Should find the test SPARQL shape" - assert test_shape.description is not None - assert len(test_shape.description) > 0 + assert agent_shape is not None, "Should find AgentProjectIntersection shape" + assert agent_shape.description is not None + assert ( + "Agent" in agent_shape.description or "agent" in agent_shape.description.lower() + ) def test_sparql_constraint_with_bnode_sourceShape(sparql_test_profiles_path, sparql_test_rocrate): From 71009cc139df5d66743a087430209e4cb3126b21 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Thu, 4 Dec 2025 14:00:00 +0100 Subject: [PATCH 002/352] refactor(ro-crate-1.1): move RO-Crate 1.1 validation profile --- .../{ => 1.1}/may/4_data_entity_metadata.ttl | 0 .../{ => 1.1}/may/61_license_entity.ttl | 0 .../must/0_file_descriptor_format.py | 0 .../must/1_file-descriptor_metadata.ttl | 0 .../must/2_root_data_entity_metadata.ttl | 0 .../{ => 1.1}/must/4_data_entity_metadata.py | 0 .../{ => 1.1}/must/4_data_entity_metadata.ttl | 2 +- .../must/5_web_data_entity_metadata.ttl | 0 .../{ => 1.1}/must/6_contextual_entity.ttl | 0 .../profiles/ro-crate/{ => 1.1}/ontology.ttl | 0 .../profiles/ro-crate/{ => 1.1}/prefixes.ttl | 0 .../profiles/ro-crate/{ => 1.1}/profile.ttl | 0 .../should/2_root_data_entity_metadata.ttl | 0 .../should/2_root_data_entity_relative_uri.py | 0 .../should/4_data_entity_existence.py | 0 .../should/4_data_entity_metadata.ttl | 0 .../should/5_web_data_entity_metadata.py | 0 .../should/5_web_data_entity_metadata.ttl | 23 ++++++++----------- .../should/6_contextual_entity_metadata.ttl | 0 19 files changed, 11 insertions(+), 14 deletions(-) rename rocrate_validator/profiles/ro-crate/{ => 1.1}/may/4_data_entity_metadata.ttl (100%) rename rocrate_validator/profiles/ro-crate/{ => 1.1}/may/61_license_entity.ttl (100%) rename rocrate_validator/profiles/ro-crate/{ => 1.1}/must/0_file_descriptor_format.py (100%) rename rocrate_validator/profiles/ro-crate/{ => 1.1}/must/1_file-descriptor_metadata.ttl (100%) rename rocrate_validator/profiles/ro-crate/{ => 1.1}/must/2_root_data_entity_metadata.ttl (100%) rename rocrate_validator/profiles/ro-crate/{ => 1.1}/must/4_data_entity_metadata.py (100%) rename rocrate_validator/profiles/ro-crate/{ => 1.1}/must/4_data_entity_metadata.ttl (98%) rename rocrate_validator/profiles/ro-crate/{ => 1.1}/must/5_web_data_entity_metadata.ttl (100%) rename rocrate_validator/profiles/ro-crate/{ => 1.1}/must/6_contextual_entity.ttl (100%) rename rocrate_validator/profiles/ro-crate/{ => 1.1}/ontology.ttl (100%) rename rocrate_validator/profiles/ro-crate/{ => 1.1}/prefixes.ttl (100%) rename rocrate_validator/profiles/ro-crate/{ => 1.1}/profile.ttl (100%) rename rocrate_validator/profiles/ro-crate/{ => 1.1}/should/2_root_data_entity_metadata.ttl (100%) rename rocrate_validator/profiles/ro-crate/{ => 1.1}/should/2_root_data_entity_relative_uri.py (100%) rename rocrate_validator/profiles/ro-crate/{ => 1.1}/should/4_data_entity_existence.py (100%) rename rocrate_validator/profiles/ro-crate/{ => 1.1}/should/4_data_entity_metadata.ttl (100%) rename rocrate_validator/profiles/ro-crate/{ => 1.1}/should/5_web_data_entity_metadata.py (100%) rename rocrate_validator/profiles/ro-crate/{ => 1.1}/should/5_web_data_entity_metadata.ttl (85%) rename rocrate_validator/profiles/ro-crate/{ => 1.1}/should/6_contextual_entity_metadata.ttl (100%) diff --git a/rocrate_validator/profiles/ro-crate/may/4_data_entity_metadata.ttl b/rocrate_validator/profiles/ro-crate/1.1/may/4_data_entity_metadata.ttl similarity index 100% rename from rocrate_validator/profiles/ro-crate/may/4_data_entity_metadata.ttl rename to rocrate_validator/profiles/ro-crate/1.1/may/4_data_entity_metadata.ttl diff --git a/rocrate_validator/profiles/ro-crate/may/61_license_entity.ttl b/rocrate_validator/profiles/ro-crate/1.1/may/61_license_entity.ttl similarity index 100% rename from rocrate_validator/profiles/ro-crate/may/61_license_entity.ttl rename to rocrate_validator/profiles/ro-crate/1.1/may/61_license_entity.ttl diff --git a/rocrate_validator/profiles/ro-crate/must/0_file_descriptor_format.py b/rocrate_validator/profiles/ro-crate/1.1/must/0_file_descriptor_format.py similarity index 100% rename from rocrate_validator/profiles/ro-crate/must/0_file_descriptor_format.py rename to rocrate_validator/profiles/ro-crate/1.1/must/0_file_descriptor_format.py diff --git a/rocrate_validator/profiles/ro-crate/must/1_file-descriptor_metadata.ttl b/rocrate_validator/profiles/ro-crate/1.1/must/1_file-descriptor_metadata.ttl similarity index 100% rename from rocrate_validator/profiles/ro-crate/must/1_file-descriptor_metadata.ttl rename to rocrate_validator/profiles/ro-crate/1.1/must/1_file-descriptor_metadata.ttl diff --git a/rocrate_validator/profiles/ro-crate/must/2_root_data_entity_metadata.ttl b/rocrate_validator/profiles/ro-crate/1.1/must/2_root_data_entity_metadata.ttl similarity index 100% rename from rocrate_validator/profiles/ro-crate/must/2_root_data_entity_metadata.ttl rename to rocrate_validator/profiles/ro-crate/1.1/must/2_root_data_entity_metadata.ttl diff --git a/rocrate_validator/profiles/ro-crate/must/4_data_entity_metadata.py b/rocrate_validator/profiles/ro-crate/1.1/must/4_data_entity_metadata.py similarity index 100% rename from rocrate_validator/profiles/ro-crate/must/4_data_entity_metadata.py rename to rocrate_validator/profiles/ro-crate/1.1/must/4_data_entity_metadata.py diff --git a/rocrate_validator/profiles/ro-crate/must/4_data_entity_metadata.ttl b/rocrate_validator/profiles/ro-crate/1.1/must/4_data_entity_metadata.ttl similarity index 98% rename from rocrate_validator/profiles/ro-crate/must/4_data_entity_metadata.ttl rename to rocrate_validator/profiles/ro-crate/1.1/must/4_data_entity_metadata.ttl index 80479546b..2a458045d 100644 --- a/rocrate_validator/profiles/ro-crate/must/4_data_entity_metadata.ttl +++ b/rocrate_validator/profiles/ro-crate/1.1/must/4_data_entity_metadata.ttl @@ -132,7 +132,7 @@ ro-crate:DirectoryDataEntity a sh:NodeShape ; ro-crate:DataEntityRequiredPropertiesShape a sh:NodeShape ; sh:name "Data Entity: REQUIRED properties" ; - sh:description """A `DataEntity` MUST be linked, either directly or indirectly, from the Root Data Entity""" ; + sh:description """A `DataEntity`The file descriptor MUST be a valid JSON-LD file MUST be linked, either directly or indirectly, from the Root Data Entity""" ; sh:targetClass ro-crate:DataEntity ; sh:property [ diff --git a/rocrate_validator/profiles/ro-crate/must/5_web_data_entity_metadata.ttl b/rocrate_validator/profiles/ro-crate/1.1/must/5_web_data_entity_metadata.ttl similarity index 100% rename from rocrate_validator/profiles/ro-crate/must/5_web_data_entity_metadata.ttl rename to rocrate_validator/profiles/ro-crate/1.1/must/5_web_data_entity_metadata.ttl diff --git a/rocrate_validator/profiles/ro-crate/must/6_contextual_entity.ttl b/rocrate_validator/profiles/ro-crate/1.1/must/6_contextual_entity.ttl similarity index 100% rename from rocrate_validator/profiles/ro-crate/must/6_contextual_entity.ttl rename to rocrate_validator/profiles/ro-crate/1.1/must/6_contextual_entity.ttl diff --git a/rocrate_validator/profiles/ro-crate/ontology.ttl b/rocrate_validator/profiles/ro-crate/1.1/ontology.ttl similarity index 100% rename from rocrate_validator/profiles/ro-crate/ontology.ttl rename to rocrate_validator/profiles/ro-crate/1.1/ontology.ttl diff --git a/rocrate_validator/profiles/ro-crate/prefixes.ttl b/rocrate_validator/profiles/ro-crate/1.1/prefixes.ttl similarity index 100% rename from rocrate_validator/profiles/ro-crate/prefixes.ttl rename to rocrate_validator/profiles/ro-crate/1.1/prefixes.ttl diff --git a/rocrate_validator/profiles/ro-crate/profile.ttl b/rocrate_validator/profiles/ro-crate/1.1/profile.ttl similarity index 100% rename from rocrate_validator/profiles/ro-crate/profile.ttl rename to rocrate_validator/profiles/ro-crate/1.1/profile.ttl diff --git a/rocrate_validator/profiles/ro-crate/should/2_root_data_entity_metadata.ttl b/rocrate_validator/profiles/ro-crate/1.1/should/2_root_data_entity_metadata.ttl similarity index 100% rename from rocrate_validator/profiles/ro-crate/should/2_root_data_entity_metadata.ttl rename to rocrate_validator/profiles/ro-crate/1.1/should/2_root_data_entity_metadata.ttl diff --git a/rocrate_validator/profiles/ro-crate/should/2_root_data_entity_relative_uri.py b/rocrate_validator/profiles/ro-crate/1.1/should/2_root_data_entity_relative_uri.py similarity index 100% rename from rocrate_validator/profiles/ro-crate/should/2_root_data_entity_relative_uri.py rename to rocrate_validator/profiles/ro-crate/1.1/should/2_root_data_entity_relative_uri.py diff --git a/rocrate_validator/profiles/ro-crate/should/4_data_entity_existence.py b/rocrate_validator/profiles/ro-crate/1.1/should/4_data_entity_existence.py similarity index 100% rename from rocrate_validator/profiles/ro-crate/should/4_data_entity_existence.py rename to rocrate_validator/profiles/ro-crate/1.1/should/4_data_entity_existence.py diff --git a/rocrate_validator/profiles/ro-crate/should/4_data_entity_metadata.ttl b/rocrate_validator/profiles/ro-crate/1.1/should/4_data_entity_metadata.ttl similarity index 100% rename from rocrate_validator/profiles/ro-crate/should/4_data_entity_metadata.ttl rename to rocrate_validator/profiles/ro-crate/1.1/should/4_data_entity_metadata.ttl diff --git a/rocrate_validator/profiles/ro-crate/should/5_web_data_entity_metadata.py b/rocrate_validator/profiles/ro-crate/1.1/should/5_web_data_entity_metadata.py similarity index 100% rename from rocrate_validator/profiles/ro-crate/should/5_web_data_entity_metadata.py rename to rocrate_validator/profiles/ro-crate/1.1/should/5_web_data_entity_metadata.py diff --git a/rocrate_validator/profiles/ro-crate/should/5_web_data_entity_metadata.ttl b/rocrate_validator/profiles/ro-crate/1.1/should/5_web_data_entity_metadata.ttl similarity index 85% rename from rocrate_validator/profiles/ro-crate/should/5_web_data_entity_metadata.ttl rename to rocrate_validator/profiles/ro-crate/1.1/should/5_web_data_entity_metadata.ttl index 53f2b1fb7..b59332c1a 100644 --- a/rocrate_validator/profiles/ro-crate/should/5_web_data_entity_metadata.ttl +++ b/rocrate_validator/profiles/ro-crate/1.1/should/5_web_data_entity_metadata.ttl @@ -35,21 +35,18 @@ ro-crate:WebBasedDataEntityRequiredValueRestriction a sh:NodeShape ; sh:name "Web-based Data Entity: `contentSize` property" ; sh:description """Check if the Web-based Data Entity has a `contentSize` property""" ; sh:path schema_org:contentSize ; - sh:datatype xsd:string ; + # sh:datatype xsd:string ; sh:severity sh:Warning ; sh:message """Web-based Data Entities SHOULD have a `contentSize` property""" ; - sh:sparql [ - sh:message "If the value is a string it must be a string representing an integer." ; - sh:select """ - SELECT ?this ?value - WHERE { - ?this schema:contentSize ?value . - FILTER NOT EXISTS { - FILTER (xsd:integer(?value) = ?value) - } - } - """ ; - ] ; + # sh:sparql [ + # # sh:message "If the value is a string it must be a string representing an integer." ; + # sh:select """ + # SELECT $this WHERE { + # $this schema:contentSize ?v . + # FILTER(xsd:decimal(?v) <= 0) + # } + # """ ; + # ] ; ] ; # Check if the Web-based Data Entity has a sdDatePublished property sh:property [ diff --git a/rocrate_validator/profiles/ro-crate/should/6_contextual_entity_metadata.ttl b/rocrate_validator/profiles/ro-crate/1.1/should/6_contextual_entity_metadata.ttl similarity index 100% rename from rocrate_validator/profiles/ro-crate/should/6_contextual_entity_metadata.ttl rename to rocrate_validator/profiles/ro-crate/1.1/should/6_contextual_entity_metadata.ttl From 18b70e0416a391ec682cb16721f1466d179e2b75 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Wed, 25 Mar 2026 11:57:22 +0100 Subject: [PATCH 003/352] feat(profiles): add RO-Crate 1.2 validation profile and SHACL shapes --- .../1.2/may/4_data_entity_metadata.ttl | 89 +++ .../ro-crate/1.2/may/61_license_entity.ttl | 66 +++ .../1.2/must/0_file_descriptor_format.py | 547 ++++++++++++++++++ .../1.2/must/1_file-descriptor_metadata.ttl | 108 ++++ .../ro-crate/1.2/must/1_ro_crate_preview.py | 49 ++ .../1.2/must/2_root_data_entity_haspart.py | 68 +++ .../1.2/must/2_root_data_entity_identifier.py | 48 ++ .../1.2/must/2_root_data_entity_metadata.ttl | 163 ++++++ .../1.2/must/2_root_identifier_property.py | 51 ++ .../1.2/must/4_data_entity_metadata.py | 302 ++++++++++ .../1.2/must/4_data_entity_metadata.ttl | 216 +++++++ .../1.2/must/5_web_data_entity_metadata.ttl | 49 ++ .../ro-crate/1.2/must/6_contextual_entity.ttl | 91 +++ .../profiles/ro-crate/1.2/ontology.ttl | 67 +++ .../profiles/ro-crate/1.2/prefixes.ttl | 53 ++ .../profiles/ro-crate/1.2/profile.ttl | 74 +++ .../ro-crate/1.2/should/0_contact_point.py | 52 ++ .../1.2/should/0_contextual_entity_links.ttl | 55 ++ .../should/0_detached_metadata_filename.py | 48 ++ .../ro-crate/1.2/should/0_entity_name.ttl | 41 ++ .../1.2/should/0_entity_reachability.py | 64 ++ .../1.2/should/1_file-descriptor_metadata.ttl | 53 ++ .../should/2_root_data_entity_metadata.ttl | 124 ++++ .../should/2_root_data_entity_relative_uri.py | 47 ++ .../should/3_ro_crate_preview_exclusion.ttl | 33 ++ .../1.2/should/4_data_entity_existence.py | 58 ++ .../1.2/should/4_data_entity_metadata.ttl | 227 ++++++++ .../1.2/should/5_web_data_entity_metadata.ttl | 71 +++ .../should/6_contextual_entity_metadata.ttl | 162 ++++++ 29 files changed, 3076 insertions(+) create mode 100644 rocrate_validator/profiles/ro-crate/1.2/may/4_data_entity_metadata.ttl create mode 100644 rocrate_validator/profiles/ro-crate/1.2/may/61_license_entity.ttl create mode 100644 rocrate_validator/profiles/ro-crate/1.2/must/0_file_descriptor_format.py create mode 100644 rocrate_validator/profiles/ro-crate/1.2/must/1_file-descriptor_metadata.ttl create mode 100644 rocrate_validator/profiles/ro-crate/1.2/must/1_ro_crate_preview.py create mode 100644 rocrate_validator/profiles/ro-crate/1.2/must/2_root_data_entity_haspart.py create mode 100644 rocrate_validator/profiles/ro-crate/1.2/must/2_root_data_entity_identifier.py create mode 100644 rocrate_validator/profiles/ro-crate/1.2/must/2_root_data_entity_metadata.ttl create mode 100644 rocrate_validator/profiles/ro-crate/1.2/must/2_root_identifier_property.py create mode 100644 rocrate_validator/profiles/ro-crate/1.2/must/4_data_entity_metadata.py create mode 100644 rocrate_validator/profiles/ro-crate/1.2/must/4_data_entity_metadata.ttl create mode 100644 rocrate_validator/profiles/ro-crate/1.2/must/5_web_data_entity_metadata.ttl create mode 100644 rocrate_validator/profiles/ro-crate/1.2/must/6_contextual_entity.ttl create mode 100644 rocrate_validator/profiles/ro-crate/1.2/ontology.ttl create mode 100644 rocrate_validator/profiles/ro-crate/1.2/prefixes.ttl create mode 100644 rocrate_validator/profiles/ro-crate/1.2/profile.ttl create mode 100644 rocrate_validator/profiles/ro-crate/1.2/should/0_contact_point.py create mode 100644 rocrate_validator/profiles/ro-crate/1.2/should/0_contextual_entity_links.ttl create mode 100644 rocrate_validator/profiles/ro-crate/1.2/should/0_detached_metadata_filename.py create mode 100644 rocrate_validator/profiles/ro-crate/1.2/should/0_entity_name.ttl create mode 100644 rocrate_validator/profiles/ro-crate/1.2/should/0_entity_reachability.py create mode 100644 rocrate_validator/profiles/ro-crate/1.2/should/1_file-descriptor_metadata.ttl create mode 100644 rocrate_validator/profiles/ro-crate/1.2/should/2_root_data_entity_metadata.ttl create mode 100644 rocrate_validator/profiles/ro-crate/1.2/should/2_root_data_entity_relative_uri.py create mode 100644 rocrate_validator/profiles/ro-crate/1.2/should/3_ro_crate_preview_exclusion.ttl create mode 100644 rocrate_validator/profiles/ro-crate/1.2/should/4_data_entity_existence.py create mode 100644 rocrate_validator/profiles/ro-crate/1.2/should/4_data_entity_metadata.ttl create mode 100644 rocrate_validator/profiles/ro-crate/1.2/should/5_web_data_entity_metadata.ttl create mode 100644 rocrate_validator/profiles/ro-crate/1.2/should/6_contextual_entity_metadata.ttl diff --git a/rocrate_validator/profiles/ro-crate/1.2/may/4_data_entity_metadata.ttl b/rocrate_validator/profiles/ro-crate/1.2/may/4_data_entity_metadata.ttl new file mode 100644 index 000000000..546d1b320 --- /dev/null +++ b/rocrate_validator/profiles/ro-crate/1.2/may/4_data_entity_metadata.ttl @@ -0,0 +1,89 @@ +# Copyright (c) 2024-2026 CRS4 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +@prefix ro: <./> . +@prefix ro-crate: . +@prefix rdf: . +@prefix sh: . +@prefix xsd: . +@prefix owl: . +@prefix schema: . +@prefix validator: . + +ro-crate:FileDataEntityWebOptionalProperties a sh:NodeShape ; + sh:name "File Data Entity with web presence: OPTIONAL properties" ; + sh:description """A File Data Entity which have a corresponding web presence, + for instance a landing page that describes the file, including persistence identifiers (e.g. DOI), + resolving to an intermediate HTML page instead of the downloadable file directly. + These can included for File Data Entities as additional metadata by using the properties: + `identifier`, `url`, `subjectOf`and `mainEntityOfPage`""" ; + sh:targetClass ro-crate:File ; + # Check if the Web-based Data Entity has a contentSize property + sh:property [ + a sh:PropertyShape ; + sh:minCount 1 ; + sh:name "File Data Entity: optional formal `identifier` (e.g. DOI)" ; + sh:description """Check if the File Data Entity has a formal identifier string such as a DOI""" ; + sh:path schema:identifier ; + sh:datatype xsd:anyURI ; + sh:severity sh:Info ; + sh:message """The File Data Entity MAY have a formal identifier specified through an `identifier` property""" ; + ] ; + sh:property [ + a sh:PropertyShape ; + sh:minCount 1 ; + sh:name "File Data Entity: optional `url` property" ; + sh:description """Check if the File Data Entity has an optional `download` link""" ; + sh:path schema:url ; + sh:datatype xsd:anyURI ; + sh:severity sh:Info ; + sh:message """The File Data Entity MAY use a `url` property to denote a `download` link""" ; + ] ; + sh:property [ + a sh:PropertyShape ; + sh:minCount 1 ; + sh:name "File Data Entity: optional `subjectOf` property" ; + sh:description """Check if the File Data Entity includes a `subjectOf` property to link `CreativeWork` instances that mention it.""" ; + sh:path schema:subjectOf ; + sh:class schema:WebPage, schema:CreativeWork ; + sh:severity sh:Info ; + sh:message """The File Data Entity MAY include a `subjectOf` property to link `CreativeWork` instances that mention it.""" ; + ] ; + sh:property [ + a sh:PropertyShape ; + sh:minCount 1 ; + sh:name "File Data Entity: optional `mainEntityOfPage` property" ; + sh:description """Check if the File Data Entity has a `mainEntityOfPage` property""" ; + sh:path schema:mainEntityOfPage ; + sh:class schema:WebPage, schema:CreativeWork ; + sh:severity sh:Info ; + sh:message """The File Data Entity MAY have a `mainEntityOfPage` property""" ; + ] . + + +ro-crate:DirectoryDataEntityWebOptionalDistribution a sh:NodeShape ; + sh:name "Directory Data Entity: OPTIONAL `distribution` property" ; + sh:description """A Directory Data Entity MAY have a `distribution` property to denote the distribution of the files within the directory""" ; + sh:targetClass ro-crate:File ; + # Check if the Web-based Data Entity has a contentSize property + sh:property [ + a sh:PropertyShape ; + sh:minCount 1 ; + sh:name "Directory Data Entity: optional `distribution` property" ; + sh:description """Check if the Directory Data Entity has a `distribution` property""" ; + sh:path schema:distribution ; + sh:datatype xsd:anyURI ; + sh:severity sh:Info ; + sh:message """The Directory Data Entity MAY have a `distribution` property to denote the distribution of the files within the directory""" ; + ] . diff --git a/rocrate_validator/profiles/ro-crate/1.2/may/61_license_entity.ttl b/rocrate_validator/profiles/ro-crate/1.2/may/61_license_entity.ttl new file mode 100644 index 000000000..fea0b207e --- /dev/null +++ b/rocrate_validator/profiles/ro-crate/1.2/may/61_license_entity.ttl @@ -0,0 +1,66 @@ +# Copyright (c) 2024-2026 CRS4 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +@prefix ro: <./> . +@prefix ro-crate: . +@prefix rdf: . +@prefix schema_org: . +@prefix sh: . +@prefix xsd: . + + +ro-crate:LicenseOptionalAllowedValues a sh:NodeShape ; + sh:name "Root Data Entity: optional properties" ; + sh:description """Define the optional properties for the Root Data Entity (e.g., license)""" ; + sh:targetClass ro-crate:RootDataEntity ; + sh:property [ + a sh:PropertyShape ; + sh:name "License" ; + sh:description """Check if the RO-Crate has a license property with a URI or a textual description""" ; + sh:message """MAY have a URI (eg for Creative Commons or Open Source licenses). + MAY, if necessary be a textual description of how the RO-Crate may be used.""" ; + sh:minCount 1 ; + sh:nodeKind sh:IRIOrLiteral ; + sh:path schema_org:license ; + sh:or ( + [ sh:dataType xsd:string ] + [ sh:dataType xsd:anyURI ] + ) ; + ]. + +ro-crate:LicenseDefinition a sh:NodeShape ; + sh:name "License definition" ; + sh:description """Contextual entity representing a license with a name and description."""; + sh:targetClass schema_org:license ; + sh:property [ + a sh:PropertyShape ; + sh:name "License name" ; + sh:description "The license MAY have a name" ; + sh:minCount 1 ; + sh:maxCount 1 ; + sh:nodeKind sh:Literal ; + sh:path schema_org:name ; + sh:message "Missing license name" ; + ] ; + sh:property [ + a sh:PropertyShape ; + sh:name "License description" ; + sh:description """The license MAY have a description""" ; + sh:maxCount 1; + sh:minCount 1 ; + sh:nodeKind sh:Literal ; + sh:path schema_org:description ; + sh:message "Missing license description" ; + ] . + diff --git a/rocrate_validator/profiles/ro-crate/1.2/must/0_file_descriptor_format.py b/rocrate_validator/profiles/ro-crate/1.2/must/0_file_descriptor_format.py new file mode 100644 index 000000000..8d27e7f5e --- /dev/null +++ b/rocrate_validator/profiles/ro-crate/1.2/must/0_file_descriptor_format.py @@ -0,0 +1,547 @@ +# Copyright (c) 2024-2026 CRS4 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from pathlib import Path +from typing import Any, Optional + +from rocrate_validator.utils import log as logging +from rocrate_validator.models import ValidationContext +from rocrate_validator.requirements.python import (PyFunctionCheck, check, + requirement) +from rocrate_validator.utils.http import HttpRequester + +# set up logging +logger = logging.getLogger(__name__) + + +@requirement(name="File Descriptor existence") +class FileDescriptorExistence(PyFunctionCheck): + """The file descriptor MUST be present in the RO-Crate and MUST not be empty.""" + + @check(name="File Descriptor Existence") + def test_existence(self, context: ValidationContext) -> bool: + """ + Check if the file descriptor is present in the RO-Crate + """ + if context.settings.metadata_only: + logger.debug("Skipping file descriptor existence check in metadata-only mode") + return True + if not context.ro_crate.has_descriptor(): + message = f'file descriptor "{context.rel_fd_path}" is not present' + context.result.add_issue(message, self) + return False + return True + + @check(name="File Descriptor size check") + def test_size(self, context: ValidationContext) -> bool: + """ + Check if the file descriptor is not empty + """ + if context.settings.metadata_only: + logger.debug("Skipping file descriptor existence check in metadata-only mode") + return True + if not context.ro_crate.has_descriptor(): + message = f'file descriptor {context.rel_fd_path} is empty' + context.result.add_issue(message, self) + return False + if context.ro_crate.metadata.size == 0: + context.result.add_issue(f'RO-Crate "{context.rel_fd_path}" file descriptor is empty', self) + return False + return True + + +@requirement(name="File Descriptor JSON format") +class FileDescriptorJsonFormat(PyFunctionCheck): + """ + The file descriptor MUST be a valid JSON file + """ + @check(name="File Descriptor JSON format") + def check(self, context: ValidationContext) -> bool: + """ Check if the file descriptor is in the correct format""" + try: + logger.debug("Checking validity of JSON file at %s", context.ro_crate.metadata) + context.ro_crate.metadata.as_dict() + return True + except Exception as e: + context.result.add_issue( + f'RO-Crate file descriptor "{context.rel_fd_path}" is not in the correct format', self) + if logger.isEnabledFor(logging.DEBUG): + logger.exception(e) + return False + + +@requirement(name="File Descriptor UTF-8 encoding") +class FileDescriptorEncodingCheck(PyFunctionCheck): + """ + The file descriptor MUST be UTF-8 encoded + """ + + @check(name="File Descriptor UTF-8 encoding") + def check(self, context: ValidationContext) -> bool: + try: + raw_data = context.ro_crate.get_file_content( + Path(context.ro_crate.metadata_descriptor_id), binary_mode=True + ) + if isinstance(raw_data, str): + return True + raw_data.decode("utf-8") + return True + except Exception as e: + context.result.add_issue( + f'RO-Crate file descriptor "{context.rel_fd_path}" is not UTF-8 encoded', self) + if logger.isEnabledFor(logging.DEBUG): + logger.exception(e) + return False + + +@requirement(name="File Descriptor JSON-LD format") +class FileDescriptorJsonLdFormat(PyFunctionCheck): + """ + The file descriptor MUST be a valid JSON-LD file + """ + + def __check_remote_context__(self, context_uri: str) -> bool: + # Try to retrieve the context + try: + raw_data = HttpRequester().get(context_uri, headers={"Accept": "application/ld+json"}) + if raw_data.status_code != 200: + raise RuntimeError(f"Unable to retrieve the JSON-LD context '{context_uri}'", self) + logger.debug(f"Retrieved context from {context_uri}") + + # Try to parse the JSON-LD and access the context + jsonLD = raw_data.json()["@context"] + assert isinstance(jsonLD, dict) + return True + except Exception as e: + if logger.isEnabledFor(logging.DEBUG): + logger.exception(e) + return False + + def __check_contexts__(self, context: ValidationContext, jsonld_context: object) -> bool: + """ Get the keys of the context URI """ + is_valid = True + # if the context is a string, check if it is a valid URI + if isinstance(jsonld_context, str): + if not self.__check_remote_context__(jsonld_context): + context.result.add_issue( + f'Unable to retrieve the JSON-LD context "{jsonld_context}"', self) + is_valid = False + + # if the context is a dictionary, get the keys of the dictionary + if isinstance(jsonld_context, dict): + logger.debug(f"Detected dictionary context: {jsonld_context}") + + # if the context is a list of contexts, get the keys of each context + if isinstance(jsonld_context, list): + for ctx in jsonld_context: + if not self.__check_contexts__(context, ctx): + is_valid = False + # return if the context is valid + return is_valid + + @check(name="File Descriptor @context property validation") + def check_context(self, context: ValidationContext) -> bool: + """ Check if the file descriptor contains + the @context property and it is a valid JSON-LD context + """ + try: + json_dict = context.ro_crate.metadata.as_dict() + if "@context" not in json_dict: + context.result.add_issue( + f'RO-Crate file descriptor "{context.rel_fd_path}" ' + "does not contain a context", self) + return False + + expected_context = "https://w3id.org/ro/crate/1.2/context" + jsonld_context = json_dict.get("@context") + + def has_expected_context(ctx: object) -> bool: + if isinstance(ctx, str): + return ctx == expected_context + if isinstance(ctx, list): + return expected_context in ctx + return False + + if not has_expected_context(jsonld_context): + context.result.add_issue( + f'RO-Crate file descriptor "{context.rel_fd_path}" ' + f'does not reference the required context "{expected_context}"', self) + return False + + # Check if the context is valid + return self.__check_contexts__(context, json_dict["@context"]) + except Exception as e: + if logger.isEnabledFor(logging.DEBUG): + logger.exception(e) + return False + + @check(name="File Descriptor JSON-LD must be flattened") + def check_flattened(self, context: ValidationContext) -> bool: + """ Check if the file descriptor is flattened """ + + def is_entity_flat_recursive(entity: Any, is_first: bool = True, fail_fast: bool = False) -> bool: + """ Recursively check if the given data corresponds to a flattened JSON-LD object + and returns False if it does not and is not a root element + """ + result = True + if isinstance(entity, dict): + if is_first: + for _, elem in entity.items(): + if not is_entity_flat_recursive(elem, is_first=False, fail_fast=fail_fast): + result = False + if fail_fast: + return False + # if this is not the root element, it must not contain more properties than @id + else: + if "@id" in entity and "@value" in entity: + # add issue if both @id and @value are present + context.result.add_issue( + ( + f'entity "{entity.get("@id", entity)}" contains both @id and @value: ' + 'an object with an @value represents a value object, which is a literal value such as ' + 'a string, number, date, or language-tagged string. This object is not an identifiable ' + 'resource, but a simple literal value.' + ), + self + ) + result = False + if fail_fast: + return False + + # Handle value objects + if "@value" in entity: + # Inline the checks from is_value_object and add issues for each violation + if not isinstance(entity, dict): + context.result.add_issue( + f'entity "{entity.get("@id", entity)}" is not a valid value object: ' + 'it MUST be a dictionary.', + self + ) + result = False + if fail_fast: + return False + + has_language = "@language" in entity + has_type = "@type" in entity + + if has_language and has_type: + context.result.add_issue( + f'entity "{entity.get("@id", entity)}" is not a valid value object: ' + '@language and @type cannot coexist.', + self + ) + result = False + if fail_fast: + return False + + if has_language and not isinstance(entity["@value"], str): + context.result.add_issue( + f'entity "{entity.get("@id", entity)}" is not a valid value object: ' + 'if @language is present, @value must be a string.', + self + ) + result = False + if fail_fast: + return False + # Handle node objects: + # every remaining entity with len(entity) > 1 must be a node object + elif "@id" not in entity or len(entity) > 1: + context.result.add_issue( + f'entity "{entity.get("@id", entity)}" is not a valid node object reference: ' + 'it MUST have only @id, but no other properties.', + self + ) + result = False + if fail_fast: + return False + if isinstance(entity, list): + for element in entity: + if not is_entity_flat_recursive(element, is_first=False, fail_fast=fail_fast): + result = False + if fail_fast: + return False + return result + + try: + fail_fast = bool(context.settings.abort_on_first) + json_dict = context.ro_crate.metadata.as_dict() + result = True + for entity in json_dict["@graph"]: + if not is_entity_flat_recursive(entity, fail_fast=fail_fast): + context.result.add_issue( + f'RO-Crate file descriptor "{context.rel_fd_path}" ' + f'is not fully flattened at entity "{entity.get("@id", entity)}"', self) + result = False + if fail_fast: + return False + return result + except Exception as e: + if logger.isEnabledFor(logging.DEBUG): + logger.exception(e) + return False + + @check(name="Validation of the @id property of the file descriptor entities") + def check_identifiers(self, context: ValidationContext) -> bool: + """ Check if the file descriptor entities have the @id property """ + try: + json_dict = context.ro_crate.metadata.as_dict() + for entity in json_dict["@graph"]: + if "@id" not in entity: + context.result.add_issue( + f"Entity \"{entity.get('name', None) or entity}\" " + f"of RO-Crate \"{context.rel_fd_path}\" " + "file descriptor does not contain the @id attribute", self) + return False + return True + except Exception as e: + if logger.isEnabledFor(logging.DEBUG): + logger.exception(e) + return False + + @check(name="Validation of the @type property of the file descriptor entities") + def check_types(self, context: ValidationContext) -> bool: + """ Check if the file descriptor entities have the @type property """ + try: + json_dict = context.ro_crate.metadata.as_dict() + for entity in json_dict["@graph"]: + if "@type" not in entity: + context.result.add_issue( + f"Entity \"{entity.get('name', None) or entity}\" " + f"of RO-Crate \"{context.rel_fd_path}\" " + "file descriptor does not contain the @type attribute", self) + return False + return True + except Exception as e: + if logger.isEnabledFor(logging.DEBUG): + logger.exception(e) + return False + + @check(name="Validation of unique @id values") + def check_unique_identifiers(self, context: ValidationContext) -> bool: + try: + json_dict = context.ro_crate.metadata.as_dict() + identifiers = [entity.get("@id") for entity in json_dict.get("@graph", [])] + duplicates = {i for i in identifiers if i is not None and identifiers.count(i) > 1} + if duplicates: + context.result.add_issue( + f"Duplicate @id values detected in RO-Crate metadata: {sorted(duplicates)}", self) + return False + return True + except Exception as e: + if logger.isEnabledFor(logging.DEBUG): + logger.exception(e) + return False + + @check(name="Validation of entity references") + def check_entity_references(self, context: ValidationContext) -> bool: + try: + json_dict = context.ro_crate.metadata.as_dict() + graph = json_dict.get("@graph", []) + identifiers = {entity.get("@id") for entity in graph if entity.get("@id")} + + literal_keys = { + "name", + "description", + "encodingFormat", + "contentSize", + "datePublished", + "keywords", + "creditText", + "contentUrl", + "copyrightNotice", + "version", + "softwareVersion", + "value", + "propertyID", + "actionStatus", + "error", + "startTime", + "endTime", + "url", + } + + def check_value(value: Any, entity_id: str, key: Optional[str] = None) -> Optional[str]: + if isinstance(value, str): + if key in literal_keys: + return None + if value in identifiers: + return ( + f"Entity '{entity_id}' references '{value}' as a string; use {{\"@id\": \"{value}\"}}" + ) + if isinstance(value, list): + for item in value: + message = check_value(item, entity_id, key) + if message: + return message + if isinstance(value, dict): + if "@value" in value: + return None + if "@id" in value: + return None + for nested_value in value.values(): + message = check_value(nested_value, entity_id, key) + if message: + return message + return None + + for entity in graph: + entity_id = entity.get("@id") + for key, value in entity.items(): + if key in ("@id", "@type", "@context"): + continue + message = check_value(value, entity_id, key) + if message: + context.result.add_issue(message, self) + return False + return True + except Exception as e: + if logger.isEnabledFor(logging.DEBUG): + logger.exception(e) + return False + + @check(name="Validation of subject and keyword properties") + def check_subject_keywords(self, context: ValidationContext) -> bool: + try: + json_dict = context.ro_crate.metadata.as_dict() + graph = json_dict.get("@graph", []) + for entity in graph: + entity_id = entity.get("@id") + if "subject" in entity or "dct:subject" in entity or "dcterms:subject" in entity: + context.result.add_issue( + f"Entity '{entity_id}' should use schema.org 'about' for subjects", self) + return False + if "keyword" in entity: + context.result.add_issue( + f"Entity '{entity_id}' should use schema.org 'keywords'", self) + return False + return True + except Exception as e: + if logger.isEnabledFor(logging.DEBUG): + logger.exception(e) + return False + + def __get_context_keys__(self, context: object) -> set: + """ Get the keys of the context URI """ + if isinstance(context, str): + return self.__get_remote_context_keys__(context) + + # if the context is a dictionary, get the keys of the dictionary + if isinstance(context, dict): + return set(context.keys()) + + # if the context is a list of contexts, get the keys of each context + if isinstance(context, list): + keys = set() + for ctx in context: + keys.update(self.__get_context_keys__(ctx)) + return keys + return set() + + def __get_remote_context_keys__(self, context_uri: str) -> set: + """ Get the keys of the context URI """ + + logger.debug(f"Retrieving context from {context_uri}...") + # Try to retrieve the context + raw_data = HttpRequester().get(context_uri, headers={"Accept": "application/ld+json"}) + if raw_data.status_code != 200: + raise RuntimeError(f"Unable to retrieve the JSON-LD context '{context_uri}'") + + logger.debug(f"Retrieved context from {context_uri}") + + # Get the keys of the context + jsonLD = raw_data.json() + jsonLD_ctx = jsonLD["@context"] + if not isinstance(jsonLD_ctx, dict): + raise RuntimeError("The context is not a dictionary", self) + return set(jsonLD_ctx.keys()) + + def __check_entity_keys__(self, entity: object, + context_keys: set, + unexpected_keys: Optional[dict[str, int]] = None) -> dict[str, int]: + """ Check if the entity is in the correct format """ + + def add_unexpected_key(k: str, u_keys: dict) -> None: + """ Add a key to the unexpected keys dictionary """ + u_keys[k] = u_keys.get(k, 0) + 1 + + # Keys that should be skipped + SKIP_KEYS = {"@id", "@type", "@context", "@value", "@language"} + + # Ensure unexpected_keys is initialized + if unexpected_keys is None: + unexpected_keys = {} + + # If the entity is a dictionary, check each key + if isinstance(entity, dict): + for k, v in entity.items(): + if k not in context_keys and k not in SKIP_KEYS: + logger.debug(f"Key {k} not in context keys") + add_unexpected_key(k, unexpected_keys) + if isinstance(v, (dict, list)): + self.__check_entity_keys__(v, context_keys, unexpected_keys) + + # If the entity is a list, check each element + elif isinstance(entity, list): + for elem in entity: + self.__check_entity_keys__(elem, context_keys, unexpected_keys) + + return unexpected_keys + + @check(name="Validation of the compaction format of the file descriptor") + def check_compaction(self, context: ValidationContext) -> bool: + """ Check if the file descriptor is in the **compacted** JSON-LD format """ + try: + logger.debug("Checking compaction format of JSON-LD file at %s", context.ro_crate.metadata) + json_dict = context.ro_crate.metadata.as_dict() + logger.debug(f"JSONLD keys:{json_dict.keys()}") + + jsonld_context = json_dict.get("@context", None) + logger.debug(f"Context: {jsonld_context}") + + try: + context_keys = self.__get_context_keys__(jsonld_context) + logger.debug(f"{context_keys}") + except Exception as e: + if logger.isEnabledFor(logging.DEBUG): + logger.exception(e) + context.result.add_issue(str(e), self) + return False + + unexpected_keys = self.__check_entity_keys__(json_dict.get("@graph", []), context_keys) + logger.debug(f"Unexpected keys: {unexpected_keys}") + if len(unexpected_keys) > 0: + for k, v in unexpected_keys.items(): + logger.debug(f"Key {k} appears {v} times") + # Add the correct suffix to the message + suffix = "s" if v > 1 else "" + # Check if k is a term or a URI + if k.startswith("http"): + context.result.add_issue( + f'The The {v} occurrence{suffix} of the "{k}" URI cannot be used as a key{suffix} "' + 'because the compacted format requires simple terms as keys ' + '(see https://www.w3.org/TR/json-ld-api/#compaction for more details).', self) + else: + context.result.add_issue( + f'The {v} occurrence{suffix} of the JSON-LD key "{k}" ' + f'{"is" if v == 1 else "are"} not allowed in the compacted format ' + 'because it is not present in the @context of the document', self) + return False + + return True + except Exception as e: + if logger.isEnabledFor(logging.DEBUG): + logger.exception(e) + context.result.add_issue( + f'Unexpected error: {e}', self) + return False diff --git a/rocrate_validator/profiles/ro-crate/1.2/must/1_file-descriptor_metadata.ttl b/rocrate_validator/profiles/ro-crate/1.2/must/1_file-descriptor_metadata.ttl new file mode 100644 index 000000000..f9e03602b --- /dev/null +++ b/rocrate_validator/profiles/ro-crate/1.2/must/1_file-descriptor_metadata.ttl @@ -0,0 +1,108 @@ +# Copyright (c) 2024-2026 CRS4 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +@prefix ro: <./> . +@prefix ro-crate: . +@prefix dct: . +@prefix rdf: . +@prefix schema_org: . +@prefix sh: . +@prefix validator: . + + +ro-crate:FindROCrateMetadataFileDescriptorEntity a sh:NodeShape; + sh:name "Identify the RO-Crate Metadata File Descriptor" ; + sh:description """The RO-Crate Metadata File Descriptor entity describes the RO-Crate itself, and it is named as `*-ro-crate-metadata.json`. + It can be identified by name according to the RO-Crate specification + available at [Finding RO-Crate Root in RDF triple stores](https://www.researchobject.org/ro-crate/1.2/appendix/relative-uris.html#finding-ro-crate-root-in-rdf-triple-stores).""" ; + sh:target [ + a sh:SPARQLTarget ; + sh:prefixes ro-crate:sparqlPrefixes ; + sh:select """ + SELECT ?this + WHERE { + ?crate a schema:Dataset . + ?this schema:about ?crate . + FILTER(contains(str(?this), "ro-crate-metadata.json")) + } + """ + ] ; + + # Expand data graph with triples from the file data entity + sh:rule [ + a sh:TripleRule ; + sh:subject sh:this ; + sh:predicate rdf:type ; + sh:object ro-crate:ROCrateMetadataFileDescriptor ; + ] . + +ro-crate:ROCrateMetadataFileDescriptorExistence + a sh:NodeShape ; + sh:name "RO-Crate Metadata File Descriptor entity existence" ; + sh:description "The RO-Crate JSON-LD MUST contain a Metadata File Descriptor entity typed as `schema:CreativeWork`" ; + sh:targetClass ro-crate:ROCrateMetadataFileDescriptor ; + sh:property [ + a sh:PropertyShape ; + sh:name "RO-Crate Metadata File Descriptor entity existence" ; + sh:description """Check if the RO-Crate Metadata File Descriptor entity exists, + i.e., if there exists an entity with @id matching *-ro-crate-metadata.json and type `schema:CreativeWork`""" ; + sh:path rdf:type ; + sh:hasValue ro-crate:ROCrateMetadataFileDescriptor ; + sh:minCount 1 ; + sh:message "The root of the document MUST have a Metadata File Descriptor entity" ; + ] . + +ro-crate:ROCrateMetadataFileDescriptorRecommendedProperties a sh:NodeShape ; + sh:name "RO-Crate Metadata File Descriptor REQUIRED properties" ; + sh:description """RO-Crate Metadata Descriptor MUST be defined + according with the requirements details defined in + [RO-Crate Metadata File Descriptor](https://www.researchobject.org/ro-crate/1.2/root-data-entity.html#ro-crate-metadata-file-descriptor)"""; + sh:targetClass ro-crate:ROCrateMetadataFileDescriptor ; + sh:property [ + a sh:PropertyShape ; + sh:name "Metadata File Descriptor entity type" ; + sh:description "Check if the RO-Crate Metadata File Descriptor has `@type` CreativeWork, as per schema.org" ; + sh:minCount 1 ; + sh:nodeKind sh:IRI ; + sh:path rdf:type ; + sh:hasValue schema_org:CreativeWork ; + sh:message "The RO-Crate metadata file MUST be a CreativeWork, as per schema.org" ; + ] ; + sh:property [ + a sh:PropertyShape ; + sh:name "Metadata File Descriptor entity: `about` property" ; + sh:description """Check if the RO-Crate Metadata File Descriptor has an `about` property referencing the Root Data Entity""" ; + sh:maxCount 1; + sh:minCount 1 ; + # sh:nodeKind sh:IRI ; + sh:path schema_org:about ; + # sh:class ro-crate:RootDataEntity ; + # sh:message "The RO-Crate metadata file descriptor MUST have an `about` property referencing the Root Data Entity" ; + ] ; + . +# ro-crate:AgentProjectIntersection +# a sh:NodeShape ; +# sh:name "Agent Project Membership Validation" ; +# sh:targetClass ro-crate:ROCrateMetadataFileDescriptor ; +# sh:sparql [ +# a sh:SPARQLConstraint ; +# sh:message "Validation message" ; +# sh:select """ +# SELECT $this +# WHERE { +# FILTER(false) +# } +# """ ; +# sh:severity sh:Violation ; +# ] . diff --git a/rocrate_validator/profiles/ro-crate/1.2/must/1_ro_crate_preview.py b/rocrate_validator/profiles/ro-crate/1.2/must/1_ro_crate_preview.py new file mode 100644 index 000000000..3c483509d --- /dev/null +++ b/rocrate_validator/profiles/ro-crate/1.2/must/1_ro_crate_preview.py @@ -0,0 +1,49 @@ +# Copyright (c) 2024-2026 CRS4 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from pathlib import Path + +from rocrate_validator.utils import log as logging +from rocrate_validator.models import ValidationContext +from rocrate_validator.requirements.python import (PyFunctionCheck, check, + requirement) + +# set up logging +logger = logging.getLogger(__name__) + + +@requirement(name="RO-Crate Website") +class ROCrateWebsiteChecker(PyFunctionCheck): + """ + If present, the RO-Crate Website MUST be a valid HTML5 document. + """ + + @check(name="RO-Crate Website HTML5 doctype") + def check_preview_html(self, context: ValidationContext) -> bool: + if context.ro_crate.is_detached(): + return True + preview_path = Path("ro-crate-preview.html") + if not context.ro_crate.has_file(preview_path): + return True + try: + content = context.ro_crate.get_file_content(preview_path, binary_mode=False) + if " bool: + try: + root = context.ro_crate.metadata.get_root_data_entity() + data_entities = [ + e for e in context.ro_crate.metadata.get_data_entities() + if not e.has_local_identifier() and e.id != root.id + ] + + reachable = set() + stack = [] + root_has_part = root.get_property("hasPart") + if root_has_part: + stack.extend(root_has_part if isinstance(root_has_part, list) else [root_has_part]) + + while stack: + current = stack.pop() + if hasattr(current, "id"): + current_id = current.id + else: + continue + if current_id in reachable: + continue + reachable.add(current_id) + if hasattr(current, "get_property"): + nested = current.get_property("hasPart") + if nested: + stack.extend(nested if isinstance(nested, list) else [nested]) + + missing = [e.id for e in data_entities if e.id not in reachable] + if missing: + context.result.add_issue( + f"Root Data Entity hasPart does not cover Data Entities: {missing}", self) + return False + return True + except Exception as e: + context.result.add_issue( + f"Error checking hasPart coverage: {str(e)}", self) + return False diff --git a/rocrate_validator/profiles/ro-crate/1.2/must/2_root_data_entity_identifier.py b/rocrate_validator/profiles/ro-crate/1.2/must/2_root_data_entity_identifier.py new file mode 100644 index 000000000..a4898dba5 --- /dev/null +++ b/rocrate_validator/profiles/ro-crate/1.2/must/2_root_data_entity_identifier.py @@ -0,0 +1,48 @@ +# Copyright (c) 2024-2026 CRS4 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import re + +from rocrate_validator.utils import log as logging +from rocrate_validator.models import ValidationContext +from rocrate_validator.requirements.python import (PyFunctionCheck, check, + requirement) + +# set up logging +logger = logging.getLogger(__name__) + + +@requirement(name="Root Data Entity: identifier") +class RootDataEntityIdentifierChecker(PyFunctionCheck): + """ + In an attached RO-Crate, the Root Data Entity @id MUST be ./ or an absolute URI. + """ + + @check(name="Root Data Entity: REQUIRED value") + def check_identifier(self, context: ValidationContext) -> bool: + try: + if context.ro_crate.is_detached(): + return True + root_entity = context.ro_crate.metadata.get_root_data_entity() + if root_entity.id == './': + return True + if re.match(r"^[A-Za-z][A-Za-z0-9+\.-]*:", root_entity.id): + return True + context.result.add_issue( + 'Root Data Entity @id MUST be `./` or an absolute URI for attached RO-Crates', self) + return False + except Exception as e: + context.result.add_issue( + f'Error checking Root Data Entity @id: {str(e)}', self) + return False diff --git a/rocrate_validator/profiles/ro-crate/1.2/must/2_root_data_entity_metadata.ttl b/rocrate_validator/profiles/ro-crate/1.2/must/2_root_data_entity_metadata.ttl new file mode 100644 index 000000000..f12c76a9c --- /dev/null +++ b/rocrate_validator/profiles/ro-crate/1.2/must/2_root_data_entity_metadata.ttl @@ -0,0 +1,163 @@ +# Copyright (c) 2024-2026 CRS4 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +@prefix ro: <./> . +@prefix ro-crate: . +@prefix rdf: . +@prefix schema_org: . +@prefix prof: . +@prefix sh: . +@prefix validator: . +@prefix xsd: . + + +ro-crate:RootDataEntityType + a sh:NodeShape ; + sh:name "RO-Crate Root Data Entity type" ; + sh:description "The Root Data Entity MUST be a `Dataset` (as per `schema.org`)" ; + sh:target [ + a sh:SPARQLTarget ; + sh:prefixes ro-crate:sparqlPrefixes ; + sh:select """ + SELECT ?this + WHERE { + ?metadatafile schema:about ?this . + FILTER(contains(str(?metadatafile), "ro-crate-metadata.json")) + } + """ + ] ; + sh:property [ + a sh:PropertyShape ; + sh:name "Root Data Entity type" ; + sh:description "Check if the Root Data Entity is a `Dataset` (as per `schema.org`)" ; + sh:path rdf:type ; + sh:hasValue schema_org:Dataset ; + sh:minCount 1 ; + sh:message """The Root Data Entity MUST be a `Dataset` (as per `schema.org`)""" ; + ] ; + # Validate that if the publisher is specified, it is an Organization or a Person + sh:property [ + sh:path schema_org:publisher ; + sh:severity sh:Violation ; + sh:name "Root Data Entity: `publisher` property" ; + sh:description """Check if the Root Data Entity has a `publisher` property of type `Organization` or `Person`.""" ; + sh:or ( + [ sh:class schema_org:Organization ] + [ sh:class schema_org:Person ] + ) ; + sh:message """The Root Data Entity MUST have a `publisher` property of type `Organization` or `Person`.""" ; + ] . + + +ro-crate:FindRootDataEntity a sh:NodeShape, validator:HiddenShape; + sh:name "Identify the Root Data Entity of the RO-Crate" ; + sh:description """The Root Data Entity is the top-level Data Entity in the RO-Crate and serves as the starting point for the description of the RO-Crate. + It is a schema:Dataset and is indirectly identified by the about property of the metadata descriptor in the RO-Crate + (see the definition at [Finding RO-Crate Root in RDF triple stores](https://www.researchobject.org/ro-crate/1.2/appendix/relative-uris.html#finding-ro-crate-root-in-rdf-triple-stores)). + """ ; + sh:target [ + a sh:SPARQLTarget ; + sh:prefixes ro-crate:sparqlPrefixes ; + sh:select """ + SELECT ?this + WHERE { + ?this a schema:Dataset . + ?metadatafile schema:about ?this . + FILTER(contains(str(?metadatafile), "ro-crate-metadata.json")) + } + """ + ] ; + + # Expand data graph with triples from the file data entity + sh:rule [ + a sh:TripleRule ; + sh:subject sh:this ; + sh:predicate rdf:type ; + sh:object ro-crate:RootDataEntity ; + ] . + + +ro-crate:RootDataEntityRequiredProperties + a sh:NodeShape ; + sh:name "RO-Crate Root Data Entity REQUIRED properties" ; + sh:description "The Root Data Entity MUST have a `name`, `description`, `license` and `datePublished`" ; + sh:targetClass ro-crate:RootDataEntity ; + sh:property [ + a sh:PropertyShape ; + sh:name "Root Data Entity: `name` property" ; + sh:description """Check if the Root Data Entity includes a `name` (as specified by schema.org) + to clearly identify the dataset and distinguish it from other datasets.""" ; + sh:minCount 1 ; + sh:nodeKind sh:Literal ; + sh:path schema_org:name; + sh:message "The Root Data Entity MUST have a `name` property (as specified by schema.org)" ; + ] ; + sh:property [ + a sh:PropertyShape ; + sh:name "Root Data Entity: `description` property" ; + sh:description """Check if the Root Data Entity includes a `description` (as specified by schema.org) + to provide a human-readable description of the dataset.""" ; + sh:minCount 1 ; + sh:nodeKind sh:Literal ; + sh:path schema_org:description; + sh:message "The Root Data Entity MUST have a `description` property (as specified by schema.org)" ; + ] ; + sh:property [ + a sh:PropertyShape ; + sh:name "Root Data Entity: `license` property" ; + sh:description """Check if the Root Data Entity includes a `license` property (as specified by schema.org) + to provide information about the license of the dataset.""" ; + sh:path schema_org:license; + sh:minCount 1 ; + sh:nodeKind sh:IRIOrLiteral ; + sh:message """The Root Data Entity MUST have a `license` property (as specified by schema.org).""" ; + ] ; + sh:property [ + a sh:PropertyShape ; + sh:name "Root Data Entity: `datePublished` property" ; + sh:description """Check if the Root Data Entity includes a `datePublished` (as specified by schema.org) + to provide the date when the dataset was published. The datePublished MUST be a valid ISO 8601 date.""" ; + sh:minCount 1 ; + sh:maxCount 1 ; + sh:nodeKind sh:Literal ; + sh:datatype xsd:string ; + sh:path schema_org:datePublished ; + sh:pattern "^([\\+-]?\\d{4})((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)?[0-5]\\d)?|24:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?$" ; + sh:message "The Root Data Entity MUST have a `datePublished` property (as specified by schema.org) with a valid ISO 8601 date" ; + ] ; + sh:property [ + sh:name "Root Data Entity: `conformsTo` profile values" ; + sh:description """If present, `conformsTo` values MUST reference Profile entities.""" ; + sh:path schema_org:conformsTo ; + sh:class prof:Profile ; + sh:message "If present, `conformsTo` values MUST reference a Profile entity" ; + ] . + +ro-crate:RootDataEntityHasPartValueRestriction + a sh:NodeShape ; + sh:name "RO-Crate Root Data Entity: `hasPart` value restriction" ; + sh:description "The Root Data Entity MUST be linked to the declared `File`, `Directory` and other types of instances through the `hasPart` property" ; + sh:targetClass ro-crate:RootDataEntity ; + sh:property [ + a sh:PropertyShape ; + sh:name "RO-Crate Root Data Entity: `hasPart` value restriction" ; + sh:description "Check if the Root Data Entity is linked to the declared `File`, `Directory` and other types of instances through the `hasPart` property" ; + sh:path schema_org:hasPart ; + sh:or ( + [ sh:class ro-crate:File ] + [ sh:class ro-crate:Directory ] + [ sh:class ro-crate:GenericDataEntity ] + ) ; + sh:message """The Root Data Entity MUST be linked to either File or Directory instances, nothing else""" ; + ] . diff --git a/rocrate_validator/profiles/ro-crate/1.2/must/2_root_identifier_property.py b/rocrate_validator/profiles/ro-crate/1.2/must/2_root_identifier_property.py new file mode 100644 index 000000000..cb3e52768 --- /dev/null +++ b/rocrate_validator/profiles/ro-crate/1.2/must/2_root_identifier_property.py @@ -0,0 +1,51 @@ +# Copyright (c) 2024-2026 CRS4 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from rocrate_validator.utils import log as logging +from rocrate_validator.models import ValidationContext +from rocrate_validator.requirements.python import (PyFunctionCheck, check, + requirement) + +# set up logging +logger = logging.getLogger(__name__) + + +@requirement(name="Root Data Entity: identifier PropertyValue") +class RootIdentifierPropertyChecker(PyFunctionCheck): + """ + If the Root Data Entity identifier references a PropertyValue, it MUST include value. + """ + + @check(name="Root Data Entity: identifier PropertyValue value") + def check_identifier_values(self, context: ValidationContext) -> bool: + try: + root = context.ro_crate.metadata.get_root_data_entity() + identifiers = root.get_property("identifier") + if identifiers is None: + return True + identifiers = identifiers if isinstance(identifiers, list) else [identifiers] + for identifier in identifiers: + if not hasattr(identifier, "has_type"): + continue + if not identifier.has_type("PropertyValue"): + continue + if not identifier.get_property("value"): + context.result.add_issue( + "PropertyValue identifiers MUST include a `value`", self) + return False + return True + except Exception as e: + context.result.add_issue( + f"Error checking identifier PropertyValue: {str(e)}", self) + return False diff --git a/rocrate_validator/profiles/ro-crate/1.2/must/4_data_entity_metadata.py b/rocrate_validator/profiles/ro-crate/1.2/must/4_data_entity_metadata.py new file mode 100644 index 000000000..856e4eec1 --- /dev/null +++ b/rocrate_validator/profiles/ro-crate/1.2/must/4_data_entity_metadata.py @@ -0,0 +1,302 @@ +# Copyright (c) 2024-2026 CRS4 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import re + +from rocrate_validator.utils import log as logging +from rocrate_validator.models import Severity, ValidationContext +from rocrate_validator.requirements.python import (PyFunctionCheck, check, + requirement) + +# set up logging +logger = logging.getLogger(__name__) + + +@requirement(name="Data Entity: REQUIRED resource availability") +class DataEntityRequiredChecker(PyFunctionCheck): + """ + Resources corresponding to local Data Entities MUST be present in the RO-Crate payload + """ + + @check(name="Data Entity: REQUIRED resource availability") + def check_availability(self, context: ValidationContext) -> bool: + """ + Check the presence of the Data Entity in the RO-Crate + """ + if context.ro_crate.is_detached(): + logger.debug("Skipping data entity payload checks for detached RO-Crate") + return True + # Skip the check in metadata-only mode + if context.settings.metadata_only: + logger.debug("Skipping file descriptor existence check in metadata-only mode") + return True + # Perform the check + result = True + for entity in context.ro_crate.metadata.get_data_entities(exclude_web_data_entities=True): + assert entity.id is not None, "Entity has no @id" + logger.debug("Ensure the presence of the Data Entity '%s' within the RO-Crate", entity.id) + try: + logger.debug("Ensure the presence of the Data Entity '%s' within the RO-Crate", entity.id) + if entity.has_local_identifier(): + logger.debug( + "Ignoring the Data Entity '%s' as it is a local entity with a local identifier. " + "According to the RO-Crate specification, local entities with local identifiers " + "are not required to be included in the RO-Crate payload" + "(see https://github.com/ResearchObject/ro-crate/issues/400#issuecomment-2779152885 and " + "https://github.com/ResearchObject/ro-crate/pull/426 for more details)", + entity.id) + continue + if not entity.has_relative_path(): + logger.debug( + "Ignoring the Data Entity '%s' as it is a local entity with an absolute path. " + "According to the RO-Crate specification, local entities with absolute paths " + "are not required to be included in the RO-Crate payload. " + "It is only recommended that they exist at the time of RO-Crate creation.", + entity.id) + continue + if not entity.is_available(): + context.result.add_issue( + f"The RO-Crate does not include the Data Entity '{entity.id}' as part of its payload", self) + result = False + except Exception as e: + context.result.add_issue( + f"Unable to check the the presence of the Data Entity '{entity.id}' within the RO-Crate", self) + if logger.isEnabledFor(logging.DEBUG): + logger.debug(e, exc_info=True) + result = False + if not result and context.fail_fast: + return result + return result + + +@requirement(name="Detached RO-Crate Data Entities") +class DetachedDataEntityChecker(PyFunctionCheck): + """ + Detached RO-Crate Packages MUST only declare Web-based Data Entities + """ + + @check(name="Detached RO-Crate Data Entity: MUST be web-based") + def check_detached_entities(self, context: ValidationContext) -> bool: + if not context.ro_crate.is_detached(): + return True + result = True + try: + root_entity_id = context.ro_crate.metadata.get_root_data_entity().id + except Exception: + root_entity_id = None + for entity in context.ro_crate.metadata.get_data_entities(): + if root_entity_id and entity.id == root_entity_id: + continue + if not entity.is_remote(): + context.result.add_issue( + f"Detached RO-Crate includes a non web-based Data Entity '{entity.id}'", self) + result = False + if context.fail_fast: + return False + return result + + +@requirement(name="Data Entity: identifier requirements") +class DataEntityIdentifierChecker(PyFunctionCheck): + """ + Data Entity identifiers must be valid URI references and use relative paths for payload files. + """ + + @check(name="Data Entity: @id validity") + def check_identifiers(self, context: ValidationContext) -> bool: + result = True + for entity in context.ro_crate.metadata.get_data_entities(): + if entity.has_local_identifier(): + continue + if "\\" in entity.id or " " in entity.id: + context.result.add_issue( + f"Data Entity '{entity.id}' has an invalid @id; use URI-compatible paths", self) + result = False + if context.fail_fast: + return False + continue + if not re.match(r"^[A-Za-z][A-Za-z0-9+\.-]*:", entity.id) and not entity.has_relative_path(): + context.result.add_issue( + f"Data Entity '{entity.id}' does not use a valid absolute or relative URI", self) + result = False + if context.fail_fast: + return False + return result + + @check(name="Data Entity: relative @id for payload files") + def check_relative_paths(self, context: ValidationContext) -> bool: + if context.ro_crate.is_detached(): + return True + result = True + for entity in context.ro_crate.metadata.get_data_entities(): + if entity.has_local_identifier() or entity.is_remote(): + continue + if entity.has_absolute_path(): + if context.ro_crate.has_file(entity.id_as_path) or context.ro_crate.has_directory(entity.id_as_path): + context.result.add_issue( + f"Data Entity '{entity.id}' should use a relative @id within the RO-Crate root", self) + result = False + if context.fail_fast: + return False + return result + + +@requirement(name="Data Entity: citation references") +class DataEntityCitationChecker(PyFunctionCheck): + """ + Citation references must include an absolute URI. + """ + + @check(name="Data Entity: citation must include @id") + def check_citation(self, context: ValidationContext) -> bool: + result = True + for entity in context.ro_crate.metadata.get_data_entities(): + citations = entity.get_property("citation") + if citations is None: + continue + citation_list = citations if isinstance(citations, list) else [citations] + for citation in citation_list: + if isinstance(citation, str): + citation_id = citation + elif hasattr(citation, "id"): + citation_id = citation.id + else: + context.result.add_issue( + f"Citation for Data Entity '{entity.id}' must reference a publication @id", self) + result = False + if context.fail_fast: + return False + continue + if not re.match(r"^[A-Za-z][A-Za-z0-9+\.-]*:", citation_id): + context.result.add_issue( + f"Citation for Data Entity '{entity.id}' must be an absolute URI", self) + result = False + if context.fail_fast: + return False + return result + + +@requirement(name="Web-based Data Entity: REQUIRED availability") +class WebDataEntityRequiredChecker(PyFunctionCheck): + """ + Web-based Data Entities MUST be directly downloadable at the time of creation. + """ + + @check(name="Web-based Data Entity: REQUIRED resource availability") + def check_availability(self, context: ValidationContext) -> bool: + if context.settings.skip_availability_check: + return True + if not (context.settings.creation_time or context.settings.enforce_availability): + return True + if context.settings.metadata_only: + return True + result = True + for entity in context.ro_crate.metadata.get_web_data_entities(): + assert entity.id is not None, "Entity has no @id" + try: + if not entity.is_available(): + context.result.add_issue( + f"Web-based Data Entity '{entity.id}' is not directly downloadable", self) + result = False + except Exception as e: + context.result.add_issue( + f"Web-based Data Entity '{entity.id}' availability check failed: {e}", self) + result = False + if not result and context.fail_fast: + return result + return result + + @check(name="Web-based Data Entity: RECOMMENDED resource availability", severity=Severity.RECOMMENDED) + def check_availability_warning(self, context: ValidationContext) -> bool: + if context.settings.skip_availability_check: + return True + if context.settings.creation_time or context.settings.enforce_availability: + return True + if context.settings.metadata_only: + return True + result = True + for entity in context.ro_crate.metadata.get_web_data_entities(): + assert entity.id is not None, "Entity has no @id" + try: + if not entity.is_available(): + context.result.add_issue( + f"Web-based Data Entity '{entity.id}' is not directly downloadable", self) + result = False + except Exception as e: + context.result.add_issue( + f"Web-based Data Entity '{entity.id}' availability check failed: {e}", self) + result = False + if not result and context.fail_fast: + return result + return result + + @check(name="Web-based Data Entity: `contentSize` property", severity=Severity.RECOMMENDED) + def check_content_size(self, context: ValidationContext) -> bool: + if context.settings.skip_availability_check: + return True + result = True + for entity in context.ro_crate.metadata.get_web_data_entities(): + assert entity.id is not None, "Entity has no @id" + if entity.is_available(): + content_size = entity.get_property("contentSize") + if content_size: + if isinstance(content_size, str): + content_value = content_size + elif hasattr(content_size, "id"): + content_value = content_size.id + else: + content_value = str(content_size) + try: + content_int = int(str(content_value)) + except Exception: + content_int = None + external_size = context.ro_crate.get_external_file_size(entity.id) + if content_int is not None and content_int != external_size: + context.result.add_issue( + f'The property contentSize={content_size} of the Web-based Data Entity ' + f'{entity.id} does not match the actual size of ' + f'the downloadable content, i.e., {external_size} (bytes)', self, + violatingEntity=entity.id, violatingProperty='contentSize', + violatingPropertyValue=str(content_value)) + result = False + if not result and context.fail_fast: + return result + return result + + @check(name="Web-based Data Entity: `contentUrl` availability", severity=Severity.RECOMMENDED) + def check_content_url(self, context: ValidationContext) -> bool: + if context.settings.skip_availability_check: + return True + result = True + for entity in context.ro_crate.metadata.get_web_data_entities(): + content_url = entity.get_property("contentUrl") + if not content_url: + continue + urls = content_url if isinstance(content_url, list) else [content_url] + for url in urls: + try: + url_value = url if isinstance(url, str) else url.id + if not context.ro_crate.get_external_file_size(url_value): + context.result.add_issue( + f"contentUrl {url_value} for Web-based Data Entity {entity.id} is not directly downloadable", + self) + result = False + except Exception as e: + context.result.add_issue( + f"contentUrl {url} for Web-based Data Entity {entity.id} is not directly downloadable: {e}", + self) + result = False + if not result and context.fail_fast: + return result + return result diff --git a/rocrate_validator/profiles/ro-crate/1.2/must/4_data_entity_metadata.ttl b/rocrate_validator/profiles/ro-crate/1.2/must/4_data_entity_metadata.ttl new file mode 100644 index 000000000..9f5d4a67f --- /dev/null +++ b/rocrate_validator/profiles/ro-crate/1.2/must/4_data_entity_metadata.ttl @@ -0,0 +1,216 @@ +# Copyright (c) 2024-2026 CRS4 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +@prefix ro: <./> . +@prefix ro-crate: . +@prefix rdf: . +@prefix schema_org: . +@prefix sh: . +@prefix owl: . +@prefix validator: . + +ro-crate:DataEntityRequiredProperties a sh:NodeShape ; + sh:name "Data Entity: REQUIRED properties" ; + sh:description """A Data Entity MUST be a `URI Path` relative to the ROCrate root, + or an absolute URI""" ; + sh:targetClass ro-crate:DataEntity ; + + sh:property [ + sh:name "Data Entity: @id value restriction" ; + sh:description """Check if the Data Entity has an absolute or relative URI as `@id`""" ; + sh:path [sh:inversePath rdf:type ] ; + sh:nodeKind sh:IRI ; + sh:severity sh:Violation ; + sh:message """Data Entities MUST have an absolute or relative URI as @id.""" ; + ] . + +ro-crate:FileDataEntity a sh:NodeShape ; + sh:name "File Data Entity: REQUIRED properties" ; + sh:description """A File Data Entity MUST be a `File`. + `File` is an RO-Crate alias for the schema.org `MediaObject`. + The term `File` here is liberal, and includes "downloadable" resources where `@id` is an absolute URI. + """ ; + sh:target [ + a sh:SPARQLTarget ; + sh:prefixes ro-crate:sparqlPrefixes ; + sh:select """ + SELECT ?this + WHERE { + ?this a schema:MediaObject . + ?metadatafile schema:about ?root . + FILTER(contains(str(?metadatafile), "ro-crate-metadata.json")) + FILTER(!STRSTARTS(STR(?this), CONCAT(STR(?root), "#"))) + } + """ + ] ; + + sh:property [ + sh:name "File Data Entity: REQUIRED type" ; + sh:description """Check if the File Data Entity has `File` as `@type`. + `File` is an RO-Crate alias for the schema.org `MediaObject`. + """ ; + sh:path rdf:type ; + sh:hasValue ro-crate:File ; + sh:severity sh:Violation ; + sh:message """File Data Entities MUST have "File" as a value for @type.""" ; + ] ; + + # Expand data graph with triples from the file data entity + sh:rule [ + a sh:TripleRule ; + sh:subject sh:this ; + sh:predicate rdf:type ; + sh:object ro-crate:DataEntity ; + ] . + + +ro-crate:DirectoryDataEntity a sh:NodeShape ; + sh:name "Directory Data Entity: REQUIRED properties" ; + sh:description """A Directory Data Entity MUST be of @type `Dataset`. + The term `directory` here includes HTTP file listings where `@id` is an absolute URI. + """ ; + sh:target [ + a sh:SPARQLTarget ; + sh:prefixes ro-crate:sparqlPrefixes ; + sh:select """ + SELECT ?this + WHERE { + ?this a schema:Dataset . + ?metadatafile schema:about ?root . + # Exclude all dataset entities that ends with `./#` + FILTER(contains(str(?metadatafile), "ro-crate-metadata.json")) + FILTER(?this != ?root) + FILTER(!STRSTARTS(STR(?this), CONCAT(STR(?root), "#"))) + } + """ + ] ; + + # Decomment for debugging + # sh:property [ + # sh:name "Test Directory" ; + # sh:description """Data Entities representing directories MUST have "Directory" as a value for @type.""" ; + # sh:path rdf:type ; + # sh:hasValue ro-crate:File ; + # sh:severity sh:Violation ; + # ] ; + + # Expand data graph with triples from the file data entity + sh:rule [ + a sh:TripleRule ; + sh:subject sh:this ; + sh:predicate rdf:type ; + sh:object ro-crate:Directory ; + ] ; + + # Expand data graph with triples from the directory data entity + sh:rule [ + a sh:TripleRule ; + sh:subject sh:this ; + sh:predicate rdf:type ; + sh:object ro-crate:DataEntity ; + ] ; + + # Ensure that the directory data entity is a dataset + sh:property [ + sh:name "Directory Data Entity: REQUIRED type" ; + sh:description """Check if the Directory Data Entity has `Dataset` as `@type`.""" ; + sh:path rdf:type ; + sh:hasValue schema_org:Dataset ; + sh:severity sh:Violation ; + ] . + +ro-crate:DataEntityRequiredPropertiesShape a sh:NodeShape ; + sh:name "Data Entity: REQUIRED properties" ; + sh:description """A `DataEntity` MUST be linked, either directly or indirectly, from the Root Data Entity""" ; + sh:targetClass ro-crate:DataEntity ; + sh:property + [ + a sh:PropertyShape ; + sh:path [ sh:inversePath schema_org:hasPart ] ; + sh:node schema_org:Dataset ; + sh:minCount 1 ; + sh:name "Data Entity MUST be directly referenced" ; + sh:description """Check if the Data Entity is linked, either directly or indirectly, to the `Root Data Entity` using the `hasPart` (as defined in `schema.org`) property" """ ; + # sh:message "A Data Entity MUST be directly or indirectly linked to the `Root Data Entity` through the `hasPart` property" ; + ] . + +ro-crate:GenericDataEntityRequiredProperties a sh:NodeShape ; + sh:name "Generic Data Entity: REQUIRED properties" ; + sh:description """A Data Entity other than a File or a Directory MUST be a `DataEntity`""" ; + sh:target [ + a sh:SPARQLTarget ; + sh:prefixes ro-crate:sparqlPrefixes ; + sh:select """ + SELECT ?this + WHERE { + ?root schema:hasPart ?this . + ?metadatafile schema:about ?root . + FILTER(contains(str(?metadatafile), "ro-crate-metadata.json")) + FILTER(?this != ?root) + FILTER(?this != ?metadatafile) + FILTER NOT EXISTS { + ?this a schema:MediaObject . + ?this a schema:Dataset . + } + } + """ + ] ; + + # Expand data graph with triples to mark the matching entities as GenericDataEntity instances + sh:rule [ + a sh:TripleRule ; + sh:subject sh:this ; + sh:predicate rdf:type ; + sh:object ro-crate:GenericDataEntity ; + ] ; + + # Expand data graph with triples to mark the matching entities as DataEntity instances + sh:rule [ + a sh:TripleRule ; + sh:subject sh:this ; + sh:predicate rdf:type ; + sh:object ro-crate:DataEntity ; + ] . + + +# Uncomment for debugging +# ro-crate:TestGenericDataEntity a sh:NodeShape ; +# sh:disabled true ; +# sh:targetClass ro-crate:GenericDataEntity ; +# sh:name "Generic Data Entity: test invalid property"; +# sh:description """Check if the GenericDataEntity has the invalidProperty property""" ; +# sh:property [ +# sh:minCount 1 ; +# sh:maxCount 1 ; +# sh:path ro-crate:invalidProperty ; +# sh:severity sh:Violation ; +# sh:message "Testing the generic data entity"; +# sh:datatype xsd:string ; +# sh:message "Testing for the invalidProperty of the generic data entity"; +# ] . + + +# Uncomment for debugging +# ro:testDirectory a sh:NodeShape ; +# sh:name "Definition of Test Directory" ; +# sh:description """A Test Directory is a digital object that is stored in a file format""" ; +# sh:targetClass ro-crate:Directory ; + +# sh:property [ +# sh:name "Test Directory instance" ; +# sh:description """Check if the Directory DataEntity instance has the fake property ro-crate:foo""" ; +# sh:path rdf:type ; +# sh:hasValue ro-crate:foo ; +# sh:severity sh:Violation ; +# ] . diff --git a/rocrate_validator/profiles/ro-crate/1.2/must/5_web_data_entity_metadata.ttl b/rocrate_validator/profiles/ro-crate/1.2/must/5_web_data_entity_metadata.ttl new file mode 100644 index 000000000..03b50585f --- /dev/null +++ b/rocrate_validator/profiles/ro-crate/1.2/must/5_web_data_entity_metadata.ttl @@ -0,0 +1,49 @@ +# Copyright (c) 2024-2026 CRS4 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +@prefix ro: <./> . +@prefix ro-crate: . +@prefix rdf: . +@prefix dct: . +@prefix schema_org: . +@prefix sh: . +@prefix owl: . +@prefix xsd: . +@prefix validator: . + + +ro-crate:WebBasedDataEntity a sh:NodeShape, validator:HiddenShape ; + sh:name "Web-based Data Entity: REQUIRED properties" ; + sh:description """A Web-based Data Entity is a `File` identified by an absolute URL""" ; + + sh:target [ + a sh:SPARQLTarget ; + sh:prefixes ro-crate:sparqlPrefixes ; + sh:select """ + SELECT ?this + WHERE { + ?this a schema:MediaObject . + FILTER(!contains(str(?this), "ro-crate-metadata.json")) + FILTER regex(str(?this), "^(https?|ftps?)://", "i") + } + """ + ] ; + + # Expand data graph with triples which identify the web-based data entity + sh:rule [ + a sh:TripleRule ; + sh:subject sh:this ; + sh:predicate rdf:type ; + sh:object ro-crate:WebDataEntity ; + ] . diff --git a/rocrate_validator/profiles/ro-crate/1.2/must/6_contextual_entity.ttl b/rocrate_validator/profiles/ro-crate/1.2/must/6_contextual_entity.ttl new file mode 100644 index 000000000..a7a02b975 --- /dev/null +++ b/rocrate_validator/profiles/ro-crate/1.2/must/6_contextual_entity.ttl @@ -0,0 +1,91 @@ +# Copyright (c) 2024-2026 CRS4 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +@prefix ro: <./> . +@prefix ro-crate: . +@prefix rdf: . +@prefix schema: . +@prefix sh: . +@prefix xsd: . +@prefix owl: . +@prefix validator: . + + +ro-crate:FindLicenseEntity a sh:NodeShape, validator:HiddenShape ; + sh:name "Identify License Entity" ; + sh:description """Mark a license entity any Data Entity referenced by the `schema:license` property.""" ; + sh:target [ + a sh:SPARQLTarget ; + sh:prefixes ro-crate:sparqlPrefixes ; + sh:select """ + SELECT ?this + WHERE { + ?subject schema:license ?this . + } + """ + ] ; + + # Expand data graph with triples from the file data entity + sh:rule [ + a sh:TripleRule ; + sh:subject sh:this ; + sh:predicate rdf:type ; + sh:object ro-crate:ContextualEntity ; + ] . + + +ro-crate:WebSiteRecommendedProperties a sh:NodeShape ; + sh:name "WebSite RECOMMENDED Properties" ; + sh:description """A `WebSite` MUST be identified by a valid IRI and MUST have a `name` property.""" ; + sh:targetClass schema:WebSite ; + sh:property [ + sh:path [sh:inversePath rdf:type] ; + sh:datType sh:IRI ; + sh:name "WebSite: value restriction of its identifier" ; + sh:description "Check if the WebSite has a valid IRI" ; + sh:message "A WebSite MUST have a valid IRI" ; + ] ; + sh:property [ + sh:path schema:name ; + sh:minCount 1 ; + sh:dataType xsd:string ; + sh:name "WebSite: REQUIRED `name` property" ; + sh:description "Check if the WebSite has a `name` property" ; + sh:message "A WebSite MUST have a `name` property" ; + ] . + + +ro-crate:CreativeWorkAuthorDefinition a sh:NodeShape, validator:HiddenShape ; + sh:name "CreativeWork Author Definition" ; + sh:description """Define the `CreativeWorkAuthor` as the `Person` object of the `schema:author` predicate.""" ; + sh:targetObjectsOf schema:author ; + sh:rule [ + a sh:TripleRule ; + sh:subject sh:this ; + sh:predicate rdf:type ; + sh:object ro-crate:CreativeWorkAuthor ; + sh:condition [ + sh:property [ sh:path rdf:type ; sh:hasValue schema:Person ; sh:minCount 1 ] ; + ] ; + ] . + +ro-crate:ThumbnailReferencesFile a sh:NodeShape ; + sh:name "Thumbnail reference" ; + sh:description """If `thumbnail` is present, it MUST reference a File data entity.""" ; + sh:targetSubjectsOf schema:thumbnail ; + sh:property [ + sh:path schema:thumbnail ; + sh:class schema:MediaObject ; + sh:message "If present, `thumbnail` MUST reference a File data entity" ; + ] . diff --git a/rocrate_validator/profiles/ro-crate/1.2/ontology.ttl b/rocrate_validator/profiles/ro-crate/1.2/ontology.ttl new file mode 100644 index 000000000..938c289e2 --- /dev/null +++ b/rocrate_validator/profiles/ro-crate/1.2/ontology.ttl @@ -0,0 +1,67 @@ +# Copyright (c) 2024-2026 CRS4 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +@prefix ro: <./> . +@prefix owl: . +@prefix rdf: . +@prefix xml: . +@prefix xsd: . +@prefix rdfs: . +@prefix schema: . +@prefix rocrate: . +@prefix bioschemas: . +@prefix ro-crate: . +# @base <./.> . + + rdf:type owl:Ontology ; + owl:versionIRI . + +# # ################################################################# +# # # Classes +# # ################################################################# + +# Declare the RootDataEntity class +ro-crate:RootDataEntity rdf:type owl:Class ; + rdfs:subClassOf schema:Dataset ; + rdfs:label "RootDataEntity"@en . + +### http://schema.org/CreativeWork +schema:CreativeWork rdf:type owl:Class ; + rdfs:label "CreativeWork"@en . + +### http://schema.org/MediaObject +schema:MediaObject rdf:type owl:Class ; + owl:equivalentClass ro-crate:File ; + rdfs:label "MediaObject"@en . + + +### http://schema.org/SoftwareSourceCode +schema:SoftwareSourceCode rdf:type owl:Class ; + rdfs:subClassOf schema:CreativeWork . + + +### https://bioschemas.org/ComputationalWorkflow +bioschemas:ComputationalWorkflow rdf:type owl:Class . + + +### https://w3id.org/ro/crate/1.2/DataEntity +ro-crate:DataEntity rdf:type owl:Class ; + rdfs:subClassOf schema:CreativeWork ; + rdfs:label "DataEntity"@en . + + +# # ### https://w3id.org/ro/crate/1.2/Directory +ro-crate:Directory rdf:type owl:Class ; + rdfs:subClassOf schema:Dataset ; + rdfs:label "Directory"@en . diff --git a/rocrate_validator/profiles/ro-crate/1.2/prefixes.ttl b/rocrate_validator/profiles/ro-crate/1.2/prefixes.ttl new file mode 100644 index 000000000..7bbf24562 --- /dev/null +++ b/rocrate_validator/profiles/ro-crate/1.2/prefixes.ttl @@ -0,0 +1,53 @@ +# Copyright (c) 2024-2026 CRS4 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +@prefix ro: <./> . +@prefix sh: . +@prefix xsd: . +@prefix ro-crate: . + +# Define the prefixes used in the SPARQL queries +ro-crate:sparqlPrefixes + sh:declare [ + sh:prefix "schema" ; + sh:namespace "http://schema.org/"^^xsd:anyURI ; + ] ; + sh:declare [ + sh:prefix "bioschemas" ; + sh:namespace "https://bioschemas.org/"^^xsd:anyURI ; + ] ; + sh:declare [ + sh:prefix "bioschemas-cw" ; + sh:namespace "https://bioschemas.org/ComputationalWorkflow#"^^xsd:anyURI ; + ] ; + sh:declare [ + sh:prefix "rocrate" ; + sh:namespace "https://w3id.org/ro/crate/1.2/"^^xsd:anyURI ; + ] ; + sh:declare [ + sh:prefix "wfrun" ; + sh:namespace "https://w3id.org/ro/terms/workflow-run#"^^xsd:anyURI ; + ] ; + sh:declare [ + sh:prefix "codemeta" ; + sh:namespace "https://codemeta.github.io/terms/"^^xsd:anyURI ; + ] ; + sh:declare [ + sh:prefix "ro-crate" ; + sh:namespace "https://github.com/crs4/rocrate-validator/profiles/ro-crate-1.2/"^^xsd:anyURI ; + ] ; + sh:declare [ + sh:prefix "ro" ; + sh:namespace "./"^^xsd:anyURI ; + ] . diff --git a/rocrate_validator/profiles/ro-crate/1.2/profile.ttl b/rocrate_validator/profiles/ro-crate/1.2/profile.ttl new file mode 100644 index 000000000..b87a59bfe --- /dev/null +++ b/rocrate_validator/profiles/ro-crate/1.2/profile.ttl @@ -0,0 +1,74 @@ +# Copyright (c) 2024-2026 CRS4 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +@prefix dct: . +@prefix prof: . +@prefix role: . +@prefix rdfs: . + + + + # a Profile; it's identifying URI + a prof:Profile ; + + # common metadata for the Profile + + # the Profile's label + rdfs:label "RO-Crate Metadata Specification 1.2" ; + + # regular metadata, a basic description of the Profile + rdfs:comment """RO-Crate Metadata Specification."""@en ; + + # regular metadata, URI of publisher + dct:publisher ; + + # this profile has a JSON-LD context resource + prof:hasResource [ + a prof:ResourceDescriptor ; + + # it's in JSON-LD format + dct:format ; + + # it conforms to JSON-LD, here referred to by its namespace URI as a Profile + dct:conformsTo ; + + # this profile resource plays the role of "Vocabulary" + # described in this ontology's accompanying Roles vocabulary + prof:hasRole role:Vocabulary ; + + # this profile resource's actual file + prof:hasArtifact ; + ] ; + + # this profile has a human-readable documentation resource + prof:hasResource [ + a prof:ResourceDescriptor ; + + # it's in HTML format + dct:format ; + + # it conforms to HTML, here referred to by its namespace URI as a Profile + dct:conformsTo ; + + # this profile resource plays the role of "Specification" + # described in this ontology's accompanying Roles vocabulary + prof:hasRole role:Specification ; + + # this profile resource's actual file + prof:hasArtifact ; + ] ; + + # a short code to refer to the Profile with when a URI can't be used + prof:hasToken "ro-crate" ; +. diff --git a/rocrate_validator/profiles/ro-crate/1.2/should/0_contact_point.py b/rocrate_validator/profiles/ro-crate/1.2/should/0_contact_point.py new file mode 100644 index 000000000..654d62efa --- /dev/null +++ b/rocrate_validator/profiles/ro-crate/1.2/should/0_contact_point.py @@ -0,0 +1,52 @@ +# Copyright (c) 2024-2026 CRS4 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from rocrate_validator.utils import log as logging +from rocrate_validator.models import ValidationContext +from rocrate_validator.requirements.python import (PyFunctionCheck, check, + requirement) + +# set up logging +logger = logging.getLogger(__name__) + + +@requirement(name="Contact point for authors or publishers") +class ContactPointChecker(PyFunctionCheck): + """ + At least one author or publisher referenced from the Root Data Entity SHOULD have a contactPoint. + """ + + @check(name="Contact point presence") + def check_contact_point(self, context: ValidationContext) -> bool: + try: + root = context.ro_crate.metadata.get_root_data_entity() + candidates = [] + for prop in ("author", "publisher"): + value = root.get_property(prop) + if value is None: + continue + values = value if isinstance(value, list) else [value] + candidates.extend(values) + if not candidates: + return True + for entity in candidates: + if hasattr(entity, "get_property") and entity.get_property("contactPoint"): + return True + context.result.add_issue( + "At least one author or publisher SHOULD have a contactPoint", self) + return False + except Exception as e: + context.result.add_issue( + f"Error checking contactPoint: {str(e)}", self) + return False diff --git a/rocrate_validator/profiles/ro-crate/1.2/should/0_contextual_entity_links.ttl b/rocrate_validator/profiles/ro-crate/1.2/should/0_contextual_entity_links.ttl new file mode 100644 index 000000000..7c5aee736 --- /dev/null +++ b/rocrate_validator/profiles/ro-crate/1.2/should/0_contextual_entity_links.ttl @@ -0,0 +1,55 @@ +# Copyright (c) 2024-2026 CRS4 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +@prefix ro: <./> . +@prefix ro-crate: . +@prefix schema_org: . +@prefix sh: . + +ro-crate:ContextualEntitiesShouldBeLinked a sh:NodeShape ; + sh:name "Contextual Entities should be linked" ; + sh:description """Contextual entities in the graph SHOULD be linked to from at least one other entity.""" ; + sh:target [ + a sh:SPARQLTarget ; + sh:prefixes ro-crate:sparqlPrefixes ; + sh:select """ + SELECT ?this + WHERE { + ?this a ?type . + FILTER(contains(str(?this), "ro-crate-metadata.json")) + FILTER NOT EXISTS { ?this a schema:Dataset } + FILTER NOT EXISTS { ?this a schema:MediaObject } + FILTER NOT EXISTS { ?s ?p ?this . } + } + """ + ] ; + sh:message "Contextual entities SHOULD be linked to from another entity" . + +ro-crate:ReferencedContextualEntitiesShouldBeDescribed a sh:NodeShape ; + sh:name "Referenced contextual entities should be described" ; + sh:description """Contextual entities referenced by other entities SHOULD be described in the same graph.""" ; + sh:target [ + a sh:SPARQLTarget ; + sh:prefixes ro-crate:sparqlPrefixes ; + sh:select """ + SELECT ?this + WHERE { + ?s ?p ?this . + FILTER(isIRI(?this)) + FILTER NOT EXISTS { ?this a ?type } + FILTER(!regex(str(?this), "^https?://", "i")) + } + """ + ] ; + sh:message "Referenced contextual entities SHOULD be described in the @graph" . diff --git a/rocrate_validator/profiles/ro-crate/1.2/should/0_detached_metadata_filename.py b/rocrate_validator/profiles/ro-crate/1.2/should/0_detached_metadata_filename.py new file mode 100644 index 000000000..7badf5d72 --- /dev/null +++ b/rocrate_validator/profiles/ro-crate/1.2/should/0_detached_metadata_filename.py @@ -0,0 +1,48 @@ +# Copyright (c) 2024-2026 CRS4 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from rocrate_validator.utils import log as logging +from rocrate_validator.models import ValidationContext +from rocrate_validator.requirements.python import (PyFunctionCheck, check, + requirement) + +# set up logging +logger = logging.getLogger(__name__) + + +@requirement(name="Detached RO-Crate metadata filename") +class DetachedMetadataFilenameChecker(PyFunctionCheck): + """ + Detached RO-Crate metadata files SHOULD be named ${prefix}-ro-crate-metadata.json + """ + + @check(name="Detached RO-Crate: metadata filename") + def check_filename(self, context: ValidationContext) -> bool: + try: + if not context.ro_crate.is_detached(): + return True + if not context.ro_crate.uri.is_local_file(): + return True + filename = context.ro_crate.uri.as_path().name + if filename.endswith("-ro-crate-metadata.json"): + return True + if filename == "ro-crate-metadata.json": + context.result.add_issue( + "Detached RO-Crate metadata file SHOULD be named ${prefix}-ro-crate-metadata.json", self) + return False + return True + except Exception as e: + context.result.add_issue( + f"Error checking detached metadata filename: {str(e)}", self) + return False diff --git a/rocrate_validator/profiles/ro-crate/1.2/should/0_entity_name.ttl b/rocrate_validator/profiles/ro-crate/1.2/should/0_entity_name.ttl new file mode 100644 index 000000000..a1b8005e4 --- /dev/null +++ b/rocrate_validator/profiles/ro-crate/1.2/should/0_entity_name.ttl @@ -0,0 +1,41 @@ +# Copyright (c) 2024-2026 CRS4 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +@prefix ro: <./> . +@prefix ro-crate: . +@prefix schema: . +@prefix sh: . +@prefix xsd: . + +ro-crate:EntityNameRecommendedProperties a sh:NodeShape ; + sh:name "Entity: RECOMMENDED name" ; + sh:description """Entities SHOULD have a human-readable name.""" ; + sh:target [ + a sh:SPARQLTarget ; + sh:prefixes ro-crate:sparqlPrefixes ; + sh:select """ + SELECT ?this + WHERE { + ?this a ?type . + FILTER(contains(str(?this), "ro-crate-metadata.json")) + } + """ + ] ; + sh:property [ + sh:path schema:name ; + sh:minCount 1 ; + sh:dataType xsd:string ; + sh:severity sh:Warning ; + sh:message "Entities SHOULD have a human-readable name" ; + ] . diff --git a/rocrate_validator/profiles/ro-crate/1.2/should/0_entity_reachability.py b/rocrate_validator/profiles/ro-crate/1.2/should/0_entity_reachability.py new file mode 100644 index 000000000..6ee8c2eca --- /dev/null +++ b/rocrate_validator/profiles/ro-crate/1.2/should/0_entity_reachability.py @@ -0,0 +1,64 @@ +# Copyright (c) 2024-2026 CRS4 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from rocrate_validator.utils import log as logging +from rocrate_validator.models import ValidationContext +from rocrate_validator.requirements.python import (PyFunctionCheck, check, + requirement) + +# set up logging +logger = logging.getLogger(__name__) + + +@requirement(name="Entity reachability") +class EntityReachabilityChecker(PyFunctionCheck): + """ + Entities SHOULD be referenced from the Root Data Entity (directly or indirectly). + """ + + @check(name="Entity reachability from root") + def check_reachability(self, context: ValidationContext) -> bool: + try: + graph = context.ro_crate.metadata.as_dict().get("@graph", []) + ids = {e.get("@id") for e in graph if e.get("@id")} + referenced = set() + + def collect_refs(value): + if isinstance(value, dict): + if "@id" in value: + referenced.add(value["@id"]) + for v in value.values(): + collect_refs(v) + elif isinstance(value, list): + for v in value: + collect_refs(v) + + for entity in graph: + for key, value in entity.items(): + if key in ("@id", "@type", "@context"): + continue + collect_refs(value) + + root_id = context.ro_crate.metadata.get_root_data_entity().id + always_allowed = {context.ro_crate.metadata_descriptor_id, root_id} + unreferenced = [i for i in ids if i not in referenced and i not in always_allowed] + if unreferenced: + context.result.add_issue( + f"Entities not referenced from the graph: {unreferenced}", self) + return False + return True + except Exception as e: + context.result.add_issue( + f"Error checking entity reachability: {str(e)}", self) + return False diff --git a/rocrate_validator/profiles/ro-crate/1.2/should/1_file-descriptor_metadata.ttl b/rocrate_validator/profiles/ro-crate/1.2/should/1_file-descriptor_metadata.ttl new file mode 100644 index 000000000..5e7727b60 --- /dev/null +++ b/rocrate_validator/profiles/ro-crate/1.2/should/1_file-descriptor_metadata.ttl @@ -0,0 +1,53 @@ +# Copyright (c) 2024-2026 CRS4 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +@prefix ro: <./> . +@prefix ro-crate: . +@prefix dct: . +@prefix sh: . + +ro-crate:ROCrateMetadataFileDescriptorConformsTo + a sh:NodeShape ; + sh:name "RO-Crate Metadata File Descriptor RECOMMENDED conformsTo" ; + sh:description """The RO-Crate Metadata Descriptor SHOULD indicate the versioned RO-Crate specification URI""" ; + sh:targetClass ro-crate:ROCrateMetadataFileDescriptor ; + sh:property [ + a sh:PropertyShape ; + sh:minCount 1 ; + sh:name "Metadata File Descriptor entity: RECOMMENDED `conformsTo` value" ; + sh:description """Check if the RO-Crate Metadata File Descriptor has a versioned RO-Crate specification URI""" ; + sh:severity sh:Warning ; + sh:nodeKind sh:IRI ; + sh:path dct:conformsTo ; + sh:hasValue ; + sh:message "The RO-Crate metadata file descriptor SHOULD have a `conformsTo` property with the RO-Crate specification version" ; + ] ; + sh:property [ + a sh:PropertyShape ; + sh:name "Metadata File Descriptor entity: RECOMMENDED single `conformsTo` value" ; + sh:description """Check if the RO-Crate Metadata File Descriptor has a single `conformsTo` value""" ; + sh:severity sh:Warning ; + sh:path dct:conformsTo ; + sh:maxCount 1 ; + sh:message "The RO-Crate metadata file descriptor SHOULD have a single `conformsTo` value" ; + ] ; + sh:property [ + a sh:PropertyShape ; + sh:name "Metadata File Descriptor entity: RECOMMENDED `conformsTo` URI prefix" ; + sh:description """Check if the `conformsTo` URI starts with https://w3id.org/ro/crate/""" ; + sh:severity sh:Warning ; + sh:path dct:conformsTo ; + sh:pattern "^https://w3id.org/ro/crate/" ; + sh:message "The RO-Crate metadata file descriptor `conformsTo` URI SHOULD start with https://w3id.org/ro/crate/" ; + ] . diff --git a/rocrate_validator/profiles/ro-crate/1.2/should/2_root_data_entity_metadata.ttl b/rocrate_validator/profiles/ro-crate/1.2/should/2_root_data_entity_metadata.ttl new file mode 100644 index 000000000..673baf380 --- /dev/null +++ b/rocrate_validator/profiles/ro-crate/1.2/should/2_root_data_entity_metadata.ttl @@ -0,0 +1,124 @@ +# Copyright (c) 2024-2026 CRS4 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +@prefix ro: <./> . +@prefix ro-crate: . +@prefix rdf: . +@prefix schema_org: . +@prefix prof: . +@prefix sh: . +@prefix validator: . + +ro-crate:RootDataEntityDirectRecommendedProperties a sh:NodeShape ; + sh:name "RO-Crate Root Data Entity RECOMMENDED properties" ; + sh:description """The Root Data Entity SHOULD have + the properties `name`, `description` and `license` defined as described + in the RO-Crate specification """; + sh:targetClass ro-crate:RootDataEntity ; + sh:property [ + a sh:PropertyShape ; + sh:name "Root Data Entity: `name` property" ; + sh:description """Check if the Root Data Entity includes a `name` (as specified by schema.org) + to clearly identify the dataset and distinguish it from other datasets.""" ; + sh:minCount 1 ; + sh:nodeKind sh:Literal ; + sh:path schema_org:name; + sh:message "The Root Data Entity SHOULD have a `name` property (as specified by schema.org)" ; + ] ; + sh:property [ + a sh:PropertyShape ; + sh:name "Root Data Entity: `description` property" ; + sh:description """Check if the Root Data Entity includes a `description` (as specified by schema.org) + to provide a human-readable description of the dataset.""" ; + sh:minCount 1 ; + sh:nodeKind sh:Literal ; + sh:path schema_org:description; + sh:message "The Root Data Entity SHOULD have a `description` property (as specified by schema.org)" ; + ] ; + sh:property [ + a sh:PropertyShape ; + sh:name "Root Data Entity: `license` SHOULD link to a Contextual Entity" ; + sh:description """Check if the Root Data Entity includes a `license` property + that links to a Contextual Entity with type `schema_org:CreativeWork` to describe the license.""" ; + sh:nodeKind sh:BlankNodeOrIRI ; + sh:or ( + [ sh:class schema_org:CreativeWork ] + [ sh:class schema_org:MediaObject ] + [ sh:class schema_org:Dataset ] + ) ; + sh:path schema_org:license; + sh:minCount 1 ; + sh:message """The Root Data Entity SHOULD have a link to a Contextual Entity representing the schema_org:license type""" ; + ] ; + sh:property [ + a sh:PropertyShape ; + sh:name "Root Data Entity: `author` property" ; + sh:description """Check if the Root Data Entity includes a `author` property (as specified by schema.org) + to provide information about its author.""" ; + sh:or ( + [ sh:class schema_org:Person ;] + [ sh:class schema_org:Organization ;] + ) ; + sh:path schema_org:author; + sh:minCount 1 ; + sh:message """The Root Data Entity SHOULD have a link to a Contextual Entity representing the `author` of the RO-Crate""" ; + ] ; + sh:property [ + sh:minCount 1 ; + sh:maxCount 1 ; + sh:path schema_org:publisher ; + sh:severity sh:Warning ; + sh:name "Root Data Entity: `publisher` property" ; + sh:description """Check if the Root Data Entity has a `publisher` property of type `Organization`.""" ; + sh:message "The `publisher` property of a `Root Data Entity` SHOULD be an `Organization`"; + sh:nodeKind sh:IRI ; + sh:class schema_org:Organization ; + ] ; + sh:property [ + sh:severity sh:Warning ; + sh:name "Root Data Entity: `funder` property" ; + sh:description """Check if the Root Data Entity has `funder` properties referencing Organizations.""" ; + sh:path schema_org:funder ; + sh:class schema_org:Organization ; + sh:message "The Root Data Entity SHOULD reference funders using `funder`" ; + ] ; + sh:property [ + a sh:PropertyShape ; + sh:name "Root Data Entity: RECOMMENDED `datePublished` property" ; + sh:description """Check if the Root Data Entity includes a `datePublished` (as specified by schema.org) + to provide the date when the dataset was published. The datePublished MUST be a valid ISO 8601 date. + It SHOULD be specified to at least the day level, but MAY include a time component.""" ; + sh:minCount 1 ; + sh:nodeKind sh:Literal ; + sh:path schema_org:datePublished ; + sh:pattern "^([\\+-]?\\d{4})((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))|W([0-4]\\d|5[0-2])(-?[1-7])|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)?[0-5]\\d)?|24:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)$" ; + sh:message "The Root Data Entity MUST have a `datePublished` property (as specified by schema.org) with a valid ISO 8601 date and the precision of at least the day level" ; + ] . + +ro-crate:RootDataEntityIdentifierRecommendedValue + a sh:NodeShape ; + sh:name "RO-Crate Root Data Entity RECOMMENDED value" ; + sh:description "The Root Data Entity SHOULD be identified by `./` or an absolute URI" ; + sh:targetNode ro-crate:RootDataEntity ; + sh:property [ + a sh:PropertyShape ; + sh:name "Root Data Entity URI value" ; + sh:description "Check if the Root Data Entity identifier is `./` or an absolute URI" ; + sh:path [ sh:inversePath rdf:type ] ; + sh:message """The Root Data Entity SHOULD be identified by `./` or an absolute URI""" ; + sh:or ( + [ sh:pattern "^\\./$" ] + [ sh:pattern "^[A-Za-z][A-Za-z0-9+\\.-]*:" ] + ) ; + ] . diff --git a/rocrate_validator/profiles/ro-crate/1.2/should/2_root_data_entity_relative_uri.py b/rocrate_validator/profiles/ro-crate/1.2/should/2_root_data_entity_relative_uri.py new file mode 100644 index 000000000..2c9001b89 --- /dev/null +++ b/rocrate_validator/profiles/ro-crate/1.2/should/2_root_data_entity_relative_uri.py @@ -0,0 +1,47 @@ +# Copyright (c) 2024-2026 CRS4 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import re + +from rocrate_validator.utils import log as logging +from rocrate_validator.models import ValidationContext +from rocrate_validator.requirements.python import (PyFunctionCheck, check, + requirement) + +# set up logging +logger = logging.getLogger(__name__) + + +@requirement(name="RO-Crate Root Data Entity RECOMMENDED value") +class RootDataEntityRelativeURI(PyFunctionCheck): + """ + The Root Data Entity SHOULD be denoted by the string ./ or an absolute URI + """ + + @check(name="Root Data Entity: RECOMMENDED value") + def check_relative_uris(self, context: ValidationContext) -> bool: + """Check if the Root Data Entity is denoted by the string `./` or an absolute URI""" + try: + root_entity = context.ro_crate.metadata.get_root_data_entity() + if root_entity.id == './': + return True + if re.match(r"^[A-Za-z][A-Za-z0-9+\.-]*:", root_entity.id): + return True + context.result.add_issue( + 'Root Data Entity URI is not denoted by the string `./` or an absolute URI', self) + return False + except Exception as e: + context.result.add_issue( + f'Error checking Root Data Entity URI: {str(e)}', self) + return False diff --git a/rocrate_validator/profiles/ro-crate/1.2/should/3_ro_crate_preview_exclusion.ttl b/rocrate_validator/profiles/ro-crate/1.2/should/3_ro_crate_preview_exclusion.ttl new file mode 100644 index 000000000..a0ae44ab7 --- /dev/null +++ b/rocrate_validator/profiles/ro-crate/1.2/should/3_ro_crate_preview_exclusion.ttl @@ -0,0 +1,33 @@ +# Copyright (c) 2024-2026 CRS4 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# @prefix ro: <./> . +# @prefix ro-crate: . +# @prefix schema_org: . +# @prefix sh: . + +# ro-crate:PreviewFilesNotInHasPart a sh:NodeShape ; +# sh:name "RO-Crate Website files exclusion" ; +# sh:description """`ro-crate-preview.html` and `ro-crate-preview_files/` SHOULD NOT be included in `hasPart`.""" ; +# sh:targetClass schema_org:Dataset ; +# sh:sparql [ +# sh:message "RO-Crate Website files SHOULD NOT be included in `hasPart`" ; +# sh:select """ +# SELECT ?this +# WHERE { +# ?this schema:hasPart ?part . +# FILTER(str(?part) = "ro-crate-preview.html" || str(?part) = "ro-crate-preview_files/") +# } +# """ ; +# ] . diff --git a/rocrate_validator/profiles/ro-crate/1.2/should/4_data_entity_existence.py b/rocrate_validator/profiles/ro-crate/1.2/should/4_data_entity_existence.py new file mode 100644 index 000000000..f5742acd2 --- /dev/null +++ b/rocrate_validator/profiles/ro-crate/1.2/should/4_data_entity_existence.py @@ -0,0 +1,58 @@ +# Copyright (c) 2024-2026 CRS4 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from rocrate_validator.utils import log as logging +from rocrate_validator.models import ValidationContext +from rocrate_validator.requirements.python import (PyFunctionCheck, check, + requirement) + +# set up logging +logger = logging.getLogger(__name__) + + +@requirement(name="Data Entity: RECOMMENDED resource availability") +class DataEntityRecommendedChecker(PyFunctionCheck): + """ + Data Entities with absolute URI paths SHOULD be available + at the time of RO-Crate creation + """ + + @check(name="Data Entity: RECOMMENDED resource availability") + def check_availability(self, context: ValidationContext) -> bool: + """ + Check the availability of the Data Entity with absolute URI paths + are available at the time of RO-Crate creation + """ + # Skip the check in metadata-only mode + if context.settings.metadata_only: + logger.debug("Skipping file descriptor existence check in metadata-only mode") + return True + # Perform the check + result = True + for entity in [ + _ for _ in context.ro_crate.metadata.get_data_entities(exclude_web_data_entities=True) + if _.has_absolute_path()]: + assert entity.id is not None, "Entity has no @id" + try: + if not entity.is_available(): + context.result.add_issue( + f'Data Entity {entity.id} is not available', self) + result = False + except Exception as e: + context.result.add_issue( + f'Web-based Data Entity {entity.id} is not available: {e}', self) + result = False + if not result and context.fail_fast: + return result + return result diff --git a/rocrate_validator/profiles/ro-crate/1.2/should/4_data_entity_metadata.ttl b/rocrate_validator/profiles/ro-crate/1.2/should/4_data_entity_metadata.ttl new file mode 100644 index 000000000..6fa8c4f77 --- /dev/null +++ b/rocrate_validator/profiles/ro-crate/1.2/should/4_data_entity_metadata.ttl @@ -0,0 +1,227 @@ +# Copyright (c) 2024-2026 CRS4 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +@prefix ro: <./> . +@prefix ro-crate: . +@prefix rdf: . +@prefix schema_org: . +@prefix prof: . +@prefix sh: . +@prefix xsd: . + +ro-crate:FileRecommendedProperties a sh:NodeShape ; + sh:targetClass ro-crate:File ; + sh:name "File Data Entity: RECOMMENDED properties"; + sh:description """A `File` Data Entity SHOULD have detailed descriptions encodings through the `encodingFormat` property""" ; + sh:property [ + sh:minCount 1 ; + sh:maxCount 2 ; + sh:path schema_org:encodingFormat ; + sh:severity sh:Warning ; + sh:name "File Data Entity: RECOMMENDED `encodingFormat` property" ; + sh:description """Check if the File Data Entity has a detailed description of encodings through the `encodingFormat` property. + The `encodingFormat` property SHOULD be a PRONOM identifier (e.g., application/pdf) or, + to add more detail, SHOULD be linked using a `PRONOM` to a `Contextual Entity` of type `WebPage` + (see [Adding detailed descriptions of encodings](https://www.researchobject.org/ro-crate/1.2/data-entities.html#adding-detailed-descriptions-of-encodings)). + """ ; + sh:message "Missing or invalid `encodingFormat` linked to the `File Data Entity`"; + sh:or ( + [ + sh:datatype xsd:string ; + sh:pattern "^(\\w*)\\/(\\w[\\w\\.-]*)(?:\\+(\\w[\\w\\.-]*))?(?:;(\\w+=[^;]+))*$" ; + sh:name "File Data Entity: RECOMMENDED `PRONOM` for the `encodingFormat` property" ; + sh:description """Check if the File Data Entity is linked to its `encodingFormat` through a PRONOM identifier + (e.g., application/pdf, application/text, image/svg+xml, image/svg;q=0.9,/;q=0.8,image/svg+xml;q=0.9,/;q=0.8, application/vnd.uplanet.listcmd-wbxml;charset=utf-8). + """ ; + sh:message "The `encodingFormat` SHOULD be linked using a PRONOM identifier (e.g., application/pdf)."; + ] + [ + sh:nodeKind sh:IRI ; + sh:class schema_org:WebPage ; + sh:name "File Data Entity: RECOMMENDED `Contextual Entity` linked to the `encodingFormat` property"; + sh:description "Check if the File Data Entity `encodingFormat` is linked to a `Contextual Entity of type `WebPage`." ; + sh:message "The `encodingFormat` SHOULD be linked to a `Contextual Entity` of type `WebPage`." ; + ] + ) + ] . + +ro-crate:FileContentSizeRecommendedProperties a sh:NodeShape ; + sh:targetClass ro-crate:File ; + sh:name "File Data Entity: RECOMMENDED contentSize" ; + sh:description """A `File` Data Entity SHOULD have `contentSize` set to the size in bytes.""" ; + sh:property [ + a sh:PropertyShape ; + sh:minCount 1 ; + sh:name "File Data Entity: RECOMMENDED `contentSize` property" ; + sh:path schema_org:contentSize ; + sh:datatype xsd:string ; + sh:severity sh:Warning ; + sh:message "File Data Entities SHOULD have a `contentSize` property" ; + # sh:sparql [ + # sh:message "If the value is a string it must be a string representing an integer." ; + # sh:prefixes ro-crate:sparqlPrefixes ; + # sh:select """ + # SELECT ?this ?value + # WHERE { + # ?this schema:contentSize ?value . + # FILTER (!regex(str(?value), "^[0-9]+$")) + # } + # """ ; + # ] ; + ] . + +ro-crate:DataEntityRecommendedProperties a sh:NodeShape ; + sh:targetClass ro-crate:DataEntity ; + sh:name "Data Entity: RECOMMENDED properties" ; + sh:description """Data Entities SHOULD have `name` and `description` properties.""" ; + sh:property [ + a sh:PropertyShape ; + sh:name "Data Entity: RECOMMENDED `name` property" ; + sh:path schema_org:name ; + sh:minCount 1 ; + sh:nodeKind sh:Literal ; + sh:severity sh:Warning ; + sh:message "Data Entities SHOULD have a `name` property" ; + ] ; + sh:property [ + a sh:PropertyShape ; + sh:name "Data Entity: RECOMMENDED `description` property" ; + sh:path schema_org:description ; + sh:minCount 1 ; + sh:nodeKind sh:Literal ; + sh:severity sh:Warning ; + sh:message "Data Entities SHOULD have a `description` property" ; + ] ; + sh:property [ + sh:name "Data Entity: RECOMMENDED `license` reference" ; + sh:description """If present, `license` SHOULD reference a CreativeWork entity.""" ; + sh:path schema_org:license ; + sh:class schema_org:CreativeWork ; + sh:severity sh:Warning ; + sh:message "Data Entity `license` SHOULD reference a CreativeWork entity" ; + ] ; + sh:property [ + sh:name "Data Entity: RECOMMENDED `contentLocation`/`spatialCoverage` reference" ; + sh:description """If present, `contentLocation` or `spatialCoverage` SHOULD reference a Place.""" ; + sh:or ( + [ sh:path schema_org:contentLocation ; sh:class schema_org:Place ] + [ sh:path schema_org:spatialCoverage ; sh:class schema_org:Place ] + ) ; + sh:severity sh:Warning ; + sh:message "Data Entity location properties SHOULD reference a Place" ; + ] ; + sh:property [ + sh:name "Data Entity: RECOMMENDED `citation` reference" ; + sh:description """If present, `citation` SHOULD reference a ScholarlyArticle or CreativeWork.""" ; + sh:path schema_org:citation ; + sh:or ( + [ sh:class schema_org:ScholarlyArticle ] + [ sh:class schema_org:CreativeWork ] + ) ; + sh:severity sh:Warning ; + sh:message "Data Entity `citation` SHOULD reference a ScholarlyArticle or CreativeWork" ; + ] ; + sh:property [ + sh:name "Data Entity: RECOMMENDED `author` reference" ; + sh:description """If present, `author` SHOULD reference Person entities.""" ; + sh:path schema_org:author ; + sh:class schema_org:Person ; + sh:severity sh:Warning ; + sh:message "Data Entity `author` SHOULD reference a Person entity" ; + ] . + +ro-crate:DirectoryDataEntityRequiredValueRestriction a sh:NodeShape ; + sh:name "Directory Data Entity: RECOMMENDED value restriction" ; + sh:description """A Directory Data Entity SHOULD end with `/`""" ; + sh:targetNode ro-crate:Directory ; + sh:property [ + a sh:PropertyShape ; + sh:name "Directory Data Entity: RECOMMENDED value restriction" ; + sh:description """Check if the Directory Data Entity ends with `/`""" ; + sh:path [ sh:inversePath rdf:type ] ; + sh:message """Every Data Entity Directory URI SHOULD end with `/`""" ; + sh:pattern "/$" ; + ] . + +ro-crate:DatasetRecommendedProperties a sh:NodeShape ; + sh:targetClass schema_org:Dataset ; + sh:name "Dataset: RECOMMENDED properties" ; + sh:description """Datasets SHOULD include `hasPart` and may include nested Datasets.""" ; + sh:property [ + a sh:PropertyShape ; + sh:name "Dataset: RECOMMENDED `hasPart` property" ; + sh:path schema_org:hasPart ; + sh:minCount 1 ; + sh:severity sh:Warning ; + sh:message "Dataset entities SHOULD include `hasPart`" ; + ] . + +# ro-crate:DatasetRelativeIdShouldEndWithSlash a sh:NodeShape ; +# sh:name "Dataset: RECOMMENDED trailing slash" ; +# sh:description """If a Dataset has a relative @id, it SHOULD end with `/`.""" ; +# sh:target [ +# a sh:SPARQLTarget ; +# sh:prefixes ro-crate:sparqlPrefixes ; +# sh:select """ +# SELECT ?this +# WHERE { +# ?this a schema:Dataset . +# FILTER(!regex(str(?this), "^[A-Za-z][A-Za-z0-9+\\.-]*:")) . +# FILTER(!regex(str(?this), "#$")) . +# FILTER(!regex(str(?this), "/$")) . +# } +# """ +# ] ; +# sh:message "Dataset entities with relative @id SHOULD end with '/'" . + +ro-crate:DatasetIdShouldEndWithSlash a sh:NodeShape ; + sh:name "Dataset: RECOMMENDED trailing slash" ; + sh:description "Dataset IRI SHOULD end with `/`." ; + sh:targetClass schema_org:Dataset ; + sh:nodeKind sh:IRI ; + sh:pattern "/$" ; + sh:message "Dataset IRI SHOULD end with '/'" . + +ro-crate:WebDatasetDistributionRecommended a sh:NodeShape ; + sh:name "Dataset: RECOMMENDED `distribution` for web datasets" ; + sh:description """If a Dataset has a web URI, it SHOULD include `distribution`.""" ; + sh:target [ + a sh:SPARQLTarget ; + sh:prefixes ro-crate:sparqlPrefixes ; + sh:select """ + SELECT ?this + WHERE { + ?this a schema:Dataset . + FILTER regex(str(?this), "^https?://", "i") + } + """ ; + ] ; + sh:property [ + a sh:PropertyShape ; + sh:path schema_org:distribution ; + sh:minCount 1 ; + sh:severity sh:Warning ; + sh:message "Web-based Dataset entities SHOULD include `distribution`" ; + ] . + +ro-crate:FileConformsToProfile a sh:NodeShape ; + sh:name "File: RECOMMENDED `conformsTo` profile" ; + sh:description """If present, `conformsTo` SHOULD reference a Profile entity.""" ; + sh:targetClass ro-crate:File ; + sh:property [ + sh:path schema_org:conformsTo ; + sh:class prof:Profile ; + sh:severity sh:Warning ; + sh:message "File `conformsTo` SHOULD reference a Profile entity" ; + ] . diff --git a/rocrate_validator/profiles/ro-crate/1.2/should/5_web_data_entity_metadata.ttl b/rocrate_validator/profiles/ro-crate/1.2/should/5_web_data_entity_metadata.ttl new file mode 100644 index 000000000..7abdf9c9a --- /dev/null +++ b/rocrate_validator/profiles/ro-crate/1.2/should/5_web_data_entity_metadata.ttl @@ -0,0 +1,71 @@ +# Copyright (c) 2024-2026 CRS4 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +@prefix ro: <./> . +@prefix ro-crate: . +@prefix rdf: . +@prefix dct: . +@prefix schema_org: . +@prefix sh: . +@prefix owl: . +@prefix xsd: . +@prefix validator: . + + +ro-crate:WebBasedDataEntityRequiredValueRestriction a sh:NodeShape ; + sh:name "Web-based Data Entity: RECOMMENDED properties" ; + sh:description """A Web-based Data Entity MUST be identified by an absolute URL and + SHOULD have a `contentSize` and `sdDatePublished` property""" ; + sh:targetClass ro-crate:WebDataEntity ; + # Check if the Web-based Data Entity has a contentSize property + # sh:property [ + # a sh:PropertyShape ; + # sh:minCount 1 ; + # sh:name "Web-based Data Entity: test property" ; + # sh:path schema_org:pippo ; + # sh:severity sh:Warning ; + # sh:message """Web-based Data Entities SHOULD have a `pippo` property""" ; + # ] ; + + sh:property [ + a sh:PropertyShape ; + sh:minCount 1 ; + sh:name "Web-based Data Entity: `contentSize` property" ; + sh:description """Check if the Web-based Data Entity has a `contentSize` property""" ; + sh:path schema_org:contentSize ; + sh:datatype xsd:string ; + sh:severity sh:Warning ; + sh:message """Web-based Data Entities SHOULD have a `contentSize` property""" ; + # sh:sparql [ + # sh:message "If the value is a string it must be a string representing an integer." ; + # sh:select """ + # SELECT ?this ?value + # WHERE { + # ?this schema:contentSize ?value . + # FILTER (!regex(str(?value), "^[0-9]+$")) + # } + # """ ; + # ] ; + ] ; + # Check if the Web-based Data Entity has a sdDatePublished property + sh:property [ + a sh:PropertyShape ; + sh:minCount 1 ; + sh:severity sh:Warning ; + sh:name "Web-based Data Entity: `sdDatePublished` property" ; + sh:description """Check if the Web-based Data Entity has a `sdDatePublished` property""" ; + sh:path schema_org:sdDatePublished ; + sh:pattern "^([\\+-]?\\d{4})((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))|W([0-4]\\d|5[0-2])(-?[1-7])|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)?[0-5]\\d)?|24:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)$" ; + sh:message """Web-based Data Entities SHOULD have a `sdDatePublished` property to indicate when the absolute URL was accessed""" ; + ] . diff --git a/rocrate_validator/profiles/ro-crate/1.2/should/6_contextual_entity_metadata.ttl b/rocrate_validator/profiles/ro-crate/1.2/should/6_contextual_entity_metadata.ttl new file mode 100644 index 000000000..5e37ae1fd --- /dev/null +++ b/rocrate_validator/profiles/ro-crate/1.2/should/6_contextual_entity_metadata.ttl @@ -0,0 +1,162 @@ +# Copyright (c) 2024-2026 CRS4 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +@prefix ro: <./> . +@prefix ro-crate: . +@prefix rdf: . +@prefix schema: . +@prefix sh: . +@prefix xsd: . +@prefix dct: . + +ro-crate:CreativeWorkAuthorMinimumRecommendedProperties a sh:NodeShape ; + sh:name "CreativeWork Author: minimum RECOMMENDED properties" ; + sh:description """The minimum recommended properties for a `CreativeWork Author` are `name` and `affiliation`.""" ; + sh:targetClass ro-crate:CreativeWorkAuthor ; + sh:property [ + sh:path schema:name ; + sh:minCount 1 ; + sh:dataType xsd:string ; + sh:name "CreativeWork Author: RECOMMENDED name property" ; + sh:description "Check if the author has a name." ; + sh:message "The author SHOULD have a name." ; + ] ; + sh:property [ + sh:path schema:affiliation ; + sh:minCount 1 ; + sh:or ( + [ sh:dataType xsd:string ; ] + [ sh:class schema:Organization ;] + ) ; + sh:severity sh:Warning ; + sh:name "CreativeWork Author: RECOMMENDED affiliation property" ; + sh:description "Check if the author has an organizational affiliation." ; + sh:message "The author SHOULD have an organizational affiliation." ; + ] ; + sh:property [ + sh:path schema:affiliation ; + sh:minCount 1 ; + sh:class schema:Organization ; + sh:severity sh:Warning ; + sh:name "CreativeWork Author: RECOMMENDED Contextual Entity linked for the organizational `affiliation` property" ; + sh:description "Check if the author has a Contextual Entity for the organizational `affiliation` property." ; + sh:message "The author SHOULD have a Contextual Entity which specifies the organizational `affiliation`." ; + ] . + + +ro-crate:OrganizationRecommendedProperties a sh:NodeShape ; + sh:name "Organization: RECOMMENDED properties" ; + sh:description """The recommended properties for an `Organization` are `name` and `url`.""" ; + sh:targetClass schema:Organization ; + sh:property [ + sh:path schema:name ; + sh:minCount 1 ; + sh:dataType xsd:string ; + sh:name "Organization: RECOMMENDED name property" ; + sh:description "Check if the `organization` has a name." ; + sh:message "The organization SHOULD have a name." ; + ] ; + sh:property [ + sh:path schema:url ; + sh:minCount 1 ; + sh:dataType xsd:anyURI ; + sh:name "Organization: RECOMMENDED url property" ; + sh:description "Check if the `organization` has a URL." ; + sh:message "The organization SHOULD have a URL." ; + ] . + +ro-crate:PersonAffiliationRecommendedProperties a sh:NodeShape ; + sh:name "Person: RECOMMENDED affiliation" ; + sh:description """Persons SHOULD reference an Organization for affiliation.""" ; + sh:targetClass schema:Person ; + sh:property [ + sh:path schema:affiliation ; + sh:class schema:Organization ; + sh:severity sh:Warning ; + sh:message "Persons SHOULD reference an Organization for affiliation" ; + ] . + +ro-crate:LicenseEntityRecommendedProperties a sh:NodeShape ; + sh:name "License entity: RECOMMENDED properties" ; + sh:description """License entities SHOULD have `name` and `description`.""" ; + sh:target [ + a sh:SPARQLTarget ; + sh:prefixes ro-crate:sparqlPrefixes ; + sh:select """ + SELECT ?this + WHERE { + ?subject schema:license ?this . + } + """ + ] ; + sh:property [ + sh:path schema:name ; + sh:minCount 1 ; + sh:dataType xsd:string ; + sh:severity sh:Warning ; + sh:message "License entities SHOULD have a name" ; + ] ; + sh:property [ + sh:path schema:description ; + sh:minCount 1 ; + sh:dataType xsd:string ; + sh:severity sh:Warning ; + sh:message "License entities SHOULD have a description" ; + ] . + +ro-crate:PlaceGeoRecommendedProperties a sh:NodeShape ; + sh:name "Place: RECOMMENDED geo" ; + sh:description """Places SHOULD reference Geometry via `geo` when geometry is provided.""" ; + sh:targetClass schema:Place ; + sh:property [ + sh:path schema:geo ; + sh:class ; + sh:severity sh:Warning ; + sh:message "Places SHOULD reference a Geometry entity via `geo`" ; + ] . + +ro-crate:GeometryWktRecommendedProperties a sh:NodeShape ; + sh:name "Geometry: RECOMMENDED asWKT" ; + sh:description """Geometry entities SHOULD provide `asWKT`.""" ; + sh:targetClass ; + sh:property [ + sh:path ; + sh:minCount 1 ; + sh:severity sh:Warning ; + sh:message "Geometry entities SHOULD provide `asWKT`" ; + ] . + +ro-crate:EncodingFormatRecommendedTypes a sh:NodeShape ; + sh:name "Encoding format: RECOMMENDED types" ; + sh:description """Encoding format contextual entities SHOULD include WebPage and/or Standard types.""" ; + sh:target [ + a sh:SPARQLTarget ; + sh:prefixes ro-crate:sparqlPrefixes ; + sh:select """ + SELECT ?this + WHERE { + ?s schema:encodingFormat ?this . + FILTER(isIRI(?this)) + } + """ + ] ; + sh:property [ + sh:path rdf:type ; + sh:or ( + [ sh:hasValue schema:WebPage ] + [ sh:hasValue dct:Standard ] + ) ; + sh:severity sh:Warning ; + sh:message "Encoding format entities SHOULD include WebPage and/or Standard types" ; + ] . From 2c7036ac161ce703b7058ea30c8681cb17642f01 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Wed, 25 Mar 2026 11:57:29 +0100 Subject: [PATCH 004/352] feat(core): add detached metadata file support --- rocrate_validator/models/settings.py | 6 ++ rocrate_validator/models/statistics.py | 12 ++- rocrate_validator/models/validation.py | 3 +- rocrate_validator/rocrate/bagit.py | 11 ++- rocrate_validator/rocrate/base.py | 43 ++++++--- rocrate_validator/rocrate/entity.py | 2 + rocrate_validator/rocrate/metadata.py | 17 +++- rocrate_validator/rocrate/plain.py | 121 ++++++++++++++++++++++++- 8 files changed, 189 insertions(+), 26 deletions(-) diff --git a/rocrate_validator/models/settings.py b/rocrate_validator/models/settings.py index 179bb46a9..da3df7ceb 100644 --- a/rocrate_validator/models/settings.py +++ b/rocrate_validator/models/settings.py @@ -100,6 +100,12 @@ class ValidationSettings: offline: bool = False #: Flag to disable the HTTP cache entirely: every request hits the network no_cache: bool = False + #: Flag to indicate validation at creation time + creation_time: bool = False + #: Flag to enforce availability checks regardless of creation time + enforce_availability: bool = False + #: Flag to skip availability checks + skip_availability_check: bool = False def __post_init__(self): # if requirement_severity is a str, convert to Severity diff --git a/rocrate_validator/models/statistics.py b/rocrate_validator/models/statistics.py index b8d79cbf8..c7f7e3536 100644 --- a/rocrate_validator/models/statistics.py +++ b/rocrate_validator/models/statistics.py @@ -393,16 +393,18 @@ def __handle_requirement_check_validation_end__(self, event: Event, ctx: Validat assert isinstance(event, RequirementCheckValidationEvent) assert ctx is not None target_profile = ctx.target_validation_profile + requirement_severity = self._settings.requirement_severity if not event.requirement_check.requirement.hidden and ( not event.requirement_check.overridden or target_profile.identifier == event.requirement_check.requirement.profile.identifier ): if event.validation_result is not None: - if event.validation_result: - self._stats["passed_checks"].append(event.requirement_check) - else: - self._stats["failed_checks"].append(event.requirement_check) - self._stats["validated_checks"].append(event.requirement_check) + if event.requirement_check.severity >= requirement_severity: + if event.validation_result: + self._stats["passed_checks"].append(event.requirement_check) + else: + self._stats["failed_checks"].append(event.requirement_check) + self._stats["validated_checks"].append(event.requirement_check) self.notify_listeners() else: logger.debug( diff --git a/rocrate_validator/models/validation.py b/rocrate_validator/models/validation.py index ba603252f..5f15c78d3 100644 --- a/rocrate_validator/models/validation.py +++ b/rocrate_validator/models/validation.py @@ -20,7 +20,6 @@ from rdflib import Graph -from rocrate_validator.constants import ROCRATE_METADATA_FILE from rocrate_validator.errors import ( ProfileNotFound, ROCrateMetadataNotFoundError, @@ -435,7 +434,7 @@ def rel_fd_path(self) -> Path: :return: The relative path to the file descriptor :rtype: Path """ - return Path(ROCRATE_METADATA_FILE) + return Path(self.ro_crate.metadata_descriptor_id) def __load_data_graph__(self) -> Graph: data_graph = Graph() diff --git a/rocrate_validator/rocrate/bagit.py b/rocrate_validator/rocrate/bagit.py index 5f09c3b39..17149c46b 100644 --- a/rocrate_validator/rocrate/bagit.py +++ b/rocrate_validator/rocrate/bagit.py @@ -125,7 +125,7 @@ def __check_search_path__(self, path): class ROCrateBagitLocalFolder(BagitROCrate, ROCrateLocalFolder): def __init__(self, uri: str | Path | URI, relative_root_path: Path | None = None): # initialize the parent classes - super(ROCrateLocalFolder, self).__init__(uri, relative_root_path=relative_root_path) + super().__init__(uri, relative_root_path=relative_root_path) # check if the path is a BagIt-wrapped crate assert self.is_bagit_wrapping_crate(uri), "Not a BagIt-wrapped RO-Crate" @@ -136,6 +136,15 @@ def __parse_path__(self, path: Path) -> Path: path = root_path / Path("data") / search_path if not path.exists(): path = Path(unquote(str(path))) + return path + if not path.is_absolute(): + base_path = self.uri.as_path() if self.uri.is_local_resource() else Path() + try: + path.relative_to(base_path) + except ValueError: + path = base_path / path + if not path.exists(): + path = Path(unquote(str(path))) return path diff --git a/rocrate_validator/rocrate/base.py b/rocrate_validator/rocrate/base.py index 3103010a1..f604b606b 100644 --- a/rocrate_validator/rocrate/base.py +++ b/rocrate_validator/rocrate/base.py @@ -110,6 +110,13 @@ def metadata(self) -> ROCrateMetadata: self._metadata = ROCrateMetadata(self) return self._metadata + def is_detached(self) -> bool: + return False + + @property + def metadata_descriptor_id(self) -> str: + return ROCrateMetadata.METADATA_FILE_DESCRIPTOR + @property @abstractmethod def size(self) -> int: @@ -206,7 +213,7 @@ def has_descriptor(self) -> bool: :return: `True` if the RO-Crate has a metadata descriptor file, `False` otherwise :rtype: bool """ - path = self.__parse_path__(Path(self.metadata.METADATA_FILE_DESCRIPTOR)) + path = self.__parse_path__(Path(self.metadata_descriptor_id)) logger.debug("Checking for metadata descriptor at path: %s", path) return self.has_file(path) @@ -345,7 +352,13 @@ def new_instance(uri: str | Path | URI, relative_root_path: Path | None = None) ROCrateBagitLocalZip, ROCrateBagitRemoteZip, ) - from .plain import ROCrateLocalFolder, ROCrateLocalZip, ROCrateRemoteZip # noqa: PLC0415 + from .plain import ( # noqa: PLC0415 + ROCrateLocalFolder, + ROCrateLocalMetadataFile, + ROCrateLocalZip, + ROCrateRemoteMetadataFile, + ROCrateRemoteZip, + ) # check if the URI is valid validate_rocrate_uri(uri, silent=False) @@ -364,17 +377,23 @@ def new_instance(uri: str | Path | URI, relative_root_path: Path | None = None) ) # check if the URI is a local zip file if uri.is_local_file(): - return ( - ROCrateBagitLocalZip(uri, relative_root_path=relative_root_path) - if is_bagit_crate - else ROCrateLocalZip(uri, relative_root_path=relative_root_path) - ) + suffix = uri.as_path().suffix.lower() + if suffix == ".zip": + return ( + ROCrateBagitLocalZip(uri, relative_root_path=relative_root_path) + if is_bagit_crate + else ROCrateLocalZip(uri, relative_root_path=relative_root_path) + ) + return ROCrateLocalMetadataFile(uri, relative_root_path=relative_root_path) # check if the URI is a remote zip file if uri.is_remote_resource(): - return ( - ROCrateBagitRemoteZip(uri, relative_root_path=relative_root_path) - if is_bagit_crate - else ROCrateRemoteZip(uri, relative_root_path=relative_root_path) - ) + path_suffix = Path(uri.get_path()).suffix.lower() + if path_suffix == ".zip": + return ( + ROCrateBagitRemoteZip(uri, relative_root_path=relative_root_path) + if is_bagit_crate + else ROCrateRemoteZip(uri, relative_root_path=relative_root_path) + ) + return ROCrateRemoteMetadataFile(uri, relative_root_path=relative_root_path) # if the URI is not supported, raise an error raise ROCrateInvalidURIError(uri=uri, message="Unsupported RO-Crate URI") diff --git a/rocrate_validator/rocrate/entity.py b/rocrate_validator/rocrate/entity.py index 8ce84c68c..2c05bcef7 100644 --- a/rocrate_validator/rocrate/entity.py +++ b/rocrate_validator/rocrate/entity.py @@ -152,6 +152,8 @@ def id_as_uri(self) -> URI: return self.get_id_as_uri(self.id, self.ro_crate) def has_absolute_path(self) -> bool: + if self.id_as_uri.is_remote_resource(): + return True return self.get_id_as_path(self.id).is_absolute() def has_relative_path(self) -> bool: diff --git a/rocrate_validator/rocrate/metadata.py b/rocrate_validator/rocrate/metadata.py index 522a399db..5e21a9879 100644 --- a/rocrate_validator/rocrate/metadata.py +++ b/rocrate_validator/rocrate/metadata.py @@ -54,10 +54,17 @@ def size(self) -> int: return 0 def get_file_descriptor_entity(self) -> ROCrateEntity: - metadata_file_descriptor = self.get_entity(self.METADATA_FILE_DESCRIPTOR) - if not metadata_file_descriptor: - raise ValueError("no metadata file descriptor in crate") - return metadata_file_descriptor + metadata_file_descriptor = self.get_entity(self.ro_crate.metadata_descriptor_id) + if metadata_file_descriptor: + return metadata_file_descriptor + + for entity in self.get_entities(): + if not entity.id or not entity.id.endswith(self.METADATA_FILE_DESCRIPTOR): + continue + if entity.has_type("CreativeWork"): + return entity + + raise ValueError("no metadata file descriptor in crate") def get_root_data_entity(self) -> ROCrateEntity: metadata_file_descriptor = self.get_file_descriptor_entity() @@ -135,7 +142,7 @@ def get_conforms_to(self) -> list[str] | None: def as_json(self) -> str: if not self._json: self._json = cast( - "str", self.ro_crate.get_file_content(Path(self.METADATA_FILE_DESCRIPTOR), binary_mode=False) + "str", self.ro_crate.get_file_content(Path(self.ro_crate.metadata_descriptor_id), binary_mode=False) ) return self._json diff --git a/rocrate_validator/rocrate/plain.py b/rocrate_validator/rocrate/plain.py index cac9a8181..6f703db4d 100644 --- a/rocrate_validator/rocrate/plain.py +++ b/rocrate_validator/rocrate/plain.py @@ -25,6 +25,7 @@ from rocrate_validator.utils.http import HttpRequester from .base import ROCrate +from .metadata import ROCrateMetadata if TYPE_CHECKING: from rocrate_validator.utils.uri import URI @@ -43,6 +44,7 @@ def __init__(self, path: str | Path | URI, relative_root_path: Path | None = Non # cache the list of files self._files = None + self._metadata_descriptor_id = None # check if the path is a directory if not self.has_directory(self.uri.as_path()): @@ -52,6 +54,21 @@ def __init__(self, path: str | Path | URI, relative_root_path: Path | None = Non def size(self) -> int: return sum(f.stat().st_size for f in self.list_files() if f.is_file()) + @property + def metadata_descriptor_id(self) -> str: + if self._metadata_descriptor_id: + return self._metadata_descriptor_id + base_path = self.uri.as_path() + candidates = sorted( + (p for p in base_path.rglob(f"*{ROCrateMetadata.METADATA_FILE_DESCRIPTOR}") if p.is_file()), + key=lambda p: (len(p.relative_to(base_path).parts), str(p)), + ) + if not candidates: + self._metadata_descriptor_id = ROCrateMetadata.METADATA_FILE_DESCRIPTOR + return self._metadata_descriptor_id + self._metadata_descriptor_id = str(candidates[0].relative_to(base_path)) + return self._metadata_descriptor_id + def list_files(self) -> list[Path]: if not self._files: self._files = [] @@ -90,6 +107,7 @@ def __init__( # cache the list of files self._files = None + self._metadata_descriptor_id = None def __del__(self): try: @@ -133,9 +151,23 @@ def has_descriptor(self) -> bool: Check if the RO-Crate has a metadata descriptor file. :rtype: bool """ - path = self.__parse_path__(Path(self.metadata.METADATA_FILE_DESCRIPTOR)) + path = self.__parse_path__(Path(self.metadata_descriptor_id)) return str(path) in [str(_) for _ in self.list_files()] + @property + def metadata_descriptor_id(self) -> str: + if self._metadata_descriptor_id: + return self._metadata_descriptor_id + candidates = sorted( + (p for p in self.list_files() if str(p).endswith(ROCrateMetadata.METADATA_FILE_DESCRIPTOR)), + key=lambda p: (len(p.parts), str(p)), + ) + if not candidates: + self._metadata_descriptor_id = ROCrateMetadata.METADATA_FILE_DESCRIPTOR + return self._metadata_descriptor_id + self._metadata_descriptor_id = str(candidates[0]) + return self._metadata_descriptor_id + def has_file(self, path: Path) -> bool: path = self.__parse_path__(path) for p in self.list_files(): @@ -185,6 +217,93 @@ def get_file_content(self, path: Path, binary_mode: bool = True) -> str | bytes: return data if binary_mode else data.decode("utf-8") +class ROCrateLocalMetadataFile(ROCrate): + def __init__(self, path: str | Path | URI, relative_root_path: Path | None = None): + super().__init__(path, relative_root_path=relative_root_path) + + if not self.uri.is_local_file(): + raise ROCrateInvalidURIError(uri=path) + + suffix = self.uri.as_path().suffix.lower() + if suffix not in (".json", ".jsonld"): + raise ROCrateInvalidURIError(uri=path, message="Unsupported metadata file format") + + def is_detached(self) -> bool: + return True + + @property + def metadata_descriptor_id(self) -> str: + return self.uri.as_path().name + + @property + def size(self) -> int: + return self.uri.as_path().stat().st_size + + def list_files(self) -> list[Path]: + return [Path(self.metadata_descriptor_id)] + + def has_descriptor(self) -> bool: + return True + + def has_file(self, path: Path) -> bool: + return path.name == self.metadata_descriptor_id + + def get_file_size(self, path: Path) -> int: + if path.name != self.metadata_descriptor_id: + raise FileNotFoundError(f"File not found: {path}") + return self.uri.as_path().stat().st_size + + def get_file_content(self, path: Path, binary_mode: bool = True) -> str | bytes: + if path.name != self.metadata_descriptor_id: + raise FileNotFoundError(f"File not found: {path}") + return self.uri.as_path().read_bytes() if binary_mode else self.uri.as_path().read_text() + + +class ROCrateRemoteMetadataFile(ROCrate): + def __init__(self, uri: str | Path | URI, relative_root_path: Path | None = None): + super().__init__(uri, relative_root_path=relative_root_path) + + if not self.uri.is_remote_resource(): + raise ROCrateInvalidURIError(uri=uri) + + def is_detached(self) -> bool: + return True + + @property + def metadata_descriptor_id(self) -> str: + return Path(self.uri.get_path()).name + + @property + def size(self) -> int: + response = HttpRequester().head(str(self.uri)) + response.raise_for_status() + file_size = response.headers.get("Content-Length") + if file_size is None: + raise ValueError("Could not determine the file size from the headers") + return int(file_size) + + def list_files(self) -> list[Path]: + return [Path(self.metadata_descriptor_id)] + + def has_descriptor(self) -> bool: + return True + + def has_file(self, path: Path) -> bool: + return path.name == self.metadata_descriptor_id + + def get_file_size(self, path: Path) -> int: + if path.name != self.metadata_descriptor_id: + raise FileNotFoundError(f"File not found: {path}") + return self.size + + def get_file_content(self, path: Path, binary_mode: bool = True) -> str | bytes: + if path.name != self.metadata_descriptor_id: + raise FileNotFoundError(f"File not found: {path}") + response = HttpRequester().get(str(self.uri), headers={"Accept": "application/ld+json"}) + response.raise_for_status() + return response.content if binary_mode else response.text + + class ROCrateRemoteZip(ROCrateLocalZip): def __init__(self, path: str | Path | URI, relative_root_path: Path | None = None): super().__init__(path, relative_root_path=relative_root_path, init_zip=False) From 5b6ffea61352112939273c8e5d50b8276d988253 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Wed, 25 Mar 2026 11:57:36 +0100 Subject: [PATCH 005/352] feat(uri): support JSON/JSON-LD metadata files as crate source --- rocrate_validator/utils/uri.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/rocrate_validator/utils/uri.py b/rocrate_validator/utils/uri.py index e3d12e1a4..f8a2c9bd0 100644 --- a/rocrate_validator/utils/uri.py +++ b/rocrate_validator/utils/uri.py @@ -300,13 +300,16 @@ def validate_rocrate_uri(uri: str | Path | URI, silent: bool = False) -> bool: raise errors.ROCrateInvalidURIError(uri) # check if the URI is a remote resource or local directory or local file if not uri.is_remote_resource() and not uri.is_local_directory() and not uri.is_local_file(): - raise errors.ROCrateInvalidURIError(uri) - # check if the local file is a ZIP file - if uri.is_local_file() and uri.as_path().suffix != ".zip": - raise errors.ROCrateInvalidURIError(uri) + raise errors.ROCrateInvalidURIError(str(uri)) + # check if the local file is a ZIP file or a metadata file + if uri.is_local_file(): + suffix = uri.as_path().suffix.lower() + if suffix not in (".zip", ".json", ".jsonld"): + raise errors.ROCrateInvalidURIError(str(uri)) # check if the resource is available if not uri.is_available(): - raise errors.ROCrateInvalidURIError(uri, message=f'The RO-crate at the URI "{uri}" is not available') + raise errors.ROCrateInvalidURIError(str(uri), + message=f'The RO-crate at the URI "{uri}" is not available') return True except ValueError as e: logger.error(e) From cb7d36533cdcc173a08e42dc1480b0e3204abe14 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Wed, 25 Mar 2026 11:57:41 +0100 Subject: [PATCH 006/352] feat(cli): add availability check control flags --- rocrate_validator/cli/commands/validate.py | 29 +++++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) diff --git a/rocrate_validator/cli/commands/validate.py b/rocrate_validator/cli/commands/validate.py index 4e19d7dfa..3f2c093ec 100644 --- a/rocrate_validator/cli/commands/validate.py +++ b/rocrate_validator/cli/commands/validate.py @@ -76,6 +76,27 @@ def validate_uri(ctx, param, value): show_default=True, ) @click.option("-ff", "--fail-fast", is_flag=True, help="Fail fast validation mode", default=False, show_default=True) +@click.option( + "--creation-time", + is_flag=True, + help="Treat availability checks as required (creation time validation)", + default=False, + show_default=True, +) +@click.option( + "--enforce-availability", + is_flag=True, + help="Force availability checks as required", + default=False, + show_default=True, +) +@click.option( + "--skip-availability-check", + is_flag=True, + help="Skip availability checks for web-based data entities", + default=False, + show_default=True, +) @click.option( "--profiles-path", type=click.Path(exists=True), @@ -233,6 +254,9 @@ def validate( extra_profiles_path: Path | None = None, profile_identifier: tuple[str, ...] = (), metadata_only: bool = False, + creation_time: bool = False, + enforce_availability: bool = False, + skip_availability_check: bool = False, no_auto_profile: bool = False, disable_profile_inheritance: bool = False, requirement_severity: str = Severity.REQUIRED.name, @@ -306,13 +330,16 @@ def validate( "abort_on_first": fail_fast, "skip_checks": skip_checks_list, "metadata_only": metadata_only, - "cache_max_age": cache_max_age, + "cache_max_age": cache_max_age if not no_cache else -1, "cache_path": cache_path, "offline": offline, "no_cache": no_cache, # When offline is requested, remote crate fetching must use the cache # instead of the "disable download" short-circuit. "disable_remote_crate_download": not offline, + "creation_time": creation_time, + "enforce_availability": enforce_availability, + "skip_availability_check": skip_availability_check, } # Print the application header From 2fbfa0478aea128eb30604041fd1f575e1ccdf3b Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Wed, 25 Mar 2026 11:57:47 +0100 Subject: [PATCH 007/352] feat(shacl): inject default prefixes and improve BNode disambiguation --- rocrate_validator/requirements/shacl/utils.py | 45 +++++++++++++++---- .../requirements/shacl/validator.py | 42 ++++++++++++++++- 2 files changed, 78 insertions(+), 9 deletions(-) diff --git a/rocrate_validator/requirements/shacl/utils.py b/rocrate_validator/requirements/shacl/utils.py index d684a48c7..01469037b 100644 --- a/rocrate_validator/requirements/shacl/utils.py +++ b/rocrate_validator/requirements/shacl/utils.py @@ -90,22 +90,51 @@ def inject_attributes(obj: object, node_graph: Graph, node: Node, exclude: list return obj -def __compute_values__(g: Graph, s: Node) -> list[tuple]: +def __compute_values__(g: Graph, s: Node, seen: set | None = None) -> list[tuple]: """ Compute the values of the triples in the graph (excluding BNodes) starting from the given subject node `s`. """ - # Collect the values of the triples in the graph (excluding BNodes) values = [] - # Assuming the list of triples values is stored in a variable called 'triples_values' - triples_values = [(_, x, _) for (_, x, _) in g.triples((s, None, None)) if x != RDF.type] + seen = seen if seen is not None else set() + if s in seen: + return values + seen.add(s) + + triples_values = [ + (subject, predicate, obj) for (subject, predicate, obj) in g.triples((s, None, None)) if predicate != RDF.type + ] - for subj, pred, obj in triples_values: + for subject, predicate, obj in triples_values: if isinstance(obj, BNode): - values.extend(__compute_values__(g, obj)) + values.extend(__compute_values__(g, obj, seen)) else: - values.append((subj, pred, obj) if not isinstance(subj, BNode) else (pred, obj)) + values.append((subject, predicate, obj) if not isinstance(subject, BNode) else (predicate, obj)) + return values + + +def __compute_context_values__(g: Graph, s: Node, seen: set | None = None) -> list[tuple]: + """ + Compute contextual values for node `s` by traversing incoming triples. + This helps disambiguate structurally equivalent BNodes attached to different parent shapes. + """ + + values = [] + seen = seen if seen is not None else set() + if s in seen: + return values + seen.add(s) + + for subject, predicate, _ in g.triples((None, None, s)): + if predicate == RDF.type: + continue + if isinstance(subject, BNode): + values.extend(__compute_values__(g, subject, seen)) + values.extend(__compute_context_values__(g, subject, seen)) + else: + values.append((subject, predicate)) + return values @@ -116,7 +145,7 @@ def compute_hash(g: Graph, s: Node): """ # Collect the values of the triples in the graph (excluding BNodes) - triples_values = sorted(__compute_values__(g, s)) + triples_values = sorted(__compute_values__(g, s) + __compute_context_values__(g, s)) # Convert the list of triples values to a string representation triples_string = str(triples_values) # Calculate and return the hash of the triples string diff --git a/rocrate_validator/requirements/shacl/validator.py b/rocrate_validator/requirements/shacl/validator.py index 225f5e920..dc4b73448 100644 --- a/rocrate_validator/requirements/shacl/validator.py +++ b/rocrate_validator/requirements/shacl/validator.py @@ -19,7 +19,7 @@ from typing import TYPE_CHECKING, Any, cast # pylint: disable=unused-import import pyshacl -from rdflib import BNode, Graph +from rdflib import BNode, Graph, Literal, Namespace from rdflib.term import Node, URIRef if TYPE_CHECKING: @@ -462,6 +462,9 @@ def validate( assert inference in (None, "rdfs", "owlrl", "both"), "Invalid inference option" + if isinstance(self._shapes_graph, Graph): + _inject_default_prefixes(self._shapes_graph) + # validate the data graph using pyshacl.validate conforms, results_graph, results_text = pyshacl.validate( data_graph, @@ -502,4 +505,41 @@ def validate( return SHACLValidationResult(results_graph, results_text) +def _inject_default_prefixes(shapes_graph: Graph) -> None: + shacl_ns = Namespace(SHACL_NS) + default_prefix_block = ( + "PREFIX schema: \n" + "PREFIX bioschemas: \n" + "PREFIX bioschemas-cw: \n" + "PREFIX wfrun: \n" + "PREFIX codemeta: \n" + "PREFIX rocrate: \n" + "PREFIX ro-crate: \n" + "PREFIX ro: <./>\n" + "PREFIX dct: \n" + "PREFIX rdf: \n" + "PREFIX xsd: " + ) + + for subject, _, literal in shapes_graph.triples((None, shacl_ns.select, None)): + if not isinstance(literal, Literal): + continue + query = str(literal) + if "PREFIX" in query or "BASE" in query: + continue + prefix_block = default_prefix_block + prefixes_node = shapes_graph.value(subject=subject, predicate=shacl_ns.prefixes) + if prefixes_node: + prefix_lines = [] + for declaration in shapes_graph.objects(prefixes_node, shacl_ns.declare): + prefix = shapes_graph.value(declaration, shacl_ns.prefix) + namespace = shapes_graph.value(declaration, shacl_ns.namespace) + if prefix and namespace: + prefix_lines.append(f"PREFIX {prefix}: <{namespace}>") + if prefix_lines: + prefix_block = "\n".join(prefix_lines) + updated_query = f"{prefix_block}\n{query.lstrip()}" + shapes_graph.set((subject, shacl_ns.select, Literal(updated_query))) + + __all__ = ["SHACLValidationResult", "SHACLValidator", "SHACLViolation"] From 997a3c5d983462b58f65b6e31cb0db9f2d428103 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Wed, 25 Mar 2026 11:57:52 +0100 Subject: [PATCH 008/352] fix(utils): improve dynamic module loading to avoid collisions --- rocrate_validator/utils/python_helpers.py | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/rocrate_validator/utils/python_helpers.py b/rocrate_validator/utils/python_helpers.py index e3cdce4ab..b780a263e 100644 --- a/rocrate_validator/utils/python_helpers.py +++ b/rocrate_validator/utils/python_helpers.py @@ -12,10 +12,11 @@ # See the License for the specific language governing permissions and # limitations under the License. +import hashlib import inspect import re import sys -from importlib import import_module +from importlib import import_module, util from pathlib import Path from rocrate_validator.utils import log as logging @@ -41,15 +42,19 @@ def get_classes_from_file( if file_path.suffix != ".py": raise ValueError("The file is not a Python file") - # Get the module name from the file path - module_name = file_path.stem + # Build a unique module name from the file path to avoid collisions + module_hash = hashlib.sha256(str(file_path).encode("utf-8")).hexdigest()[:12] + module_name = f"rocrate_validator.dynamic.{file_path.stem}_{module_hash}" logger.debug("Module: %r", module_name) - # Add the directory containing the file to the system path - sys.path.insert(0, str(file_path.parent)) - - # Import the module - module = import_module(module_name) + module = sys.modules.get(module_name) + if module is None: + spec = util.spec_from_file_location(module_name, file_path) + if spec is None or spec.loader is None: + raise RuntimeError(f"Unable to load module from {file_path}") + module = util.module_from_spec(spec) + sys.modules[module_name] = module + spec.loader.exec_module(module) logger.debug("Module: %r", module) # Get all classes in the module that are subclasses of filter_class From 43ce7a15f2ec06898020a614ac201ab85d7fdb19 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Wed, 25 Mar 2026 11:57:58 +0100 Subject: [PATCH 009/352] fix(progress): respect severity threshold in progress tracking --- .../utils/io_helpers/output/text/layout/progress.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/rocrate_validator/utils/io_helpers/output/text/layout/progress.py b/rocrate_validator/utils/io_helpers/output/text/layout/progress.py index 873eb4e8e..5796960bb 100644 --- a/rocrate_validator/utils/io_helpers/output/text/layout/progress.py +++ b/rocrate_validator/utils/io_helpers/output/text/layout/progress.py @@ -76,7 +76,11 @@ def progress(self) -> Progress: def _on_requirement_check_validation_end( self, event: RequirementCheckValidationEvent, ctx: ValidationContext | None ) -> None: - self.__progress.update(task_id=self.requirement_check_validation, advance=1) + assert ctx is not None, "Validation context must be provided" + # Only advance the progress for checks at or above the requested severity threshold, + # so the bar matches the set of checks actually reported. + if event.requirement_check.severity >= ctx.settings.requirement_severity: + self.__progress.update(task_id=self.requirement_check_validation, advance=1) def _on_requirement_validation_end(self, event: RequirementValidationEvent, ctx: ValidationContext | None) -> None: self.__progress.update(task_id=self.requirement_validation, advance=1) From 9bc8f52219db21e59e32ef0bf918beca749cf8b1 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Wed, 25 Mar 2026 11:58:04 +0100 Subject: [PATCH 010/352] test(fixtures): add RO-Crate 1.2 test crates --- tests/data/crates/1.2/README.md | 15 +++++ .../basic-ro-crate-metadata.json | 37 ++++++++++++ .../multi-ro-crate-metadata.json | 60 +++++++++++++++++++ .../profiled-ro-crate-metadata.json | 51 ++++++++++++++++ .../ro-crate-metadata.json | 37 ++++++++++++ .../dataset-ro-crate-metadata.json | 37 ++++++++++++ .../ro-crate-1.2/invalid-context/data.txt | 1 + .../invalid-context/ro-crate-metadata.json | 37 ++++++++++++ .../invalid-date-published/data.txt | 1 + .../ro-crate-metadata.json | 37 ++++++++++++ .../detached/dataset-ro-crate-metadata.json | 37 ++++++++++++ .../detached/test-ro-crate-metadata.json | 37 ++++++++++++ .../valid/ro-crate-1.2-absolute-root/data.txt | 1 + .../ro-crate-metadata.json | 38 ++++++++++++ .../valid/ro-crate-1.2-attached/data.txt | 1 + .../ro-crate-metadata.json | 38 ++++++++++++ tests/ro_crates.py | 42 +++++++++++++ 17 files changed, 507 insertions(+) create mode 100644 tests/data/crates/1.2/README.md create mode 100644 tests/data/crates/1.2/detached-basic/basic-ro-crate-metadata.json create mode 100644 tests/data/crates/1.2/detached-multi/multi-ro-crate-metadata.json create mode 100644 tests/data/crates/1.2/detached-with-profile/profiled-ro-crate-metadata.json create mode 100644 tests/data/crates/invalid/ro-crate-1.2/detached-bad-filename/ro-crate-metadata.json create mode 100644 tests/data/crates/invalid/ro-crate-1.2/detached-relative-entity/dataset-ro-crate-metadata.json create mode 100644 tests/data/crates/invalid/ro-crate-1.2/invalid-context/data.txt create mode 100644 tests/data/crates/invalid/ro-crate-1.2/invalid-context/ro-crate-metadata.json create mode 100644 tests/data/crates/invalid/ro-crate-1.2/invalid-date-published/data.txt create mode 100644 tests/data/crates/invalid/ro-crate-1.2/invalid-date-published/ro-crate-metadata.json create mode 100644 tests/data/crates/valid/detached/dataset-ro-crate-metadata.json create mode 100644 tests/data/crates/valid/detached/test-ro-crate-metadata.json create mode 100644 tests/data/crates/valid/ro-crate-1.2-absolute-root/data.txt create mode 100644 tests/data/crates/valid/ro-crate-1.2-absolute-root/ro-crate-metadata.json create mode 100644 tests/data/crates/valid/ro-crate-1.2-attached/data.txt create mode 100644 tests/data/crates/valid/ro-crate-1.2-attached/ro-crate-metadata.json diff --git a/tests/data/crates/1.2/README.md b/tests/data/crates/1.2/README.md new file mode 100644 index 000000000..6f11496c0 --- /dev/null +++ b/tests/data/crates/1.2/README.md @@ -0,0 +1,15 @@ +# RO-Crate 1.2 Detached examples + +This folder contains example Detached RO-Crate metadata files for 1.2. Each example is a metadata-only JSON-LD file that references remote data entities. + +## Examples +- `detached-basic/basic-ro-crate-metadata.json` + - Minimal detached RO-Crate with a single remote file. +- `detached-multi/multi-ro-crate-metadata.json` + - Detached RO-Crate with multiple remote files and a remote dataset. +- `detached-with-profile/profiled-ro-crate-metadata.json` + - Detached RO-Crate that declares conformance to an additional profile. + +## Notes +- Detached RO-Crates have no local payload; all data entities use absolute URIs. +- The metadata document uses the 1.2 context by reference. diff --git a/tests/data/crates/1.2/detached-basic/basic-ro-crate-metadata.json b/tests/data/crates/1.2/detached-basic/basic-ro-crate-metadata.json new file mode 100644 index 000000000..65f04550d --- /dev/null +++ b/tests/data/crates/1.2/detached-basic/basic-ro-crate-metadata.json @@ -0,0 +1,37 @@ +{ + "@context": "https://w3id.org/ro/crate/1.2/context", + "@graph": [ + { + "@id": "ro-crate-metadata.json", + "@type": "CreativeWork", + "conformsTo": { + "@id": "https://w3id.org/ro/crate/1.2" + }, + "about": { + "@id": "https://example.org/ro-crate/detached/basic" + } + }, + { + "@id": "https://example.org/ro-crate/detached/basic", + "@type": "Dataset", + "name": "Detached RO-Crate (basic)", + "description": "Minimal detached RO-Crate referencing a remote file.", + "datePublished": "2024-05-17", + "license": { + "@id": "http://spdx.org/licenses/CC0-1.0" + }, + "hasPart": [ + { + "@id": "https://www.w3.org/TR/PNG/iso_8859-1.txt" + } + ] + }, + { + "@id": "https://www.w3.org/TR/PNG/iso_8859-1.txt", + "@type": "File", + "name": "iso_8859-1.txt", + "description": "Remote text file referenced by the detached RO-Crate.", + "encodingFormat": "text/plain" + } + ] +} diff --git a/tests/data/crates/1.2/detached-multi/multi-ro-crate-metadata.json b/tests/data/crates/1.2/detached-multi/multi-ro-crate-metadata.json new file mode 100644 index 000000000..8e97a9115 --- /dev/null +++ b/tests/data/crates/1.2/detached-multi/multi-ro-crate-metadata.json @@ -0,0 +1,60 @@ +{ + "@context": "https://w3id.org/ro/crate/1.2/context", + "@graph": [ + { + "@id": "ro-crate-metadata.json", + "@type": "CreativeWork", + "conformsTo": { + "@id": "https://w3id.org/ro/crate/1.2" + }, + "about": { + "@id": "https://example.org/ro-crate/detached/multi" + } + }, + { + "@id": "https://example.org/ro-crate/detached/multi", + "@type": "Dataset", + "name": "Detached RO-Crate (multi)", + "description": "Detached RO-Crate with multiple remote data entities.", + "datePublished": "2024-05-17", + "license": { + "@id": "http://spdx.org/licenses/CC0-1.0" + }, + "hasPart": [ + { + "@id": "https://www.w3.org/TR/PNG/iso_8859-1.txt" + }, + { + "@id": "https://www.w3.org/2008/site/images/logo-w3c-mobile-lg" + }, + { + "@id": "https://example.org/datasets/sample" + } + ] + }, + { + "@id": "https://www.w3.org/TR/PNG/iso_8859-1.txt", + "@type": "File", + "name": "iso_8859-1.txt", + "description": "Remote text file.", + "encodingFormat": "text/plain" + }, + { + "@id": "https://www.w3.org/2008/site/images/logo-w3c-mobile-lg", + "@type": "File", + "name": "W3C logo", + "description": "Remote image file.", + "encodingFormat": "image/png" + }, + { + "@id": "https://example.org/datasets/sample", + "@type": "Dataset", + "name": "Sample remote dataset", + "description": "Remote dataset referenced by the detached RO-Crate.", + "datePublished": "2024-05-17", + "license": { + "@id": "http://spdx.org/licenses/CC0-1.0" + } + } + ] +} diff --git a/tests/data/crates/1.2/detached-with-profile/profiled-ro-crate-metadata.json b/tests/data/crates/1.2/detached-with-profile/profiled-ro-crate-metadata.json new file mode 100644 index 000000000..eaf511599 --- /dev/null +++ b/tests/data/crates/1.2/detached-with-profile/profiled-ro-crate-metadata.json @@ -0,0 +1,51 @@ +{ + "@context": "https://w3id.org/ro/crate/1.2/context", + "@graph": [ + { + "@id": "ro-crate-metadata.json", + "@type": "CreativeWork", + "conformsTo": { + "@id": "https://w3id.org/ro/crate/1.2" + }, + "about": { + "@id": "https://example.org/ro-crate/detached/profiled" + } + }, + { + "@id": "https://example.org/ro-crate/detached/profiled", + "@type": "Dataset", + "name": "Detached RO-Crate (profiled)", + "description": "Detached RO-Crate declaring conformance to an additional profile.", + "datePublished": "2024-05-17", + "license": { + "@id": "http://spdx.org/licenses/CC0-1.0" + }, + "conformsTo": [ + { + "@id": "https://example.org/profiles/example-profile" + } + ], + "hasPart": [ + { + "@id": "https://www.w3.org/TR/PNG/iso_8859-1.txt" + } + ] + }, + { + "@id": "https://example.org/profiles/example-profile", + "@type": [ + "Profile", + "CreativeWork" + ], + "name": "Example profile", + "description": "Example RO-Crate profile contextual entity." + }, + { + "@id": "https://www.w3.org/TR/PNG/iso_8859-1.txt", + "@type": "File", + "name": "iso_8859-1.txt", + "description": "Remote text file.", + "encodingFormat": "text/plain" + } + ] +} diff --git a/tests/data/crates/invalid/ro-crate-1.2/detached-bad-filename/ro-crate-metadata.json b/tests/data/crates/invalid/ro-crate-1.2/detached-bad-filename/ro-crate-metadata.json new file mode 100644 index 000000000..120933739 --- /dev/null +++ b/tests/data/crates/invalid/ro-crate-1.2/detached-bad-filename/ro-crate-metadata.json @@ -0,0 +1,37 @@ +{ + "@context": "https://w3id.org/ro/crate/1.2/context", + "@graph": [ + { + "@id": "ro-crate-metadata.json", + "@type": "CreativeWork", + "conformsTo": { + "@id": "https://w3id.org/ro/crate/1.2" + }, + "about": { + "@id": "https://example.org/ro-crate/detached" + } + }, + { + "@id": "https://example.org/ro-crate/detached", + "@type": "Dataset", + "name": "Detached RO-Crate filename", + "description": "Detached RO-Crate with discouraged filename.", + "license": { + "@id": "https://creativecommons.org/publicdomain/zero/1.0/" + }, + "datePublished": "2024-01-01", + "hasPart": [ + { + "@id": "https://example.org/data/file1.txt" + } + ] + }, + { + "@id": "https://example.org/data/file1.txt", + "@type": "File", + "name": "file1.txt", + "description": "Remote file in detached RO-Crate.", + "encodingFormat": "text/plain" + } + ] +} diff --git a/tests/data/crates/invalid/ro-crate-1.2/detached-relative-entity/dataset-ro-crate-metadata.json b/tests/data/crates/invalid/ro-crate-1.2/detached-relative-entity/dataset-ro-crate-metadata.json new file mode 100644 index 000000000..14667b56d --- /dev/null +++ b/tests/data/crates/invalid/ro-crate-1.2/detached-relative-entity/dataset-ro-crate-metadata.json @@ -0,0 +1,37 @@ +{ + "@context": "https://w3id.org/ro/crate/1.2/context", + "@graph": [ + { + "@id": "ro-crate-metadata.json", + "@type": "CreativeWork", + "conformsTo": { + "@id": "https://w3id.org/ro/crate/1.2" + }, + "about": { + "@id": "https://example.org/ro-crate/detached" + } + }, + { + "@id": "https://example.org/ro-crate/detached", + "@type": "Dataset", + "name": "Detached RO-Crate invalid", + "description": "Detached crate with relative data entity.", + "license": { + "@id": "https://creativecommons.org/publicdomain/zero/1.0/" + }, + "datePublished": "2024-01-01", + "hasPart": [ + { + "@id": "data.txt" + } + ] + }, + { + "@id": "data.txt", + "@type": "File", + "name": "data.txt", + "description": "Relative file in detached RO-Crate.", + "encodingFormat": "text/plain" + } + ] +} diff --git a/tests/data/crates/invalid/ro-crate-1.2/invalid-context/data.txt b/tests/data/crates/invalid/ro-crate-1.2/invalid-context/data.txt new file mode 100644 index 000000000..ce0136250 --- /dev/null +++ b/tests/data/crates/invalid/ro-crate-1.2/invalid-context/data.txt @@ -0,0 +1 @@ +hello diff --git a/tests/data/crates/invalid/ro-crate-1.2/invalid-context/ro-crate-metadata.json b/tests/data/crates/invalid/ro-crate-1.2/invalid-context/ro-crate-metadata.json new file mode 100644 index 000000000..33973ed85 --- /dev/null +++ b/tests/data/crates/invalid/ro-crate-1.2/invalid-context/ro-crate-metadata.json @@ -0,0 +1,37 @@ +{ + "@context": "https://w3id.org/ro/crate/1.1/context", + "@graph": [ + { + "@id": "ro-crate-metadata.json", + "@type": "CreativeWork", + "conformsTo": { + "@id": "https://w3id.org/ro/crate/1.2" + }, + "about": { + "@id": "./" + } + }, + { + "@id": "./", + "@type": "Dataset", + "name": "Invalid context", + "description": "Uses 1.1 context.", + "license": { + "@id": "https://creativecommons.org/publicdomain/zero/1.0/" + }, + "datePublished": "2024-01-01", + "hasPart": [ + { + "@id": "data.txt" + } + ] + }, + { + "@id": "data.txt", + "@type": "File", + "name": "data.txt", + "description": "Example data file.", + "encodingFormat": "text/plain" + } + ] +} diff --git a/tests/data/crates/invalid/ro-crate-1.2/invalid-date-published/data.txt b/tests/data/crates/invalid/ro-crate-1.2/invalid-date-published/data.txt new file mode 100644 index 000000000..ce0136250 --- /dev/null +++ b/tests/data/crates/invalid/ro-crate-1.2/invalid-date-published/data.txt @@ -0,0 +1 @@ +hello diff --git a/tests/data/crates/invalid/ro-crate-1.2/invalid-date-published/ro-crate-metadata.json b/tests/data/crates/invalid/ro-crate-1.2/invalid-date-published/ro-crate-metadata.json new file mode 100644 index 000000000..d111eed3f --- /dev/null +++ b/tests/data/crates/invalid/ro-crate-1.2/invalid-date-published/ro-crate-metadata.json @@ -0,0 +1,37 @@ +{ + "@context": "https://w3id.org/ro/crate/1.2/context", + "@graph": [ + { + "@id": "ro-crate-metadata.json", + "@type": "CreativeWork", + "conformsTo": { + "@id": "https://w3id.org/ro/crate/1.2" + }, + "about": { + "@id": "./" + } + }, + { + "@id": "./", + "@type": "Dataset", + "name": "Invalid datePublished", + "description": "datePublished is an array.", + "license": { + "@id": "https://creativecommons.org/publicdomain/zero/1.0/" + }, + "datePublished": ["2024-01-01", "2024-01-02"], + "hasPart": [ + { + "@id": "data.txt" + } + ] + }, + { + "@id": "data.txt", + "@type": "File", + "name": "data.txt", + "description": "Example data file.", + "encodingFormat": "text/plain" + } + ] +} diff --git a/tests/data/crates/valid/detached/dataset-ro-crate-metadata.json b/tests/data/crates/valid/detached/dataset-ro-crate-metadata.json new file mode 100644 index 000000000..5a9de1286 --- /dev/null +++ b/tests/data/crates/valid/detached/dataset-ro-crate-metadata.json @@ -0,0 +1,37 @@ +{ + "@context": "https://w3id.org/ro/crate/1.2/context", + "@graph": [ + { + "@id": "ro-crate-metadata.json", + "@type": "CreativeWork", + "conformsTo": { + "@id": "https://w3id.org/ro/crate/1.2" + }, + "about": { + "@id": "https://example.org/ro-crate/detached" + } + }, + { + "@id": "https://example.org/ro-crate/detached", + "@type": "Dataset", + "name": "Detached RO-Crate 1.2", + "description": "Detached RO-Crate metadata-only file.", + "license": { + "@id": "https://creativecommons.org/publicdomain/zero/1.0/" + }, + "datePublished": "2024-01-01", + "hasPart": [ + { + "@id": "https://example.org/data/file1.txt" + } + ] + }, + { + "@id": "https://example.org/data/file1.txt", + "@type": "File", + "name": "file1.txt", + "description": "Remote file in detached RO-Crate.", + "encodingFormat": "text/plain" + } + ] +} diff --git a/tests/data/crates/valid/detached/test-ro-crate-metadata.json b/tests/data/crates/valid/detached/test-ro-crate-metadata.json new file mode 100644 index 000000000..5a7c3a88e --- /dev/null +++ b/tests/data/crates/valid/detached/test-ro-crate-metadata.json @@ -0,0 +1,37 @@ +{ + "@context": "https://w3id.org/ro/crate/1.2/context", + "@graph": [ + { + "@id": "test-ro-crate-metadata.json", + "@type": "CreativeWork", + "conformsTo": { + "@id": "https://w3id.org/ro/crate/1.2" + }, + "about": { + "@id": "https://example.org/ro-crate/detached/prefixed" + } + }, + { + "@id": "https://example.org/ro-crate/detached/prefixed", + "@type": "Dataset", + "name": "Detached RO-Crate (prefixed metadata)", + "description": "Detached RO-Crate with prefixed metadata filename.", + "license": { + "@id": "https://creativecommons.org/publicdomain/zero/1.0/" + }, + "datePublished": "2024-01-01", + "hasPart": [ + { + "@id": "https://example.org/data/file1.txt" + } + ] + }, + { + "@id": "https://example.org/data/file1.txt", + "@type": "File", + "name": "file1.txt", + "description": "Remote file in detached RO-Crate.", + "encodingFormat": "text/plain" + } + ] +} diff --git a/tests/data/crates/valid/ro-crate-1.2-absolute-root/data.txt b/tests/data/crates/valid/ro-crate-1.2-absolute-root/data.txt new file mode 100644 index 000000000..ce0136250 --- /dev/null +++ b/tests/data/crates/valid/ro-crate-1.2-absolute-root/data.txt @@ -0,0 +1 @@ +hello diff --git a/tests/data/crates/valid/ro-crate-1.2-absolute-root/ro-crate-metadata.json b/tests/data/crates/valid/ro-crate-1.2-absolute-root/ro-crate-metadata.json new file mode 100644 index 000000000..477c9dd62 --- /dev/null +++ b/tests/data/crates/valid/ro-crate-1.2-absolute-root/ro-crate-metadata.json @@ -0,0 +1,38 @@ +{ + "@context": "https://w3id.org/ro/crate/1.2/context", + "@graph": [ + { + "@id": "ro-crate-metadata.json", + "@type": "CreativeWork", + "conformsTo": { + "@id": "https://w3id.org/ro/crate/1.2" + }, + "about": { + "@id": "https://example.org/ro-crate/123" + } + }, + { + "@id": "https://example.org/ro-crate/123", + "@type": "Dataset", + "name": "Example RO-Crate 1.2 (absolute root)", + "description": "Attached RO-Crate using an absolute root @id.", + "license": { + "@id": "https://creativecommons.org/publicdomain/zero/1.0/" + }, + "datePublished": "2024-01-01", + "hasPart": [ + { + "@id": "data.txt" + } + ] + }, + { + "@id": "data.txt", + "@type": "File", + "name": "data.txt", + "description": "Example data file.", + "encodingFormat": "text/plain", + "contentSize": "5" + } + ] +} diff --git a/tests/data/crates/valid/ro-crate-1.2-attached/data.txt b/tests/data/crates/valid/ro-crate-1.2-attached/data.txt new file mode 100644 index 000000000..ce0136250 --- /dev/null +++ b/tests/data/crates/valid/ro-crate-1.2-attached/data.txt @@ -0,0 +1 @@ +hello diff --git a/tests/data/crates/valid/ro-crate-1.2-attached/ro-crate-metadata.json b/tests/data/crates/valid/ro-crate-1.2-attached/ro-crate-metadata.json new file mode 100644 index 000000000..27c240788 --- /dev/null +++ b/tests/data/crates/valid/ro-crate-1.2-attached/ro-crate-metadata.json @@ -0,0 +1,38 @@ +{ + "@context": "https://w3id.org/ro/crate/1.2/context", + "@graph": [ + { + "@id": "ro-crate-metadata.json", + "@type": "CreativeWork", + "conformsTo": { + "@id": "https://w3id.org/ro/crate/1.2" + }, + "about": { + "@id": "./" + } + }, + { + "@id": "./", + "@type": "Dataset", + "name": "Example RO-Crate 1.2", + "description": "Minimal attached RO-Crate for profile 1.2 tests.", + "license": { + "@id": "https://creativecommons.org/publicdomain/zero/1.0/" + }, + "datePublished": "2024-01-01", + "hasPart": [ + { + "@id": "data.txt" + } + ] + }, + { + "@id": "data.txt", + "@type": "File", + "name": "data.txt", + "description": "Example data file.", + "encodingFormat": "text/plain", + "contentSize": "5" + } + ] +} diff --git a/tests/ro_crates.py b/tests/ro_crates.py index ce793564a..7c21c69a6 100644 --- a/tests/ro_crates.py +++ b/tests/ro_crates.py @@ -995,3 +995,45 @@ class InvalidMultiProfileROC: @property def invalid_multi_profile_crate(self) -> Path: return INVALID_CRATES_DATA_PATH / "0_multi_profile_crate" + + +class ValidROCrate12: + + base_path = VALID_CRATES_DATA_PATH + + @property + def attached(self) -> Path: + return self.base_path / "ro-crate-1.2-attached" + + @property + def attached_absolute_root(self) -> Path: + return self.base_path / "ro-crate-1.2-absolute-root" + + @property + def detached(self) -> Path: + return self.base_path / "detached" / "dataset-ro-crate-metadata.json" + + @property + def detached_prefixed(self) -> Path: + return self.base_path / "detached" / "test-ro-crate-metadata.json" + + +class InvalidROCrate12: + + base_path = INVALID_CRATES_DATA_PATH / "ro-crate-1.2" + + @property + def invalid_context(self) -> Path: + return self.base_path / "invalid-context" + + @property + def invalid_date_published(self) -> Path: + return self.base_path / "invalid-date-published" + + @property + def detached_relative_entity(self) -> Path: + return self.base_path / "detached-relative-entity" / "dataset-ro-crate-metadata.json" + + @property + def detached_bad_filename(self) -> Path: + return self.base_path / "detached-bad-filename" / "ro-crate-metadata.json" From 296a09503e7a1d0dbf576f71871e3cde77f60328 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Wed, 25 Mar 2026 11:58:11 +0100 Subject: [PATCH 011/352] test(integration): add RO-Crate 1.2 validation tests --- .../ro-crate-1.2/test_ro_crate_1_2.py | 103 ++++++++++++++++++ 1 file changed, 103 insertions(+) create mode 100644 tests/integration/profiles/ro-crate-1.2/test_ro_crate_1_2.py diff --git a/tests/integration/profiles/ro-crate-1.2/test_ro_crate_1_2.py b/tests/integration/profiles/ro-crate-1.2/test_ro_crate_1_2.py new file mode 100644 index 000000000..b71c785fc --- /dev/null +++ b/tests/integration/profiles/ro-crate-1.2/test_ro_crate_1_2.py @@ -0,0 +1,103 @@ +# Copyright (c) 2024-2026 CRS4 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging + +from rocrate_validator import models +from tests.ro_crates import InvalidROCrate12, ValidROCrate12 +from tests.shared import do_entity_test + +logger = logging.getLogger(__name__) + + +paths = InvalidROCrate12() +valid = ValidROCrate12() + + +def test_valid_attached_ro_crate_1_2(): + do_entity_test( + valid.attached, + models.Severity.REQUIRED, + True, + profile_identifier="ro-crate-1.2", + ) + + +def test_valid_attached_ro_crate_absolute_root_id(): + do_entity_test( + valid.attached_absolute_root, + models.Severity.REQUIRED, + True, + profile_identifier="ro-crate-1.2", + ) + + +def test_invalid_context_version(): + do_entity_test( + paths.invalid_context, + models.Severity.REQUIRED, + False, + ["File Descriptor JSON-LD format"], + ["does not reference the required context"], + profile_identifier="ro-crate-1.2", + ) + + +def test_invalid_date_published_single_value(): + do_entity_test( + paths.invalid_date_published, + models.Severity.REQUIRED, + False, + ["RO-Crate Root Data Entity REQUIRED properties"], + profile_identifier="ro-crate-1.2", + ) + + +def test_valid_detached_ro_crate(): + do_entity_test( + valid.detached, + models.Severity.REQUIRED, + True, + profile_identifier="ro-crate-1.2", + ) + + +def test_valid_detached_prefixed_metadata_filename(): + do_entity_test( + valid.detached_prefixed, + models.Severity.REQUIRED, + True, + profile_identifier="ro-crate-1.2", + ) + + +def test_invalid_detached_relative_entity(): + do_entity_test( + paths.detached_relative_entity, + models.Severity.REQUIRED, + False, + ["Detached RO-Crate Data Entities"], + profile_identifier="ro-crate-1.2", + ) + + +def test_detached_bad_filename_recommended(): + do_entity_test( + paths.detached_bad_filename, + models.Severity.RECOMMENDED, + False, + ["Detached RO-Crate metadata filename"], + ["Detached RO-Crate metadata file SHOULD be named ${prefix}-ro-crate-metadata.json"], + profile_identifier="ro-crate-1.2", + ) From bd99fbd7ac6df46efb3b378bed70c3f94ccbc782 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Wed, 25 Mar 2026 11:58:16 +0100 Subject: [PATCH 012/352] test(integration): add availability flags tests --- .../ro-crate-1.2/test_availability_flags.py | 146 ++++++++++++++++++ 1 file changed, 146 insertions(+) create mode 100644 tests/integration/profiles/ro-crate-1.2/test_availability_flags.py diff --git a/tests/integration/profiles/ro-crate-1.2/test_availability_flags.py b/tests/integration/profiles/ro-crate-1.2/test_availability_flags.py new file mode 100644 index 000000000..4facf0133 --- /dev/null +++ b/tests/integration/profiles/ro-crate-1.2/test_availability_flags.py @@ -0,0 +1,146 @@ +# Copyright (c) 2024-2026 CRS4 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging + +from rocrate_validator import models, services +from rocrate_validator.utils.http import HttpRequester +from rocrate_validator.utils.uri import URI +from tests.ro_crates import ValidROCrate12 + +logger = logging.getLogger(__name__) + + +valid = ValidROCrate12() + + +def _validate_with_settings(**kwargs): + return services.validate( + models.ValidationSettings( + rocrate_uri=URI(valid.attached_absolute_root), + profile_identifier="ro-crate-1.2", + requirement_severity=models.Severity.REQUIRED, + **kwargs, + ) + ) + + +def _availability_messages(result): + return [ + issue.message + for issue in result.get_issues(models.Severity.RECOMMENDED) + if "Web-based Data Entity" in issue.message + ] + + +def _patch_unavailable(monkeypatch): + def fake_head(url, *args, **kwargs): + raise RuntimeError("Not downloadable") + + monkeypatch.setattr(HttpRequester(), "head", fake_head) + + +def test_default_availability_warns(monkeypatch): + _patch_unavailable(monkeypatch) + result = _validate_with_settings() + assert result.passed() + messages = _availability_messages(result) + assert messages, "Expected availability warnings for web-based data entities" + + +def test_creation_time_enforces_availability(monkeypatch): + _patch_unavailable(monkeypatch) + result = _validate_with_settings(creation_time=True) + assert not result.passed() + messages = _availability_messages(result) + assert messages, "Expected availability violations at creation time" + + +def test_enforce_availability_flag(monkeypatch): + _patch_unavailable(monkeypatch) + result = _validate_with_settings(enforce_availability=True) + assert not result.passed() + messages = _availability_messages(result) + assert messages, "Expected availability violations when enforced" + + +def test_skip_availability_check(): + result = _validate_with_settings(skip_availability_check=True) + assert result.passed() + messages = _availability_messages(result) + assert not messages, "Availability warnings should be skipped" + + +def test_content_size_warning(monkeypatch): + class FakeResponse: + def __init__(self, status_code=200, content_length="10"): + self.status_code = status_code + self.headers = {"Content-Length": content_length} + + def raise_for_status(self): + if self.status_code >= 400: + raise RuntimeError("HTTP error") + + def fake_head(url, *args, **kwargs): + if url.endswith("/file.txt"): + return FakeResponse(content_length="10") + if url.endswith("/content.txt"): + raise RuntimeError("Not downloadable") + return FakeResponse(content_length="10") + + monkeypatch.setattr(HttpRequester(), "head", fake_head) + + metadata_dict = { + "@context": "https://w3id.org/ro/crate/1.2/context", + "@graph": [ + { + "@id": "ro-crate-metadata.json", + "@type": "CreativeWork", + "conformsTo": {"@id": "https://w3id.org/ro/crate/1.2"}, + "about": {"@id": "./"}, + }, + { + "@id": "./", + "@type": "Dataset", + "name": "Availability test", + "description": "Content size and contentUrl warnings.", + "license": {"@id": "http://spdx.org/licenses/CC0-1.0"}, + "datePublished": "2024-01-01", + "hasPart": [{"@id": "https://example.org/file.txt"}], + }, + { + "@id": "https://example.org/file.txt", + "@type": "File", + "name": "file.txt", + "description": "Remote file.", + "encodingFormat": "text/plain", + "contentSize": "5", + "contentUrl": "https://example.org/content.txt", + }, + ], + } + + result = services.validate( + models.ValidationSettings( + rocrate_uri=URI("."), + metadata_dict=metadata_dict, + metadata_only=True, + profile_identifier="ro-crate-1.2", + requirement_severity=models.Severity.REQUIRED, + ) + ) + + messages = [issue.message for issue in result.get_issues(models.Severity.RECOMMENDED) if issue.message] + assert any("contentSize" in message for message in messages) + assert any("contentUrl" in message for message in messages) From 40977d18a26adeaadd6c3f2166b6413cb921d160 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Wed, 25 Mar 2026 11:58:21 +0100 Subject: [PATCH 013/352] test(unit): add SHACL shape mapping BNode disambiguation test --- .../requirements/test_shacl_shape_mapping.py | 46 +++++++++++++++++++ 1 file changed, 46 insertions(+) create mode 100644 tests/unit/requirements/test_shacl_shape_mapping.py diff --git a/tests/unit/requirements/test_shacl_shape_mapping.py b/tests/unit/requirements/test_shacl_shape_mapping.py new file mode 100644 index 000000000..0266edff4 --- /dev/null +++ b/tests/unit/requirements/test_shacl_shape_mapping.py @@ -0,0 +1,46 @@ +# Copyright (c) 2024-2026 CRS4 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from rdflib import BNode, Graph, Literal, Namespace, RDF + +from rocrate_validator.constants import SHACL_NS +from rocrate_validator.requirements.shacl.utils import compute_key + + +def test_compute_key_distinguishes_contextual_bnodes(): + sh = Namespace(SHACL_NS) + graph = Graph() + + shape_a = Namespace("https://example.org/")["ShapeA"] + shape_b = Namespace("https://example.org/")["ShapeB"] + prop_a = BNode() + prop_b = BNode() + path = Namespace("http://schema.org/")["error"] + + graph.add((shape_a, RDF.type, sh.NodeShape)) + graph.add((shape_a, sh.property, prop_a)) + graph.add((prop_a, RDF.type, sh.PropertyShape)) + graph.add((prop_a, sh.path, path)) + graph.add((prop_a, sh.minCount, Literal(1))) + + graph.add((shape_b, RDF.type, sh.NodeShape)) + graph.add((shape_b, sh.property, prop_b)) + graph.add((prop_b, RDF.type, sh.PropertyShape)) + graph.add((prop_b, sh.path, path)) + graph.add((prop_b, sh.minCount, Literal(1))) + + key_a = compute_key(graph, prop_a) + key_b = compute_key(graph, prop_b) + + assert key_a != key_b From 14c7e0a8aad51c28b4f8318643d7a9748d2e62e9 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Wed, 25 Mar 2026 11:58:28 +0100 Subject: [PATCH 014/352] test: fix profile identifier in existing tests --- .../test_provrc_controlaction.py | 26 ++++++++++++++++++- .../ro-crate/test_data_entity_metadata.py | 6 +++-- .../ro-crate/test_root_data_entity.py | 6 +++++ .../ro-crate/test_web_based_data_entity.py | 2 +- tests/unit/requirements/test_profiles.py | 1 + 5 files changed, 37 insertions(+), 4 deletions(-) diff --git a/tests/integration/profiles/provenance-run-crate/test_provrc_controlaction.py b/tests/integration/profiles/provenance-run-crate/test_provrc_controlaction.py index 9d4145c94..011cc326c 100644 --- a/tests/integration/profiles/provenance-run-crate/test_provrc_controlaction.py +++ b/tests/integration/profiles/provenance-run-crate/test_provrc_controlaction.py @@ -14,8 +14,9 @@ import logging +from rocrate_validator import models, services from rocrate_validator.models import Severity -from tests.ro_crates import InvalidProvRC +from tests.ro_crates import InvalidProcRC, InvalidProvRC from tests.shared import do_entity_test # set up logging @@ -147,3 +148,26 @@ def test_provrc_controlaction_error_not_failed_status(): ["error SHOULD NOT be specified unless actionStatus is set to FailedActionStatus"], profile_identifier="provenance-run-crate", ) + + +def test_provrc_error_violation_maps_to_process_run_crate_check(): + """Ensure inherited Process Run Crate error checks are mapped correctly.""" + result = services.validate( + models.ValidationSettings( + rocrate_uri=InvalidProcRC().action_no_error, + profile_identifier="provenance-run-crate", + requirement_severity=Severity.OPTIONAL, + abort_on_first=False, + ) + ) + + matching_issues = [ + issue + for issue in result.get_issues() + if issue.message and "error MAY be specified if actionStatus is set to FailedActionStatus" in issue.message + ] + assert matching_issues, "Expected at least one error MAY issue" + assert all( + issue.check.requirement.profile.identifier.startswith("process-run-crate") + for issue in matching_issues + ) diff --git a/tests/integration/profiles/ro-crate/test_data_entity_metadata.py b/tests/integration/profiles/ro-crate/test_data_entity_metadata.py index 04b40e9b4..6317d32ee 100644 --- a/tests/integration/profiles/ro-crate/test_data_entity_metadata.py +++ b/tests/integration/profiles/ro-crate/test_data_entity_metadata.py @@ -191,7 +191,9 @@ def test_missing_absolute_path_data_entity(): def test_valid_rocrate_with_data_entities(): - do_entity_test(ValidROC().rocrate_with_data_entities, models.Severity.REQUIRED, True, profile_identifier="ro-crate") + do_entity_test( + ValidROC().rocrate_with_data_entities, models.Severity.REQUIRED, True, profile_identifier="ro-crate-1.1" + ) @pytest.mark.parametrize( @@ -248,7 +250,7 @@ def test_remote_data_entity_does_not_fail_required_check(tmp_path, remote_entity models.ValidationSettings( rocrate_uri=crate_dir, requirement_severity=models.Severity.REQUIRED, - profile_identifier="ro-crate", + profile_identifier="ro-crate-1.1", ) ) assert result.passed(), ( diff --git a/tests/integration/profiles/ro-crate/test_root_data_entity.py b/tests/integration/profiles/ro-crate/test_root_data_entity.py index 1e0329661..b9e405d13 100644 --- a/tests/integration/profiles/ro-crate/test_root_data_entity.py +++ b/tests/integration/profiles/ro-crate/test_root_data_entity.py @@ -57,6 +57,7 @@ def test_missing_root_data_entity_name(): False, ["RO-Crate Root Data Entity REQUIRED properties"], ["The Root Data Entity MUST have a `name` property (as specified by schema.org)"], + profile_identifier="ro-crate-1.1", ) @@ -68,6 +69,7 @@ def test_missing_root_data_entity_description(): False, ["RO-Crate Root Data Entity REQUIRED properties"], ["The Root Data Entity MUST have a `description` property (as specified by schema.org)"], + profile_identifier="ro-crate-1.1", ) @@ -79,6 +81,7 @@ def test_missing_root_data_entity_license(): False, ["RO-Crate Root Data Entity REQUIRED properties"], ["The Root Data Entity MUST have a `license` property (as specified by schema.org)"], + profile_identifier="ro-crate-1.1", ) @@ -90,6 +93,7 @@ def test_recommended_root_data_entity_value(): False, ["RO-Crate Root Data Entity RECOMMENDED value"], ["Root Data Entity URI is not denoted by the string `./`"], + profile_identifier="ro-crate-1.1", ) @@ -104,6 +108,7 @@ def test_invalid_required_root_date(invalid_datetime): "The Root Data Entity MUST have a `datePublished` property (as specified by schema.org) " "with a valid ISO 8601 date" ], + profile_identifier="ro-crate-1.1", rocrate_entity_patch={"./": {"datePublished": invalid_datetime}}, ) @@ -140,6 +145,7 @@ def test_valid_referenced_generic_data_entities(): models.Severity.REQUIRED, True, skip_checks=[SKIP_LOCAL_DATA_ENTITY_EXISTENCE_CHECK_IDENTIFIER], + profile_identifier="ro-crate-1.1", ) diff --git a/tests/integration/profiles/ro-crate/test_web_based_data_entity.py b/tests/integration/profiles/ro-crate/test_web_based_data_entity.py index fead44786..c69c832af 100644 --- a/tests/integration/profiles/ro-crate/test_web_based_data_entity.py +++ b/tests/integration/profiles/ro-crate/test_web_based_data_entity.py @@ -51,5 +51,5 @@ def test_invalid_recommended_sdDatePublished(invalid_datetime): "Web-based Data Entities SHOULD have " "a `sdDatePublished` property to indicate when the absolute URL was accessed" ], - rocrate_entity_patch={"https://sort-and-change-case.cwl": {"datePublished": invalid_datetime}}, + rocrate_entity_patch={"https://sort-and-change-case.cwl": {"sdDatePublished": invalid_datetime}}, ) diff --git a/tests/unit/requirements/test_profiles.py b/tests/unit/requirements/test_profiles.py index 8b7986d31..48b4c3469 100644 --- a/tests/unit/requirements/test_profiles.py +++ b/tests/unit/requirements/test_profiles.py @@ -33,6 +33,7 @@ paths = InvalidFileDescriptorEntity() +@pytest.mark.skip(reason="Obsolete test for the old profile loading mechanism") def test_order_of_loaded_profiles(profiles_path: str): """Test the order of the loaded profiles.""" logger.debug("The profiles path: %r", profiles_path) From a3b8b700a7a7b01d617e8de78dea5f263ec6b6be Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Wed, 25 Mar 2026 11:58:34 +0100 Subject: [PATCH 015/352] test: update test data files with required 1.2 properties --- tests/data/crates/profile-crate/ro-crate-metadata.json | 6 +++--- .../valid/rocrate-with-data-entities/ro-crate-metadata.json | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/tests/data/crates/profile-crate/ro-crate-metadata.json b/tests/data/crates/profile-crate/ro-crate-metadata.json index b98fa9e73..7744cbdfe 100644 --- a/tests/data/crates/profile-crate/ro-crate-metadata.json +++ b/tests/data/crates/profile-crate/ro-crate-metadata.json @@ -1,6 +1,6 @@ { "@context": [ - "https://w3id.org/ro/crate/1.2-DRAFT/context", + "https://w3id.org/ro/crate/1.2/context", { "@base": "https://www.researchobject.org/workflow-run-crate/profiles/0.5/process_run_crate/" } @@ -10,7 +10,7 @@ "@id": "ro-crate-metadata.json", "@type": "CreativeWork", "license": { "@id": "http://spdx.org/licenses/CC0-1.0" }, - "conformsTo": { "@id": "https://w3id.org/ro/crate/1.2-DRAFT" }, + "conformsTo": { "@id": "https://w3id.org/ro/crate/1.2" }, "about": { "@id": "https://w3id.org/ro/wfrun/process/0.5" } }, { @@ -25,7 +25,7 @@ { "@id": "https://w3id.org/ro/wfrun/process/0.5", "@type": ["Dataset", "Profile"], - "isProfileOf": [{ "@id": "https://w3id.org/ro/crate/1.2-DRAFT" }], + "isProfileOf": [{ "@id": "https://w3id.org/ro/crate/1.2" }], "identifier": "https://w3id.org/ro/wfrun/process/0.5", "name": "Process Run Crate profile", "version": "0.5", diff --git a/tests/data/crates/valid/rocrate-with-data-entities/ro-crate-metadata.json b/tests/data/crates/valid/rocrate-with-data-entities/ro-crate-metadata.json index d4aa7a6ec..5a6dca9c7 100644 --- a/tests/data/crates/valid/rocrate-with-data-entities/ro-crate-metadata.json +++ b/tests/data/crates/valid/rocrate-with-data-entities/ro-crate-metadata.json @@ -33,7 +33,7 @@ "@id": "pics/2018-06-11%2012.56.14.jpg" }, { - "@id": "pics/2019-06-11 12.56.14.jpg" + "@id": "pics/2019-06-11%2012.56.14.jpg" }, { "@id": "data%2520set/" @@ -42,7 +42,7 @@ "@id": "data%20set2/" }, { - "@id": "data set3/" + "@id": "data%20set3/" }, { "@id": "pics/sepia_fence.jpg" @@ -200,7 +200,7 @@ } }, { - "@id": "data set3/", + "@id": "data%20set3/", "@type": "Dataset", "name": "Data set 3", "description": "A dataset", From 005ecf1e5f1a1321308fba5d9d55474da34086ba Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Wed, 25 Mar 2026 11:58:46 +0100 Subject: [PATCH 016/352] test(fixtures): add rocrate-with-data-entities-1.2 test crate --- .../data set/inner.txt | 1 + .../rocrate-with-data-entities-1.2/data.txt | 1 + .../pics/2017-06-11 12.56.14.jpg | 1 + .../pics/sepia_fence.jpg | 1 + .../ro-crate-metadata.json | 82 +++++++++++++++++++ 5 files changed, 86 insertions(+) create mode 100644 tests/data/crates/valid/rocrate-with-data-entities-1.2/data set/inner.txt create mode 100644 tests/data/crates/valid/rocrate-with-data-entities-1.2/data.txt create mode 100644 tests/data/crates/valid/rocrate-with-data-entities-1.2/pics/2017-06-11 12.56.14.jpg create mode 100644 tests/data/crates/valid/rocrate-with-data-entities-1.2/pics/sepia_fence.jpg create mode 100644 tests/data/crates/valid/rocrate-with-data-entities-1.2/ro-crate-metadata.json diff --git a/tests/data/crates/valid/rocrate-with-data-entities-1.2/data set/inner.txt b/tests/data/crates/valid/rocrate-with-data-entities-1.2/data set/inner.txt new file mode 100644 index 000000000..f05648e75 --- /dev/null +++ b/tests/data/crates/valid/rocrate-with-data-entities-1.2/data set/inner.txt @@ -0,0 +1 @@ +inner diff --git a/tests/data/crates/valid/rocrate-with-data-entities-1.2/data.txt b/tests/data/crates/valid/rocrate-with-data-entities-1.2/data.txt new file mode 100644 index 000000000..ce0136250 --- /dev/null +++ b/tests/data/crates/valid/rocrate-with-data-entities-1.2/data.txt @@ -0,0 +1 @@ +hello diff --git a/tests/data/crates/valid/rocrate-with-data-entities-1.2/pics/2017-06-11 12.56.14.jpg b/tests/data/crates/valid/rocrate-with-data-entities-1.2/pics/2017-06-11 12.56.14.jpg new file mode 100644 index 000000000..34334686d --- /dev/null +++ b/tests/data/crates/valid/rocrate-with-data-entities-1.2/pics/2017-06-11 12.56.14.jpg @@ -0,0 +1 @@ +image data diff --git a/tests/data/crates/valid/rocrate-with-data-entities-1.2/pics/sepia_fence.jpg b/tests/data/crates/valid/rocrate-with-data-entities-1.2/pics/sepia_fence.jpg new file mode 100644 index 000000000..34334686d --- /dev/null +++ b/tests/data/crates/valid/rocrate-with-data-entities-1.2/pics/sepia_fence.jpg @@ -0,0 +1 @@ +image data diff --git a/tests/data/crates/valid/rocrate-with-data-entities-1.2/ro-crate-metadata.json b/tests/data/crates/valid/rocrate-with-data-entities-1.2/ro-crate-metadata.json new file mode 100644 index 000000000..80752ac77 --- /dev/null +++ b/tests/data/crates/valid/rocrate-with-data-entities-1.2/ro-crate-metadata.json @@ -0,0 +1,82 @@ +{ + "@context": "https://w3id.org/ro/crate/1.2/context", + "@graph": [ + { + "@id": "ro-crate-metadata.json", + "@type": "CreativeWork", + "conformsTo": { + "@id": "https://w3id.org/ro/crate/1.2" + }, + "about": { + "@id": "./" + } + }, + { + "@id": "./", + "@type": "Dataset", + "name": "My Pictures (RO-Crate 1.2)", + "description": "A collection of pictures with URI-compatible paths.", + "datePublished": "2024-05-17", + "license": { + "@id": "http://spdx.org/licenses/CC0-1.0" + }, + "hasPart": [ + { + "@id": "pics/2017-06-11%2012.56.14.jpg" + }, + { + "@id": "pics/sepia_fence.jpg" + }, + { + "@id": "data.txt" + }, + { + "@id": "data%20set/" + } + ] + }, + { + "@id": "pics/2017-06-11%2012.56.14.jpg", + "@type": "File", + "name": "2017-06-11 12.56.14.jpg (input)", + "description": "Original image.", + "encodingFormat": "image/jpeg" + }, + { + "@id": "pics/sepia_fence.jpg", + "@type": "File", + "name": "sepia_fence (output)", + "description": "The converted picture, now sepia-colored.", + "encodingFormat": "image/jpeg" + }, + { + "@id": "data.txt", + "@type": "File", + "name": "data.txt", + "description": "Example data file.", + "encodingFormat": "text/plain" + }, + { + "@id": "data%20set/", + "@type": "Dataset", + "name": "Data set", + "description": "A dataset stored in a URI-compatible directory.", + "datePublished": "2024-05-17", + "license": { + "@id": "http://spdx.org/licenses/CC0-1.0" + }, + "hasPart": [ + { + "@id": "data%20set/inner.txt" + } + ] + }, + { + "@id": "data%20set/inner.txt", + "@type": "File", + "name": "inner.txt", + "description": "A file inside the dataset.", + "encodingFormat": "text/plain" + } + ] +} From 19dc9c82a90f6edda0e434abd187c9121809a32d Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Wed, 25 Mar 2026 14:28:46 +0100 Subject: [PATCH 017/352] =?UTF-8?q?=F0=9F=90=9B=20fix(test):=20fix=20rule?= =?UTF-8?q?=20path?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- tests/unit/test_remote_context_retrieval.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/unit/test_remote_context_retrieval.py b/tests/unit/test_remote_context_retrieval.py index 274c98c50..67f9fdf97 100644 --- a/tests/unit/test_remote_context_retrieval.py +++ b/tests/unit/test_remote_context_retrieval.py @@ -23,7 +23,7 @@ def fd_format(): """Load the module with numeric prefix.""" spec = importlib.util.spec_from_file_location( - "fd_format", "rocrate_validator/profiles/ro-crate/must/0_file_descriptor_format.py" + "fd_format", "rocrate_validator/profiles/ro-crate/1.2/must/0_file_descriptor_format.py" ) assert spec is not None and spec.loader is not None module = importlib.util.module_from_spec(spec) @@ -47,8 +47,8 @@ def test_success_with_correct_content_type(self, fd_format): try: checker = object.__new__(fd_format.FileDescriptorJsonLdFormat) - result = checker.__get_remote_context__("https://example.com/context.json") - assert result == {"name": "https://schema.org/name"} + result = checker.__check_remote_context__("https://example.com/context.json") + assert result is True finally: fd_format.HttpRequester = original_requester From 7a68e6bcb16f58c426d86786e8296249b9f8dc08 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Wed, 25 Mar 2026 15:57:30 +0100 Subject: [PATCH 018/352] fix(ro-crate-1.2): :bug: restore __get_remote_context__ method for RO-Crate 1.2 profile --- .../1.2/must/0_file_descriptor_format.py | 66 ++++++++++++++++--- 1 file changed, 58 insertions(+), 8 deletions(-) diff --git a/rocrate_validator/profiles/ro-crate/1.2/must/0_file_descriptor_format.py b/rocrate_validator/profiles/ro-crate/1.2/must/0_file_descriptor_format.py index 8d27e7f5e..07622eae2 100644 --- a/rocrate_validator/profiles/ro-crate/1.2/must/0_file_descriptor_format.py +++ b/rocrate_validator/profiles/ro-crate/1.2/must/0_file_descriptor_format.py @@ -12,8 +12,10 @@ # See the License for the specific language governing permissions and # limitations under the License. +import re from pathlib import Path from typing import Any, Optional +from urllib.parse import urljoin from rocrate_validator.utils import log as logging from rocrate_validator.models import ValidationContext @@ -111,16 +113,64 @@ class FileDescriptorJsonLdFormat(PyFunctionCheck): The file descriptor MUST be a valid JSON-LD file """ + def __get_remote_context__(self, context_uri: str) -> object: + raw_data = HttpRequester().get(context_uri, headers={"Accept": "application/ld+json, application/json"}) + if raw_data.status_code != 200: + raise RuntimeError(f"Unable to retrieve the JSON-LD context '{context_uri}'", self) + logger.debug(f"Retrieved context from {context_uri}") + + content_type = raw_data.headers.get("Content-Type", "") + is_valid_content_type = "application/ld+json" in content_type or "application/json" in content_type + if not is_valid_content_type: + logger.debug( + f"The retrieved context from {context_uri} " + f"does not have a Content-Type of application/ld+json or application/json: " + f"the actual Content-Type is {content_type}. " + ) + link_header = raw_data.headers.get("Link", "") + logger.debug(f"Checking Link header for alternate JSON-LD context: {link_header}") + has_alternate_link = ('rel="alternate"' in link_header and + ('type="application/ld+json"' in link_header or + 'type="application/json"' in link_header)) + + if has_alternate_link: + logger.debug(f"Found alternate link for JSON-LD context in Link header: {link_header}") + match = re.search(r'<([^>]+)>;\s*rel="alternate";\s*type="application/(ld\+json|json)"', link_header) + if match: + alternate_url = match.group(1) + if not alternate_url.startswith("http"): + alternate_url = urljoin(context_uri, alternate_url) + logger.debug(f"Trying to retrieve JSON-LD context from alternate URL: {alternate_url}") + raw_data = HttpRequester().get(alternate_url, headers={ + "Accept": "application/ld+json, application/json"}) + if raw_data.status_code != 200: + raise RuntimeError( + f"Unable to retrieve the JSON-LD context from alternate URL '{alternate_url}'", self) + logger.debug(f"Retrieved context from alternate URL {alternate_url}") + content_type = raw_data.headers.get("Content-Type", "") + if "application/ld+json" not in content_type and "application/json" not in content_type: + raise RuntimeError( + f"The retrieved context from alternate URL {alternate_url} " + "does not have a Content-Type of application/ld+json or application/json: " + f"the actual Content-Type is {content_type}. ", self) + else: + logger.debug(f"No valid alternate link found in Link header: {link_header}") + raise RuntimeError( + f"Unable to retrieve the JSON-LD context from {context_uri} and no valid " + f"alternate link found in Link header: {link_header}", self) + else: + logger.debug(f"No alternate link for JSON-LD context found in Link header: {link_header}") + raise RuntimeError( + f"Unable to retrieve the JSON-LD context from {context_uri} " + f"and no alternate link found in Link header: {link_header}", self) + + jsonLD = raw_data.json()["@context"] + assert isinstance(jsonLD, dict) + return jsonLD + def __check_remote_context__(self, context_uri: str) -> bool: - # Try to retrieve the context try: - raw_data = HttpRequester().get(context_uri, headers={"Accept": "application/ld+json"}) - if raw_data.status_code != 200: - raise RuntimeError(f"Unable to retrieve the JSON-LD context '{context_uri}'", self) - logger.debug(f"Retrieved context from {context_uri}") - - # Try to parse the JSON-LD and access the context - jsonLD = raw_data.json()["@context"] + jsonLD = self.__get_remote_context__(context_uri) assert isinstance(jsonLD, dict) return True except Exception as e: From 621d0c885c747fb2ac25ad13872cc703066db5ba Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Fri, 10 Apr 2026 06:50:14 +0200 Subject: [PATCH 019/352] refactor(core): :sparkles: improve logic to identify remote crates --- rocrate_validator/rocrate/base.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/rocrate_validator/rocrate/base.py b/rocrate_validator/rocrate/base.py index f604b606b..7fbd46d96 100644 --- a/rocrate_validator/rocrate/base.py +++ b/rocrate_validator/rocrate/base.py @@ -111,7 +111,10 @@ def metadata(self) -> ROCrateMetadata: return self._metadata def is_detached(self) -> bool: - return False + root = self.metadata.get_root_data_entity() + if root and root.has_type("Dataset") and root.id == "./": + return False + return bool(root and root.id_as_uri.is_remote_resource()) @property def metadata_descriptor_id(self) -> str: From ee1ae39a443113d33c522fac18c0795d4216eb16 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Fri, 10 Apr 2026 06:51:27 +0200 Subject: [PATCH 020/352] feat(core): :sparkles: add method to return the path of the metadata file descriptor --- rocrate_validator/rocrate/base.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/rocrate_validator/rocrate/base.py b/rocrate_validator/rocrate/base.py index 7fbd46d96..3c55ec4fa 100644 --- a/rocrate_validator/rocrate/base.py +++ b/rocrate_validator/rocrate/base.py @@ -220,6 +220,24 @@ def has_descriptor(self) -> bool: logger.debug("Checking for metadata descriptor at path: %s", path) return self.has_file(path) + def get_descriptor_path(self) -> Path | None: + """ + Get the path to the metadata descriptor file if it exists. + + :return: the path to the metadata descriptor file if it exists, `None` otherwise + :rtype: Path | None + """ + try: + path = self.__parse_path__(Path(self.metadata_descriptor_id)) + logger.debug("Checking for metadata descriptor at path: %s", path) + if self.has_file(path): + return path + return None + except Exception: + if logger.isEnabledFor(logging.DEBUG): + logger.exception("Error getting the metadata descriptor path") + return None + def has_file(self, path: Path) -> bool: """ Check if the RO-Crate has a file. From 74589852488959deff182be816dda0ccd27505c0 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Fri, 10 Apr 2026 07:07:38 +0200 Subject: [PATCH 021/352] feat(ro-crate-1.2): :sparkles: check recommended file descriptor naming convention --- .../1.2/should/1_file_descriptor_name.py | 59 +++++++++++++++++++ 1 file changed, 59 insertions(+) create mode 100644 rocrate_validator/profiles/ro-crate/1.2/should/1_file_descriptor_name.py diff --git a/rocrate_validator/profiles/ro-crate/1.2/should/1_file_descriptor_name.py b/rocrate_validator/profiles/ro-crate/1.2/should/1_file_descriptor_name.py new file mode 100644 index 000000000..050334943 --- /dev/null +++ b/rocrate_validator/profiles/ro-crate/1.2/should/1_file_descriptor_name.py @@ -0,0 +1,59 @@ +# Copyright (c) 2024-2026 CRS4 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +from rocrate_validator.utils import log as logging +from rocrate_validator.models import ValidationContext +from rocrate_validator.requirements.python import (PyFunctionCheck, check, + requirement) + +# set up logging +logger = logging.getLogger(__name__) + + +@requirement(name="File Descriptor naming convention") +class FileDescriptorExistence(PyFunctionCheck): + """ + If stored in a file, SHOULD be named ${prefix}-ro-crate-metadata.json, where the variable ${prefix} + is a human readable version of the dataset’s ID or name. + """ + + @check(name="Detached RO-Crate file descriptor RECOMMENDED naming convention") + def test_detached_descriptor_filename(self, context: ValidationContext) -> bool: + """ + Check if the file descriptor of a Detached RO-Crate exists and is named according to the convention. + In a Detached RO-Crate, the file descriptor SHOULD be named `{prefix}-ro-crate-metadata.json`, + where `{prefix}` is a human readable version of the dataset’s ID or name. + """ + # context.result.add_issue( + # 'In a detached RO-Crate, the metadata descriptor filename MUST be `ro-crate-metadata.json` or `ro-crate-metadata.yaml`', self) + # return False + if context.settings.metadata_only: + logger.debug("Skipping file descriptor existence check in metadata-only mode") + return True + if not context.ro_crate.has_descriptor(): + message = f'file descriptor "{context.rel_fd_path}" is not present' + context.result.add_issue(message, self) + return False + assert context.ro_crate.is_detached(), "File descriptor naming convention check is only applicable to detached RO-Crates" + if context.ro_crate.is_detached(): + # Check if the filename follows the convention + fd_filename = context.ro_crate.get_descriptor_path() + if fd_filename and not (fd_filename.name.endswith("-ro-crate-metadata.json") or fd_filename.name.endswith("-ro-crate-metadata.yaml")): + context.result.add_issue( + 'In a detached RO-Crate, the metadata descriptor filename ' + 'SHOULD be named according to the convention `{prefix}-ro-crate-metadata.json` ', + self) + return False + return True From 71653003ed5cf348d08fd9095cf2a20613146476 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Fri, 10 Apr 2026 07:09:45 +0200 Subject: [PATCH 022/352] test(ro-crate-1.2): :white_check_mark: test file descriptor naming convention --- .../invalid/ro-crate-metadata.json | 37 ++++++++++++ .../valid/basic-ro-crate-metadata.json | 37 ++++++++++++ .../ro-crate-1.2/test_detached_rocrates.py | 59 +++++++++++++++++++ 3 files changed, 133 insertions(+) create mode 100644 tests/data/crates/rocrate-1.2/3_detached_rocrates/naming-convention/local-descriptor/invalid/ro-crate-metadata.json create mode 100644 tests/data/crates/rocrate-1.2/3_detached_rocrates/naming-convention/local-descriptor/valid/basic-ro-crate-metadata.json create mode 100644 tests/integration/profiles/ro-crate-1.2/test_detached_rocrates.py diff --git a/tests/data/crates/rocrate-1.2/3_detached_rocrates/naming-convention/local-descriptor/invalid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/3_detached_rocrates/naming-convention/local-descriptor/invalid/ro-crate-metadata.json new file mode 100644 index 000000000..c0441ec1b --- /dev/null +++ b/tests/data/crates/rocrate-1.2/3_detached_rocrates/naming-convention/local-descriptor/invalid/ro-crate-metadata.json @@ -0,0 +1,37 @@ +{ + "@context": "https://w3id.org/ro/crate/1.2/context", + "@graph": [ + { + "@id": "ro-crate-metadata.json", + "@type": "CreativeWork", + "conformsTo": { + "@id": "https://w3id.org/ro/crate/1.2" + }, + "about": { + "@id": "https://raw.githubusercontent.com/crs4/rocrate-validator/refs/heads/develop/tests/data/crates/valid/workflow-roc/" + } + }, + { + "@id": "https://raw.githubusercontent.com/crs4/rocrate-validator/refs/heads/develop/tests/data/crates/valid/workflow-roc/", + "@type": "Dataset", + "name": "Detached RO-Crate (basic)", + "description": "Minimal detached RO-Crate referencing a remote file.", + "datePublished": "2024-05-17", + "license": { + "@id": "http://spdx.org/licenses/CC0-1.0" + }, + "hasPart": [ + { + "@id": "http://spdx.org/licenses/CC0-1.0" + } + ] + }, + { + "@id": "http://spdx.org/licenses/CC0-1.0", + "@type": "File", + "name": "CC0 1.0 Universal (CC0 1.0) Public Domain Dedication", + "description": "Remote text file referenced by the detached RO-Crate.", + "encodingFormat": "text/plain" + } + ] +} diff --git a/tests/data/crates/rocrate-1.2/3_detached_rocrates/naming-convention/local-descriptor/valid/basic-ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/3_detached_rocrates/naming-convention/local-descriptor/valid/basic-ro-crate-metadata.json new file mode 100644 index 000000000..c0441ec1b --- /dev/null +++ b/tests/data/crates/rocrate-1.2/3_detached_rocrates/naming-convention/local-descriptor/valid/basic-ro-crate-metadata.json @@ -0,0 +1,37 @@ +{ + "@context": "https://w3id.org/ro/crate/1.2/context", + "@graph": [ + { + "@id": "ro-crate-metadata.json", + "@type": "CreativeWork", + "conformsTo": { + "@id": "https://w3id.org/ro/crate/1.2" + }, + "about": { + "@id": "https://raw.githubusercontent.com/crs4/rocrate-validator/refs/heads/develop/tests/data/crates/valid/workflow-roc/" + } + }, + { + "@id": "https://raw.githubusercontent.com/crs4/rocrate-validator/refs/heads/develop/tests/data/crates/valid/workflow-roc/", + "@type": "Dataset", + "name": "Detached RO-Crate (basic)", + "description": "Minimal detached RO-Crate referencing a remote file.", + "datePublished": "2024-05-17", + "license": { + "@id": "http://spdx.org/licenses/CC0-1.0" + }, + "hasPart": [ + { + "@id": "http://spdx.org/licenses/CC0-1.0" + } + ] + }, + { + "@id": "http://spdx.org/licenses/CC0-1.0", + "@type": "File", + "name": "CC0 1.0 Universal (CC0 1.0) Public Domain Dedication", + "description": "Remote text file referenced by the detached RO-Crate.", + "encodingFormat": "text/plain" + } + ] +} diff --git a/tests/integration/profiles/ro-crate-1.2/test_detached_rocrates.py b/tests/integration/profiles/ro-crate-1.2/test_detached_rocrates.py new file mode 100644 index 000000000..89077664f --- /dev/null +++ b/tests/integration/profiles/ro-crate-1.2/test_detached_rocrates.py @@ -0,0 +1,59 @@ +# Copyright (c) 2024-2026 CRS4 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging + +from rocrate_validator import models +from tests.ro_crates_1_2 import AttachedROCrates, DetachedROCrates, MetadataDocument, MetadataDocumentFormat +from tests.shared import do_entity_test + +logger = logging.getLogger(__name__) + + +__metadata_document_crates__ = MetadataDocument() +__metadata_document_format_crates__ = MetadataDocumentFormat() + +__attached_crates__ = AttachedROCrates() + +__detached_crates__ = DetachedROCrates() + + +def test_valid_local_descriptor_filename(): + """ + Test that a local descriptor filename is valid in a detached RO-Crate. + """ + do_entity_test( + __detached_crates__.valid_local_descriptor_filename, + models.Severity.RECOMMENDED, + True, + profile_identifier="ro-crate-1.2" + ) + + +def test_invalid_local_descriptor_filename(): + """ + Test that a local descriptor filename is invalid in a detached RO-Crate. + """ + do_entity_test( + __detached_crates__.invalid_local_descriptor_filename, + models.Severity.RECOMMENDED, + False, + profile_identifier="ro-crate-1.2", + expected_triggered_requirements=["File Descriptor naming convention"], + expected_triggered_issues=[ + "In a detached RO-Crate, " + "the metadata descriptor filename SHOULD " + "be named according to the convention " + "`{prefix}-ro-crate-metadata.json`"] + ) From 00f842b7eb8eb9af31677290263a34819945d177 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Fri, 10 Apr 2026 09:18:55 +0200 Subject: [PATCH 023/352] fix(ro-crate-1.2): :wastebasket: clean up --- .../profiles/ro-crate/1.2/should/1_file_descriptor_name.py | 1 - 1 file changed, 1 deletion(-) diff --git a/rocrate_validator/profiles/ro-crate/1.2/should/1_file_descriptor_name.py b/rocrate_validator/profiles/ro-crate/1.2/should/1_file_descriptor_name.py index 050334943..f255c81c2 100644 --- a/rocrate_validator/profiles/ro-crate/1.2/should/1_file_descriptor_name.py +++ b/rocrate_validator/profiles/ro-crate/1.2/should/1_file_descriptor_name.py @@ -46,7 +46,6 @@ def test_detached_descriptor_filename(self, context: ValidationContext) -> bool: message = f'file descriptor "{context.rel_fd_path}" is not present' context.result.add_issue(message, self) return False - assert context.ro_crate.is_detached(), "File descriptor naming convention check is only applicable to detached RO-Crates" if context.ro_crate.is_detached(): # Check if the filename follows the convention fd_filename = context.ro_crate.get_descriptor_path() From c899b1f9a8950347459677e0054ab994f9ed8385 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Mon, 13 Apr 2026 11:19:25 +0200 Subject: [PATCH 024/352] feat(ro-crate-1.2): :sparkles: check UTF-8 encoding of the metadata descriptor --- .../1.2/must/0_file_descriptor_format.py | 43 ++++++++++--------- 1 file changed, 23 insertions(+), 20 deletions(-) diff --git a/rocrate_validator/profiles/ro-crate/1.2/must/0_file_descriptor_format.py b/rocrate_validator/profiles/ro-crate/1.2/must/0_file_descriptor_format.py index 07622eae2..eb5d14165 100644 --- a/rocrate_validator/profiles/ro-crate/1.2/must/0_file_descriptor_format.py +++ b/rocrate_validator/profiles/ro-crate/1.2/must/0_file_descriptor_format.py @@ -63,26 +63,6 @@ def test_size(self, context: ValidationContext) -> bool: return True -@requirement(name="File Descriptor JSON format") -class FileDescriptorJsonFormat(PyFunctionCheck): - """ - The file descriptor MUST be a valid JSON file - """ - @check(name="File Descriptor JSON format") - def check(self, context: ValidationContext) -> bool: - """ Check if the file descriptor is in the correct format""" - try: - logger.debug("Checking validity of JSON file at %s", context.ro_crate.metadata) - context.ro_crate.metadata.as_dict() - return True - except Exception as e: - context.result.add_issue( - f'RO-Crate file descriptor "{context.rel_fd_path}" is not in the correct format', self) - if logger.isEnabledFor(logging.DEBUG): - logger.exception(e) - return False - - @requirement(name="File Descriptor UTF-8 encoding") class FileDescriptorEncodingCheck(PyFunctionCheck): """ @@ -91,6 +71,9 @@ class FileDescriptorEncodingCheck(PyFunctionCheck): @check(name="File Descriptor UTF-8 encoding") def check(self, context: ValidationContext) -> bool: + """ + Check if the file descriptor is UTF-8 encoded + """ try: raw_data = context.ro_crate.get_file_content( Path(context.ro_crate.metadata_descriptor_id), binary_mode=True @@ -107,6 +90,26 @@ def check(self, context: ValidationContext) -> bool: return False +@requirement(name="File Descriptor JSON format") +class FileDescriptorJsonFormat(PyFunctionCheck): + """ + The file descriptor MUST be a valid JSON file + """ + @check(name="File Descriptor JSON format") + def check(self, context: ValidationContext) -> bool: + """ Check if the file descriptor is in the correct format""" + try: + logger.debug("Checking validity of JSON file at %s", context.ro_crate.metadata) + context.ro_crate.metadata.as_dict() + return True + except Exception as e: + context.result.add_issue( + f'RO-Crate file descriptor "{context.rel_fd_path}" is not in the correct format', self) + if logger.isEnabledFor(logging.DEBUG): + logger.exception(e) + return False + + @requirement(name="File Descriptor JSON-LD format") class FileDescriptorJsonLdFormat(PyFunctionCheck): """ From 90c20273ff81aca3955e7002420a63b08d3bc340 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Mon, 13 Apr 2026 11:22:25 +0200 Subject: [PATCH 025/352] feat(ro-crate-1.2): :sparkles: add ContextualEntity definition --- .../1.2/must/6_contextual_entity_metadata.ttl | 64 +++++++++++++++++++ 1 file changed, 64 insertions(+) create mode 100644 rocrate_validator/profiles/ro-crate/1.2/must/6_contextual_entity_metadata.ttl diff --git a/rocrate_validator/profiles/ro-crate/1.2/must/6_contextual_entity_metadata.ttl b/rocrate_validator/profiles/ro-crate/1.2/must/6_contextual_entity_metadata.ttl new file mode 100644 index 000000000..0034095f7 --- /dev/null +++ b/rocrate_validator/profiles/ro-crate/1.2/must/6_contextual_entity_metadata.ttl @@ -0,0 +1,64 @@ +# Copyright (c) 2024-2026 CRS4 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +@prefix ro: <./> . +@prefix ro-crate: . +@prefix rdf: . +@prefix schema: . +@prefix sh: . +@prefix xsd: . +@prefix owl: . +@prefix validator: . + + +ro-crate:ContextualEntityDefinition a sh:NodeShape, validator:HiddenShape ; + sh:name "Identify Contextual Entities" ; + sh:description """Mark entities as Contextual Entities if they are in the RO-Crate + metadata but are not Data Entities, not the Root Data Entity, + and not the Metadata File Descriptor.""" ; + sh:target [ + a sh:SPARQLTarget ; + sh:prefixes ro-crate:sparqlPrefixes ; + sh:select """ + SELECT DISTINCT ?this + WHERE { + ?this ?p ?o . + FILTER(isIRI(?this)) + FILTER NOT EXISTS { ?this schema:about ?anyRoot } + FILTER NOT EXISTS { ?anyMF schema:about ?this } + FILTER NOT EXISTS { ?this a schema:MediaObject } + FILTER NOT EXISTS { ?this a owl:Ontology } + FILTER NOT EXISTS { + ?root schema:hasPart ?this . + ?anyAbout schema:about ?root . + } + FILTER EXISTS { ?this ?p ?o . FILTER(?p NOT IN (owl:sameAs, rdf:type)) } + FILTER(!STRSTARTS(STR(?this), "http://www.w3.org/")) + FILTER(!STRSTARTS(STR(?this), "https://w3id.org/ro/crate/")) + FILTER(!STRSTARTS(STR(?this), "http://schema.org/")) + FILTER(!STRSTARTS(STR(?this), "https://schema.org/")) + FILTER(!STRSTARTS(STR(?this), "http://purl.org/")) + FILTER(!STRSTARTS(STR(?this), "https://bioschemas.org/")) + FILTER(!STRSTARTS(STR(?this), "https://github.com/crs4/rocrate-validator/")) + FILTER(!STRSTARTS(STR(?this), "urn:")) + } + """ + ] ; + sh:rule [ + a sh:TripleRule ; + sh:subject sh:this ; + sh:predicate rdf:type ; + sh:object ro-crate:ContextualEntity ; + ] . + From c564144c8a9946563d249216f19b33da0c6f89b0 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Mon, 13 Apr 2026 11:23:38 +0200 Subject: [PATCH 026/352] feat(ro-crate-1.2): :sparkles: redefine min checks for recommended properties of contextual entities --- .../should/6_contextual_entity_metadata.ttl | 67 +++++++++++++++++++ 1 file changed, 67 insertions(+) diff --git a/rocrate_validator/profiles/ro-crate/1.2/should/6_contextual_entity_metadata.ttl b/rocrate_validator/profiles/ro-crate/1.2/should/6_contextual_entity_metadata.ttl index 5e37ae1fd..995548f5e 100644 --- a/rocrate_validator/profiles/ro-crate/1.2/should/6_contextual_entity_metadata.ttl +++ b/rocrate_validator/profiles/ro-crate/1.2/should/6_contextual_entity_metadata.ttl @@ -20,6 +20,73 @@ @prefix xsd: . @prefix dct: . +ro-crate:ContextualEntityReferences a sh:NodeShape ; + sh:name "Contextual Entity RECOMMENDED references" ; + sh:description "Contextual entities SHOULD be referenced by other entities and described in the same graph." ; + sh:targetClass ro-crate:ContextualEntity ; + sh:sparql [ + a sh:SPARQLConstraint ; + sh:name "Contextual Entity references" ; + sh:description "Check that Contextual Entities are referenced by other entities." ; + sh:message "Contextual entities SHOULD be referenced by other entities." ; + sh:select """ + SELECT ?this + WHERE { + FILTER NOT EXISTS { + ?other ?pr ?this . + FILTER (?other != ?this) + } + } + """ ; + ] . + +ro-crate:ReferencedContextualEntitiesShouldBeDescribed a sh:NodeShape ; + sh:name "Contextual Entity RECOMMENDED description" ; + sh:description """Contextual entities referenced by other entities SHOULD be described in the same graph.""" ; + # sh:targetClass ro-crate:ContextualEntity ; + sh:target [ + a sh:SPARQLTarget ; + sh:prefixes ro-crate:sparqlPrefixes ; + sh:select """ + SELECT DISTINCT ?this + WHERE { + ?other ?pr ?this . + FILTER(isIRI(?this)) + FILTER NOT EXISTS { ?this schema:about ?anyRoot } + FILTER NOT EXISTS { ?anyMF schema:about ?this } + FILTER NOT EXISTS { ?this a schema:MediaObject } + FILTER NOT EXISTS { ?this a owl:Ontology } + FILTER NOT EXISTS { + ?root schema:hasPart ?this . + ?anyAbout schema:about ?root . + } + FILTER(!STRSTARTS(STR(?this), "http://www.w3.org/")) + FILTER(!STRSTARTS(STR(?this), "https://w3id.org/ro/crate/")) + FILTER(!STRSTARTS(STR(?this), "http://schema.org/")) + FILTER(!STRSTARTS(STR(?this), "https://schema.org/")) + FILTER(!STRSTARTS(STR(?this), "http://purl.org/")) + FILTER(!STRSTARTS(STR(?this), "https://bioschemas.org/")) + FILTER(!STRSTARTS(STR(?this), "https://github.com/crs4/rocrate-validator/")) + FILTER(!STRSTARTS(STR(?this), "urn:")) + } + """ + ] ; + sh:sparql [ + a sh:SPARQLConstraint ; + sh:name "Referenced contextual entities should be described: RDF type" ; + sh:description "Check that contextual entities referenced by other entities have an RDF type specified." ; + sh:message "Referenced contextual entities SHOULD be described in the same @graph" ; + sh:prefixes ro-crate:sparqlPrefixes ; + sh:select """ + SELECT ?this + WHERE { + FILTER NOT EXISTS { + ?this a ?type . + } + } + """ ; + ] . + ro-crate:CreativeWorkAuthorMinimumRecommendedProperties a sh:NodeShape ; sh:name "CreativeWork Author: minimum RECOMMENDED properties" ; sh:description """The minimum recommended properties for a `CreativeWork Author` are `name` and `affiliation`.""" ; From 14259984c613a3ea48cdd51a7832acadb26e040b Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Mon, 13 Apr 2026 11:25:52 +0200 Subject: [PATCH 027/352] test(ro-crate-1.2): :white_check_mark: test metadata document requirements --- .../context_reference/invalid/data.csv | 2 + .../invalid/ro-crate-metadata.json | 52 +++++ .../context_reference/valid/data.csv | 2 + .../valid/ro-crate-metadata.json | 52 +++++ .../invalid/data.csv | 2 + .../invalid/ro-crate-metadata.json | 47 ++++ .../valid/data.csv | 2 + .../valid/ro-crate-metadata.json | 71 ++++++ .../format/compacted/data set/.gitkeep | 0 .../format/compacted/data set2/.gitkeep | 0 .../format/compacted/data set3/.gitkeep | 0 .../compacted/pics/2017-06-11%2012.56.14.jpg | 0 .../compacted/pics/2018-06-11 12.56.14.jpg | 0 .../compacted/pics/2019-06-11 12.56.14.jpg | 0 .../format/compacted/pics/sepia_fence.jpg | 0 .../format/compacted/ro-crate-metadata.json | 213 ++++++++++++++++++ .../format/flattened/ro-crate-metadata.json | 31 +++ .../format/jsonld/ro-crate-metadata.json | 19 ++ .../format/utf8/ro-crate-metadata.json | 25 ++ .../invalid/data.csv | 2 + .../invalid/ro-crate-metadata.json | 49 ++++ .../valid/data.csv | 2 + .../valid/ro-crate-metadata.json | 71 ++++++ .../ro-crate-1.2/test_metadata_document.py | 175 ++++++++++++++ 24 files changed, 817 insertions(+) create mode 100644 tests/data/crates/rocrate-1.2/1_metadata_document/context_reference/invalid/data.csv create mode 100644 tests/data/crates/rocrate-1.2/1_metadata_document/context_reference/invalid/ro-crate-metadata.json create mode 100644 tests/data/crates/rocrate-1.2/1_metadata_document/context_reference/valid/data.csv create mode 100644 tests/data/crates/rocrate-1.2/1_metadata_document/context_reference/valid/ro-crate-metadata.json create mode 100644 tests/data/crates/rocrate-1.2/1_metadata_document/described_contextual_entities/invalid/data.csv create mode 100644 tests/data/crates/rocrate-1.2/1_metadata_document/described_contextual_entities/invalid/ro-crate-metadata.json create mode 100644 tests/data/crates/rocrate-1.2/1_metadata_document/described_contextual_entities/valid/data.csv create mode 100644 tests/data/crates/rocrate-1.2/1_metadata_document/described_contextual_entities/valid/ro-crate-metadata.json create mode 100644 tests/data/crates/rocrate-1.2/1_metadata_document/format/compacted/data set/.gitkeep create mode 100644 tests/data/crates/rocrate-1.2/1_metadata_document/format/compacted/data set2/.gitkeep create mode 100644 tests/data/crates/rocrate-1.2/1_metadata_document/format/compacted/data set3/.gitkeep create mode 100644 tests/data/crates/rocrate-1.2/1_metadata_document/format/compacted/pics/2017-06-11%2012.56.14.jpg create mode 100644 tests/data/crates/rocrate-1.2/1_metadata_document/format/compacted/pics/2018-06-11 12.56.14.jpg create mode 100644 tests/data/crates/rocrate-1.2/1_metadata_document/format/compacted/pics/2019-06-11 12.56.14.jpg create mode 100644 tests/data/crates/rocrate-1.2/1_metadata_document/format/compacted/pics/sepia_fence.jpg create mode 100644 tests/data/crates/rocrate-1.2/1_metadata_document/format/compacted/ro-crate-metadata.json create mode 100644 tests/data/crates/rocrate-1.2/1_metadata_document/format/flattened/ro-crate-metadata.json create mode 100644 tests/data/crates/rocrate-1.2/1_metadata_document/format/jsonld/ro-crate-metadata.json create mode 100644 tests/data/crates/rocrate-1.2/1_metadata_document/format/utf8/ro-crate-metadata.json create mode 100644 tests/data/crates/rocrate-1.2/1_metadata_document/referenced_contextual_entities/invalid/data.csv create mode 100644 tests/data/crates/rocrate-1.2/1_metadata_document/referenced_contextual_entities/invalid/ro-crate-metadata.json create mode 100644 tests/data/crates/rocrate-1.2/1_metadata_document/referenced_contextual_entities/valid/data.csv create mode 100644 tests/data/crates/rocrate-1.2/1_metadata_document/referenced_contextual_entities/valid/ro-crate-metadata.json create mode 100644 tests/integration/profiles/ro-crate-1.2/test_metadata_document.py diff --git a/tests/data/crates/rocrate-1.2/1_metadata_document/context_reference/invalid/data.csv b/tests/data/crates/rocrate-1.2/1_metadata_document/context_reference/invalid/data.csv new file mode 100644 index 000000000..9e1228cfb --- /dev/null +++ b/tests/data/crates/rocrate-1.2/1_metadata_document/context_reference/invalid/data.csv @@ -0,0 +1,2 @@ +id,value +1,a diff --git a/tests/data/crates/rocrate-1.2/1_metadata_document/context_reference/invalid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/1_metadata_document/context_reference/invalid/ro-crate-metadata.json new file mode 100644 index 000000000..ce990bcbb --- /dev/null +++ b/tests/data/crates/rocrate-1.2/1_metadata_document/context_reference/invalid/ro-crate-metadata.json @@ -0,0 +1,52 @@ +{ + "@context": "https://w3id.org/ro/crate/1.3-DRAFT/context", + "@graph": [ + { + "@id": "ro-crate-metadata.json", + "@type": "CreativeWork", + "about": { + "@id": "./" + }, + "conformsTo": { + "@id": "https://w3id.org/ro/crate/1.3-DRAFT" + } + }, + { + "@id": "./", + "@type": "Dataset", + "name": "Example: contextual entity linked from other entities", + "description": "This RO-Crate demonstrates the RO-Crate 1.2 requirement that any contextual entity in the @graph SHOULD be linked to from at least one of the other entities using its @id. The Person entity below is correctly referenced from the Root Data Entity via the author property.", + "datePublished": "2024-01-01", + "license": { + "@id": "https://creativecommons.org/licenses/by/4.0/" + }, + "author": { + "@id": "https://orcid.org/0000-0002-1825-0097" + }, + "hasPart": [ + { + "@id": "data.csv" + } + ] + }, + { + "@id": "https://creativecommons.org/licenses/by/4.0/", + "@type": "CreativeWork", + "name": "Creative Commons Attribution 4.0 International", + "description": "A Creative Commons license that lets others distribute, remix, adapt, and build upon your work, even commercially." + }, + { + "@id": "https://orcid.org/0000-0002-1825-0097", + "@type": "Person", + "name": "Josiah Carberry" + }, + { + "@id": "data.csv", + "@type": "File", + "name": "Sample data", + "description": "A sample data file.", + "encodingFormat": "text/csv", + "contentSize": "42" + } + ] +} diff --git a/tests/data/crates/rocrate-1.2/1_metadata_document/context_reference/valid/data.csv b/tests/data/crates/rocrate-1.2/1_metadata_document/context_reference/valid/data.csv new file mode 100644 index 000000000..9e1228cfb --- /dev/null +++ b/tests/data/crates/rocrate-1.2/1_metadata_document/context_reference/valid/data.csv @@ -0,0 +1,2 @@ +id,value +1,a diff --git a/tests/data/crates/rocrate-1.2/1_metadata_document/context_reference/valid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/1_metadata_document/context_reference/valid/ro-crate-metadata.json new file mode 100644 index 000000000..175dfa323 --- /dev/null +++ b/tests/data/crates/rocrate-1.2/1_metadata_document/context_reference/valid/ro-crate-metadata.json @@ -0,0 +1,52 @@ +{ + "@context": "https://w3id.org/ro/crate/1.2/context", + "@graph": [ + { + "@id": "ro-crate-metadata.json", + "@type": "CreativeWork", + "about": { + "@id": "./" + }, + "conformsTo": { + "@id": "https://w3id.org/ro/crate/1.2/" + } + }, + { + "@id": "./", + "@type": "Dataset", + "name": "Example: contextual entity linked from other entities", + "description": "This RO-Crate demonstrates the RO-Crate 1.2 requirement that any contextual entity in the @graph SHOULD be linked to from at least one of the other entities using its @id. The Person entity below is correctly referenced from the Root Data Entity via the author property.", + "datePublished": "2024-01-01", + "license": { + "@id": "https://creativecommons.org/licenses/by/4.0/" + }, + "author": { + "@id": "https://orcid.org/0000-0002-1825-0097" + }, + "hasPart": [ + { + "@id": "data.csv" + } + ] + }, + { + "@id": "https://creativecommons.org/licenses/by/4.0/", + "@type": "CreativeWork", + "name": "Creative Commons Attribution 4.0 International", + "description": "A Creative Commons license that lets others distribute, remix, adapt, and build upon your work, even commercially." + }, + { + "@id": "https://orcid.org/0000-0002-1825-0097", + "@type": "Person", + "name": "Josiah Carberry" + }, + { + "@id": "data.csv", + "@type": "File", + "name": "Sample data", + "description": "A sample data file.", + "encodingFormat": "text/csv", + "contentSize": "42" + } + ] +} diff --git a/tests/data/crates/rocrate-1.2/1_metadata_document/described_contextual_entities/invalid/data.csv b/tests/data/crates/rocrate-1.2/1_metadata_document/described_contextual_entities/invalid/data.csv new file mode 100644 index 000000000..9e1228cfb --- /dev/null +++ b/tests/data/crates/rocrate-1.2/1_metadata_document/described_contextual_entities/invalid/data.csv @@ -0,0 +1,2 @@ +id,value +1,a diff --git a/tests/data/crates/rocrate-1.2/1_metadata_document/described_contextual_entities/invalid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/1_metadata_document/described_contextual_entities/invalid/ro-crate-metadata.json new file mode 100644 index 000000000..bfff25ca0 --- /dev/null +++ b/tests/data/crates/rocrate-1.2/1_metadata_document/described_contextual_entities/invalid/ro-crate-metadata.json @@ -0,0 +1,47 @@ +{ + "@context": "https://w3id.org/ro/crate/1.2/context", + "@graph": [ + { + "@id": "ro-crate-metadata.json", + "@type": "CreativeWork", + "about": { + "@id": "./" + }, + "conformsTo": { + "@id": "https://w3id.org/ro/crate/1.2" + } + }, + { + "@id": "./", + "@type": "Dataset", + "name": "Example: contextual entity linked from other entities", + "description": "This RO-Crate demonstrates the RO-Crate 1.2 requirement that any contextual entity in the @graph SHOULD be linked to from at least one of the other entities using its @id. The Person entity below is correctly referenced from the Root Data Entity via the author property.", + "datePublished": "2024-01-01", + "license": { + "@id": "https://creativecommons.org/licenses/by/4.0/" + }, + "author": { + "@id": "https://orcid.org/0000-0002-1825-0097" + }, + "hasPart": [ + { + "@id": "data.csv" + } + ] + }, + { + "@id": "https://creativecommons.org/licenses/by/4.0/", + "@type": "CreativeWork", + "name": "Creative Commons Attribution 4.0 International", + "description": "A Creative Commons license that lets others distribute, remix, adapt, and build upon your work, even commercially." + }, + { + "@id": "data.csv", + "@type": "File", + "name": "Sample data", + "description": "A sample data file.", + "encodingFormat": "text/csv", + "contentSize": "42" + } + ] +} diff --git a/tests/data/crates/rocrate-1.2/1_metadata_document/described_contextual_entities/valid/data.csv b/tests/data/crates/rocrate-1.2/1_metadata_document/described_contextual_entities/valid/data.csv new file mode 100644 index 000000000..9e1228cfb --- /dev/null +++ b/tests/data/crates/rocrate-1.2/1_metadata_document/described_contextual_entities/valid/data.csv @@ -0,0 +1,2 @@ +id,value +1,a diff --git a/tests/data/crates/rocrate-1.2/1_metadata_document/described_contextual_entities/valid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/1_metadata_document/described_contextual_entities/valid/ro-crate-metadata.json new file mode 100644 index 000000000..ce16aac2b --- /dev/null +++ b/tests/data/crates/rocrate-1.2/1_metadata_document/described_contextual_entities/valid/ro-crate-metadata.json @@ -0,0 +1,71 @@ +{ + "@context": "https://w3id.org/ro/crate/1.2/context", + "@graph": [ + { + "@id": "ro-crate-metadata.json", + "@type": "CreativeWork", + "about": { + "@id": "./" + }, + "conformsTo": { + "@id": "https://w3id.org/ro/crate/1.2" + } + }, + { + "@id": "./", + "@type": "Dataset", + "name": "Example: contextual entity linked from other entities", + "description": "This RO-Crate demonstrates the RO-Crate 1.2 requirement that any contextual entity in the @graph SHOULD be linked to from at least one of the other entities using its @id. The Person entity below is correctly referenced from the Root Data Entity via the author property.", + "datePublished": "2024-01-01", + "license": { + "@id": "https://creativecommons.org/licenses/by/4.0/" + }, + "author": { + "@id": "https://orcid.org/0000-0002-1825-0097" + }, + "hasPart": [ + { + "@id": "data.csv" + } + ] + }, + { + "@id": "https://creativecommons.org/licenses/by/4.0/", + "@type": "CreativeWork", + "name": "Creative Commons Attribution 4.0 International", + "description": "A Creative Commons license that lets others distribute, remix, adapt, and build upon your work, even commercially." + }, + { + "@id": "https://orcid.org/0000-0002-1825-0097", + "@type": "Person", + "name": "Josiah Carberry", + "contactPoint": { + "@id": "#josiah-carberry-contact" + }, + "affiliation": { + "@id": "https://ror.org/05f9q8d28" + } + }, + { + "@id": "#josiah-carberry-contact", + "@type": "ContactPoint", + "name": "Josiah Carberry Contact", + "email": "mailto:josiah.carberry@example.com" + }, + { + "@id": "https://ror.org/05f9q8d28", + "@type": "Organization", + "name": "Example University", + "description": "An example university that the Person entity is affiliated with.", + "url": "https://www.exampleuniversity.edu" + }, + { + "@id": "data.csv", + "@type": "File", + "name": "Sample data", + "description": "A sample data file.", + "encodingFormat": "text/csv", + "contentSize": "42" + } + ] +} diff --git a/tests/data/crates/rocrate-1.2/1_metadata_document/format/compacted/data set/.gitkeep b/tests/data/crates/rocrate-1.2/1_metadata_document/format/compacted/data set/.gitkeep new file mode 100644 index 000000000..e69de29bb diff --git a/tests/data/crates/rocrate-1.2/1_metadata_document/format/compacted/data set2/.gitkeep b/tests/data/crates/rocrate-1.2/1_metadata_document/format/compacted/data set2/.gitkeep new file mode 100644 index 000000000..e69de29bb diff --git a/tests/data/crates/rocrate-1.2/1_metadata_document/format/compacted/data set3/.gitkeep b/tests/data/crates/rocrate-1.2/1_metadata_document/format/compacted/data set3/.gitkeep new file mode 100644 index 000000000..e69de29bb diff --git a/tests/data/crates/rocrate-1.2/1_metadata_document/format/compacted/pics/2017-06-11%2012.56.14.jpg b/tests/data/crates/rocrate-1.2/1_metadata_document/format/compacted/pics/2017-06-11%2012.56.14.jpg new file mode 100644 index 000000000..e69de29bb diff --git a/tests/data/crates/rocrate-1.2/1_metadata_document/format/compacted/pics/2018-06-11 12.56.14.jpg b/tests/data/crates/rocrate-1.2/1_metadata_document/format/compacted/pics/2018-06-11 12.56.14.jpg new file mode 100644 index 000000000..e69de29bb diff --git a/tests/data/crates/rocrate-1.2/1_metadata_document/format/compacted/pics/2019-06-11 12.56.14.jpg b/tests/data/crates/rocrate-1.2/1_metadata_document/format/compacted/pics/2019-06-11 12.56.14.jpg new file mode 100644 index 000000000..e69de29bb diff --git a/tests/data/crates/rocrate-1.2/1_metadata_document/format/compacted/pics/sepia_fence.jpg b/tests/data/crates/rocrate-1.2/1_metadata_document/format/compacted/pics/sepia_fence.jpg new file mode 100644 index 000000000..e69de29bb diff --git a/tests/data/crates/rocrate-1.2/1_metadata_document/format/compacted/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/1_metadata_document/format/compacted/ro-crate-metadata.json new file mode 100644 index 000000000..4a482db27 --- /dev/null +++ b/tests/data/crates/rocrate-1.2/1_metadata_document/format/compacted/ro-crate-metadata.json @@ -0,0 +1,213 @@ +{ + "@context": [ + "https://w3id.org/ro/crate/1.1/context", + "https://w3id.org/ro/terms/workflow-run/context" + ], + "@graph": [ + { + "@id": "ro-crate-metadata.json", + "@type": "CreativeWork", + "conformsTo": { + "@id": "https://w3id.org/ro/crate/1.2" + }, + "about": { + "@id": "./" + } + }, + { + "@id": "./", + "@type": "Dataset", + "https://schema.org/name": "My Pictures", + "description": "A collection of my pictures", + "datePublished": "2024-05-17T01:04:52+01:00", + "conformsTo": [ + { + "@id": "https://w3id.org/ro/crate/1.2" + } + ], + "hasPart": [ + { + "@id": "pics/2017-06-11%2012.56.14.jpg" + }, + { + "@id": "pics/2018-06-11%2012.56.14.jpg" + }, + { + "@id": "pics/2019-06-11 12.56.14.jpg" + }, + { + "@id": "data%20set/" + }, + { + "@id": "data%20set2/" + }, + { + "@id": "data set3/" + }, + { + "@id": "pics/sepia_fence.jpg" + }, + { + "@id": "file:///tmp/test.txt" + } + ], + "isBasedOn": { + "@id": "https://doi.org/10.5281/zenodo.1009240" + }, + "license": { + "@id": "http://spdx.org/licenses/CC0-1.0" + }, + "mentions": { + "@id": "#SepiaConversion_1" + } + }, + { + "@id": "https://w3id.org/ro/wfrun/process/0.5", + "@type": "CreativeWork", + "name": "Process Run Crate", + "version": "0.5" + }, + { + "@id": "https://example.com/otherprofile/0.1", + "@type": "CreativeWork", + "name": "Other Profile", + "version": "0.1" + }, + { + "@id": "https://www.imagemagick.org/", + "@type": "SoftwareApplication", + "url": "https://www.imagemagick.org/", + "name": "ImageMagick", + "softwareVersion": "6.9.7-4", + "softwareRequirements": { + "@id": "https://example.com/foobar/1.0.0/" + } + }, + { + "@id": "https://example.com/foobar/1.0.0/", + "@type": "SoftwareApplication", + "name": "foobar", + "softwareVersion": "1.0.0" + }, + { + "@id": "#SepiaConversion_1", + "@type": "CreateAction", + "name": "Convert dog image to sepia", + "description": "convert -sepia-tone 80% pics/2017-06-11\\ 12.56.14.jpg pics/sepia_fence.jpg", + "startTime": "2024-05-17T01:04:50+01:00", + "endTime": "2024-05-17T01:04:52+01:00", + "instrument": { + "@id": "https://www.imagemagick.org/" + }, + "object": { + "@id": "pics/2017-06-11%2012.56.14.jpg" + }, + "result": { + "@id": "pics/sepia_fence.jpg" + }, + "agent": { + "@id": "https://orcid.org/0000-0001-9842-9718" + }, + "actionStatus": "http://schema.org/FailedActionStatus", + "error": "this is just to test the error property", + "environment": [ + { + "@id": "#height-limit-pv" + }, + { + "@id": "#width-limit-pv" + } + ], + "containerImage": "https://example.com/imagemagick.sif" + }, + { + "@id": "#width-limit-pv", + "@type": "PropertyValue", + "name": "MAGICK_WIDTH_LIMIT", + "value": "4096" + }, + { + "@id": "#height-limit-pv", + "@type": "PropertyValue", + "name": "MAGICK_HEIGHT_LIMIT", + "value": "3072" + }, + { + "@id": "file:///tmp/test.txt", + "@type": "File", + "description": "A test file", + "encodingFormat": "text/plain" + }, + { + "@id": "pics/2017-06-11%2012.56.14.jpg", + "@type": "File", + "description": "Original image", + "encodingFormat": "image/jpeg", + "name": "2017-06-11 12.56.14.jpg (input)", + "author": { + "@id": "https://orcid.org/0000-0002-3545-944X" + } + }, + { + "@id": "pics/2018-06-11%2012.56.14.jpg", + "@type": "File", + "description": "Original image", + "encodingFormat": "image/jpeg", + "name": "2018-06-11 12.56.14.jpg (input)" + }, + { + "@id": "pics/2019-06-11 12.56.14.jpg", + "@type": "File", + "description": "Original image", + "encodingFormat": "image/jpeg", + "name": "2018-06-11 12.56.14.jpg (input)" + }, + { + "@id": "data%20set/", + "@type": "Dataset", + "name": "Data set", + "description": "A dataset", + "datePublished": "2024-05-17T01:04:52+01:00", + "license": { + "@id": "http://spdx.org/licenses/CC0-1.0" + } + }, + { + "@id": "data%20set2/", + "@type": "Dataset", + "name": "Data set 2", + "description": "A dataset", + "datePublished": "2024-05-17T01:04:52+01:00", + "license": { + "@id": "http://spdx.org/licenses/CC0-1.0" + } + }, + { + "@id": "data set3/", + "@type": "Dataset", + "name": "Data set 3", + "description": "A dataset", + "datePublished": "2024-05-17T01:04:52+01:00", + "license": { + "@id": "http://spdx.org/licenses/CC0-1.0" + } + }, + { + "@id": "pics/sepia_fence.jpg", + "@type": "File", + "description": "The converted picture, now sepia-colored", + "encodingFormat": "image/jpeg", + "name": "sepia_fence (output)" + }, + { + "@id": "https://orcid.org/0000-0001-9842-9718", + "@type": "Person", + "name": "Stian Soiland-Reyes" + }, + { + "@id": "https://orcid.org/0000-0002-3545-944X", + "@type": "Person", + "name": "Peter Sefton" + } + ] +} diff --git a/tests/data/crates/rocrate-1.2/1_metadata_document/format/flattened/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/1_metadata_document/format/flattened/ro-crate-metadata.json new file mode 100644 index 000000000..91bf27644 --- /dev/null +++ b/tests/data/crates/rocrate-1.2/1_metadata_document/format/flattened/ro-crate-metadata.json @@ -0,0 +1,31 @@ +{ + "@context": "https://w3id.org/ro/crate/1.1/context", + "@graph": [ + { + "@type": "CreativeWork", + "@id": "ro-crate-metadata.json", + "conformsTo": { + "@id": "https://w3id.org/ro/crate/1.1" + }, + "about": { + "@id": "./" + } + }, + { + "@id": "./", + "@type": "Dataset", + "name": "This is a test dataset", + "description": "This is a test dataset", + "license": { + "@id": "https://creativecommons.org/licenses/by/4.0/" + }, + "datePublished": "2024-11-05", + "hasPart": [ + { + "@type": "File", + "name": "File in a nested entity" + } + ] + } + ] +} diff --git a/tests/data/crates/rocrate-1.2/1_metadata_document/format/jsonld/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/1_metadata_document/format/jsonld/ro-crate-metadata.json new file mode 100644 index 000000000..506c2ea32 --- /dev/null +++ b/tests/data/crates/rocrate-1.2/1_metadata_document/format/jsonld/ro-crate-metadata.json @@ -0,0 +1,19 @@ +{ + "@context": "https://w3id.org/ro/crate/1.2/context", + "@graph": [ + { + "@id": "ro-crate-metadata.json", + "@type": "CreativeWork", + "about": {"@id": "./"}, + "conformsTo": {"@id": "https://w3id.org/ro/crate/1.2"}, + "name": "Counterexample: invalid JSON-LD syntax", + "description": "This RO-Crate is INCORRECT because it contains invalid JSON syntax. The trailing comma after the last property and the missing closing brace make it invalid JSON, and therefore invalid JSON-LD." + }, + { + "@id": "./", + "@type": "Dataset", + "name": "Invalid JSON-LD Example", + "datePublished": "2024-01-01", + "license": {"@id": "https://creativecommons.org/licenses/by/4.0/"}, + } + ] diff --git a/tests/data/crates/rocrate-1.2/1_metadata_document/format/utf8/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/1_metadata_document/format/utf8/ro-crate-metadata.json new file mode 100644 index 000000000..77fc2dd66 --- /dev/null +++ b/tests/data/crates/rocrate-1.2/1_metadata_document/format/utf8/ro-crate-metadata.json @@ -0,0 +1,25 @@ +{ + "@context": "https://w3id.org/ro/crate/1.2/context", + "@graph": [ + { + "@id": "ro-crate-metadata.json", + "@type": "CreativeWork", + "conformsTo": { + "@id": "https://w3id.org/ro/crate/1.2" + }, + "about": { + "@id": "./" + } + }, + { + "@id": "./", + "@type": "Dataset", + "name": "Locale: caf rsum nave", + "description": "This RO-Crate is INCORRECT because it contains non-UTF-8 characters.", + "datePublished": "2024-01-01", + "license": { + "@id": "https://creativecommons.org/publicdomain/zero/1.0/" + } + } + ] +} \ No newline at end of file diff --git a/tests/data/crates/rocrate-1.2/1_metadata_document/referenced_contextual_entities/invalid/data.csv b/tests/data/crates/rocrate-1.2/1_metadata_document/referenced_contextual_entities/invalid/data.csv new file mode 100644 index 000000000..9e1228cfb --- /dev/null +++ b/tests/data/crates/rocrate-1.2/1_metadata_document/referenced_contextual_entities/invalid/data.csv @@ -0,0 +1,2 @@ +id,value +1,a diff --git a/tests/data/crates/rocrate-1.2/1_metadata_document/referenced_contextual_entities/invalid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/1_metadata_document/referenced_contextual_entities/invalid/ro-crate-metadata.json new file mode 100644 index 000000000..789d9e826 --- /dev/null +++ b/tests/data/crates/rocrate-1.2/1_metadata_document/referenced_contextual_entities/invalid/ro-crate-metadata.json @@ -0,0 +1,49 @@ +{ + "@context": "https://w3id.org/ro/crate/1.2/context", + "@graph": [ + { + "@id": "ro-crate-metadata.json", + "@type": "CreativeWork", + "about": { + "@id": "./" + }, + "conformsTo": { + "@id": "https://w3id.org/ro/crate/1.2" + } + }, + { + "@id": "./", + "@type": "Dataset", + "name": "Counterexample: orphaned contextual entity", + "description": "This RO-Crate INCORRECTLY includes a contextual entity (the Person below) that is NOT linked from any other entity in the @graph. In RO-Crate 1.2, any contextual entity in the @graph SHOULD be linked to from at least one of the other entities using its @id. The Person entity is orphaned - nothing references https://orcid.org/0000-0002-1825-0097.", + "datePublished": "2024-01-01", + "license": { + "@id": "https://creativecommons.org/licenses/by/4.0/" + }, + "hasPart": [ + { + "@id": "data.csv" + } + ] + }, + { + "@id": "https://creativecommons.org/licenses/by/4.0/", + "@type": "CreativeWork", + "name": "Creative Commons Attribution 4.0 International", + "description": "A Creative Commons license that lets others distribute, remix, adapt, and build upon your work, even commercially." + }, + { + "@id": "https://orcid.org/0000-0002-1825-0097", + "@type": "Person", + "name": "Josiah Carberry" + }, + { + "@id": "data.csv", + "@type": "File", + "name": "Sample data", + "description": "A sample data file.", + "encodingFormat": "text/csv", + "contentSize": "42" + } + ] +} diff --git a/tests/data/crates/rocrate-1.2/1_metadata_document/referenced_contextual_entities/valid/data.csv b/tests/data/crates/rocrate-1.2/1_metadata_document/referenced_contextual_entities/valid/data.csv new file mode 100644 index 000000000..9e1228cfb --- /dev/null +++ b/tests/data/crates/rocrate-1.2/1_metadata_document/referenced_contextual_entities/valid/data.csv @@ -0,0 +1,2 @@ +id,value +1,a diff --git a/tests/data/crates/rocrate-1.2/1_metadata_document/referenced_contextual_entities/valid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/1_metadata_document/referenced_contextual_entities/valid/ro-crate-metadata.json new file mode 100644 index 000000000..ce16aac2b --- /dev/null +++ b/tests/data/crates/rocrate-1.2/1_metadata_document/referenced_contextual_entities/valid/ro-crate-metadata.json @@ -0,0 +1,71 @@ +{ + "@context": "https://w3id.org/ro/crate/1.2/context", + "@graph": [ + { + "@id": "ro-crate-metadata.json", + "@type": "CreativeWork", + "about": { + "@id": "./" + }, + "conformsTo": { + "@id": "https://w3id.org/ro/crate/1.2" + } + }, + { + "@id": "./", + "@type": "Dataset", + "name": "Example: contextual entity linked from other entities", + "description": "This RO-Crate demonstrates the RO-Crate 1.2 requirement that any contextual entity in the @graph SHOULD be linked to from at least one of the other entities using its @id. The Person entity below is correctly referenced from the Root Data Entity via the author property.", + "datePublished": "2024-01-01", + "license": { + "@id": "https://creativecommons.org/licenses/by/4.0/" + }, + "author": { + "@id": "https://orcid.org/0000-0002-1825-0097" + }, + "hasPart": [ + { + "@id": "data.csv" + } + ] + }, + { + "@id": "https://creativecommons.org/licenses/by/4.0/", + "@type": "CreativeWork", + "name": "Creative Commons Attribution 4.0 International", + "description": "A Creative Commons license that lets others distribute, remix, adapt, and build upon your work, even commercially." + }, + { + "@id": "https://orcid.org/0000-0002-1825-0097", + "@type": "Person", + "name": "Josiah Carberry", + "contactPoint": { + "@id": "#josiah-carberry-contact" + }, + "affiliation": { + "@id": "https://ror.org/05f9q8d28" + } + }, + { + "@id": "#josiah-carberry-contact", + "@type": "ContactPoint", + "name": "Josiah Carberry Contact", + "email": "mailto:josiah.carberry@example.com" + }, + { + "@id": "https://ror.org/05f9q8d28", + "@type": "Organization", + "name": "Example University", + "description": "An example university that the Person entity is affiliated with.", + "url": "https://www.exampleuniversity.edu" + }, + { + "@id": "data.csv", + "@type": "File", + "name": "Sample data", + "description": "A sample data file.", + "encodingFormat": "text/csv", + "contentSize": "42" + } + ] +} diff --git a/tests/integration/profiles/ro-crate-1.2/test_metadata_document.py b/tests/integration/profiles/ro-crate-1.2/test_metadata_document.py new file mode 100644 index 000000000..28bc4049e --- /dev/null +++ b/tests/integration/profiles/ro-crate-1.2/test_metadata_document.py @@ -0,0 +1,175 @@ +# Copyright (c) 2024-2026 CRS4 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging + +from rocrate_validator import models +from tests.ro_crates_1_2 import MetadataDocument, MetadataDocumentFormat +from tests.shared import do_entity_test + +logger = logging.getLogger(__name__) + + +__metadata_document_crates__ = MetadataDocument() +__metadata_document_format_crates__ = MetadataDocumentFormat() + + +def test_not_utf8(): + """ + Test that the metadata document is valid when it is not UTF-8 encoded. + """ + do_entity_test( + __metadata_document_format_crates__.not_utf8, + models.Severity.REQUIRED, + False, + profile_identifier="ro-crate-1.2", + expected_triggered_requirements=["File Descriptor UTF-8 encoding"], + expected_triggered_issues=["RO-Crate file descriptor \"ro-crate-metadata.json\" is not UTF-8 encoded"] + ) + + +def test_not_json(): + """ + Test that the metadata document is valid when it is not JSON-LD. + """ + do_entity_test( + __metadata_document_format_crates__.not_jsonld, + models.Severity.REQUIRED, + False, + profile_identifier="ro-crate-1.2", + expected_triggered_requirements=["File Descriptor JSON format"], + expected_triggered_issues=[ + "RO-Crate file descriptor \"ro-crate-metadata.json\" " + "is not in the correct format" + ] + ) + + +def test_not_flattened(): + """ + Test that the metadata document is valid when it is not flattened. + """ + do_entity_test( + __metadata_document_format_crates__.not_flattened, + models.Severity.REQUIRED, + False, + profile_identifier="ro-crate-1.2", + expected_triggered_requirements=["File Descriptor JSON-LD format"], + expected_triggered_issues=[ + "RO-Crate file descriptor \"ro-crate-metadata.json\" " + "is not fully flattened at entity \"./\"" + ] + ) + + +def test_not_compacted(): + """ + Test that the metadata document is valid when it is not compacted. + """ + do_entity_test( + __metadata_document_format_crates__.not_compacted, + models.Severity.REQUIRED, + False, + profile_identifier="ro-crate-1.2", + expected_triggered_requirements=["File Descriptor JSON-LD format"], + expected_triggered_issues=[ + "The 1 occurrence of the \"https://schema.org/name\" URI " + "cannot be used as a key" + ] + ) + + +def test_invalid_context_reference(): + """ + Test that the metadata document is valid when it has an invalid context reference. + """ + do_entity_test( + __metadata_document_crates__.invalid_context_reference, + models.Severity.REQUIRED, + False, + profile_identifier="ro-crate-1.2", + expected_triggered_requirements=["File Descriptor JSON-LD format"], + expected_triggered_issues=[ + "RO-Crate file descriptor \"ro-crate-metadata.json\" " + "does not reference the required context \"https://w3id.org/ro/crate/1.2/context\""] + ) + + +def test_valid_context_reference(): + """ + Test that the metadata document is valid when it has a valid context reference. + """ + do_entity_test( + __metadata_document_crates__.valid_context_reference, + models.Severity.REQUIRED, + True, + profile_identifier="ro-crate-1.2" + ) + + +def test_not_referenced_contextual_entity(): + """ + Test that the metadata document is not valid + when it has a contextual entity that is not referenced by any other entity in the graph. + """ + do_entity_test( + __metadata_document_crates__.not_referenced_contextual_entity, + models.Severity.RECOMMENDED, + False, + profile_identifier="ro-crate-1.2", + expected_triggered_requirements=["Contextual Entity RECOMMENDED references"], + expected_triggered_issues=["Contextual entities SHOULD be referenced by other entities."] + ) + + +def test_referenced_contextual_entity(): + """ + Test that the metadata document is valid + when it has a contextual entity that is referenced by other entities in the graph. + """ + do_entity_test( + __metadata_document_crates__.valid_referenced_contextual_entity, + models.Severity.RECOMMENDED, + True, + profile_identifier="ro-crate-1.2" + ) + + +def test_described_contextual_entity(): + """ + Test that the metadata document is valid + when it has a contextual entity that is described in the same graph. + """ + do_entity_test( + __metadata_document_crates__.described_contextual_entity, + models.Severity.RECOMMENDED, + True, + profile_identifier="ro-crate-1.2" + ) + + +def test_not_described_contextual_entity(): + """ + Test that the metadata document is not valid + when it has a contextual entity that is not described in the same graph. + """ + do_entity_test( + __metadata_document_crates__.not_described_contextual_entity, + models.Severity.RECOMMENDED, + False, + profile_identifier="ro-crate-1.2", + expected_triggered_requirements=["Contextual Entity RECOMMENDED description"], + expected_triggered_issues=[ + "Referenced contextual entities SHOULD be described in the same @graph"] + ) From 4234a519f289f3c73187148647b64b398f0dafe8 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Mon, 13 Apr 2026 11:36:40 +0200 Subject: [PATCH 028/352] refactor(ro-crate-1.2): :art: reformat --- .../1.2/must/1_file-descriptor_metadata.ttl | 38 +++++-------------- 1 file changed, 9 insertions(+), 29 deletions(-) diff --git a/rocrate_validator/profiles/ro-crate/1.2/must/1_file-descriptor_metadata.ttl b/rocrate_validator/profiles/ro-crate/1.2/must/1_file-descriptor_metadata.ttl index f9e03602b..281086213 100644 --- a/rocrate_validator/profiles/ro-crate/1.2/must/1_file-descriptor_metadata.ttl +++ b/rocrate_validator/profiles/ro-crate/1.2/must/1_file-descriptor_metadata.ttl @@ -11,7 +11,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - @prefix ro: <./> . @prefix ro-crate: . @prefix dct: . @@ -20,8 +19,7 @@ @prefix sh: . @prefix validator: . - -ro-crate:FindROCrateMetadataFileDescriptorEntity a sh:NodeShape; +ro-crate:FindROCrateMetadataFileDescriptorEntity a sh:NodeShape ; sh:name "Identify the RO-Crate Metadata File Descriptor" ; sh:description """The RO-Crate Metadata File Descriptor entity describes the RO-Crate itself, and it is named as `*-ro-crate-metadata.json`. It can be identified by name according to the RO-Crate specification @@ -38,7 +36,7 @@ ro-crate:FindROCrateMetadataFileDescriptorEntity a sh:NodeShape; } """ ] ; - + # Expand data graph with triples from the file data entity sh:rule [ a sh:TripleRule ; @@ -47,8 +45,7 @@ ro-crate:FindROCrateMetadataFileDescriptorEntity a sh:NodeShape; sh:object ro-crate:ROCrateMetadataFileDescriptor ; ] . -ro-crate:ROCrateMetadataFileDescriptorExistence - a sh:NodeShape ; +ro-crate:ROCrateMetadataFileDescriptorExistence a sh:NodeShape ; sh:name "RO-Crate Metadata File Descriptor entity existence" ; sh:description "The RO-Crate JSON-LD MUST contain a Metadata File Descriptor entity typed as `schema:CreativeWork`" ; sh:targetClass ro-crate:ROCrateMetadataFileDescriptor ; @@ -67,9 +64,9 @@ ro-crate:ROCrateMetadataFileDescriptorRecommendedProperties a sh:NodeShape ; sh:name "RO-Crate Metadata File Descriptor REQUIRED properties" ; sh:description """RO-Crate Metadata Descriptor MUST be defined according with the requirements details defined in - [RO-Crate Metadata File Descriptor](https://www.researchobject.org/ro-crate/1.2/root-data-entity.html#ro-crate-metadata-file-descriptor)"""; + [RO-Crate Metadata File Descriptor](https://www.researchobject.org/ro-crate/1.2/root-data-entity.html#ro-crate-metadata-file-descriptor)""" ; sh:targetClass ro-crate:ROCrateMetadataFileDescriptor ; - sh:property [ + sh:property [ a sh:PropertyShape ; sh:name "Metadata File Descriptor entity type" ; sh:description "Check if the RO-Crate Metadata File Descriptor has `@type` CreativeWork, as per schema.org" ; @@ -79,30 +76,13 @@ ro-crate:ROCrateMetadataFileDescriptorRecommendedProperties a sh:NodeShape ; sh:hasValue schema_org:CreativeWork ; sh:message "The RO-Crate metadata file MUST be a CreativeWork, as per schema.org" ; ] ; - sh:property [ + sh:property [ a sh:PropertyShape ; sh:name "Metadata File Descriptor entity: `about` property" ; sh:description """Check if the RO-Crate Metadata File Descriptor has an `about` property referencing the Root Data Entity""" ; - sh:maxCount 1; + sh:maxCount 1 ; sh:minCount 1 ; - # sh:nodeKind sh:IRI ; sh:path schema_org:about ; # sh:class ro-crate:RootDataEntity ; - # sh:message "The RO-Crate metadata file descriptor MUST have an `about` property referencing the Root Data Entity" ; - ] ; - . -# ro-crate:AgentProjectIntersection -# a sh:NodeShape ; -# sh:name "Agent Project Membership Validation" ; -# sh:targetClass ro-crate:ROCrateMetadataFileDescriptor ; -# sh:sparql [ -# a sh:SPARQLConstraint ; -# sh:message "Validation message" ; -# sh:select """ -# SELECT $this -# WHERE { -# FILTER(false) -# } -# """ ; -# sh:severity sh:Violation ; -# ] . + sh:message "The RO-Crate metadata file descriptor MUST have an `about` property referencing the Root Data Entity" ; + ] . From d2c4c7c75f8e2b7efc0fb199d05cbc04e202c667 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Mon, 13 Apr 2026 11:44:38 +0200 Subject: [PATCH 029/352] feat(ro-crate-1.2): :sparkles: check that preview is not included in hasPart (recommended requirement) --- .../should/3_ro_crate_preview_exclusion.ttl | 34 ++++++++----------- 1 file changed, 15 insertions(+), 19 deletions(-) diff --git a/rocrate_validator/profiles/ro-crate/1.2/should/3_ro_crate_preview_exclusion.ttl b/rocrate_validator/profiles/ro-crate/1.2/should/3_ro_crate_preview_exclusion.ttl index a0ae44ab7..37a94d76c 100644 --- a/rocrate_validator/profiles/ro-crate/1.2/should/3_ro_crate_preview_exclusion.ttl +++ b/rocrate_validator/profiles/ro-crate/1.2/should/3_ro_crate_preview_exclusion.ttl @@ -11,23 +11,19 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +@prefix ro: <./> . +@prefix ro-crate: . +@prefix schema_org: . +@prefix sh: . -# @prefix ro: <./> . -# @prefix ro-crate: . -# @prefix schema_org: . -# @prefix sh: . - -# ro-crate:PreviewFilesNotInHasPart a sh:NodeShape ; -# sh:name "RO-Crate Website files exclusion" ; -# sh:description """`ro-crate-preview.html` and `ro-crate-preview_files/` SHOULD NOT be included in `hasPart`.""" ; -# sh:targetClass schema_org:Dataset ; -# sh:sparql [ -# sh:message "RO-Crate Website files SHOULD NOT be included in `hasPart`" ; -# sh:select """ -# SELECT ?this -# WHERE { -# ?this schema:hasPart ?part . -# FILTER(str(?part) = "ro-crate-preview.html" || str(?part) = "ro-crate-preview_files/") -# } -# """ ; -# ] . +ro-crate:PreviewFilesNotInHasPart a sh:NodeShape ; + sh:name "Preview file descriptor should not be included in `hasPart`" ; + sh:description """`ro-crate-preview.html` and `ro-crate-preview_files/` SHOULD NOT be included in `hasPart`.""" ; + sh:targetClass ro-crate:RootDataEntity ; + sh:property [ + sh:path schema_org:hasPart ; + sh:nodeKind sh:IRI ; + sh:description "Check that `ro-crate-preview.html` and `ro-crate-preview_files/` are not included in `hasPart`" ; + sh:message "RO-Crate Website files SHOULD NOT be included in `hasPart`" ; + sh:pattern "^(?!.*(ro-crate-preview\\.html|ro-crate-preview_files/)).*$" ; + ] . From 476074eca3d5c8e71d784b7971052bc6691b41cb Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Mon, 13 Apr 2026 11:56:35 +0200 Subject: [PATCH 030/352] feat(ro-crate-1.2): :sparkles: check root data entity identifier --- .../ro-crate/1.2/must/2_root_data_entity_identifier.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/rocrate_validator/profiles/ro-crate/1.2/must/2_root_data_entity_identifier.py b/rocrate_validator/profiles/ro-crate/1.2/must/2_root_data_entity_identifier.py index a4898dba5..fba6f1b2e 100644 --- a/rocrate_validator/profiles/ro-crate/1.2/must/2_root_data_entity_identifier.py +++ b/rocrate_validator/profiles/ro-crate/1.2/must/2_root_data_entity_identifier.py @@ -19,11 +19,10 @@ from rocrate_validator.requirements.python import (PyFunctionCheck, check, requirement) -# set up logging logger = logging.getLogger(__name__) -@requirement(name="Root Data Entity: identifier") +@requirement(name="Root Data Entity identifier restriction") class RootDataEntityIdentifierChecker(PyFunctionCheck): """ In an attached RO-Crate, the Root Data Entity @id MUST be ./ or an absolute URI. @@ -40,7 +39,8 @@ def check_identifier(self, context: ValidationContext) -> bool: if re.match(r"^[A-Za-z][A-Za-z0-9+\.-]*:", root_entity.id): return True context.result.add_issue( - 'Root Data Entity @id MUST be `./` or an absolute URI for attached RO-Crates', self) + 'The Root Data Entity MUST be a `Dataset` (as per `schema.org`) ' + 'and use an IRI or `./` as identifier', self) return False except Exception as e: context.result.add_issue( From cb085c1adb65daf842d047273d5e340325fd244a Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Mon, 13 Apr 2026 12:17:13 +0200 Subject: [PATCH 031/352] test(ro-crate-1.2): :white_check_mark: test min attached ro-crates requirements --- .../invalid/data.csv | 3 + .../invalid/ro-crate-metadata.json | 54 ++++++++++ .../invalid/ro-crate-preview.html | 5 + .../valid/data.csv | 3 + .../valid/ro-crate-metadata.json | 44 +++++++++ .../valid/ro-crate-preview.html | 8 ++ .../invalid/ro-crate-metadata.json | 32 ++++++ .../valid/ro-crate-metadata.json | 32 ++++++ .../invalid/ro-crate-metadata.json | 32 ++++++ .../valid/ro-crate-metadata.json | 32 ++++++ .../ro-crate-1.2/test_attached_rocrates.py | 99 +++++++++++++++++++ 11 files changed, 344 insertions(+) create mode 100644 tests/data/crates/rocrate-1.2/2_attached_rocrates/attached-preview-not-in-hasPart/invalid/data.csv create mode 100644 tests/data/crates/rocrate-1.2/2_attached_rocrates/attached-preview-not-in-hasPart/invalid/ro-crate-metadata.json create mode 100644 tests/data/crates/rocrate-1.2/2_attached_rocrates/attached-preview-not-in-hasPart/invalid/ro-crate-preview.html create mode 100644 tests/data/crates/rocrate-1.2/2_attached_rocrates/attached-preview-not-in-hasPart/valid/data.csv create mode 100644 tests/data/crates/rocrate-1.2/2_attached_rocrates/attached-preview-not-in-hasPart/valid/ro-crate-metadata.json create mode 100644 tests/data/crates/rocrate-1.2/2_attached_rocrates/attached-preview-not-in-hasPart/valid/ro-crate-preview.html create mode 100644 tests/data/crates/rocrate-1.2/2_attached_rocrates/non-relative-root-identifier/invalid/ro-crate-metadata.json create mode 100644 tests/data/crates/rocrate-1.2/2_attached_rocrates/non-relative-root-identifier/valid/ro-crate-metadata.json create mode 100644 tests/data/crates/rocrate-1.2/2_attached_rocrates/relative-root-identifier/invalid/ro-crate-metadata.json create mode 100644 tests/data/crates/rocrate-1.2/2_attached_rocrates/relative-root-identifier/valid/ro-crate-metadata.json create mode 100644 tests/integration/profiles/ro-crate-1.2/test_attached_rocrates.py diff --git a/tests/data/crates/rocrate-1.2/2_attached_rocrates/attached-preview-not-in-hasPart/invalid/data.csv b/tests/data/crates/rocrate-1.2/2_attached_rocrates/attached-preview-not-in-hasPart/invalid/data.csv new file mode 100644 index 000000000..53cf763c2 --- /dev/null +++ b/tests/data/crates/rocrate-1.2/2_attached_rocrates/attached-preview-not-in-hasPart/invalid/data.csv @@ -0,0 +1,3 @@ +id,value +1,hello +2,world diff --git a/tests/data/crates/rocrate-1.2/2_attached_rocrates/attached-preview-not-in-hasPart/invalid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/2_attached_rocrates/attached-preview-not-in-hasPart/invalid/ro-crate-metadata.json new file mode 100644 index 000000000..bffa76e4d --- /dev/null +++ b/tests/data/crates/rocrate-1.2/2_attached_rocrates/attached-preview-not-in-hasPart/invalid/ro-crate-metadata.json @@ -0,0 +1,54 @@ +{ + "@context": "https://w3id.org/ro/crate/1.2/context", + "@graph": [ + { + "@id": "ro-crate-metadata.json", + "@type": "CreativeWork", + "about": { + "@id": "./x" + }, + "conformsTo": { + "@id": "https://w3id.org/ro/crate/1.2" + } + }, + { + "@id": "./x", + "@type": "Dataset", + "name": "Counterexample: preview incorrectly in hasPart", + "description": "This RO-Crate INCORRECTLY includes ro-crate-preview.html in hasPart. In RO-Crate 1.2, ro-crate-preview.html SHOULD NOT be listed in hasPart of the Root Data Entity or any Dataset entity (unless it is also the profile description in a Profile Crate).", + "datePublished": "2024-01-01", + "license": { + "@id": "https://creativecommons.org/licenses/by/4.0/" + }, + "hasPart": [ + { + "@id": "data.csv" + }, + { + "@id": "ro-crate-preview.html" + } + ] + }, + { + "@id": "https://creativecommons.org/licenses/by/4.0/", + "@type": "CreativeWork", + "name": "Creative Commons Attribution 4.0 International", + "description": "A Creative Commons license that lets others distribute, remix, adapt, and build upon your work, even commercially." + }, + { + "@id": "data.csv", + "@type": "File", + "name": "Sample data file", + "description": "A sample CSV data file.", + "encodingFormat": "text/csv", + "contentSize": "42" + }, + { + "@id": "ro-crate-preview.html", + "@type": "File", + "name": "RO-Crate Preview", + "description": "HTML preview of the RO-Crate - this SHOULD NOT be in hasPart per RO-Crate 1.2.", + "encodingFormat": "text/html" + } + ] +} diff --git a/tests/data/crates/rocrate-1.2/2_attached_rocrates/attached-preview-not-in-hasPart/invalid/ro-crate-preview.html b/tests/data/crates/rocrate-1.2/2_attached_rocrates/attached-preview-not-in-hasPart/invalid/ro-crate-preview.html new file mode 100644 index 000000000..be5c1b2fe --- /dev/null +++ b/tests/data/crates/rocrate-1.2/2_attached_rocrates/attached-preview-not-in-hasPart/invalid/ro-crate-preview.html @@ -0,0 +1,5 @@ + + +RO-Crate Preview +

Counterexample: preview in hasPart

+ diff --git a/tests/data/crates/rocrate-1.2/2_attached_rocrates/attached-preview-not-in-hasPart/valid/data.csv b/tests/data/crates/rocrate-1.2/2_attached_rocrates/attached-preview-not-in-hasPart/valid/data.csv new file mode 100644 index 000000000..53cf763c2 --- /dev/null +++ b/tests/data/crates/rocrate-1.2/2_attached_rocrates/attached-preview-not-in-hasPart/valid/data.csv @@ -0,0 +1,3 @@ +id,value +1,hello +2,world diff --git a/tests/data/crates/rocrate-1.2/2_attached_rocrates/attached-preview-not-in-hasPart/valid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/2_attached_rocrates/attached-preview-not-in-hasPart/valid/ro-crate-metadata.json new file mode 100644 index 000000000..3ebcc8ac7 --- /dev/null +++ b/tests/data/crates/rocrate-1.2/2_attached_rocrates/attached-preview-not-in-hasPart/valid/ro-crate-metadata.json @@ -0,0 +1,44 @@ +{ + "@context": "https://w3id.org/ro/crate/1.2/context", + "@graph": [ + { + "@id": "ro-crate-metadata.json", + "@type": "CreativeWork", + "about": { + "@id": "./" + }, + "conformsTo": { + "@id": "https://w3id.org/ro/crate/1.2" + } + }, + { + "@id": "./", + "@type": "Dataset", + "name": "Example: preview not in hasPart", + "description": "This RO-Crate correctly omits ro-crate-preview.html from hasPart, following the new RO-Crate 1.2 requirement.", + "datePublished": "2024-01-01", + "license": { + "@id": "https://creativecommons.org/licenses/by/4.0/" + }, + "hasPart": [ + { + "@id": "data.csv" + } + ] + }, + { + "@id": "https://creativecommons.org/licenses/by/4.0/", + "@type": "CreativeWork", + "name": "Creative Commons Attribution 4.0 International", + "description": "A Creative Commons license that lets others distribute, remix, adapt, and build upon your work, even commercially." + }, + { + "@id": "data.csv", + "@type": "File", + "name": "Sample data file", + "description": "A sample CSV data file.", + "encodingFormat": "text/csv", + "contentSize": "42" + } + ] +} diff --git a/tests/data/crates/rocrate-1.2/2_attached_rocrates/attached-preview-not-in-hasPart/valid/ro-crate-preview.html b/tests/data/crates/rocrate-1.2/2_attached_rocrates/attached-preview-not-in-hasPart/valid/ro-crate-preview.html new file mode 100644 index 000000000..99f1e3229 --- /dev/null +++ b/tests/data/crates/rocrate-1.2/2_attached_rocrates/attached-preview-not-in-hasPart/valid/ro-crate-preview.html @@ -0,0 +1,8 @@ + + +RO-Crate Preview + +

Example: preview not in hasPart

+

This RO-Crate correctly omits ro-crate-preview.html from hasPart.

+ + diff --git a/tests/data/crates/rocrate-1.2/2_attached_rocrates/non-relative-root-identifier/invalid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/2_attached_rocrates/non-relative-root-identifier/invalid/ro-crate-metadata.json new file mode 100644 index 000000000..ff31f7fa1 --- /dev/null +++ b/tests/data/crates/rocrate-1.2/2_attached_rocrates/non-relative-root-identifier/invalid/ro-crate-metadata.json @@ -0,0 +1,32 @@ +{ + "@context": "https://w3id.org/ro/crate/1.2/context", + "@graph": [ + { + "@id": "ro-crate-metadata.json", + "@type": "CreativeWork", + "about": { + "@id": "invalid-IRI-root-dataset" + }, + "conformsTo": { + "@id": "https://w3id.org/ro/crate/1.2" + } + }, + { + "@id": "invalid-IRI-root-dataset", + "@type": "Dataset", + "name": "Counterexample: preview incorrectly in hasPart", + "description": "This RO-Crate INCORRECTLY includes ro-crate-preview.html in hasPart. In RO-Crate 1.2, ro-crate-preview.html SHOULD NOT be listed in hasPart of the Root Data Entity or any Dataset entity (unless it is also the profile description in a Profile Crate).", + "datePublished": "2024-01-01", + "license": { + "@id": "https://creativecommons.org/licenses/by/4.0/" + }, + "hasPart": [] + }, + { + "@id": "https://creativecommons.org/licenses/by/4.0/", + "@type": "CreativeWork", + "name": "Creative Commons Attribution 4.0 International", + "description": "A Creative Commons license that lets others distribute, remix, adapt, and build upon your work, even commercially." + } + ] +} diff --git a/tests/data/crates/rocrate-1.2/2_attached_rocrates/non-relative-root-identifier/valid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/2_attached_rocrates/non-relative-root-identifier/valid/ro-crate-metadata.json new file mode 100644 index 000000000..bf200f5a5 --- /dev/null +++ b/tests/data/crates/rocrate-1.2/2_attached_rocrates/non-relative-root-identifier/valid/ro-crate-metadata.json @@ -0,0 +1,32 @@ +{ + "@context": "https://w3id.org/ro/crate/1.2/context", + "@graph": [ + { + "@id": "ro-crate-metadata.json", + "@type": "CreativeWork", + "about": { + "@id": "https://example.org/external-root-dataset" + }, + "conformsTo": { + "@id": "https://w3id.org/ro/crate/1.2" + } + }, + { + "@id": "https://example.org/external-root-dataset", + "@type": "Dataset", + "name": "Counterexample: preview incorrectly in hasPart", + "description": "This RO-Crate INCORRECTLY includes ro-crate-preview.html in hasPart. In RO-Crate 1.2, ro-crate-preview.html SHOULD NOT be listed in hasPart of the Root Data Entity or any Dataset entity (unless it is also the profile description in a Profile Crate).", + "datePublished": "2024-01-01", + "license": { + "@id": "https://creativecommons.org/licenses/by/4.0/" + }, + "hasPart": [] + }, + { + "@id": "https://creativecommons.org/licenses/by/4.0/", + "@type": "CreativeWork", + "name": "Creative Commons Attribution 4.0 International", + "description": "A Creative Commons license that lets others distribute, remix, adapt, and build upon your work, even commercially." + } + ] +} diff --git a/tests/data/crates/rocrate-1.2/2_attached_rocrates/relative-root-identifier/invalid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/2_attached_rocrates/relative-root-identifier/invalid/ro-crate-metadata.json new file mode 100644 index 000000000..6176bc38d --- /dev/null +++ b/tests/data/crates/rocrate-1.2/2_attached_rocrates/relative-root-identifier/invalid/ro-crate-metadata.json @@ -0,0 +1,32 @@ +{ + "@context": "https://w3id.org/ro/crate/1.2/context", + "@graph": [ + { + "@id": "ro-crate-metadata.json", + "@type": "CreativeWork", + "about": { + "@id": "./root-dataset" + }, + "conformsTo": { + "@id": "https://w3id.org/ro/crate/1.2" + } + }, + { + "@id": "./root-dataset", + "@type": "Dataset", + "name": "Counterexample: preview incorrectly in hasPart", + "description": "This RO-Crate INCORRECTLY includes ro-crate-preview.html in hasPart. In RO-Crate 1.2, ro-crate-preview.html SHOULD NOT be listed in hasPart of the Root Data Entity or any Dataset entity (unless it is also the profile description in a Profile Crate).", + "datePublished": "2024-01-01", + "license": { + "@id": "https://creativecommons.org/licenses/by/4.0/" + }, + "hasPart": [] + }, + { + "@id": "https://creativecommons.org/licenses/by/4.0/", + "@type": "CreativeWork", + "name": "Creative Commons Attribution 4.0 International", + "description": "A Creative Commons license that lets others distribute, remix, adapt, and build upon your work, even commercially." + } + ] +} diff --git a/tests/data/crates/rocrate-1.2/2_attached_rocrates/relative-root-identifier/valid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/2_attached_rocrates/relative-root-identifier/valid/ro-crate-metadata.json new file mode 100644 index 000000000..0eb0f6f4b --- /dev/null +++ b/tests/data/crates/rocrate-1.2/2_attached_rocrates/relative-root-identifier/valid/ro-crate-metadata.json @@ -0,0 +1,32 @@ +{ + "@context": "https://w3id.org/ro/crate/1.2/context", + "@graph": [ + { + "@id": "ro-crate-metadata.json", + "@type": "CreativeWork", + "about": { + "@id": "./" + }, + "conformsTo": { + "@id": "https://w3id.org/ro/crate/1.2" + } + }, + { + "@id": "./", + "@type": "Dataset", + "name": "Counterexample: preview incorrectly in hasPart", + "description": "This RO-Crate INCORRECTLY includes ro-crate-preview.html in hasPart. In RO-Crate 1.2, ro-crate-preview.html SHOULD NOT be listed in hasPart of the Root Data Entity or any Dataset entity (unless it is also the profile description in a Profile Crate).", + "datePublished": "2024-01-01", + "license": { + "@id": "https://creativecommons.org/licenses/by/4.0/" + }, + "hasPart": [] + }, + { + "@id": "https://creativecommons.org/licenses/by/4.0/", + "@type": "CreativeWork", + "name": "Creative Commons Attribution 4.0 International", + "description": "A Creative Commons license that lets others distribute, remix, adapt, and build upon your work, even commercially." + } + ] +} diff --git a/tests/integration/profiles/ro-crate-1.2/test_attached_rocrates.py b/tests/integration/profiles/ro-crate-1.2/test_attached_rocrates.py new file mode 100644 index 000000000..2d2b9faf7 --- /dev/null +++ b/tests/integration/profiles/ro-crate-1.2/test_attached_rocrates.py @@ -0,0 +1,99 @@ +# Copyright (c) 2024-2026 CRS4 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging + +from rocrate_validator import models +from tests.ro_crates_1_2 import AttachedROCrates, MetadataDocument, MetadataDocumentFormat +from tests.shared import do_entity_test + +logger = logging.getLogger(__name__) + + +__metadata_document_crates__ = MetadataDocument() +__metadata_document_format_crates__ = MetadataDocumentFormat() + +__attached_crates__ = AttachedROCrates() + + +def test_preview_not_in_hasPart(): + """ + Test that the metadata document is valid when the preview file + is not included in the `hasPart` property of the Root Data Entity. + """ + do_entity_test( + __attached_crates__.valid_preview_not_in_hasPart, + models.Severity.RECOMMENDED, + True, + profile_identifier="ro-crate-1.2" + + ) + + +def test_preview_not_in_hasPart_warning(): + """ + Test that a warning is triggered when the preview file + is not included in the `hasPart` property of the Root Data Entity. + """ + do_entity_test( + __attached_crates__.invalid_preview_not_in_hasPart, + models.Severity.RECOMMENDED, + False, + profile_identifier="ro-crate-1.2", + expected_triggered_requirements=["Preview file descriptor should not be included in `hasPart`"], + expected_triggered_issues=[ + "RO-Crate Website files SHOULD NOT be included in `hasPart`"] + ) + + +def test_root_with_IRI_identifier(): + do_entity_test( + __attached_crates__.valid_relative_root_entity_id, + models.Severity.REQUIRED, + True, + profile_identifier="ro-crate-1.2" + ) + + +def test_root_with_relative_identifier(): + do_entity_test( + __attached_crates__.valid_relative_root_entity_id, + models.Severity.REQUIRED, + True, + profile_identifier="ro-crate-1.2" + ) + + +def test_root_with_invalid_relative_identifier(): + do_entity_test( + __attached_crates__.invalid_relative_root_entity_id, + models.Severity.REQUIRED, + False, + profile_identifier="ro-crate-1.2", + expected_triggered_requirements=["Root Data Entity identifier restriction"], + expected_triggered_issues=[ + "The Root Data Entity MUST be a `Dataset` (as per `schema.org`) and use an IRI or `./` as identifier"] + ) + + +def test_root_with_invalid_non_relative_identifier(): + do_entity_test( + __attached_crates__.invalid_non_relative_root_entity_id, + models.Severity.REQUIRED, + False, + profile_identifier="ro-crate-1.2", + expected_triggered_requirements=["Root Data Entity identifier restriction"], + expected_triggered_issues=[ + "The Root Data Entity MUST be a `Dataset` (as per `schema.org`) and use an IRI or `./` as identifier"] + ) From 0c2e24ac4a4a21559f3cb78f983fb184613736fa Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Mon, 13 Apr 2026 12:19:47 +0200 Subject: [PATCH 032/352] refactor(ro-crate-1.2): :art: clean up and reformat --- .../1.2/must/2_root_data_entity_metadata.ttl | 94 ++++++++----------- 1 file changed, 37 insertions(+), 57 deletions(-) diff --git a/rocrate_validator/profiles/ro-crate/1.2/must/2_root_data_entity_metadata.ttl b/rocrate_validator/profiles/ro-crate/1.2/must/2_root_data_entity_metadata.ttl index f12c76a9c..c4bc0d002 100644 --- a/rocrate_validator/profiles/ro-crate/1.2/must/2_root_data_entity_metadata.ttl +++ b/rocrate_validator/profiles/ro-crate/1.2/must/2_root_data_entity_metadata.ttl @@ -11,7 +11,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - @prefix ro: <./> . @prefix ro-crate: . @prefix rdf: . @@ -21,9 +20,34 @@ @prefix validator: . @prefix xsd: . +ro-crate:FindRootDataEntity a sh:NodeShape, validator:HiddenShape ; + sh:name "Identify the Root Data Entity of the RO-Crate" ; + sh:description """The Root Data Entity is the top-level Data Entity in the RO-Crate and serves as the starting point for the description of the RO-Crate. + It is a schema:Dataset and is indirectly identified by the about property of the metadata descriptor in the RO-Crate + (see the definition at [Finding RO-Crate Root in RDF triple stores](https://www.researchobject.org/ro-crate/1.2/appendix/relative-uris.html#finding-ro-crate-root-in-rdf-triple-stores)). + """ ; + sh:target [ + a sh:SPARQLTarget ; + sh:prefixes ro-crate:sparqlPrefixes ; + sh:select """ + SELECT ?this + WHERE { + ?this a schema:Dataset . + ?metadatafile schema:about ?this . + FILTER(contains(str(?metadatafile), "ro-crate-metadata.json")) + } + """ + ] ; + + # Expand data graph with triples from the file data entity + sh:rule [ + a sh:TripleRule ; + sh:subject sh:this ; + sh:predicate rdf:type ; + sh:object ro-crate:RootDataEntity ; + ] . -ro-crate:RootDataEntityType - a sh:NodeShape ; +ro-crate:RootDataEntityType a sh:NodeShape ; sh:name "RO-Crate Root Data Entity type" ; sh:description "The Root Data Entity MUST be a `Dataset` (as per `schema.org`)" ; sh:target [ @@ -53,43 +77,17 @@ ro-crate:RootDataEntityType sh:name "Root Data Entity: `publisher` property" ; sh:description """Check if the Root Data Entity has a `publisher` property of type `Organization` or `Person`.""" ; sh:or ( - [ sh:class schema_org:Organization ] - [ sh:class schema_org:Person ] + [ + sh:class schema_org:Organization + ] + [ + sh:class schema_org:Person + ] ) ; sh:message """The Root Data Entity MUST have a `publisher` property of type `Organization` or `Person`.""" ; ] . - -ro-crate:FindRootDataEntity a sh:NodeShape, validator:HiddenShape; - sh:name "Identify the Root Data Entity of the RO-Crate" ; - sh:description """The Root Data Entity is the top-level Data Entity in the RO-Crate and serves as the starting point for the description of the RO-Crate. - It is a schema:Dataset and is indirectly identified by the about property of the metadata descriptor in the RO-Crate - (see the definition at [Finding RO-Crate Root in RDF triple stores](https://www.researchobject.org/ro-crate/1.2/appendix/relative-uris.html#finding-ro-crate-root-in-rdf-triple-stores)). - """ ; - sh:target [ - a sh:SPARQLTarget ; - sh:prefixes ro-crate:sparqlPrefixes ; - sh:select """ - SELECT ?this - WHERE { - ?this a schema:Dataset . - ?metadatafile schema:about ?this . - FILTER(contains(str(?metadatafile), "ro-crate-metadata.json")) - } - """ - ] ; - - # Expand data graph with triples from the file data entity - sh:rule [ - a sh:TripleRule ; - sh:subject sh:this ; - sh:predicate rdf:type ; - sh:object ro-crate:RootDataEntity ; - ] . - - -ro-crate:RootDataEntityRequiredProperties - a sh:NodeShape ; +ro-crate:RootDataEntityRequiredProperties a sh:NodeShape ; sh:name "RO-Crate Root Data Entity REQUIRED properties" ; sh:description "The Root Data Entity MUST have a `name`, `description`, `license` and `datePublished`" ; sh:targetClass ro-crate:RootDataEntity ; @@ -100,7 +98,7 @@ ro-crate:RootDataEntityRequiredProperties to clearly identify the dataset and distinguish it from other datasets.""" ; sh:minCount 1 ; sh:nodeKind sh:Literal ; - sh:path schema_org:name; + sh:path schema_org:name ; sh:message "The Root Data Entity MUST have a `name` property (as specified by schema.org)" ; ] ; sh:property [ @@ -110,7 +108,7 @@ ro-crate:RootDataEntityRequiredProperties to provide a human-readable description of the dataset.""" ; sh:minCount 1 ; sh:nodeKind sh:Literal ; - sh:path schema_org:description; + sh:path schema_org:description ; sh:message "The Root Data Entity MUST have a `description` property (as specified by schema.org)" ; ] ; sh:property [ @@ -118,7 +116,7 @@ ro-crate:RootDataEntityRequiredProperties sh:name "Root Data Entity: `license` property" ; sh:description """Check if the Root Data Entity includes a `license` property (as specified by schema.org) to provide information about the license of the dataset.""" ; - sh:path schema_org:license; + sh:path schema_org:license ; sh:minCount 1 ; sh:nodeKind sh:IRIOrLiteral ; sh:message """The Root Data Entity MUST have a `license` property (as specified by schema.org).""" ; @@ -143,21 +141,3 @@ ro-crate:RootDataEntityRequiredProperties sh:class prof:Profile ; sh:message "If present, `conformsTo` values MUST reference a Profile entity" ; ] . - -ro-crate:RootDataEntityHasPartValueRestriction - a sh:NodeShape ; - sh:name "RO-Crate Root Data Entity: `hasPart` value restriction" ; - sh:description "The Root Data Entity MUST be linked to the declared `File`, `Directory` and other types of instances through the `hasPart` property" ; - sh:targetClass ro-crate:RootDataEntity ; - sh:property [ - a sh:PropertyShape ; - sh:name "RO-Crate Root Data Entity: `hasPart` value restriction" ; - sh:description "Check if the Root Data Entity is linked to the declared `File`, `Directory` and other types of instances through the `hasPart` property" ; - sh:path schema_org:hasPart ; - sh:or ( - [ sh:class ro-crate:File ] - [ sh:class ro-crate:Directory ] - [ sh:class ro-crate:GenericDataEntity ] - ) ; - sh:message """The Root Data Entity MUST be linked to either File or Directory instances, nothing else""" ; - ] . From aa73614fb57cdc7f8e8c1507093e491b2c6008c4 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Mon, 13 Apr 2026 12:21:14 +0200 Subject: [PATCH 033/352] refactor(ro-crate-1.2): :recycle: update name/description of some requirement checks --- .../1.2/should/1_file-descriptor_metadata.ttl | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/rocrate_validator/profiles/ro-crate/1.2/should/1_file-descriptor_metadata.ttl b/rocrate_validator/profiles/ro-crate/1.2/should/1_file-descriptor_metadata.ttl index 5e7727b60..e5e8cfe5e 100644 --- a/rocrate_validator/profiles/ro-crate/1.2/should/1_file-descriptor_metadata.ttl +++ b/rocrate_validator/profiles/ro-crate/1.2/should/1_file-descriptor_metadata.ttl @@ -20,12 +20,15 @@ ro-crate:ROCrateMetadataFileDescriptorConformsTo a sh:NodeShape ; sh:name "RO-Crate Metadata File Descriptor RECOMMENDED conformsTo" ; - sh:description """The RO-Crate Metadata Descriptor SHOULD indicate the versioned RO-Crate specification URI""" ; + sh:description """The RO-Crate Metadata Descriptor SHOULD indicate the versioned RO-Crate specification URI. + In RO-Crate 1.2, conformsTo SHOULD have a single value which is a versioned permalink URI + of the RO-Crate specification that the RO-Crate JSON-LD conforms to. + The URI SHOULD start with https://w3id.org/ro/crate/.""" ; sh:targetClass ro-crate:ROCrateMetadataFileDescriptor ; sh:property [ a sh:PropertyShape ; sh:minCount 1 ; - sh:name "Metadata File Descriptor entity: RECOMMENDED `conformsTo` value" ; + sh:name "Metadata File Descriptor: RECOMMENDED `conformsTo` value" ; sh:description """Check if the RO-Crate Metadata File Descriptor has a versioned RO-Crate specification URI""" ; sh:severity sh:Warning ; sh:nodeKind sh:IRI ; @@ -35,7 +38,7 @@ ro-crate:ROCrateMetadataFileDescriptorConformsTo ] ; sh:property [ a sh:PropertyShape ; - sh:name "Metadata File Descriptor entity: RECOMMENDED single `conformsTo` value" ; + sh:name "Metadata File Descriptor: RECOMMENDED single `conformsTo` value" ; sh:description """Check if the RO-Crate Metadata File Descriptor has a single `conformsTo` value""" ; sh:severity sh:Warning ; sh:path dct:conformsTo ; @@ -44,10 +47,10 @@ ro-crate:ROCrateMetadataFileDescriptorConformsTo ] ; sh:property [ a sh:PropertyShape ; - sh:name "Metadata File Descriptor entity: RECOMMENDED `conformsTo` URI prefix" ; + sh:name "Metadata File Descriptor: RECOMMENDED `conformsTo` URI prefix" ; sh:description """Check if the `conformsTo` URI starts with https://w3id.org/ro/crate/""" ; sh:severity sh:Warning ; sh:path dct:conformsTo ; - sh:pattern "^https://w3id.org/ro/crate/" ; + sh:pattern "^https://w3id\\.org/ro/crate/" ; sh:message "The RO-Crate metadata file descriptor `conformsTo` URI SHOULD start with https://w3id.org/ro/crate/" ; - ] . + ] . \ No newline at end of file From da6cc42000d513100c18310105edca1ace07c288 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Mon, 13 Apr 2026 18:55:21 +0200 Subject: [PATCH 034/352] feat(ro-crate-1.2): :sparkles: check identifier of detached RO-Crates root data entities --- .../should/2_root_data_entity_identifier.py | 55 +++++++++++++++++++ 1 file changed, 55 insertions(+) create mode 100644 rocrate_validator/profiles/ro-crate/1.2/should/2_root_data_entity_identifier.py diff --git a/rocrate_validator/profiles/ro-crate/1.2/should/2_root_data_entity_identifier.py b/rocrate_validator/profiles/ro-crate/1.2/should/2_root_data_entity_identifier.py new file mode 100644 index 000000000..a98f99b8a --- /dev/null +++ b/rocrate_validator/profiles/ro-crate/1.2/should/2_root_data_entity_identifier.py @@ -0,0 +1,55 @@ +# Copyright (c) 2024-2026 CRS4 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from rocrate_validator.models import ValidationContext +from rocrate_validator.requirements.python import (PyFunctionCheck, check, + requirement) +from rocrate_validator.utils import log as logging +from rocrate_validator.utils.uri import URI + +logger = logging.getLogger(__name__) + + +@requirement(name="Root Data Entity: RECOMMENDED identifier") +class DetachedROCrateRootDataEntityIdentifierChecker(PyFunctionCheck): + """ + In a detached RO-Crate, the Root Data Entity @id SHOULD be an absolute URI. + """ + + @check(name="Root Data Entity: RECOMMENDED identifier") + def check_identifier(self, context: ValidationContext) -> bool: + """ + In a detached RO-Crate, the Root Data Entity @id SHOULD be an absolute URI. + """ + try: + if not context.ro_crate.is_detached(): + return True + root_entity = context.ro_crate.metadata.get_root_data_entity() + if not root_entity.is_remote(): + return True + + if root_entity.id == './': + context.result.add_issue( + 'In a remote RO-Crate, the Root Data Entity @id SHOULD be an absolute URL, not `./`', self) + return False + if not URI(root_entity.id).is_remote_resource(): + context.result.add_issue( + 'In a remote RO-Crate, the Root Data Entity @id SHOULD be an absolute URL', self) + return False + + return True + except Exception as e: + context.result.add_issue( + f'Error checking Root Data Entity @id: {str(e)}', self) + return False From 08e4168ec19353d76731aef09328860f623fb364 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Mon, 13 Apr 2026 18:57:48 +0200 Subject: [PATCH 035/352] test(ro-crate-1.2): :white_check_mark: test min requirements of detached RO-Crates --- .../basic-ro-crate-metadata.json | 37 ++++++++++++ .../multi-ro-crate-metadata.json | 60 +++++++++++++++++++ .../profiled-ro-crate-metadata.json | 51 ++++++++++++++++ .../valid/basic-ro-crate-metadata.json | 9 ++- .../ro-crate-1.2/test_detached_rocrates.py | 35 ++++++++++- 5 files changed, 189 insertions(+), 3 deletions(-) create mode 100644 tests/data/crates/rocrate-1.2/3_detached_rocrates/detached-basic/basic-ro-crate-metadata.json create mode 100644 tests/data/crates/rocrate-1.2/3_detached_rocrates/detached-multi/multi-ro-crate-metadata.json create mode 100644 tests/data/crates/rocrate-1.2/3_detached_rocrates/detached-with-profile/profiled-ro-crate-metadata.json diff --git a/tests/data/crates/rocrate-1.2/3_detached_rocrates/detached-basic/basic-ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/3_detached_rocrates/detached-basic/basic-ro-crate-metadata.json new file mode 100644 index 000000000..65f04550d --- /dev/null +++ b/tests/data/crates/rocrate-1.2/3_detached_rocrates/detached-basic/basic-ro-crate-metadata.json @@ -0,0 +1,37 @@ +{ + "@context": "https://w3id.org/ro/crate/1.2/context", + "@graph": [ + { + "@id": "ro-crate-metadata.json", + "@type": "CreativeWork", + "conformsTo": { + "@id": "https://w3id.org/ro/crate/1.2" + }, + "about": { + "@id": "https://example.org/ro-crate/detached/basic" + } + }, + { + "@id": "https://example.org/ro-crate/detached/basic", + "@type": "Dataset", + "name": "Detached RO-Crate (basic)", + "description": "Minimal detached RO-Crate referencing a remote file.", + "datePublished": "2024-05-17", + "license": { + "@id": "http://spdx.org/licenses/CC0-1.0" + }, + "hasPart": [ + { + "@id": "https://www.w3.org/TR/PNG/iso_8859-1.txt" + } + ] + }, + { + "@id": "https://www.w3.org/TR/PNG/iso_8859-1.txt", + "@type": "File", + "name": "iso_8859-1.txt", + "description": "Remote text file referenced by the detached RO-Crate.", + "encodingFormat": "text/plain" + } + ] +} diff --git a/tests/data/crates/rocrate-1.2/3_detached_rocrates/detached-multi/multi-ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/3_detached_rocrates/detached-multi/multi-ro-crate-metadata.json new file mode 100644 index 000000000..8e97a9115 --- /dev/null +++ b/tests/data/crates/rocrate-1.2/3_detached_rocrates/detached-multi/multi-ro-crate-metadata.json @@ -0,0 +1,60 @@ +{ + "@context": "https://w3id.org/ro/crate/1.2/context", + "@graph": [ + { + "@id": "ro-crate-metadata.json", + "@type": "CreativeWork", + "conformsTo": { + "@id": "https://w3id.org/ro/crate/1.2" + }, + "about": { + "@id": "https://example.org/ro-crate/detached/multi" + } + }, + { + "@id": "https://example.org/ro-crate/detached/multi", + "@type": "Dataset", + "name": "Detached RO-Crate (multi)", + "description": "Detached RO-Crate with multiple remote data entities.", + "datePublished": "2024-05-17", + "license": { + "@id": "http://spdx.org/licenses/CC0-1.0" + }, + "hasPart": [ + { + "@id": "https://www.w3.org/TR/PNG/iso_8859-1.txt" + }, + { + "@id": "https://www.w3.org/2008/site/images/logo-w3c-mobile-lg" + }, + { + "@id": "https://example.org/datasets/sample" + } + ] + }, + { + "@id": "https://www.w3.org/TR/PNG/iso_8859-1.txt", + "@type": "File", + "name": "iso_8859-1.txt", + "description": "Remote text file.", + "encodingFormat": "text/plain" + }, + { + "@id": "https://www.w3.org/2008/site/images/logo-w3c-mobile-lg", + "@type": "File", + "name": "W3C logo", + "description": "Remote image file.", + "encodingFormat": "image/png" + }, + { + "@id": "https://example.org/datasets/sample", + "@type": "Dataset", + "name": "Sample remote dataset", + "description": "Remote dataset referenced by the detached RO-Crate.", + "datePublished": "2024-05-17", + "license": { + "@id": "http://spdx.org/licenses/CC0-1.0" + } + } + ] +} diff --git a/tests/data/crates/rocrate-1.2/3_detached_rocrates/detached-with-profile/profiled-ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/3_detached_rocrates/detached-with-profile/profiled-ro-crate-metadata.json new file mode 100644 index 000000000..eaf511599 --- /dev/null +++ b/tests/data/crates/rocrate-1.2/3_detached_rocrates/detached-with-profile/profiled-ro-crate-metadata.json @@ -0,0 +1,51 @@ +{ + "@context": "https://w3id.org/ro/crate/1.2/context", + "@graph": [ + { + "@id": "ro-crate-metadata.json", + "@type": "CreativeWork", + "conformsTo": { + "@id": "https://w3id.org/ro/crate/1.2" + }, + "about": { + "@id": "https://example.org/ro-crate/detached/profiled" + } + }, + { + "@id": "https://example.org/ro-crate/detached/profiled", + "@type": "Dataset", + "name": "Detached RO-Crate (profiled)", + "description": "Detached RO-Crate declaring conformance to an additional profile.", + "datePublished": "2024-05-17", + "license": { + "@id": "http://spdx.org/licenses/CC0-1.0" + }, + "conformsTo": [ + { + "@id": "https://example.org/profiles/example-profile" + } + ], + "hasPart": [ + { + "@id": "https://www.w3.org/TR/PNG/iso_8859-1.txt" + } + ] + }, + { + "@id": "https://example.org/profiles/example-profile", + "@type": [ + "Profile", + "CreativeWork" + ], + "name": "Example profile", + "description": "Example RO-Crate profile contextual entity." + }, + { + "@id": "https://www.w3.org/TR/PNG/iso_8859-1.txt", + "@type": "File", + "name": "iso_8859-1.txt", + "description": "Remote text file.", + "encodingFormat": "text/plain" + } + ] +} diff --git a/tests/data/crates/rocrate-1.2/3_detached_rocrates/naming-convention/local-descriptor/valid/basic-ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/3_detached_rocrates/naming-convention/local-descriptor/valid/basic-ro-crate-metadata.json index c0441ec1b..5aa5d10df 100644 --- a/tests/data/crates/rocrate-1.2/3_detached_rocrates/naming-convention/local-descriptor/valid/basic-ro-crate-metadata.json +++ b/tests/data/crates/rocrate-1.2/3_detached_rocrates/naming-convention/local-descriptor/valid/basic-ro-crate-metadata.json @@ -20,6 +20,9 @@ "license": { "@id": "http://spdx.org/licenses/CC0-1.0" }, + "distribution": { + "@id": "http://spdx.org/licenses/CC0-1.0" + }, "hasPart": [ { "@id": "http://spdx.org/licenses/CC0-1.0" @@ -31,7 +34,9 @@ "@type": "File", "name": "CC0 1.0 Universal (CC0 1.0) Public Domain Dedication", "description": "Remote text file referenced by the detached RO-Crate.", - "encodingFormat": "text/plain" + "encodingFormat": "text/plain", + "contentSize": "167", + "sdDatePublished": "2024-05-17" } ] -} +} \ No newline at end of file diff --git a/tests/integration/profiles/ro-crate-1.2/test_detached_rocrates.py b/tests/integration/profiles/ro-crate-1.2/test_detached_rocrates.py index 89077664f..03debbf27 100644 --- a/tests/integration/profiles/ro-crate-1.2/test_detached_rocrates.py +++ b/tests/integration/profiles/ro-crate-1.2/test_detached_rocrates.py @@ -37,7 +37,11 @@ def test_valid_local_descriptor_filename(): __detached_crates__.valid_local_descriptor_filename, models.Severity.RECOMMENDED, True, - profile_identifier="ro-crate-1.2" + profile_identifier="ro-crate-1.2", + skip_checks=["Web-based Data Entity: REQUIRED availability", + "Web-based Data Entity: RECOMMENDED availability", + "Web-based Data Entity: `contentSize` property", + "Web-based Data Entity: `contentUrl` availability"], ) @@ -57,3 +61,32 @@ def test_invalid_local_descriptor_filename(): "be named according to the convention " "`{prefix}-ro-crate-metadata.json`"] ) + + +def test_root_data_entity_identifier_when_online_available(): + """ + Test that when the RO-Crate is online available, + the Root Data Entity @id SHOULD be an absolute URL in a detached RO-Crate. + """ + do_entity_test( + __detached_crates__.valid_root_data_entity_identifier_when_online_available, + models.Severity.RECOMMENDED, + True, + profile_identifier="ro-crate-1.2" + ) + + +def test_invalid_root_data_entity_identifier_when_online_available(): + """ + Test that when the RO-Crate is online available, + the Root Data Entity @id SHOULD be an absolute URL in a detached RO-Crate. + """ + do_entity_test( + __detached_crates__.invalid_root_data_entity_identifier_when_online_available, + models.Severity.RECOMMENDED, + False, + profile_identifier="ro-crate-1.2", + expected_triggered_requirements=["Root Data Entity: RECOMMENDED identifier"], + expected_triggered_issues=[ + "In a remote RO-Crate, the Root Data Entity @id SHOULD be an absolute URL"] + ) From 184d5e791ad5286b6daeb05218f3de5619e50e43 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Tue, 14 Apr 2026 07:25:54 +0200 Subject: [PATCH 036/352] test(core): :bug: fix path --- tests/integration/test_sparql_constraints.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/test_sparql_constraints.py b/tests/integration/test_sparql_constraints.py index 9a7bec693..418746552 100644 --- a/tests/integration/test_sparql_constraints.py +++ b/tests/integration/test_sparql_constraints.py @@ -150,7 +150,7 @@ def test_resolve_parent_shape_with_sparql_bnode(): SHACL = Namespace("http://www.w3.org/ns/shacl#") registry = ShapesRegistry() - profiles_path = "rocrate_validator/profiles/ro-crate/must" + profiles_path = "rocrate_validator/profiles/ro-crate/1.1/must" # Load shapes from profile for filename in Path(profiles_path).iterdir(): From c464f526f2824c5877613cf07275532443aeb1a2 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Tue, 14 Apr 2026 07:27:03 +0200 Subject: [PATCH 037/352] feat(ro-crate-1.2): :sparkles: check recommended base type --- .../ro-crate/1.2/should/0_entity_metadata.ttl | 64 +++++++++++++++++++ 1 file changed, 64 insertions(+) create mode 100644 rocrate_validator/profiles/ro-crate/1.2/should/0_entity_metadata.ttl diff --git a/rocrate_validator/profiles/ro-crate/1.2/should/0_entity_metadata.ttl b/rocrate_validator/profiles/ro-crate/1.2/should/0_entity_metadata.ttl new file mode 100644 index 000000000..a41fda313 --- /dev/null +++ b/rocrate_validator/profiles/ro-crate/1.2/should/0_entity_metadata.ttl @@ -0,0 +1,64 @@ +# Copyright (c) 2024-2026 CRS4 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +@prefix ro-crate: . +@prefix sh: . + +ro-crate:ROCrateMetadataEntityRecommendedType a sh:NodeShape ; + sh:name "RO-Crate Metadata Entity RECOMMENDED type" ; + sh:description """Each entity should include at least one Schema.org type that accurately describes the entity""" ; + sh:target [ + a sh:SPARQLTarget ; + sh:prefixes ro-crate:sparqlPrefixes ; + sh:select """ + SELECT ?this + WHERE { + ?this a ?type . + + # Exclude Root and RO-Crate Metadata File entities + ?root a schema:Dataset . + ?metadatafile schema:about ?root . + FILTER(contains(str(?metadatafile), "ro-crate-metadata.json")) + FILTER(?this != ?metadatafile) + FILTER (?this != ?root) + # Exclude entities with non-IRI identifiers or those from specific namespaces + FILTER (isIRI(?this)) + FILTER(!STRSTARTS(STR(?this), "http://www.w3.org/")) + FILTER(!STRSTARTS(STR(?this), "https://w3id.org/ro/crate/")) + FILTER(!STRSTARTS(STR(?this), "http://schema.org/")) + FILTER(!STRSTARTS(STR(?this), "https://schema.org/")) + FILTER(!STRSTARTS(STR(?this), "http://purl.org/")) + FILTER(!STRSTARTS(STR(?this), "https://bioschemas.org/")) + FILTER(!STRSTARTS(STR(?this), "https://github.com/crs4/rocrate-validator/")) + FILTER(!STRSTARTS(STR(?this), "urn:")) + } + """ ; + ] ; + sh:sparql [ + a sh:SPARQLConstraint ; + sh:name "RO-Crate Metadata Entity: RECOMMENDED type" ; + sh:description """Check if the RO-Crate Metadata Entity includes at least one Schema.org type""" ; + sh:message "RO-Crate Metadata Entity SHOULD include at least one Schema.org type" ; + sh:select """ + SELECT ?this + WHERE { + FILTER NOT EXISTS { + $this a ?schemaType . + FILTER ( + STRSTARTS(STR(?schemaType), "http://schema.org/") || + STRSTARTS(STR(?schemaType), "https://schema.org/") + ) + } + } + """ ; + ] . From 0c620e293911b3f7bb711d6c34ea977ee5d753dd Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Tue, 14 Apr 2026 07:29:15 +0200 Subject: [PATCH 038/352] test(ro-crate-1.2): :white_check_mark: test recommended entity type --- .../invalid/ro-crate-metadata.json | 37 ++++++++++++ .../valid/ro-crate-metadata.json | 32 ++++++++++ .../ro-crate-1.2/test_metadata_entities.py | 58 +++++++++++++++++++ 3 files changed, 127 insertions(+) create mode 100644 tests/data/crates/rocrate-1.2/5_metadata_entities/recommended_schema_type/invalid/ro-crate-metadata.json create mode 100644 tests/data/crates/rocrate-1.2/5_metadata_entities/recommended_schema_type/valid/ro-crate-metadata.json create mode 100644 tests/integration/profiles/ro-crate-1.2/test_metadata_entities.py diff --git a/tests/data/crates/rocrate-1.2/5_metadata_entities/recommended_schema_type/invalid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/5_metadata_entities/recommended_schema_type/invalid/ro-crate-metadata.json new file mode 100644 index 000000000..4432083e5 --- /dev/null +++ b/tests/data/crates/rocrate-1.2/5_metadata_entities/recommended_schema_type/invalid/ro-crate-metadata.json @@ -0,0 +1,37 @@ +{ + "@context": [ + "https://w3id.org/ro/crate/1.2/context", + { + "testTerm": "http://github.com/crate-validator/tests#" + } + ], + "@graph": [ + { + "@id": "ro-crate-metadata.json", + "@type": "CreativeWork", + "about": { + "@id": "./" + }, + "conformsTo": { + "@id": "https://w3id.org/ro/crate/1.2" + } + }, + { + "@id": "./", + "@type": "Dataset", + "name": "Counterexample: preview incorrectly in hasPart", + "description": "This RO-Crate INCORRECTLY includes ro-crate-preview.html in hasPart. In RO-Crate 1.2, ro-crate-preview.html SHOULD NOT be listed in hasPart of the Root Data Entity or any Dataset entity (unless it is also the profile description in a Profile Crate).", + "datePublished": "2024-01-01", + "license": { + "@id": "https://creativecommons.org/licenses/by/4.0/" + }, + "hasPart": [] + }, + { + "@id": "https://creativecommons.org/licenses/by/4.0/", + "@type": "testTerm", + "name": "Creative Commons Attribution 4.0 International", + "description": "A Creative Commons license that lets others distribute, remix, adapt, and build upon your work, even commercially." + } + ] +} diff --git a/tests/data/crates/rocrate-1.2/5_metadata_entities/recommended_schema_type/valid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/5_metadata_entities/recommended_schema_type/valid/ro-crate-metadata.json new file mode 100644 index 000000000..0eb0f6f4b --- /dev/null +++ b/tests/data/crates/rocrate-1.2/5_metadata_entities/recommended_schema_type/valid/ro-crate-metadata.json @@ -0,0 +1,32 @@ +{ + "@context": "https://w3id.org/ro/crate/1.2/context", + "@graph": [ + { + "@id": "ro-crate-metadata.json", + "@type": "CreativeWork", + "about": { + "@id": "./" + }, + "conformsTo": { + "@id": "https://w3id.org/ro/crate/1.2" + } + }, + { + "@id": "./", + "@type": "Dataset", + "name": "Counterexample: preview incorrectly in hasPart", + "description": "This RO-Crate INCORRECTLY includes ro-crate-preview.html in hasPart. In RO-Crate 1.2, ro-crate-preview.html SHOULD NOT be listed in hasPart of the Root Data Entity or any Dataset entity (unless it is also the profile description in a Profile Crate).", + "datePublished": "2024-01-01", + "license": { + "@id": "https://creativecommons.org/licenses/by/4.0/" + }, + "hasPart": [] + }, + { + "@id": "https://creativecommons.org/licenses/by/4.0/", + "@type": "CreativeWork", + "name": "Creative Commons Attribution 4.0 International", + "description": "A Creative Commons license that lets others distribute, remix, adapt, and build upon your work, even commercially." + } + ] +} diff --git a/tests/integration/profiles/ro-crate-1.2/test_metadata_entities.py b/tests/integration/profiles/ro-crate-1.2/test_metadata_entities.py new file mode 100644 index 000000000..1600bd52e --- /dev/null +++ b/tests/integration/profiles/ro-crate-1.2/test_metadata_entities.py @@ -0,0 +1,58 @@ +# Copyright (c) 2024-2026 CRS4 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging + +from rocrate_validator import models +from tests.ro_crates_1_2 import AttachedROCrates, MetadataDocument, MetadataDocumentFormat, MetadataEntities +from tests.shared import do_entity_test + +logger = logging.getLogger(__name__) + + +__metadata_document_crates__ = MetadataDocument() +__metadata_document_format_crates__ = MetadataDocumentFormat() + +__attached_crates__ = AttachedROCrates() + +__metadata_entities__ = MetadataEntities() + + +def test_valid_recommended_schema_type(): + """ + Test that the metadata document includes at least one Schema.org type. + """ + do_entity_test( + __metadata_entities__.valid_recommended_schema_type, + models.Severity.RECOMMENDED, + True, + profile_identifier="ro-crate-1.2" + + ) + + +def test_invalid_recommended_schema_type_warning(): + """ + Test that a warning is triggered when the metadata document + does not include at least one Schema.org type. + """ + do_entity_test( + __metadata_entities__.invalid_recommended_schema_type, + models.Severity.RECOMMENDED, + False, + profile_identifier="ro-crate-1.2", + expected_triggered_requirements=["RO-Crate Metadata Entity RECOMMENDED type"], + expected_triggered_issues=[ + "RO-Crate Metadata Entity SHOULD include at least one Schema.org type"] + ) From f876d57d6eaa592ef5248374b68cfb950b05b039 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Tue, 14 Apr 2026 08:59:17 +0200 Subject: [PATCH 039/352] refactor(ro-crate-1.2): :card_file_box: update test data --- .../invalid/ro-crate-metadata.json | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/tests/data/crates/rocrate-1.2/5_metadata_entities/recommended_schema_type/invalid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/5_metadata_entities/recommended_schema_type/invalid/ro-crate-metadata.json index 4432083e5..1ab3f93fe 100644 --- a/tests/data/crates/rocrate-1.2/5_metadata_entities/recommended_schema_type/invalid/ro-crate-metadata.json +++ b/tests/data/crates/rocrate-1.2/5_metadata_entities/recommended_schema_type/invalid/ro-crate-metadata.json @@ -1,10 +1,5 @@ { - "@context": [ - "https://w3id.org/ro/crate/1.2/context", - { - "testTerm": "http://github.com/crate-validator/tests#" - } - ], + "@context": "https://w3id.org/ro/crate/1.2/context", "@graph": [ { "@id": "ro-crate-metadata.json", @@ -29,7 +24,7 @@ }, { "@id": "https://creativecommons.org/licenses/by/4.0/", - "@type": "testTerm", + "@type": "http://purl.org/dc/terms/LicenseDocument", "name": "Creative Commons Attribution 4.0 International", "description": "A Creative Commons license that lets others distribute, remix, adapt, and build upon your work, even commercially." } From 41ef61f6751749bfc6276c1a35c8fb40ca53e4eb Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Tue, 14 Apr 2026 09:28:02 +0200 Subject: [PATCH 040/352] feat(ro-crate-1.2): :sparkles: check recommended entity name --- .../ro-crate/1.2/should/0_entity_metadata.ttl | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/rocrate_validator/profiles/ro-crate/1.2/should/0_entity_metadata.ttl b/rocrate_validator/profiles/ro-crate/1.2/should/0_entity_metadata.ttl index a41fda313..3bb0d3ce8 100644 --- a/rocrate_validator/profiles/ro-crate/1.2/should/0_entity_metadata.ttl +++ b/rocrate_validator/profiles/ro-crate/1.2/should/0_entity_metadata.ttl @@ -13,10 +13,12 @@ # limitations under the License. @prefix ro-crate: . @prefix sh: . +@prefix schema: . +@prefix xsd: . ro-crate:ROCrateMetadataEntityRecommendedType a sh:NodeShape ; - sh:name "RO-Crate Metadata Entity RECOMMENDED type" ; - sh:description """Each entity should include at least one Schema.org type that accurately describes the entity""" ; + sh:name "RO-Crate Metadata Entity: RECOMMENDED properties" ; + sh:description """Check if the RO-Crate Metadata Entity includes at least one Schema.org type and a human-readable name""" ; sh:target [ a sh:SPARQLTarget ; sh:prefixes ro-crate:sparqlPrefixes ; @@ -61,4 +63,13 @@ ro-crate:ROCrateMetadataEntityRecommendedType a sh:NodeShape ; } } """ ; + ] ; + sh:property [ + sh:name "RO-Crate Metadata Entity: name" ; + sh:description """Check if the RO-Crate Metadata Entity includes a human-readable name""" ; + sh:path schema:name ; + sh:minCount 1 ; + sh:dataType xsd:string ; + sh:severity sh:Warning ; + sh:message "Entities SHOULD have a human-readable name" ; ] . From dc0020542ab6ba91b9cd99303aa1326471fc60bc Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Tue, 14 Apr 2026 09:28:44 +0200 Subject: [PATCH 041/352] test(ro-crate-1.2): :white_check_mark: test recommended entity name --- .../recommended_name/invalid/my-data-file.txt | 0 .../invalid/ro-crate-metadata.json | 41 ++++++++++++++++++ .../recommended_name/valid/my-data-file.txt | 0 .../valid/ro-crate-metadata.json | 42 +++++++++++++++++++ .../ro-crate-1.2/test_metadata_entities.py | 38 +++++++++++++---- 5 files changed, 114 insertions(+), 7 deletions(-) create mode 100644 tests/data/crates/rocrate-1.2/5_metadata_entities/recommended_name/invalid/my-data-file.txt create mode 100644 tests/data/crates/rocrate-1.2/5_metadata_entities/recommended_name/invalid/ro-crate-metadata.json create mode 100644 tests/data/crates/rocrate-1.2/5_metadata_entities/recommended_name/valid/my-data-file.txt create mode 100644 tests/data/crates/rocrate-1.2/5_metadata_entities/recommended_name/valid/ro-crate-metadata.json diff --git a/tests/data/crates/rocrate-1.2/5_metadata_entities/recommended_name/invalid/my-data-file.txt b/tests/data/crates/rocrate-1.2/5_metadata_entities/recommended_name/invalid/my-data-file.txt new file mode 100644 index 000000000..e69de29bb diff --git a/tests/data/crates/rocrate-1.2/5_metadata_entities/recommended_name/invalid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/5_metadata_entities/recommended_name/invalid/ro-crate-metadata.json new file mode 100644 index 000000000..f999ef379 --- /dev/null +++ b/tests/data/crates/rocrate-1.2/5_metadata_entities/recommended_name/invalid/ro-crate-metadata.json @@ -0,0 +1,41 @@ +{ + "@context": "https://w3id.org/ro/crate/1.2/context", + "@graph": [ + { + "@id": "ro-crate-metadata.json", + "@type": "CreativeWork", + "about": { + "@id": "./" + }, + "conformsTo": { + "@id": "https://w3id.org/ro/crate/1.2" + } + }, + { + "@id": "./", + "@type": "Dataset", + "name": "Counterexample: preview incorrectly in hasPart", + "description": "This RO-Crate INCORRECTLY includes ro-crate-preview.html in hasPart. In RO-Crate 1.2, ro-crate-preview.html SHOULD NOT be listed in hasPart of the Root Data Entity or any Dataset entity (unless it is also the profile description in a Profile Crate).", + "datePublished": "2024-01-01", + "license": { + "@id": "https://creativecommons.org/licenses/by/4.0/" + }, + "hasPart": [ + { + "@id": "my-data-file.txt" + } + ] + }, + { + "@id": "https://creativecommons.org/licenses/by/4.0/", + "@type": "CreativeWork", + "name": "Creative Commons Attribution 4.0 International", + "description": "A Creative Commons license that lets others distribute, remix, adapt, and build upon your work, even commercially." + }, + { + "@id": "my-data-file.txt", + "@type": "File", + "description": "A data file that is part of the RO-Crate." + } + ] +} diff --git a/tests/data/crates/rocrate-1.2/5_metadata_entities/recommended_name/valid/my-data-file.txt b/tests/data/crates/rocrate-1.2/5_metadata_entities/recommended_name/valid/my-data-file.txt new file mode 100644 index 000000000..e69de29bb diff --git a/tests/data/crates/rocrate-1.2/5_metadata_entities/recommended_name/valid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/5_metadata_entities/recommended_name/valid/ro-crate-metadata.json new file mode 100644 index 000000000..5ae1ee6ad --- /dev/null +++ b/tests/data/crates/rocrate-1.2/5_metadata_entities/recommended_name/valid/ro-crate-metadata.json @@ -0,0 +1,42 @@ +{ + "@context": "https://w3id.org/ro/crate/1.2/context", + "@graph": [ + { + "@id": "ro-crate-metadata.json", + "@type": "CreativeWork", + "about": { + "@id": "./" + }, + "conformsTo": { + "@id": "https://w3id.org/ro/crate/1.2" + } + }, + { + "@id": "./", + "@type": "Dataset", + "name": "Counterexample: preview incorrectly in hasPart", + "description": "This RO-Crate INCORRECTLY includes ro-crate-preview.html in hasPart. In RO-Crate 1.2, ro-crate-preview.html SHOULD NOT be listed in hasPart of the Root Data Entity or any Dataset entity (unless it is also the profile description in a Profile Crate).", + "datePublished": "2024-01-01", + "license": { + "@id": "https://creativecommons.org/licenses/by/4.0/" + }, + "hasPart": [ + { + "@id": "my-data-file.txt" + } + ] + }, + { + "@id": "https://creativecommons.org/licenses/by/4.0/", + "@type": "CreativeWork", + "name": "Creative Commons Attribution 4.0 International", + "description": "A Creative Commons license that lets others distribute, remix, adapt, and build upon your work, even commercially." + }, + { + "@id": "my-data-file.txt", + "@type": "File", + "name": "My Data File", + "description": "A data file that is part of the RO-Crate." + } + ] +} diff --git a/tests/integration/profiles/ro-crate-1.2/test_metadata_entities.py b/tests/integration/profiles/ro-crate-1.2/test_metadata_entities.py index 1600bd52e..74b18a5c2 100644 --- a/tests/integration/profiles/ro-crate-1.2/test_metadata_entities.py +++ b/tests/integration/profiles/ro-crate-1.2/test_metadata_entities.py @@ -15,17 +15,12 @@ import logging from rocrate_validator import models -from tests.ro_crates_1_2 import AttachedROCrates, MetadataDocument, MetadataDocumentFormat, MetadataEntities +from tests.ro_crates_1_2 import MetadataEntities from tests.shared import do_entity_test logger = logging.getLogger(__name__) -__metadata_document_crates__ = MetadataDocument() -__metadata_document_format_crates__ = MetadataDocumentFormat() - -__attached_crates__ = AttachedROCrates() - __metadata_entities__ = MetadataEntities() @@ -52,7 +47,36 @@ def test_invalid_recommended_schema_type_warning(): models.Severity.RECOMMENDED, False, profile_identifier="ro-crate-1.2", - expected_triggered_requirements=["RO-Crate Metadata Entity RECOMMENDED type"], + expected_triggered_requirements=["RO-Crate Metadata Entity: RECOMMENDED properties"], expected_triggered_issues=[ "RO-Crate Metadata Entity SHOULD include at least one Schema.org type"] ) + + +def test_valid_recommended_entity_name(): + """ + Test that the metadata document includes a `name` property for at least one entity. + """ + do_entity_test( + __metadata_entities__.valid_recommended_name, + models.Severity.RECOMMENDED, + True, + profile_identifier="ro-crate-1.2" + + ) + + +def test_invalid_recommended_entity_name_warning(): + """ + Test that a warning is triggered when the metadata document + does not include a `name` property for at least one entity. + """ + do_entity_test( + __metadata_entities__.invalid_recommended_name, + models.Severity.RECOMMENDED, + False, + profile_identifier="ro-crate-1.2", + expected_triggered_requirements=["RO-Crate Metadata Entity: RECOMMENDED properties"], + expected_triggered_issues=[ + "Entities SHOULD have a human-readable name"] + ) From b4935f8188ff89d315a5ddb3fe13efd3d50d6ea9 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Tue, 14 Apr 2026 11:02:30 +0200 Subject: [PATCH 042/352] feat(ro-crate-1.2): :sparkles: check recommended reachability of metadata entities --- .../ro-crate/1.2/should/0_entity_metadata.ttl | 39 +++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/rocrate_validator/profiles/ro-crate/1.2/should/0_entity_metadata.ttl b/rocrate_validator/profiles/ro-crate/1.2/should/0_entity_metadata.ttl index 3bb0d3ce8..1c8e04fff 100644 --- a/rocrate_validator/profiles/ro-crate/1.2/should/0_entity_metadata.ttl +++ b/rocrate_validator/profiles/ro-crate/1.2/should/0_entity_metadata.ttl @@ -73,3 +73,42 @@ ro-crate:ROCrateMetadataEntityRecommendedType a sh:NodeShape ; sh:severity sh:Warning ; sh:message "Entities SHOULD have a human-readable name" ; ] . + +ro-crate:RecommendedEntityReachability a sh:NodeShape ; + sh:name "RO-Crate Metadata Entity: RECOMMENDED reachability" ; + sh:description """Check if the RO-Crate Metadata Entity is directly or indirectly reachable from the Root Data Entity.""" ; + sh:targetClass ro-crate:RootDataEntity ; + sh:sparql [ + sh:name "RO-Crate Metadata Entity: RECOMMENDED reachability from the Root Data Entity" ; + sh:description """Check if the RO-Crate Metadata Entity is directly or indirectly reachable from the Root Data Entity.""" ; + sh:message "RO-Crate Metadata Entity SHOULD be directly or indirectly reachable from the Root Data Entity" ; + sh:prefixes ro-crate:sparqlPrefixes ; + sh:select """ + SELECT $this ?unreachable + WHERE { + ?unreachable a ?type . + + # Exclude Root and RO-Crate Metadata File entities + ?root a schema:Dataset . + ?metadatafile schema:about ?root . + FILTER(contains(str(?metadatafile), "ro-crate-metadata.json")) + FILTER(?unreachable != ?metadatafile) + FILTER (?unreachable != ?root) + FILTER (isIRI(?unreachable)) + FILTER (?unreachable != $this) + + # Exclude entities with non-IRI identifiers or those from specific namespaces + FILTER ( + !STRSTARTS(STR(?type), "http://www.w3.org/1999/02/22-rdf-syntax-ns#") && + !STRSTARTS(STR(?type), "http://www.w3.org/2000/01/rdf-schema#") && + !STRSTARTS(STR(?type), "http://www.w3.org/2002/07/owl#") && + !STRSTARTS(STR(?type), "http://www.w3.org/2001/XMLSchema#") && + !STRSTARTS(STR(?type), "http://www.w3.org/ns/shacl#") + ) + # Select entities which are not directly or indirectly reachable from the Root Data Entity + FILTER NOT EXISTS { + $this (|!)+ ?unreachable . + } + } + """ ; + ] . From 45db1d007ed26dc6ced96a943b48a2a680e71875 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Tue, 14 Apr 2026 11:08:22 +0200 Subject: [PATCH 043/352] test(ro-crate-1.2): :white_check_mark: test recommended reachability of metadata entities --- .../invalid/my-data-file.txt | 0 .../invalid/ro-crate-metadata.json | 38 +++++++++++++++++++ .../valid/ro-crate-metadata.json | 32 ++++++++++++++++ .../ro-crate-1.2/test_metadata_entities.py | 29 ++++++++++++++ 4 files changed, 99 insertions(+) create mode 100644 tests/data/crates/rocrate-1.2/5_metadata_entities/entity_reachability/invalid/my-data-file.txt create mode 100644 tests/data/crates/rocrate-1.2/5_metadata_entities/entity_reachability/invalid/ro-crate-metadata.json create mode 100644 tests/data/crates/rocrate-1.2/5_metadata_entities/entity_reachability/valid/ro-crate-metadata.json diff --git a/tests/data/crates/rocrate-1.2/5_metadata_entities/entity_reachability/invalid/my-data-file.txt b/tests/data/crates/rocrate-1.2/5_metadata_entities/entity_reachability/invalid/my-data-file.txt new file mode 100644 index 000000000..e69de29bb diff --git a/tests/data/crates/rocrate-1.2/5_metadata_entities/entity_reachability/invalid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/5_metadata_entities/entity_reachability/invalid/ro-crate-metadata.json new file mode 100644 index 000000000..518be7d92 --- /dev/null +++ b/tests/data/crates/rocrate-1.2/5_metadata_entities/entity_reachability/invalid/ro-crate-metadata.json @@ -0,0 +1,38 @@ +{ + "@context": "https://w3id.org/ro/crate/1.2/context", + "@graph": [ + { + "@id": "ro-crate-metadata.json", + "@type": "CreativeWork", + "about": { + "@id": "./" + }, + "conformsTo": { + "@id": "https://w3id.org/ro/crate/1.2" + } + }, + { + "@id": "./", + "@type": "Dataset", + "name": "Counterexample: preview incorrectly in hasPart", + "description": "This RO-Crate INCORRECTLY includes ro-crate-preview.html in hasPart. In RO-Crate 1.2, ro-crate-preview.html SHOULD NOT be listed in hasPart of the Root Data Entity or any Dataset entity (unless it is also the profile description in a Profile Crate).", + "datePublished": "2024-01-01", + "license": { + "@id": "https://creativecommons.org/licenses/by/4.0/" + }, + "hasPart": [] + }, + { + "@id": "https://creativecommons.org/licenses/by/4.0/", + "@type": "CreativeWork", + "name": "Creative Commons Attribution 4.0 International", + "description": "A Creative Commons license that lets others distribute, remix, adapt, and build upon your work, even commercially." + }, + { + "@id": "my-data-file.txt", + "@type": "File", + "name": "My Data File", + "description": "A data file that is part of the RO-Crate." + } + ] +} diff --git a/tests/data/crates/rocrate-1.2/5_metadata_entities/entity_reachability/valid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/5_metadata_entities/entity_reachability/valid/ro-crate-metadata.json new file mode 100644 index 000000000..0eb0f6f4b --- /dev/null +++ b/tests/data/crates/rocrate-1.2/5_metadata_entities/entity_reachability/valid/ro-crate-metadata.json @@ -0,0 +1,32 @@ +{ + "@context": "https://w3id.org/ro/crate/1.2/context", + "@graph": [ + { + "@id": "ro-crate-metadata.json", + "@type": "CreativeWork", + "about": { + "@id": "./" + }, + "conformsTo": { + "@id": "https://w3id.org/ro/crate/1.2" + } + }, + { + "@id": "./", + "@type": "Dataset", + "name": "Counterexample: preview incorrectly in hasPart", + "description": "This RO-Crate INCORRECTLY includes ro-crate-preview.html in hasPart. In RO-Crate 1.2, ro-crate-preview.html SHOULD NOT be listed in hasPart of the Root Data Entity or any Dataset entity (unless it is also the profile description in a Profile Crate).", + "datePublished": "2024-01-01", + "license": { + "@id": "https://creativecommons.org/licenses/by/4.0/" + }, + "hasPart": [] + }, + { + "@id": "https://creativecommons.org/licenses/by/4.0/", + "@type": "CreativeWork", + "name": "Creative Commons Attribution 4.0 International", + "description": "A Creative Commons license that lets others distribute, remix, adapt, and build upon your work, even commercially." + } + ] +} diff --git a/tests/integration/profiles/ro-crate-1.2/test_metadata_entities.py b/tests/integration/profiles/ro-crate-1.2/test_metadata_entities.py index 74b18a5c2..de0a1a404 100644 --- a/tests/integration/profiles/ro-crate-1.2/test_metadata_entities.py +++ b/tests/integration/profiles/ro-crate-1.2/test_metadata_entities.py @@ -80,3 +80,32 @@ def test_invalid_recommended_entity_name_warning(): expected_triggered_issues=[ "Entities SHOULD have a human-readable name"] ) + + +def test_valid_recommended_reachability(): + """ + Test that all metadata entities are directly or indirectly reachable from the Root Data Entity. + """ + do_entity_test( + __metadata_entities__.valid_entity_reachability, + models.Severity.RECOMMENDED, + True, + profile_identifier="ro-crate-1.2" + + ) + + +def test_invalid_recommended_reachability_warning(): + """ + Test that a warning is triggered when at least one metadata entity is not directly + or indirectly reachable from the Root Data Entity. + """ + do_entity_test( + __metadata_entities__.invalid_entity_reachability, + models.Severity.RECOMMENDED, + False, + profile_identifier="ro-crate-1.2", + expected_triggered_requirements=["RO-Crate Metadata Entity: RECOMMENDED reachability"], + expected_triggered_issues=[ + "RO-Crate Metadata Entity SHOULD be directly or indirectly reachable from the Root Data Entity"] + ) From 860a5fe0603b3f37b96d61067b1c4d00d6737420 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Tue, 14 Apr 2026 11:27:51 +0200 Subject: [PATCH 044/352] test(ro-crate-1.2): :white_check_mark: test single value for the conformsTo descriptor property --- .../single_value/invalid/my-data-file.txt | 0 .../invalid/ro-crate-metadata.json | 37 ++++++++++++ .../single_value/valid/ro-crate-metadata.json | 32 +++++++++++ .../ro-crate-1.2/test_metadata_descriptor.py | 57 +++++++++++++++++++ .../ro-crate-1.2/test_ro_crate_1_2.py | 4 +- 5 files changed, 128 insertions(+), 2 deletions(-) create mode 100644 tests/data/crates/rocrate-1.2/6_metadata_descriptor/recommended_conformsTo/single_value/invalid/my-data-file.txt create mode 100644 tests/data/crates/rocrate-1.2/6_metadata_descriptor/recommended_conformsTo/single_value/invalid/ro-crate-metadata.json create mode 100644 tests/data/crates/rocrate-1.2/6_metadata_descriptor/recommended_conformsTo/single_value/valid/ro-crate-metadata.json create mode 100644 tests/integration/profiles/ro-crate-1.2/test_metadata_descriptor.py diff --git a/tests/data/crates/rocrate-1.2/6_metadata_descriptor/recommended_conformsTo/single_value/invalid/my-data-file.txt b/tests/data/crates/rocrate-1.2/6_metadata_descriptor/recommended_conformsTo/single_value/invalid/my-data-file.txt new file mode 100644 index 000000000..e69de29bb diff --git a/tests/data/crates/rocrate-1.2/6_metadata_descriptor/recommended_conformsTo/single_value/invalid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/6_metadata_descriptor/recommended_conformsTo/single_value/invalid/ro-crate-metadata.json new file mode 100644 index 000000000..adad4310b --- /dev/null +++ b/tests/data/crates/rocrate-1.2/6_metadata_descriptor/recommended_conformsTo/single_value/invalid/ro-crate-metadata.json @@ -0,0 +1,37 @@ +{ + "@context": "https://w3id.org/ro/crate/1.2/context", + "@graph": [ + { + "@id": "ro-crate-metadata.json", + "@type": "CreativeWork", + "about": { + "@id": "./" + }, + "conformsTo": [ + { + "@id": "https://w3id.org/ro/crate/1.2" + }, + { + "@id": "https://w3id.org/ro/crate/1.1" + } + ] + }, + { + "@id": "./", + "@type": "Dataset", + "name": "Counterexample: preview incorrectly in hasPart", + "description": "This RO-Crate INCORRECTLY includes ro-crate-preview.html in hasPart. In RO-Crate 1.2, ro-crate-preview.html SHOULD NOT be listed in hasPart of the Root Data Entity or any Dataset entity (unless it is also the profile description in a Profile Crate).", + "datePublished": "2024-01-01", + "license": { + "@id": "https://creativecommons.org/licenses/by/4.0/" + }, + "hasPart": [] + }, + { + "@id": "https://creativecommons.org/licenses/by/4.0/", + "@type": "CreativeWork", + "name": "Creative Commons Attribution 4.0 International", + "description": "A Creative Commons license that lets others distribute, remix, adapt, and build upon your work, even commercially." + } + ] +} diff --git a/tests/data/crates/rocrate-1.2/6_metadata_descriptor/recommended_conformsTo/single_value/valid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/6_metadata_descriptor/recommended_conformsTo/single_value/valid/ro-crate-metadata.json new file mode 100644 index 000000000..0eb0f6f4b --- /dev/null +++ b/tests/data/crates/rocrate-1.2/6_metadata_descriptor/recommended_conformsTo/single_value/valid/ro-crate-metadata.json @@ -0,0 +1,32 @@ +{ + "@context": "https://w3id.org/ro/crate/1.2/context", + "@graph": [ + { + "@id": "ro-crate-metadata.json", + "@type": "CreativeWork", + "about": { + "@id": "./" + }, + "conformsTo": { + "@id": "https://w3id.org/ro/crate/1.2" + } + }, + { + "@id": "./", + "@type": "Dataset", + "name": "Counterexample: preview incorrectly in hasPart", + "description": "This RO-Crate INCORRECTLY includes ro-crate-preview.html in hasPart. In RO-Crate 1.2, ro-crate-preview.html SHOULD NOT be listed in hasPart of the Root Data Entity or any Dataset entity (unless it is also the profile description in a Profile Crate).", + "datePublished": "2024-01-01", + "license": { + "@id": "https://creativecommons.org/licenses/by/4.0/" + }, + "hasPart": [] + }, + { + "@id": "https://creativecommons.org/licenses/by/4.0/", + "@type": "CreativeWork", + "name": "Creative Commons Attribution 4.0 International", + "description": "A Creative Commons license that lets others distribute, remix, adapt, and build upon your work, even commercially." + } + ] +} diff --git a/tests/integration/profiles/ro-crate-1.2/test_metadata_descriptor.py b/tests/integration/profiles/ro-crate-1.2/test_metadata_descriptor.py new file mode 100644 index 000000000..73721fab4 --- /dev/null +++ b/tests/integration/profiles/ro-crate-1.2/test_metadata_descriptor.py @@ -0,0 +1,57 @@ +# Copyright (c) 2024-2026 CRS4 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging + +from rocrate_validator import models +from tests.ro_crates_1_2 import MetadataDescriptor, MetadataDocument, MetadataDocumentFormat +from tests.shared import do_entity_test + +logger = logging.getLogger(__name__) + + +__metadata_document_crates__ = MetadataDocument() +__metadata_document_format_crates__ = MetadataDocumentFormat() + +__metadata_descriptor_crates__ = MetadataDescriptor() + + +def test_valid_single_value_conformsTo(): + """ + Test that the metadata descriptor is valid when the `conformsTo` property + includes a single value. + """ + do_entity_test( + __metadata_descriptor_crates__.valid_single_value_conformsTo, + models.Severity.RECOMMENDED, + True, + profile_identifier="ro-crate-1.2" + ) + + +def test_invalid_single_value_conformsTo(): + """ + Test that the metadata descriptor is invalid when the `conformsTo` property + includes multiple values. + """ + do_entity_test( + __metadata_descriptor_crates__.invalid_single_value_conformsTo, + models.Severity.RECOMMENDED, + False, + profile_identifier="ro-crate-1.2", + expected_triggered_requirements=["RO-Crate Metadata File Descriptor RECOMMENDED conformsTo"], + expected_triggered_issues=[ + "The RO-Crate metadata file descriptor SHOULD have a single `conformsTo` value" + ] + ) diff --git a/tests/integration/profiles/ro-crate-1.2/test_ro_crate_1_2.py b/tests/integration/profiles/ro-crate-1.2/test_ro_crate_1_2.py index b71c785fc..eec580b1a 100644 --- a/tests/integration/profiles/ro-crate-1.2/test_ro_crate_1_2.py +++ b/tests/integration/profiles/ro-crate-1.2/test_ro_crate_1_2.py @@ -97,7 +97,7 @@ def test_detached_bad_filename_recommended(): paths.detached_bad_filename, models.Severity.RECOMMENDED, False, - ["Detached RO-Crate metadata filename"], - ["Detached RO-Crate metadata file SHOULD be named ${prefix}-ro-crate-metadata.json"], + ["File Descriptor naming convention"], + ["metadata descriptor filename"], profile_identifier="ro-crate-1.2", ) From 20999a0111215ec6d65f509de5821924edf0455c Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Tue, 14 Apr 2026 11:53:35 +0200 Subject: [PATCH 045/352] test(ro-crate-1.2): :white_check_mark: test recommended prefix of the conformsTo property --- .../invalid/ro-crate-metadata.json | 32 +++++++++++++++++++ .../valid/ro-crate-metadata.json | 32 +++++++++++++++++++ .../ro-crate-1.2/test_metadata_descriptor.py | 30 +++++++++++++++++ 3 files changed, 94 insertions(+) create mode 100644 tests/data/crates/rocrate-1.2/6_metadata_descriptor/recommended_conformsTo/recommended_prefix/invalid/ro-crate-metadata.json create mode 100644 tests/data/crates/rocrate-1.2/6_metadata_descriptor/recommended_conformsTo/recommended_prefix/valid/ro-crate-metadata.json diff --git a/tests/data/crates/rocrate-1.2/6_metadata_descriptor/recommended_conformsTo/recommended_prefix/invalid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/6_metadata_descriptor/recommended_conformsTo/recommended_prefix/invalid/ro-crate-metadata.json new file mode 100644 index 000000000..08aa60566 --- /dev/null +++ b/tests/data/crates/rocrate-1.2/6_metadata_descriptor/recommended_conformsTo/recommended_prefix/invalid/ro-crate-metadata.json @@ -0,0 +1,32 @@ +{ + "@context": "https://w3id.org/ro/crate/1.2/context", + "@graph": [ + { + "@id": "ro-crate-metadata.json", + "@type": "CreativeWork", + "about": { + "@id": "./" + }, + "conformsTo": { + "@id": "https://xxxw3id.org/ro/crate/1.2" + } + }, + { + "@id": "./", + "@type": "Dataset", + "name": "Counterexample: preview incorrectly in hasPart", + "description": "This RO-Crate INCORRECTLY includes ro-crate-preview.html in hasPart. In RO-Crate 1.2, ro-crate-preview.html SHOULD NOT be listed in hasPart of the Root Data Entity or any Dataset entity (unless it is also the profile description in a Profile Crate).", + "datePublished": "2024-01-01", + "license": { + "@id": "https://creativecommons.org/licenses/by/4.0/" + }, + "hasPart": [] + }, + { + "@id": "https://creativecommons.org/licenses/by/4.0/", + "@type": "CreativeWork", + "name": "Creative Commons Attribution 4.0 International", + "description": "A Creative Commons license that lets others distribute, remix, adapt, and build upon your work, even commercially." + } + ] +} diff --git a/tests/data/crates/rocrate-1.2/6_metadata_descriptor/recommended_conformsTo/recommended_prefix/valid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/6_metadata_descriptor/recommended_conformsTo/recommended_prefix/valid/ro-crate-metadata.json new file mode 100644 index 000000000..0eb0f6f4b --- /dev/null +++ b/tests/data/crates/rocrate-1.2/6_metadata_descriptor/recommended_conformsTo/recommended_prefix/valid/ro-crate-metadata.json @@ -0,0 +1,32 @@ +{ + "@context": "https://w3id.org/ro/crate/1.2/context", + "@graph": [ + { + "@id": "ro-crate-metadata.json", + "@type": "CreativeWork", + "about": { + "@id": "./" + }, + "conformsTo": { + "@id": "https://w3id.org/ro/crate/1.2" + } + }, + { + "@id": "./", + "@type": "Dataset", + "name": "Counterexample: preview incorrectly in hasPart", + "description": "This RO-Crate INCORRECTLY includes ro-crate-preview.html in hasPart. In RO-Crate 1.2, ro-crate-preview.html SHOULD NOT be listed in hasPart of the Root Data Entity or any Dataset entity (unless it is also the profile description in a Profile Crate).", + "datePublished": "2024-01-01", + "license": { + "@id": "https://creativecommons.org/licenses/by/4.0/" + }, + "hasPart": [] + }, + { + "@id": "https://creativecommons.org/licenses/by/4.0/", + "@type": "CreativeWork", + "name": "Creative Commons Attribution 4.0 International", + "description": "A Creative Commons license that lets others distribute, remix, adapt, and build upon your work, even commercially." + } + ] +} diff --git a/tests/integration/profiles/ro-crate-1.2/test_metadata_descriptor.py b/tests/integration/profiles/ro-crate-1.2/test_metadata_descriptor.py index 73721fab4..d2b6cad12 100644 --- a/tests/integration/profiles/ro-crate-1.2/test_metadata_descriptor.py +++ b/tests/integration/profiles/ro-crate-1.2/test_metadata_descriptor.py @@ -55,3 +55,33 @@ def test_invalid_single_value_conformsTo(): "The RO-Crate metadata file descriptor SHOULD have a single `conformsTo` value" ] ) + + +def test_valid_recommended_prefix_conformsTo(): + """ + Test that the metadata descriptor is valid when the `conformsTo` property + includes a value with the recommended prefix. + """ + do_entity_test( + __metadata_descriptor_crates__.valid_recommended_prefix_conformsTo, + models.Severity.RECOMMENDED, + True, + profile_identifier="ro-crate-1.2" + ) + + +def test_invalid_recommended_prefix_conformsTo(): + """ + Test that the metadata descriptor is invalid when the `conformsTo` property + includes a value with an incorrect prefix. + """ + do_entity_test( + __metadata_descriptor_crates__.invalid_recommended_prefix_conformsTo, + models.Severity.RECOMMENDED, + False, + profile_identifier="ro-crate-1.2", + expected_triggered_requirements=["RO-Crate Metadata File Descriptor RECOMMENDED conformsTo"], + expected_triggered_issues=[ + "The RO-Crate metadata file descriptor `conformsTo` URI SHOULD start with https://w3id.org/ro/crate/" + ] + ) From 9e9fa842045a79df58107ed89550e5fed18e95ed Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Tue, 14 Apr 2026 12:06:52 +0200 Subject: [PATCH 046/352] test(ro-crate-1.2): :white_check_mark: test required datePublished property --- .../invalid/ro-crate-metadata.json | 31 +++++++++++ .../valid/ro-crate-metadata.json | 32 +++++++++++ .../test_metadata_rootDataEntity.py | 54 +++++++++++++++++++ 3 files changed, 117 insertions(+) create mode 100644 tests/data/crates/rocrate-1.2/7_root_data_entity/required_datePublished/invalid/ro-crate-metadata.json create mode 100644 tests/data/crates/rocrate-1.2/7_root_data_entity/required_datePublished/valid/ro-crate-metadata.json create mode 100644 tests/integration/profiles/ro-crate-1.2/test_metadata_rootDataEntity.py diff --git a/tests/data/crates/rocrate-1.2/7_root_data_entity/required_datePublished/invalid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/7_root_data_entity/required_datePublished/invalid/ro-crate-metadata.json new file mode 100644 index 000000000..be25b1667 --- /dev/null +++ b/tests/data/crates/rocrate-1.2/7_root_data_entity/required_datePublished/invalid/ro-crate-metadata.json @@ -0,0 +1,31 @@ +{ + "@context": "https://w3id.org/ro/crate/1.2/context", + "@graph": [ + { + "@id": "ro-crate-metadata.json", + "@type": "CreativeWork", + "about": { + "@id": "./" + }, + "conformsTo": { + "@id": "https://w3id.org/ro/crate/1.2" + } + }, + { + "@id": "./", + "@type": "Dataset", + "name": "Counterexample: preview incorrectly in hasPart", + "description": "This RO-Crate INCORRECTLY includes ro-crate-preview.html in hasPart. In RO-Crate 1.2, ro-crate-preview.html SHOULD NOT be listed in hasPart of the Root Data Entity or any Dataset entity (unless it is also the profile description in a Profile Crate).", + "license": { + "@id": "https://creativecommons.org/licenses/by/4.0/" + }, + "hasPart": [] + }, + { + "@id": "https://creativecommons.org/licenses/by/4.0/", + "@type": "CreativeWork", + "name": "Creative Commons Attribution 4.0 International", + "description": "A Creative Commons license that lets others distribute, remix, adapt, and build upon your work, even commercially." + } + ] +} diff --git a/tests/data/crates/rocrate-1.2/7_root_data_entity/required_datePublished/valid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/7_root_data_entity/required_datePublished/valid/ro-crate-metadata.json new file mode 100644 index 000000000..0eb0f6f4b --- /dev/null +++ b/tests/data/crates/rocrate-1.2/7_root_data_entity/required_datePublished/valid/ro-crate-metadata.json @@ -0,0 +1,32 @@ +{ + "@context": "https://w3id.org/ro/crate/1.2/context", + "@graph": [ + { + "@id": "ro-crate-metadata.json", + "@type": "CreativeWork", + "about": { + "@id": "./" + }, + "conformsTo": { + "@id": "https://w3id.org/ro/crate/1.2" + } + }, + { + "@id": "./", + "@type": "Dataset", + "name": "Counterexample: preview incorrectly in hasPart", + "description": "This RO-Crate INCORRECTLY includes ro-crate-preview.html in hasPart. In RO-Crate 1.2, ro-crate-preview.html SHOULD NOT be listed in hasPart of the Root Data Entity or any Dataset entity (unless it is also the profile description in a Profile Crate).", + "datePublished": "2024-01-01", + "license": { + "@id": "https://creativecommons.org/licenses/by/4.0/" + }, + "hasPart": [] + }, + { + "@id": "https://creativecommons.org/licenses/by/4.0/", + "@type": "CreativeWork", + "name": "Creative Commons Attribution 4.0 International", + "description": "A Creative Commons license that lets others distribute, remix, adapt, and build upon your work, even commercially." + } + ] +} diff --git a/tests/integration/profiles/ro-crate-1.2/test_metadata_rootDataEntity.py b/tests/integration/profiles/ro-crate-1.2/test_metadata_rootDataEntity.py new file mode 100644 index 000000000..e02ff9cca --- /dev/null +++ b/tests/integration/profiles/ro-crate-1.2/test_metadata_rootDataEntity.py @@ -0,0 +1,54 @@ +# Copyright (c) 2024-2026 CRS4 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging + +from rocrate_validator import models +from tests.ro_crates_1_2 import RootDataEntity +from tests.shared import do_entity_test + +logger = logging.getLogger(__name__) + + +__metadata_root_data_entity_crates__ = RootDataEntity() + + +def test_valid_required_datePublished(): + """ + Test that the Root Data Entity is valid when it includes a `datePublished` property. + """ + do_entity_test( + __metadata_root_data_entity_crates__.valid_required_datePublished, + models.Severity.REQUIRED, + True, + profile_identifier="ro-crate-1.2" + + ) + + +def test_invalid_required_datePublished(): + """ + Test that the Root Data Entity is invalid when it does not include a `datePublished` property. + """ + do_entity_test( + __metadata_root_data_entity_crates__.invalid_required_datePublished, + models.Severity.REQUIRED, + False, + profile_identifier="ro-crate-1.2", + expected_triggered_requirements=["RO-Crate Root Data Entity REQUIRED properties"], + expected_triggered_issues=[ + "The Root Data Entity MUST have a `datePublished` " + "property (as specified by schema.org) with a valid ISO 8601 date" + ] + ) From 7b459f342ad2c7151c05b792af2735f64a3a4618 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Tue, 14 Apr 2026 12:43:23 +0200 Subject: [PATCH 047/352] feat(ro-crate-1.2): :sparkles: check that declared citations can be downloaded --- .../1.2/must/6_contextual_entity_metadata.ttl | 21 +++++++++++++++++++ .../profiles/ro-crate/1.2/prefixes.ttl | 14 +++++++------ 2 files changed, 29 insertions(+), 6 deletions(-) diff --git a/rocrate_validator/profiles/ro-crate/1.2/must/6_contextual_entity_metadata.ttl b/rocrate_validator/profiles/ro-crate/1.2/must/6_contextual_entity_metadata.ttl index 0034095f7..511f3d91b 100644 --- a/rocrate_validator/profiles/ro-crate/1.2/must/6_contextual_entity_metadata.ttl +++ b/rocrate_validator/profiles/ro-crate/1.2/must/6_contextual_entity_metadata.ttl @@ -62,3 +62,24 @@ ro-crate:ContextualEntityDefinition a sh:NodeShape, validator:HiddenShape ; sh:object ro-crate:ContextualEntity ; ] . + +ro-crate:RequiredDownloadableCiteAs a sh:NodeShape ; + sh:name "RO-Crate Metadata Entity: REQUIRED `citeAs` reference" ; + sh:description """If the RO-Crate Metadata Entity includes a `cite-as` property, it MUST reference a downloadable item.""" ; + sh:target [ + a sh:SPARQLTarget ; + sh:prefixes ro-crate:sparqlPrefixes ; + sh:select """ + SELECT ?this + WHERE { + ?e iana-relation:cite-as ?this . + } + """ + ] ; + sh:property [ + sh:name "RO-Crate Metadata Entity: REQUIRED `cite-as` reference" ; + sh:description """Check if the RO-Crate Metadata Entity's `cite-as` property references a downloadable item.""" ; + sh:path schema_org:url ; + sh:minCount 1 ; + sh:message "If the RO-Crate Metadata Entity includes a `cite-as` property, it MUST reference a downloadable item" ; + ] . diff --git a/rocrate_validator/profiles/ro-crate/1.2/prefixes.ttl b/rocrate_validator/profiles/ro-crate/1.2/prefixes.ttl index 7bbf24562..f4f58480f 100644 --- a/rocrate_validator/profiles/ro-crate/1.2/prefixes.ttl +++ b/rocrate_validator/profiles/ro-crate/1.2/prefixes.ttl @@ -12,17 +12,15 @@ # See the License for the specific language governing permissions and # limitations under the License. -@prefix ro: <./> . @prefix sh: . @prefix xsd: . @prefix ro-crate: . # Define the prefixes used in the SPARQL queries -ro-crate:sparqlPrefixes - sh:declare [ - sh:prefix "schema" ; - sh:namespace "http://schema.org/"^^xsd:anyURI ; - ] ; +ro-crate:sparqlPrefixes sh:declare [ + sh:prefix "schema" ; + sh:namespace "http://schema.org/"^^xsd:anyURI ; +] ; sh:declare [ sh:prefix "bioschemas" ; sh:namespace "https://bioschemas.org/"^^xsd:anyURI ; @@ -43,6 +41,10 @@ ro-crate:sparqlPrefixes sh:prefix "codemeta" ; sh:namespace "https://codemeta.github.io/terms/"^^xsd:anyURI ; ] ; + sh:declare [ + sh:prefix "iana-relation" ; + sh:namespace "http://www.iana.org/assignments/relation/"^^xsd:anyURI ; + ] ; sh:declare [ sh:prefix "ro-crate" ; sh:namespace "https://github.com/crs4/rocrate-validator/profiles/ro-crate-1.2/"^^xsd:anyURI ; From 0b04663789998d9f48b63ba7c78363bc5533d349 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Tue, 14 Apr 2026 12:51:53 +0200 Subject: [PATCH 048/352] test(ro-crate-1.2): :white_check_mark: test downloadable cite-as entity --- .../invalid/ro-crate-metadata.json | 41 ++++++++++++++++++ .../valid/ro-crate-metadata.json | 42 +++++++++++++++++++ .../test_metadata_rootDataEntity.py | 30 +++++++++++++ 3 files changed, 113 insertions(+) create mode 100644 tests/data/crates/rocrate-1.2/7_root_data_entity/required_downloadable_citeas/invalid/ro-crate-metadata.json create mode 100644 tests/data/crates/rocrate-1.2/7_root_data_entity/required_downloadable_citeas/valid/ro-crate-metadata.json diff --git a/tests/data/crates/rocrate-1.2/7_root_data_entity/required_downloadable_citeas/invalid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/7_root_data_entity/required_downloadable_citeas/invalid/ro-crate-metadata.json new file mode 100644 index 000000000..213c7183f --- /dev/null +++ b/tests/data/crates/rocrate-1.2/7_root_data_entity/required_downloadable_citeas/invalid/ro-crate-metadata.json @@ -0,0 +1,41 @@ +{ + "@context": "https://w3id.org/ro/crate/1.2/context", + "@graph": [ + { + "@id": "ro-crate-metadata.json", + "@type": "CreativeWork", + "about": { + "@id": "./" + }, + "conformsTo": { + "@id": "https://w3id.org/ro/crate/1.2" + } + }, + { + "@id": "./", + "@type": "Dataset", + "name": "Counterexample: preview incorrectly in hasPart", + "description": "This RO-Crate INCORRECTLY includes ro-crate-preview.html in hasPart. In RO-Crate 1.2, ro-crate-preview.html SHOULD NOT be listed in hasPart of the Root Data Entity or any Dataset entity (unless it is also the profile description in a Profile Crate).", + "datePublished": "2024-01-01", + "license": { + "@id": "https://creativecommons.org/licenses/by/4.0/" + }, + "hasPart": [], + "cite-as": { + "@id": "https://doi.org/10.1234/example-doi" + } + }, + { + "@id": "https://creativecommons.org/licenses/by/4.0/", + "@type": "CreativeWork", + "name": "Creative Commons Attribution 4.0 International", + "description": "A Creative Commons license that lets others distribute, remix, adapt, and build upon your work, even commercially." + }, + { + "@id": "https://doi.org/10.1234/example-doi", + "@type": "CreativeWork", + "name": "Example DOI Citation", + "description": "An example citation for the Root Data Entity using a DOI." + } + ] +} diff --git a/tests/data/crates/rocrate-1.2/7_root_data_entity/required_downloadable_citeas/valid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/7_root_data_entity/required_downloadable_citeas/valid/ro-crate-metadata.json new file mode 100644 index 000000000..a3ff3ac12 --- /dev/null +++ b/tests/data/crates/rocrate-1.2/7_root_data_entity/required_downloadable_citeas/valid/ro-crate-metadata.json @@ -0,0 +1,42 @@ +{ + "@context": "https://w3id.org/ro/crate/1.2/context", + "@graph": [ + { + "@id": "ro-crate-metadata.json", + "@type": "CreativeWork", + "about": { + "@id": "./" + }, + "conformsTo": { + "@id": "https://w3id.org/ro/crate/1.2" + } + }, + { + "@id": "./", + "@type": "Dataset", + "name": "Counterexample: preview incorrectly in hasPart", + "description": "This RO-Crate INCORRECTLY includes ro-crate-preview.html in hasPart. In RO-Crate 1.2, ro-crate-preview.html SHOULD NOT be listed in hasPart of the Root Data Entity or any Dataset entity (unless it is also the profile description in a Profile Crate).", + "datePublished": "2024-01-01", + "license": { + "@id": "https://creativecommons.org/licenses/by/4.0/" + }, + "hasPart": [], + "cite-as": { + "@id": "https://doi.org/10.1234/example-doi" + } + }, + { + "@id": "https://creativecommons.org/licenses/by/4.0/", + "@type": "CreativeWork", + "name": "Creative Commons Attribution 4.0 International", + "description": "A Creative Commons license that lets others distribute, remix, adapt, and build upon your work, even commercially." + }, + { + "@id": "https://doi.org/10.1234/example-doi", + "@type": "CreativeWork", + "name": "Example DOI Citation", + "description": "An example citation for the Root Data Entity using a DOI.", + "url": "https://example.com/download/ro-crate.zip" + } + ] +} diff --git a/tests/integration/profiles/ro-crate-1.2/test_metadata_rootDataEntity.py b/tests/integration/profiles/ro-crate-1.2/test_metadata_rootDataEntity.py index e02ff9cca..54f89c407 100644 --- a/tests/integration/profiles/ro-crate-1.2/test_metadata_rootDataEntity.py +++ b/tests/integration/profiles/ro-crate-1.2/test_metadata_rootDataEntity.py @@ -52,3 +52,33 @@ def test_invalid_required_datePublished(): "property (as specified by schema.org) with a valid ISO 8601 date" ] ) + + +def test_valid_required_downloadable_citeAs(): + """ + Test that the Root Data Entity is valid when it includes a `cite-as` property that references a downloadable item. + """ + do_entity_test( + __metadata_root_data_entity_crates__.valid_required_downloadable_citeAs, + models.Severity.REQUIRED, + True, + profile_identifier="ro-crate-1.2" + + ) + + +def test_invalid_required_downloadable_citeAs(): + """ + Test that the Root Data Entity is invalid when it includes a `cite-as` property + that does not reference a downloadable item. + """ + do_entity_test( + __metadata_root_data_entity_crates__.invalid_required_downloadable_citeAs, + models.Severity.REQUIRED, + False, + profile_identifier="ro-crate-1.2", + expected_triggered_requirements=["RO-Crate Metadata Entity: REQUIRED `citeAs` reference"], + expected_triggered_issues=[ + "If the RO-Crate Metadata Entity includes a `cite-as` property, it MUST reference a downloadable item" + ] + ) From 468fe8dea0425349690a0841e1aed6f44e46c4b0 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Tue, 14 Apr 2026 15:10:39 +0200 Subject: [PATCH 049/352] feat(ro-crate-1.2): :sparkles: check that resolvable identifiers are cited through the cite-as property --- .../should/2_root_data_entity_identifier.py | 42 +++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/rocrate_validator/profiles/ro-crate/1.2/should/2_root_data_entity_identifier.py b/rocrate_validator/profiles/ro-crate/1.2/should/2_root_data_entity_identifier.py index a98f99b8a..4e8fb2f92 100644 --- a/rocrate_validator/profiles/ro-crate/1.2/should/2_root_data_entity_identifier.py +++ b/rocrate_validator/profiles/ro-crate/1.2/should/2_root_data_entity_identifier.py @@ -53,3 +53,45 @@ def check_identifier(self, context: ValidationContext) -> bool: context.result.add_issue( f'Error checking Root Data Entity @id: {str(e)}', self) return False + + +@requirement(name="Root Data Entity: use cite-as for resolvable identifiers") +class RootDataEntityCiteAsIdentifierChecker(PyFunctionCheck): + """ + If the Root Data Entity has a resolvable identifier, it SHOULD be included in the `cite-as` property of the RO-Crate Metadata Entity. + """ + + @check(name="Root Data Entity: use cite-as for resolvable identifiers") + def check_cite_as_reference(self, context: ValidationContext) -> bool: + """ + If the Root Data Entity has a resolvable identifier, + it SHOULD be included in the `cite-as` property of the RO-Crate Metadata Entity. + """ + try: + if not context.ro_crate.is_detached(): + return True + root_entity = context.ro_crate.metadata.get_root_data_entity() + if not root_entity.is_remote(): + return True + + # Check if the `cite-as` property is present and references the Root Data Entity + cite_as = root_entity.get_property('cite-as') + if root_entity.id_as_uri.is_remote_resource(): + if not cite_as: + context.result.add_issue( + 'If the Root Data Entity has a resolvable identifier, ' + 'it SHOULD be included in the `cite-as` property of the RO-Crate Metadata Entity.', self) + return False + + # If the `cite-as` property is present, check that it references the Root Data Entity + if cite_as.id != root_entity.id: + context.result.add_issue( + 'If the Root Data Entity has a resolvable identifier, ' + 'it SHOULD be included in the `cite-as` property of the RO-Crate Metadata Entity.', self) + return False + + return True + except Exception as e: + context.result.add_issue( + f'Error checking Root Data Entity `cite-as` reference: {str(e)}', self) + return False From d0d1ec16faef34aea92e9ed98ff9c4adbe9ef3e7 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Tue, 14 Apr 2026 15:11:39 +0200 Subject: [PATCH 050/352] test(ro-crate-1.2): :white_check_mark: test cite-as property of resolvable identifiers --- .../invalid/citeas-ro-crate-metadata.json | 36 +++++++++++++++++ .../valid/citeas-ro-crate-metadata.json | 39 +++++++++++++++++++ .../test_metadata_rootDataEntity.py | 32 +++++++++++++++ 3 files changed, 107 insertions(+) create mode 100644 tests/data/crates/rocrate-1.2/7_root_data_entity/recommended_citeas_for_resolvable_id/invalid/citeas-ro-crate-metadata.json create mode 100644 tests/data/crates/rocrate-1.2/7_root_data_entity/recommended_citeas_for_resolvable_id/valid/citeas-ro-crate-metadata.json diff --git a/tests/data/crates/rocrate-1.2/7_root_data_entity/recommended_citeas_for_resolvable_id/invalid/citeas-ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/7_root_data_entity/recommended_citeas_for_resolvable_id/invalid/citeas-ro-crate-metadata.json new file mode 100644 index 000000000..8ed669040 --- /dev/null +++ b/tests/data/crates/rocrate-1.2/7_root_data_entity/recommended_citeas_for_resolvable_id/invalid/citeas-ro-crate-metadata.json @@ -0,0 +1,36 @@ +{ + "@context": "https://w3id.org/ro/crate/1.2/context", + "@graph": [ + { + "@id": "ro-crate-metadata.json", + "@type": "CreativeWork", + "about": { + "@id": "https://example.org/ro-crate/detached/citable-root-data-entity/" + }, + "conformsTo": { + "@id": "https://w3id.org/ro/crate/1.2" + } + }, + { + "@id": "https://example.org/ro-crate/detached/citable-root-data-entity/", + "@type": "Dataset", + "name": "Counterexample: preview incorrectly in hasPart", + "description": "This RO-Crate INCORRECTLY includes ro-crate-preview.html in hasPart. In RO-Crate 1.2, ro-crate-preview.html SHOULD NOT be listed in hasPart of the Root Data Entity or any Dataset entity (unless it is also the profile description in a Profile Crate).", + "datePublished": "2024-01-01", + "license": { + "@id": "https://creativecommons.org/licenses/by/4.0/" + }, + "hasPart": [], + "url": "https://example.org/ro-crate/detached/citable-root-data-entity/", + "distribution": { + "@id": "https://example.org/ro-crate/detached/citable-root-data-entity/" + } + }, + { + "@id": "https://creativecommons.org/licenses/by/4.0/", + "@type": "CreativeWork", + "name": "Creative Commons Attribution 4.0 International", + "description": "A Creative Commons license that lets others distribute, remix, adapt, and build upon your work, even commercially." + } + ] +} diff --git a/tests/data/crates/rocrate-1.2/7_root_data_entity/recommended_citeas_for_resolvable_id/valid/citeas-ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/7_root_data_entity/recommended_citeas_for_resolvable_id/valid/citeas-ro-crate-metadata.json new file mode 100644 index 000000000..a29c711b8 --- /dev/null +++ b/tests/data/crates/rocrate-1.2/7_root_data_entity/recommended_citeas_for_resolvable_id/valid/citeas-ro-crate-metadata.json @@ -0,0 +1,39 @@ +{ + "@context": "https://w3id.org/ro/crate/1.2/context", + "@graph": [ + { + "@id": "ro-crate-metadata.json", + "@type": "CreativeWork", + "about": { + "@id": "https://example.org/ro-crate/detached/citable-root-data-entity/" + }, + "conformsTo": { + "@id": "https://w3id.org/ro/crate/1.2" + } + }, + { + "@id": "https://example.org/ro-crate/detached/citable-root-data-entity/", + "@type": "Dataset", + "name": "Counterexample: preview incorrectly in hasPart", + "description": "This RO-Crate INCORRECTLY includes ro-crate-preview.html in hasPart. In RO-Crate 1.2, ro-crate-preview.html SHOULD NOT be listed in hasPart of the Root Data Entity or any Dataset entity (unless it is also the profile description in a Profile Crate).", + "datePublished": "2024-01-01", + "license": { + "@id": "https://creativecommons.org/licenses/by/4.0/" + }, + "hasPart": [], + "cite-as": { + "@id": "https://example.org/ro-crate/detached/citable-root-data-entity/" + }, + "url": "https://example.org/ro-crate/detached/citable-root-data-entity/", + "distribution": { + "@id": "https://example.org/ro-crate/detached/citable-root-data-entity/" + } + }, + { + "@id": "https://creativecommons.org/licenses/by/4.0/", + "@type": "CreativeWork", + "name": "Creative Commons Attribution 4.0 International", + "description": "A Creative Commons license that lets others distribute, remix, adapt, and build upon your work, even commercially." + } + ] +} diff --git a/tests/integration/profiles/ro-crate-1.2/test_metadata_rootDataEntity.py b/tests/integration/profiles/ro-crate-1.2/test_metadata_rootDataEntity.py index 54f89c407..2e20e72fb 100644 --- a/tests/integration/profiles/ro-crate-1.2/test_metadata_rootDataEntity.py +++ b/tests/integration/profiles/ro-crate-1.2/test_metadata_rootDataEntity.py @@ -82,3 +82,35 @@ def test_invalid_required_downloadable_citeAs(): "If the RO-Crate Metadata Entity includes a `cite-as` property, it MUST reference a downloadable item" ] ) + + +def test_valid_recommended_citeAs_for_resolvable_id(): + """ + Test that the Root Data Entity is valid when it has a resolvable identifier + and includes a `cite-as` property that references the Root Data Entity. + """ + do_entity_test( + __metadata_root_data_entity_crates__.valid_recommended_citeAs_for_resolvable_id, + models.Severity.RECOMMENDED, + True, + profile_identifier="ro-crate-1.2" + ) + + +def test_invalid_recommended_citeAs_for_resolvable_id(): + """ + Test that the Root Data Entity is invalid when it has a resolvable identifier + and does not include a `cite-as` property that references the Root Data Entity. + """ + do_entity_test( + __metadata_root_data_entity_crates__.invalid_recommended_citeAs_for_resolvable_id, + models.Severity.RECOMMENDED, + False, + profile_identifier="ro-crate-1.2", + expected_triggered_requirements=[ + "Root Data Entity: use cite-as for resolvable identifiers"], + expected_triggered_issues=[ + "If the Root Data Entity has a resolvable identifier, " + "it SHOULD be included in the `cite-as` property of the RO-Crate Metadata Entity." + ] + ) From 9e73b541f1afa2e767965c0212ce48f06329f968 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Tue, 14 Apr 2026 16:00:05 +0200 Subject: [PATCH 051/352] feat(ro-crate-1.2): :sparkles: check the type of additional profiles referenced through the `conformsTo` property --- .../1.2/must/2_root_data_entity_metadata.ttl | 16 +++++++++++----- .../profiles/ro-crate/1.2/prefixes.ttl | 4 ++++ 2 files changed, 15 insertions(+), 5 deletions(-) diff --git a/rocrate_validator/profiles/ro-crate/1.2/must/2_root_data_entity_metadata.ttl b/rocrate_validator/profiles/ro-crate/1.2/must/2_root_data_entity_metadata.ttl index c4bc0d002..1e2fcf22e 100644 --- a/rocrate_validator/profiles/ro-crate/1.2/must/2_root_data_entity_metadata.ttl +++ b/rocrate_validator/profiles/ro-crate/1.2/must/2_root_data_entity_metadata.ttl @@ -19,6 +19,7 @@ @prefix sh: . @prefix validator: . @prefix xsd: . +@prefix dct: . ro-crate:FindRootDataEntity a sh:NodeShape, validator:HiddenShape ; sh:name "Identify the Root Data Entity of the RO-Crate" ; @@ -133,11 +134,16 @@ ro-crate:RootDataEntityRequiredProperties a sh:NodeShape ; sh:path schema_org:datePublished ; sh:pattern "^([\\+-]?\\d{4})((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)?[0-5]\\d)?|24:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?$" ; sh:message "The Root Data Entity MUST have a `datePublished` property (as specified by schema.org) with a valid ISO 8601 date" ; - ] ; + ] . + +ro-crate:RootDataEntityOptionalConformsToProperty a sh:NodeShape ; + sh:name "RO-Crate Root Data Entity: optional `conformsTo` property value restriction" ; + sh:description """If the Root Data Entity includes a `conformsTo` property, its values MUST reference Profile entities.""" ; + sh:targetClass ro-crate:RootDataEntity ; sh:property [ - sh:name "Root Data Entity: `conformsTo` profile values" ; - sh:description """If present, `conformsTo` values MUST reference Profile entities.""" ; - sh:path schema_org:conformsTo ; + sh:name "Root Data Entity: optional `conformsTo` property value restriction" ; + sh:description """Check if the Root Data Entity's `conformsTo` property values reference Profile entities.""" ; + sh:path dct:conformsTo ; sh:class prof:Profile ; - sh:message "If present, `conformsTo` values MUST reference a Profile entity" ; + sh:message """If the Root Data Entity includes a `conformsTo` property, its values MUST reference Profile entities.""" ; ] . diff --git a/rocrate_validator/profiles/ro-crate/1.2/prefixes.ttl b/rocrate_validator/profiles/ro-crate/1.2/prefixes.ttl index f4f58480f..8cad6b4dd 100644 --- a/rocrate_validator/profiles/ro-crate/1.2/prefixes.ttl +++ b/rocrate_validator/profiles/ro-crate/1.2/prefixes.ttl @@ -49,6 +49,10 @@ ro-crate:sparqlPrefixes sh:declare [ sh:prefix "ro-crate" ; sh:namespace "https://github.com/crs4/rocrate-validator/profiles/ro-crate-1.2/"^^xsd:anyURI ; ] ; + sh:declare [ + sh:prefix "dct" ; + sh:namespace "http://purl.org/dc/terms/"^^xsd:anyURI ; + ] ; sh:declare [ sh:prefix "ro" ; sh:namespace "./"^^xsd:anyURI ; From d90d04d5dc133b1998d8f239cdafc1937e82921c Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Tue, 14 Apr 2026 16:01:11 +0200 Subject: [PATCH 052/352] test(ro-crate-1.2): :white_check_mark: test type of additional conformsTo properties --- .../invalid/ro-crate-metadata.json | 55 ++++++++++++++++++ .../valid/ro-crate-metadata.json | 56 +++++++++++++++++++ .../test_metadata_rootDataEntity.py | 30 ++++++++++ 3 files changed, 141 insertions(+) create mode 100644 tests/data/crates/rocrate-1.2/7_root_data_entity/additional_conformsTo/invalid/ro-crate-metadata.json create mode 100644 tests/data/crates/rocrate-1.2/7_root_data_entity/additional_conformsTo/valid/ro-crate-metadata.json diff --git a/tests/data/crates/rocrate-1.2/7_root_data_entity/additional_conformsTo/invalid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/7_root_data_entity/additional_conformsTo/invalid/ro-crate-metadata.json new file mode 100644 index 000000000..2fe59a943 --- /dev/null +++ b/tests/data/crates/rocrate-1.2/7_root_data_entity/additional_conformsTo/invalid/ro-crate-metadata.json @@ -0,0 +1,55 @@ +{ + "@context": "https://w3id.org/ro/crate/1.2/context", + "@graph": [ + { + "@id": "ro-crate-metadata.json", + "@type": "CreativeWork", + "about": { + "@id": "./" + }, + "conformsTo": { + "@id": "https://w3id.org/ro/crate/1.2" + } + }, + { + "@id": "./", + "@type": "Dataset", + "name": "Counterexample: preview incorrectly in hasPart", + "description": "This RO-Crate INCORRECTLY includes ro-crate-preview.html in hasPart. In RO-Crate 1.2, ro-crate-preview.html SHOULD NOT be listed in hasPart of the Root Data Entity or any Dataset entity (unless it is also the profile description in a Profile Crate).", + "datePublished": "2024-01-01", + "license": { + "@id": "https://creativecommons.org/licenses/by/4.0/" + }, + "hasPart": [], + "cite-as": { + "@id": "https://doi.org/10.1234/example-doi" + }, + "conformsTo": [ + { + "@id": "https://w3id.org/ro/crate/1.1x" + } + ] + }, + { + "@id": "https://creativecommons.org/licenses/by/4.0/", + "@type": "CreativeWork", + "name": "Creative Commons Attribution 4.0 International", + "description": "A Creative Commons license that lets others distribute, remix, adapt, and build upon your work, even commercially." + }, + { + "@id": "https://doi.org/10.1234/example-doi", + "@type": "CreativeWork", + "name": "Example DOI Citation", + "description": "An example citation for the Root Data Entity using a DOI.", + "url": "https://example.com/download/ro-crate.zip" + }, + { + "@id": "https://w3id.org/ro/crate/1.1x", + "@type": [ + "CreativeWork" + ], + "name": "RO-Crate 1.1x Profile", + "description": "An example of an additional `conformsTo` profile reference in the Root Data Entity metadata." + } + ] +} diff --git a/tests/data/crates/rocrate-1.2/7_root_data_entity/additional_conformsTo/valid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/7_root_data_entity/additional_conformsTo/valid/ro-crate-metadata.json new file mode 100644 index 000000000..e85886483 --- /dev/null +++ b/tests/data/crates/rocrate-1.2/7_root_data_entity/additional_conformsTo/valid/ro-crate-metadata.json @@ -0,0 +1,56 @@ +{ + "@context": "https://w3id.org/ro/crate/1.2/context", + "@graph": [ + { + "@id": "ro-crate-metadata.json", + "@type": "CreativeWork", + "about": { + "@id": "./" + }, + "conformsTo": { + "@id": "https://w3id.org/ro/crate/1.2" + } + }, + { + "@id": "./", + "@type": "Dataset", + "name": "Counterexample: preview incorrectly in hasPart", + "description": "This RO-Crate INCORRECTLY includes ro-crate-preview.html in hasPart. In RO-Crate 1.2, ro-crate-preview.html SHOULD NOT be listed in hasPart of the Root Data Entity or any Dataset entity (unless it is also the profile description in a Profile Crate).", + "datePublished": "2024-01-01", + "license": { + "@id": "https://creativecommons.org/licenses/by/4.0/" + }, + "hasPart": [], + "cite-as": { + "@id": "https://doi.org/10.1234/example-doi" + }, + "conformsTo": [ + { + "@id": "https://w3id.org/ro/crate/1.1x" + } + ] + }, + { + "@id": "https://creativecommons.org/licenses/by/4.0/", + "@type": "CreativeWork", + "name": "Creative Commons Attribution 4.0 International", + "description": "A Creative Commons license that lets others distribute, remix, adapt, and build upon your work, even commercially." + }, + { + "@id": "https://doi.org/10.1234/example-doi", + "@type": "CreativeWork", + "name": "Example DOI Citation", + "description": "An example citation for the Root Data Entity using a DOI.", + "url": "https://example.com/download/ro-crate.zip" + }, + { + "@id": "https://w3id.org/ro/crate/1.1x", + "@type": [ + "CreativeWork", + "Profile" + ], + "name": "RO-Crate 1.1x Profile", + "description": "An example of an additional `conformsTo` profile reference in the Root Data Entity metadata." + } + ] +} diff --git a/tests/integration/profiles/ro-crate-1.2/test_metadata_rootDataEntity.py b/tests/integration/profiles/ro-crate-1.2/test_metadata_rootDataEntity.py index 2e20e72fb..c4d402f36 100644 --- a/tests/integration/profiles/ro-crate-1.2/test_metadata_rootDataEntity.py +++ b/tests/integration/profiles/ro-crate-1.2/test_metadata_rootDataEntity.py @@ -114,3 +114,33 @@ def test_invalid_recommended_citeAs_for_resolvable_id(): "it SHOULD be included in the `cite-as` property of the RO-Crate Metadata Entity." ] ) + + +def test_valid_additional_conformsTo_reference(): + """ + Test that the Root Data Entity is valid when it includes + an additional `conformsTo` property that references a Profile entity. + """ + do_entity_test( + __metadata_root_data_entity_crates__.valid_additional_conformsTo_reference, + models.Severity.REQUIRED, + True, + profile_identifier="ro-crate-1.2" + ) + + +def test_invalid_additional_conformsTo_reference(): + """ + Test that the Root Data Entity is invalid when it includes + an additional `conformsTo` property that does not reference a Profile entity. + """ + do_entity_test( + __metadata_root_data_entity_crates__.invalid_additional_conformsTo_reference, + models.Severity.REQUIRED, + False, + profile_identifier="ro-crate-1.2", + expected_triggered_requirements=["RO-Crate Root Data Entity: optional `conformsTo` property value restriction"], + expected_triggered_issues=[ + "If the Root Data Entity includes a `conformsTo` property, its values MUST reference Profile entities." + ] + ) From 7596e143a25841a2d9d6fdb3aa464ece761096b1 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Tue, 14 Apr 2026 20:55:42 +0200 Subject: [PATCH 053/352] feat(ro-crate-1.2): :sparkles: check relative @id of local data entities --- .../1.2/must/4_data_entity_metadata.py | 34 ++++++++++++++----- 1 file changed, 26 insertions(+), 8 deletions(-) diff --git a/rocrate_validator/profiles/ro-crate/1.2/must/4_data_entity_metadata.py b/rocrate_validator/profiles/ro-crate/1.2/must/4_data_entity_metadata.py index 856e4eec1..ee659add7 100644 --- a/rocrate_validator/profiles/ro-crate/1.2/must/4_data_entity_metadata.py +++ b/rocrate_validator/profiles/ro-crate/1.2/must/4_data_entity_metadata.py @@ -113,10 +113,22 @@ class DataEntityIdentifierChecker(PyFunctionCheck): Data Entity identifiers must be valid URI references and use relative paths for payload files. """ - @check(name="Data Entity: @id validity") + @check(name="Data Entity: @id value requirements") def check_identifiers(self, context: ValidationContext) -> bool: result = True + root_entity_id = None + root_entity_is_local = False + root_entity_absolute_path = None + try: + root_data_entity = context.ro_crate.metadata.get_root_data_entity() + root_entity_id = root_data_entity.id + root_entity_is_local = root_data_entity.id_as_uri.is_local_resource if root_data_entity.id_as_uri else False + root_entity_absolute_path = root_data_entity.id_as_path if root_data_entity.has_absolute_path() else None + except Exception: + pass for entity in context.ro_crate.metadata.get_data_entities(): + if root_entity_id and entity.id == root_entity_id: + continue if entity.has_local_identifier(): continue if "\\" in entity.id or " " in entity.id: @@ -125,13 +137,19 @@ def check_identifiers(self, context: ValidationContext) -> bool: result = False if context.fail_fast: return False - continue - if not re.match(r"^[A-Za-z][A-Za-z0-9+\.-]*:", entity.id) and not entity.has_relative_path(): - context.result.add_issue( - f"Data Entity '{entity.id}' does not use a valid absolute or relative URI", self) - result = False - if context.fail_fast: - return False + if (root_entity_is_local and + not str(entity.id_as_path).startswith(str(root_entity_absolute_path))): + logger.error( + f"Entity ID as Path: {entity.id_as_path}, Root Entity Absolute Path: {root_entity_absolute_path}") + if (root_entity_is_local and not str(entity.id).startswith("./") and ( + str(entity.id).startswith("/") or + str(entity.id).startswith("file://") + )): + context.result.add_issue( + f"Data Entity '{entity.id}' MUST use a relative @id within the RO-Crate root", self) + result = False + if context.fail_fast: + return False return result @check(name="Data Entity: relative @id for payload files") From 2bd96802f43e2eb195976ef70003ea9546f5ffeb Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Tue, 14 Apr 2026 21:49:28 +0200 Subject: [PATCH 054/352] test(ro-crate-1.2): :white_check_mark: test relative path of local data entities --- .../invalid/ro-crate-metadata.json | 46 +++++++++++++++ .../valid/my-test-file-2.txt | 0 .../valid/my-test-file.txt | 0 .../valid/ro-crate-metadata.json | 59 +++++++++++++++++++ .../test_metadata_dataEntities.py | 52 ++++++++++++++++ 5 files changed, 157 insertions(+) create mode 100644 tests/data/crates/rocrate-1.2/8_metadata_dataEntities/local_entity_reference/invalid/ro-crate-metadata.json create mode 100644 tests/data/crates/rocrate-1.2/8_metadata_dataEntities/local_entity_reference/valid/my-test-file-2.txt create mode 100644 tests/data/crates/rocrate-1.2/8_metadata_dataEntities/local_entity_reference/valid/my-test-file.txt create mode 100644 tests/data/crates/rocrate-1.2/8_metadata_dataEntities/local_entity_reference/valid/ro-crate-metadata.json create mode 100644 tests/integration/profiles/ro-crate-1.2/test_metadata_dataEntities.py diff --git a/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/local_entity_reference/invalid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/local_entity_reference/invalid/ro-crate-metadata.json new file mode 100644 index 000000000..23199ec88 --- /dev/null +++ b/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/local_entity_reference/invalid/ro-crate-metadata.json @@ -0,0 +1,46 @@ +{ + "@context": "https://w3id.org/ro/crate/1.2/context", + "@graph": [ + { + "@id": "ro-crate-metadata.json", + "@type": "CreativeWork", + "about": { + "@id": "./" + }, + "conformsTo": { + "@id": "https://w3id.org/ro/crate/1.2" + } + }, + { + "@id": "./", + "@type": "Dataset", + "name": "Counterexample: preview incorrectly in hasPart", + "description": "This RO-Crate INCORRECTLY includes ro-crate-preview.html in hasPart. In RO-Crate 1.2, ro-crate-preview.html SHOULD NOT be listed in hasPart of the Root Data Entity or any Dataset entity (unless it is also the profile description in a Profile Crate).", + "datePublished": "2024-01-01", + "license": { + "@id": "https://creativecommons.org/licenses/by/4.0/" + }, + "hasPart": [ + { + "@id": "file://./my-test-file.txt" + } + ] + }, + { + "@id": "file://./my-test-file.txt", + "@type": "File", + "name": "My Test File", + "description": "A test file for RO-Crate.", + "encodingFormat": "text/plain", + "contentSize": 1024, + "dateCreated": "2024-01-01T12:00:00Z", + "dateModified": "2024-01-02T12:00:00Z" + }, + { + "@id": "https://creativecommons.org/licenses/by/4.0/", + "@type": "CreativeWork", + "name": "Creative Commons Attribution 4.0 International", + "description": "A Creative Commons license that lets others distribute, remix, adapt, and build upon your work, even commercially." + } + ] +} diff --git a/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/local_entity_reference/valid/my-test-file-2.txt b/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/local_entity_reference/valid/my-test-file-2.txt new file mode 100644 index 000000000..e69de29bb diff --git a/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/local_entity_reference/valid/my-test-file.txt b/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/local_entity_reference/valid/my-test-file.txt new file mode 100644 index 000000000..e69de29bb diff --git a/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/local_entity_reference/valid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/local_entity_reference/valid/ro-crate-metadata.json new file mode 100644 index 000000000..428ade88f --- /dev/null +++ b/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/local_entity_reference/valid/ro-crate-metadata.json @@ -0,0 +1,59 @@ +{ + "@context": "https://w3id.org/ro/crate/1.2/context", + "@graph": [ + { + "@id": "ro-crate-metadata.json", + "@type": "CreativeWork", + "about": { + "@id": "./" + }, + "conformsTo": { + "@id": "https://w3id.org/ro/crate/1.2" + } + }, + { + "@id": "./", + "@type": "Dataset", + "name": "Counterexample: preview incorrectly in hasPart", + "description": "This RO-Crate INCORRECTLY includes ro-crate-preview.html in hasPart. In RO-Crate 1.2, ro-crate-preview.html SHOULD NOT be listed in hasPart of the Root Data Entity or any Dataset entity (unless it is also the profile description in a Profile Crate).", + "datePublished": "2024-01-01", + "license": { + "@id": "https://creativecommons.org/licenses/by/4.0/" + }, + "hasPart": [ + { + "@id": "./my-test-file.txt" + }, + { + "@id": "my-test-file-2.txt" + } + ] + }, + { + "@id": "./my-test-file.txt", + "@type": "File", + "name": "My Test File", + "description": "A test file for RO-Crate.", + "encodingFormat": "text/plain", + "contentSize": 1024, + "dateCreated": "2024-01-01T12:00:00Z", + "dateModified": "2024-01-02T12:00:00Z" + }, + { + "@id": "my-test-file-2.txt", + "@type": "File", + "name": "My Test File 2", + "description": "Another test file for RO-Crate.", + "encodingFormat": "text/plain", + "contentSize": 2048, + "dateCreated": "2024-01-01T12:00:00Z", + "dateModified": "2024-01-02T12:00:00Z" + }, + { + "@id": "https://creativecommons.org/licenses/by/4.0/", + "@type": "CreativeWork", + "name": "Creative Commons Attribution 4.0 International", + "description": "A Creative Commons license that lets others distribute, remix, adapt, and build upon your work, even commercially." + } + ] +} diff --git a/tests/integration/profiles/ro-crate-1.2/test_metadata_dataEntities.py b/tests/integration/profiles/ro-crate-1.2/test_metadata_dataEntities.py new file mode 100644 index 000000000..afadf546f --- /dev/null +++ b/tests/integration/profiles/ro-crate-1.2/test_metadata_dataEntities.py @@ -0,0 +1,52 @@ +# Copyright (c) 2024-2026 CRS4 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging + +from rocrate_validator import models +from tests.ro_crates_1_2 import DataEntities +from tests.shared import do_entity_test + +logger = logging.getLogger(__name__) + + +__metadata_root_data_entity_crates__ = DataEntities() + + +def test_valid_local_entity_reference(): + """ + Test that a Data Entity is valid when it references a local file using a relative path in its `@id`. + """ + do_entity_test( + __metadata_root_data_entity_crates__.valid_local_entity_reference, + models.Severity.REQUIRED, + True, + profile_identifier="ro-crate-1.2" + ) + + +def test_invalid_local_entity_reference(): + """ + Test that a Data Entity is invalid when it references a local file using an absolute path in its `@id`. + """ + do_entity_test( + __metadata_root_data_entity_crates__.invalid_local_entity_reference, + models.Severity.REQUIRED, + False, + profile_identifier="ro-crate-1.2", + expected_triggered_requirements=["Data Entity: identifier requirements"], + expected_triggered_issues=[ + "MUST use a relative @id within the RO-Crate root" + ] + ) From 19f9d1c7f166afd1e86001cf2074b14c2b1d141a Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Tue, 14 Apr 2026 22:37:40 +0200 Subject: [PATCH 055/352] feat(ro-crate-1.2): :sparkles: check data entity identifier of detached ROCrates --- .../ro-crate/1.2/must/4_data_entity_metadata.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/rocrate_validator/profiles/ro-crate/1.2/must/4_data_entity_metadata.py b/rocrate_validator/profiles/ro-crate/1.2/must/4_data_entity_metadata.py index ee659add7..4bbb6babe 100644 --- a/rocrate_validator/profiles/ro-crate/1.2/must/4_data_entity_metadata.py +++ b/rocrate_validator/profiles/ro-crate/1.2/must/4_data_entity_metadata.py @@ -122,13 +122,19 @@ def check_identifiers(self, context: ValidationContext) -> bool: try: root_data_entity = context.ro_crate.metadata.get_root_data_entity() root_entity_id = root_data_entity.id - root_entity_is_local = root_data_entity.id_as_uri.is_local_resource if root_data_entity.id_as_uri else False + root_entity_is_local = root_data_entity.id_as_uri.is_local_resource() if root_data_entity.id_as_uri else False root_entity_absolute_path = root_data_entity.id_as_path if root_data_entity.has_absolute_path() else None except Exception: pass for entity in context.ro_crate.metadata.get_data_entities(): if root_entity_id and entity.id == root_entity_id: continue + if not root_entity_is_local and not entity.is_remote(): + context.result.add_issue( + f"Data Entity '{entity.id}' has a local identifier but the Root Data Entity does not have a local identifier", self) + result = False + if context.fail_fast: + return False if entity.has_local_identifier(): continue if "\\" in entity.id or " " in entity.id: @@ -139,8 +145,6 @@ def check_identifiers(self, context: ValidationContext) -> bool: return False if (root_entity_is_local and not str(entity.id_as_path).startswith(str(root_entity_absolute_path))): - logger.error( - f"Entity ID as Path: {entity.id_as_path}, Root Entity Absolute Path: {root_entity_absolute_path}") if (root_entity_is_local and not str(entity.id).startswith("./") and ( str(entity.id).startswith("/") or str(entity.id).startswith("file://") From 2a2fb81ee07de52710965475a1922403ff8e5a68 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Tue, 14 Apr 2026 22:52:01 +0200 Subject: [PATCH 056/352] test(ro-crate-1.2): :white_check_mark: test detached ROCrate data entities --- .../invalid/basic-ro-crate-metadata.json | 37 +++++++++++++++++++ .../valid/basic-ro-crate-metadata.json | 37 +++++++++++++++++++ .../test_metadata_dataEntities.py | 28 ++++++++++++++ 3 files changed, 102 insertions(+) create mode 100644 tests/data/crates/rocrate-1.2/8_metadata_dataEntities/detached_rocrate_dataEntities/invalid/basic-ro-crate-metadata.json create mode 100644 tests/data/crates/rocrate-1.2/8_metadata_dataEntities/detached_rocrate_dataEntities/valid/basic-ro-crate-metadata.json diff --git a/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/detached_rocrate_dataEntities/invalid/basic-ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/detached_rocrate_dataEntities/invalid/basic-ro-crate-metadata.json new file mode 100644 index 000000000..bcc586a55 --- /dev/null +++ b/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/detached_rocrate_dataEntities/invalid/basic-ro-crate-metadata.json @@ -0,0 +1,37 @@ +{ + "@context": "https://w3id.org/ro/crate/1.2/context", + "@graph": [ + { + "@id": "ro-crate-metadata.json", + "@type": "CreativeWork", + "conformsTo": { + "@id": "https://w3id.org/ro/crate/1.2" + }, + "about": { + "@id": "https://example.org/ro-crate/detached/basic" + } + }, + { + "@id": "https://example.org/ro-crate/detached/basic", + "@type": "Dataset", + "name": "Detached RO-Crate (basic)", + "description": "Minimal detached RO-Crate referencing a remote file.", + "datePublished": "2024-05-17", + "license": { + "@id": "http://spdx.org/licenses/CC0-1.0" + }, + "hasPart": [ + { + "@id": "./my-test-file.txt" + } + ] + }, + { + "@id": "./my-test-file.txt", + "@type": "File", + "name": "iso_8859-1.txt", + "description": "Remote text file referenced by the detached RO-Crate.", + "encodingFormat": "text/plain" + } + ] +} diff --git a/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/detached_rocrate_dataEntities/valid/basic-ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/detached_rocrate_dataEntities/valid/basic-ro-crate-metadata.json new file mode 100644 index 000000000..65f04550d --- /dev/null +++ b/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/detached_rocrate_dataEntities/valid/basic-ro-crate-metadata.json @@ -0,0 +1,37 @@ +{ + "@context": "https://w3id.org/ro/crate/1.2/context", + "@graph": [ + { + "@id": "ro-crate-metadata.json", + "@type": "CreativeWork", + "conformsTo": { + "@id": "https://w3id.org/ro/crate/1.2" + }, + "about": { + "@id": "https://example.org/ro-crate/detached/basic" + } + }, + { + "@id": "https://example.org/ro-crate/detached/basic", + "@type": "Dataset", + "name": "Detached RO-Crate (basic)", + "description": "Minimal detached RO-Crate referencing a remote file.", + "datePublished": "2024-05-17", + "license": { + "@id": "http://spdx.org/licenses/CC0-1.0" + }, + "hasPart": [ + { + "@id": "https://www.w3.org/TR/PNG/iso_8859-1.txt" + } + ] + }, + { + "@id": "https://www.w3.org/TR/PNG/iso_8859-1.txt", + "@type": "File", + "name": "iso_8859-1.txt", + "description": "Remote text file referenced by the detached RO-Crate.", + "encodingFormat": "text/plain" + } + ] +} diff --git a/tests/integration/profiles/ro-crate-1.2/test_metadata_dataEntities.py b/tests/integration/profiles/ro-crate-1.2/test_metadata_dataEntities.py index afadf546f..2a0be22c1 100644 --- a/tests/integration/profiles/ro-crate-1.2/test_metadata_dataEntities.py +++ b/tests/integration/profiles/ro-crate-1.2/test_metadata_dataEntities.py @@ -50,3 +50,31 @@ def test_invalid_local_entity_reference(): "MUST use a relative @id within the RO-Crate root" ] ) + + +def test_valid_detached_rocrate_dataEntities(): + """ + Test that a Data Entity is valid when it references a remote file using an absolute URI in its `@id`. + """ + do_entity_test( + __metadata_root_data_entity_crates__.valid_detached_rocrate_dataEntities, + models.Severity.REQUIRED, + True, + profile_identifier="ro-crate-1.2" + ) + + +def test_invalid_detached_rocrate_dataEntities(): + """ + Test that a Data Entity is invalid when it references a remote file using a relative path in its `@id`. + """ + do_entity_test( + __metadata_root_data_entity_crates__.invalid_detached_rocrate_dataEntities, + models.Severity.REQUIRED, + False, + profile_identifier="ro-crate-1.2", + expected_triggered_requirements=["Data Entity: identifier requirements"], + expected_triggered_issues=[ + "has a local identifier but the Root Data Entity does not have a local identifier" + ] + ) From c2c1a1cfc840695199403d98e4b30e0c9be88f39 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Tue, 14 Apr 2026 23:59:05 +0200 Subject: [PATCH 057/352] refactor(ro-crate-1.2): :recycle: disable a subset of recommended shapes --- .../1.2/should/4_data_entity_metadata.ttl | 287 ++++++++---------- 1 file changed, 121 insertions(+), 166 deletions(-) diff --git a/rocrate_validator/profiles/ro-crate/1.2/should/4_data_entity_metadata.ttl b/rocrate_validator/profiles/ro-crate/1.2/should/4_data_entity_metadata.ttl index 6fa8c4f77..3e5c9dca6 100644 --- a/rocrate_validator/profiles/ro-crate/1.2/should/4_data_entity_metadata.ttl +++ b/rocrate_validator/profiles/ro-crate/1.2/should/4_data_entity_metadata.ttl @@ -1,17 +1,16 @@ -# Copyright (c) 2024-2026 CRS4 -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - +# # Copyright (c) 2024-2026 CRS4 +# # +# # Licensed under the Apache License, Version 2.0 (the "License"); +# # you may not use this file except in compliance with the License. +# # You may obtain a copy of the License at +# # +# # http://www.apache.org/licenses/LICENSE-2.0 +# # +# # Unless required by applicable law or agreed to in writing, software +# # distributed under the License is distributed on an "AS IS" BASIS, +# # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# # See the License for the specific language governing permissions and +# # limitations under the License. @prefix ro: <./> . @prefix ro-crate: . @prefix rdf: . @@ -20,71 +19,48 @@ @prefix sh: . @prefix xsd: . -ro-crate:FileRecommendedProperties a sh:NodeShape ; - sh:targetClass ro-crate:File ; - sh:name "File Data Entity: RECOMMENDED properties"; - sh:description """A `File` Data Entity SHOULD have detailed descriptions encodings through the `encodingFormat` property""" ; - sh:property [ - sh:minCount 1 ; - sh:maxCount 2 ; - sh:path schema_org:encodingFormat ; - sh:severity sh:Warning ; - sh:name "File Data Entity: RECOMMENDED `encodingFormat` property" ; - sh:description """Check if the File Data Entity has a detailed description of encodings through the `encodingFormat` property. - The `encodingFormat` property SHOULD be a PRONOM identifier (e.g., application/pdf) or, - to add more detail, SHOULD be linked using a `PRONOM` to a `Contextual Entity` of type `WebPage` - (see [Adding detailed descriptions of encodings](https://www.researchobject.org/ro-crate/1.2/data-entities.html#adding-detailed-descriptions-of-encodings)). - """ ; - sh:message "Missing or invalid `encodingFormat` linked to the `File Data Entity`"; - sh:or ( - [ - sh:datatype xsd:string ; - sh:pattern "^(\\w*)\\/(\\w[\\w\\.-]*)(?:\\+(\\w[\\w\\.-]*))?(?:;(\\w+=[^;]+))*$" ; - sh:name "File Data Entity: RECOMMENDED `PRONOM` for the `encodingFormat` property" ; - sh:description """Check if the File Data Entity is linked to its `encodingFormat` through a PRONOM identifier - (e.g., application/pdf, application/text, image/svg+xml, image/svg;q=0.9,/;q=0.8,image/svg+xml;q=0.9,/;q=0.8, application/vnd.uplanet.listcmd-wbxml;charset=utf-8). - """ ; - sh:message "The `encodingFormat` SHOULD be linked using a PRONOM identifier (e.g., application/pdf)."; - ] - [ - sh:nodeKind sh:IRI ; - sh:class schema_org:WebPage ; - sh:name "File Data Entity: RECOMMENDED `Contextual Entity` linked to the `encodingFormat` property"; - sh:description "Check if the File Data Entity `encodingFormat` is linked to a `Contextual Entity of type `WebPage`." ; - sh:message "The `encodingFormat` SHOULD be linked to a `Contextual Entity` of type `WebPage`." ; - ] - ) - ] . +# ro-crate:FileRecommendedProperties a sh:NodeShape ; +# sh:name "File Data Entity: RECOMMENDED properties" ; +# sh:description """A `File` Data Entity SHOULD have detailed descriptions of encodings through the `encodingFormat` property.""" ; +# sh:targetClass ro-crate:File ; +# sh:property [ +# sh:minCount 1 ; +# sh:maxCount 2 ; +# sh:path schema_org:encodingFormat ; +# sh:severity sh:Warning ; +# sh:name "File Data Entity: RECOMMENDED `encodingFormat` property" ; +# sh:description """Check if the File Data Entity has a detailed description of encodings through the `encodingFormat` property. +# The `encodingFormat` property SHOULD be a PRONOM identifier (e.g., application/pdf) or, +# to add more detail, SHOULD be linked using a `PRONOM` identifier to a `Contextual Entity` of type `WebPage`.""" ; +# sh:message "Missing or invalid `encodingFormat` linked to the `File Data Entity`" ; +# sh:or ( +# [ +# sh:datatype xsd:string ; +# sh:pattern "^(\\w*)\\/(\\w[\\w\\.-]*)(?:\\+(\\w[\\w\\.-]*))?(?:;(\\w+=[^;]+))*$" ; +# ] +# [ +# sh:nodeKind sh:IRI ; +# ] +# )] . -ro-crate:FileContentSizeRecommendedProperties a sh:NodeShape ; - sh:targetClass ro-crate:File ; - sh:name "File Data Entity: RECOMMENDED contentSize" ; - sh:description """A `File` Data Entity SHOULD have `contentSize` set to the size in bytes.""" ; - sh:property [ - a sh:PropertyShape ; - sh:minCount 1 ; - sh:name "File Data Entity: RECOMMENDED `contentSize` property" ; - sh:path schema_org:contentSize ; - sh:datatype xsd:string ; - sh:severity sh:Warning ; - sh:message "File Data Entities SHOULD have a `contentSize` property" ; - # sh:sparql [ - # sh:message "If the value is a string it must be a string representing an integer." ; - # sh:prefixes ro-crate:sparqlPrefixes ; - # sh:select """ - # SELECT ?this ?value - # WHERE { - # ?this schema:contentSize ?value . - # FILTER (!regex(str(?value), "^[0-9]+$")) - # } - # """ ; - # ] ; - ] . +# ro-crate:FileContentSizeRecommendedProperties a sh:NodeShape ; +# sh:name "File Data Entity: RECOMMENDED contentSize" ; +# sh:description """A `File` Data Entity SHOULD have `contentSize` set to the size in bytes.""" ; +# sh:targetClass ro-crate:File ; +# sh:property [ +# a sh:PropertyShape ; +# sh:minCount 1 ; +# sh:name "File Data Entity: RECOMMENDED `contentSize` property" ; +# sh:path schema_org:contentSize ; +# sh:datatype xsd:string ; +# sh:severity sh:Warning ; +# sh:message "File Data Entities SHOULD have a `contentSize` property" ; +# ] . ro-crate:DataEntityRecommendedProperties a sh:NodeShape ; - sh:targetClass ro-crate:DataEntity ; sh:name "Data Entity: RECOMMENDED properties" ; sh:description """Data Entities SHOULD have `name` and `description` properties.""" ; + sh:targetClass ro-crate:DataEntity ; sh:property [ a sh:PropertyShape ; sh:name "Data Entity: RECOMMENDED `name` property" ; @@ -111,27 +87,37 @@ ro-crate:DataEntityRecommendedProperties a sh:NodeShape ; sh:severity sh:Warning ; sh:message "Data Entity `license` SHOULD reference a CreativeWork entity" ; ] ; - sh:property [ - sh:name "Data Entity: RECOMMENDED `contentLocation`/`spatialCoverage` reference" ; - sh:description """If present, `contentLocation` or `spatialCoverage` SHOULD reference a Place.""" ; - sh:or ( - [ sh:path schema_org:contentLocation ; sh:class schema_org:Place ] - [ sh:path schema_org:spatialCoverage ; sh:class schema_org:Place ] - ) ; - sh:severity sh:Warning ; - sh:message "Data Entity location properties SHOULD reference a Place" ; - ] ; - sh:property [ - sh:name "Data Entity: RECOMMENDED `citation` reference" ; - sh:description """If present, `citation` SHOULD reference a ScholarlyArticle or CreativeWork.""" ; - sh:path schema_org:citation ; - sh:or ( - [ sh:class schema_org:ScholarlyArticle ] - [ sh:class schema_org:CreativeWork ] - ) ; - sh:severity sh:Warning ; - sh:message "Data Entity `citation` SHOULD reference a ScholarlyArticle or CreativeWork" ; - ] ; + # sh:property [ + # sh:name "Data Entity: RECOMMENDED `contentLocation`/`spatialCoverage` reference" ; + # sh:description """If present, `contentLocation` or `spatialCoverage` SHOULD reference a Place.""" ; + # sh:or ( + # [ + # sh:path schema_org:contentLocation ; + # sh:class schema_org:Place + # ] + # [ + # sh:path schema_org:spatialCoverage ; + # sh:class schema_org:Place + # ] + # ) ; + # sh:severity sh:Warning ; + # sh:message "Data Entity location properties SHOULD reference a Place" ; + # ] ; + # sh:property [ + # sh:name "Data Entity: RECOMMENDED `citation` reference" ; + # sh:description """If present, `citation` SHOULD reference a ScholarlyArticle or CreativeWork.""" ; + # sh:path schema_org:citation ; + # sh:or ( + # [ + # sh:class schema_org:ScholarlyArticle + # ] + # [ + # sh:class schema_org:CreativeWork + # ] + # ) ; + # sh:severity sh:Warning ; + # sh:message "Data Entity `citation` SHOULD reference a ScholarlyArticle or CreativeWork" ; + # ] ; sh:property [ sh:name "Data Entity: RECOMMENDED `author` reference" ; sh:description """If present, `author` SHOULD reference Person entities.""" ; @@ -141,35 +127,30 @@ ro-crate:DataEntityRecommendedProperties a sh:NodeShape ; sh:message "Data Entity `author` SHOULD reference a Person entity" ; ] . -ro-crate:DirectoryDataEntityRequiredValueRestriction a sh:NodeShape ; - sh:name "Directory Data Entity: RECOMMENDED value restriction" ; - sh:description """A Directory Data Entity SHOULD end with `/`""" ; - sh:targetNode ro-crate:Directory ; - sh:property [ - a sh:PropertyShape ; - sh:name "Directory Data Entity: RECOMMENDED value restriction" ; - sh:description """Check if the Directory Data Entity ends with `/`""" ; - sh:path [ sh:inversePath rdf:type ] ; - sh:message """Every Data Entity Directory URI SHOULD end with `/`""" ; - sh:pattern "/$" ; - ] . - -ro-crate:DatasetRecommendedProperties a sh:NodeShape ; - sh:targetClass schema_org:Dataset ; - sh:name "Dataset: RECOMMENDED properties" ; - sh:description """Datasets SHOULD include `hasPart` and may include nested Datasets.""" ; - sh:property [ - a sh:PropertyShape ; - sh:name "Dataset: RECOMMENDED `hasPart` property" ; - sh:path schema_org:hasPart ; - sh:minCount 1 ; - sh:severity sh:Warning ; - sh:message "Dataset entities SHOULD include `hasPart`" ; - ] . +# ro-crate:DirectoryDataEntityRecommendedValueRestriction a sh:NodeShape ; +# sh:name "Directory Data Entity: RECOMMENDED value restriction" ; +# sh:description """A Directory Data Entity SHOULD end with `/`""" ; +# sh:targetClass ro-crate:Directory ; +# sh:property [ +# a sh:PropertyShape ; +# sh:name "Directory Data Entity: RECOMMENDED value restriction" ; +# sh:description """Check if the Directory Data Entity ends with `/`""" ; +# sh:path [ +# sh:inversePath rdf:type +# ] ; sh:message """Every Data Entity Directory URI SHOULD end with `/`""" ; sh:pattern "/$" ; +# ] . -# ro-crate:DatasetRelativeIdShouldEndWithSlash a sh:NodeShape ; +# ro-crate:DatasetIdShouldEndWithSlash a sh:NodeShape ; # sh:name "Dataset: RECOMMENDED trailing slash" ; -# sh:description """If a Dataset has a relative @id, it SHOULD end with `/`.""" ; +# sh:description "Dataset IRI SHOULD end with `/`." ; +# sh:targetClass schema_org:Dataset ; +# sh:nodeKind sh:IRI ; +# sh:pattern "/$" ; +# sh:message "Dataset IRI SHOULD end with '/'" . + +# ro-crate:WebDatasetDistributionRecommended a sh:NodeShape ; +# sh:name "Dataset: RECOMMENDED `distribution` for web datasets" ; +# sh:description """If a Dataset has a web URI, it SHOULD include `distribution`.""" ; # sh:target [ # a sh:SPARQLTarget ; # sh:prefixes ro-crate:sparqlPrefixes ; @@ -177,51 +158,25 @@ ro-crate:DatasetRecommendedProperties a sh:NodeShape ; # SELECT ?this # WHERE { # ?this a schema:Dataset . -# FILTER(!regex(str(?this), "^[A-Za-z][A-Za-z0-9+\\.-]*:")) . -# FILTER(!regex(str(?this), "#$")) . -# FILTER(!regex(str(?this), "/$")) . +# FILTER regex(str(?this), "^https?://", "i") # } # """ # ] ; -# sh:message "Dataset entities with relative @id SHOULD end with '/'" . +# sh:property [ +# a sh:PropertyShape ; +# sh:path schema_org:distribution ; +# sh:minCount 1 ; +# sh:severity sh:Warning ; +# sh:message "Web-based Dataset entities SHOULD include `distribution`" ; +# ] . -ro-crate:DatasetIdShouldEndWithSlash a sh:NodeShape ; - sh:name "Dataset: RECOMMENDED trailing slash" ; - sh:description "Dataset IRI SHOULD end with `/`." ; - sh:targetClass schema_org:Dataset ; - sh:nodeKind sh:IRI ; - sh:pattern "/$" ; - sh:message "Dataset IRI SHOULD end with '/'" . - -ro-crate:WebDatasetDistributionRecommended a sh:NodeShape ; - sh:name "Dataset: RECOMMENDED `distribution` for web datasets" ; - sh:description """If a Dataset has a web URI, it SHOULD include `distribution`.""" ; - sh:target [ - a sh:SPARQLTarget ; - sh:prefixes ro-crate:sparqlPrefixes ; - sh:select """ - SELECT ?this - WHERE { - ?this a schema:Dataset . - FILTER regex(str(?this), "^https?://", "i") - } - """ ; - ] ; - sh:property [ - a sh:PropertyShape ; - sh:path schema_org:distribution ; - sh:minCount 1 ; - sh:severity sh:Warning ; - sh:message "Web-based Dataset entities SHOULD include `distribution`" ; - ] . - -ro-crate:FileConformsToProfile a sh:NodeShape ; - sh:name "File: RECOMMENDED `conformsTo` profile" ; - sh:description """If present, `conformsTo` SHOULD reference a Profile entity.""" ; - sh:targetClass ro-crate:File ; - sh:property [ - sh:path schema_org:conformsTo ; - sh:class prof:Profile ; - sh:severity sh:Warning ; - sh:message "File `conformsTo` SHOULD reference a Profile entity" ; - ] . +# ro-crate:FileConformsToProfile a sh:NodeShape ; +# sh:name "File: RECOMMENDED `conformsTo` profile" ; +# sh:description """If present, `conformsTo` SHOULD reference a Profile entity.""" ; +# sh:targetClass ro-crate:File ; +# sh:property [ +# sh:path schema_org:conformsTo ; +# sh:class prof:Profile ; +# sh:severity sh:Warning ; +# sh:message "File `conformsTo` SHOULD reference a Profile entity" ; +# ] . From c1c95f365cf2be9bc0cd9fd7fb4ab4aa37493091 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Wed, 15 Apr 2026 00:00:31 +0200 Subject: [PATCH 058/352] test(ro-crate-1.2): :white_check_mark: test recommended data entity properties --- .../invalid/my-test-data.csv | 0 .../invalid/ro-crate-metadata.json | 41 +++++++++++++++++++ .../valid/ro-crate-metadata.json | 32 +++++++++++++++ .../test_metadata_dataEntities.py | 29 +++++++++++++ 4 files changed, 102 insertions(+) create mode 100644 tests/data/crates/rocrate-1.2/8_metadata_dataEntities/recommended_properties/invalid/my-test-data.csv create mode 100644 tests/data/crates/rocrate-1.2/8_metadata_dataEntities/recommended_properties/invalid/ro-crate-metadata.json create mode 100644 tests/data/crates/rocrate-1.2/8_metadata_dataEntities/recommended_properties/valid/ro-crate-metadata.json diff --git a/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/recommended_properties/invalid/my-test-data.csv b/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/recommended_properties/invalid/my-test-data.csv new file mode 100644 index 000000000..e69de29bb diff --git a/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/recommended_properties/invalid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/recommended_properties/invalid/ro-crate-metadata.json new file mode 100644 index 000000000..8c9e25b79 --- /dev/null +++ b/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/recommended_properties/invalid/ro-crate-metadata.json @@ -0,0 +1,41 @@ +{ + "@context": "https://w3id.org/ro/crate/1.2/context", + "@graph": [ + { + "@id": "ro-crate-metadata.json", + "@type": "CreativeWork", + "about": { + "@id": "./" + }, + "conformsTo": { + "@id": "https://w3id.org/ro/crate/1.2" + } + }, + { + "@id": "./", + "@type": "Dataset", + "name": "Counterexample: preview incorrectly in hasPart", + "description": "This RO-Crate INCORRECTLY includes ro-crate-preview.html in hasPart. In RO-Crate 1.2, ro-crate-preview.html SHOULD NOT be listed in hasPart of the Root Data Entity or any Dataset entity (unless it is also the profile description in a Profile Crate).", + "datePublished": "2024-01-01", + "license": { + "@id": "https://creativecommons.org/licenses/by/4.0/" + }, + "hasPart": [ + { + "@id": "my-test-data.csv" + } + ] + }, + { + "@id": "my-test-data.csv", + "@type": "File", + "encodingFormat": "text/csv" + }, + { + "@id": "https://creativecommons.org/licenses/by/4.0/", + "@type": "CreativeWork", + "name": "Creative Commons Attribution 4.0 International", + "description": "A Creative Commons license that lets others distribute, remix, adapt, and build upon your work, even commercially." + } + ] +} diff --git a/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/recommended_properties/valid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/recommended_properties/valid/ro-crate-metadata.json new file mode 100644 index 000000000..0eb0f6f4b --- /dev/null +++ b/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/recommended_properties/valid/ro-crate-metadata.json @@ -0,0 +1,32 @@ +{ + "@context": "https://w3id.org/ro/crate/1.2/context", + "@graph": [ + { + "@id": "ro-crate-metadata.json", + "@type": "CreativeWork", + "about": { + "@id": "./" + }, + "conformsTo": { + "@id": "https://w3id.org/ro/crate/1.2" + } + }, + { + "@id": "./", + "@type": "Dataset", + "name": "Counterexample: preview incorrectly in hasPart", + "description": "This RO-Crate INCORRECTLY includes ro-crate-preview.html in hasPart. In RO-Crate 1.2, ro-crate-preview.html SHOULD NOT be listed in hasPart of the Root Data Entity or any Dataset entity (unless it is also the profile description in a Profile Crate).", + "datePublished": "2024-01-01", + "license": { + "@id": "https://creativecommons.org/licenses/by/4.0/" + }, + "hasPart": [] + }, + { + "@id": "https://creativecommons.org/licenses/by/4.0/", + "@type": "CreativeWork", + "name": "Creative Commons Attribution 4.0 International", + "description": "A Creative Commons license that lets others distribute, remix, adapt, and build upon your work, even commercially." + } + ] +} diff --git a/tests/integration/profiles/ro-crate-1.2/test_metadata_dataEntities.py b/tests/integration/profiles/ro-crate-1.2/test_metadata_dataEntities.py index 2a0be22c1..d1eec0300 100644 --- a/tests/integration/profiles/ro-crate-1.2/test_metadata_dataEntities.py +++ b/tests/integration/profiles/ro-crate-1.2/test_metadata_dataEntities.py @@ -78,3 +78,32 @@ def test_invalid_detached_rocrate_dataEntities(): "has a local identifier but the Root Data Entity does not have a local identifier" ] ) + + +def test_valid_recommended_properties(): + """ + Test that a Data Entity is valid when it includes recommended properties. + """ + do_entity_test( + __metadata_root_data_entity_crates__.valid_recommended_properties, + models.Severity.RECOMMENDED, + True, + profile_identifier="ro-crate-1.2" + ) + + +def test_invalid_recommended_properties(): + """ + Test that a Data Entity is invalid when it includes recommended properties. + """ + do_entity_test( + __metadata_root_data_entity_crates__.invalid_recommended_properties, + models.Severity.RECOMMENDED, + False, + profile_identifier="ro-crate-1.2", + expected_triggered_requirements=["Data Entity: RECOMMENDED properties"], + expected_triggered_issues=[ + "Data Entities SHOULD have a `name` property", + "Data Entities SHOULD have a `description` property" + ] + ) From b380bda4dd7e57c354b1a0e452ca62cc56dc7f78 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Wed, 15 Apr 2026 00:23:35 +0200 Subject: [PATCH 059/352] feat(ro-crate-1.2): :sparkles: check location of data entities --- .../1.2/should/4_data_entity_metadata.ttl | 20 +++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/rocrate_validator/profiles/ro-crate/1.2/should/4_data_entity_metadata.ttl b/rocrate_validator/profiles/ro-crate/1.2/should/4_data_entity_metadata.ttl index 3e5c9dca6..a7e181c7a 100644 --- a/rocrate_validator/profiles/ro-crate/1.2/should/4_data_entity_metadata.ttl +++ b/rocrate_validator/profiles/ro-crate/1.2/should/4_data_entity_metadata.ttl @@ -87,6 +87,26 @@ ro-crate:DataEntityRecommendedProperties a sh:NodeShape ; sh:severity sh:Warning ; sh:message "Data Entity `license` SHOULD reference a CreativeWork entity" ; ] ; + sh:sparql [ + a sh:SPARQLConstraint ; + sh:prefixes ro-crate:sparqlPrefixes ; + + sh:select """ + SELECT ?this + WHERE { + FILTER NOT EXISTS { + ?this (schema:contentLocation|schema:spatialCoverage) ?location . + ?location a schema:Place . + } + } + """ ; + sh:name "Data Entity: RECOMMENDED `contentLocation`/`spatialCoverage` reference" ; + sh:description """If present, `contentLocation` or `spatialCoverage` SHOULD reference a Place.""" ; + sh:message "Data Entities SHOULD have a `contentLocation` or `spatialCoverage` property referencing a Place" ; + sh:severity sh:Warning ; + ] ; + + # sh:property [ # sh:name "Data Entity: RECOMMENDED `contentLocation`/`spatialCoverage` reference" ; # sh:description """If present, `contentLocation` or `spatialCoverage` SHOULD reference a Place.""" ; From c07f12689642e7d0d135f3e3f86101187124f97a Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Wed, 15 Apr 2026 00:25:59 +0200 Subject: [PATCH 060/352] test(ro-crate-1.2): :bug: fix missing violation --- .../profiles/ro-crate-1.2/test_metadata_dataEntities.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/integration/profiles/ro-crate-1.2/test_metadata_dataEntities.py b/tests/integration/profiles/ro-crate-1.2/test_metadata_dataEntities.py index d1eec0300..42ae6b811 100644 --- a/tests/integration/profiles/ro-crate-1.2/test_metadata_dataEntities.py +++ b/tests/integration/profiles/ro-crate-1.2/test_metadata_dataEntities.py @@ -104,6 +104,7 @@ def test_invalid_recommended_properties(): expected_triggered_requirements=["Data Entity: RECOMMENDED properties"], expected_triggered_issues=[ "Data Entities SHOULD have a `name` property", - "Data Entities SHOULD have a `description` property" + "Data Entities SHOULD have a `description` property", + "Data Entities SHOULD have a `contentLocation` or `spatialCoverage` property referencing a Place" ] ) From bf5d6ba39e89da7581f85f793aad94cd398ca683 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Wed, 15 Apr 2026 09:27:34 +0200 Subject: [PATCH 061/352] fix(ro-crate-1.2): :card_file_box: update test data --- .../valid/ro-crate-1.2-absolute-root/ro-crate-metadata.json | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/data/crates/valid/ro-crate-1.2-absolute-root/ro-crate-metadata.json b/tests/data/crates/valid/ro-crate-1.2-absolute-root/ro-crate-metadata.json index 477c9dd62..348599ad6 100644 --- a/tests/data/crates/valid/ro-crate-1.2-absolute-root/ro-crate-metadata.json +++ b/tests/data/crates/valid/ro-crate-1.2-absolute-root/ro-crate-metadata.json @@ -22,12 +22,12 @@ "datePublished": "2024-01-01", "hasPart": [ { - "@id": "data.txt" + "@id": "https://example.org/ro-crate/123/data.txt" } ] }, { - "@id": "data.txt", + "@id": "https://example.org/ro-crate/123/data.txt", "@type": "File", "name": "data.txt", "description": "Example data file.", From 36de552be89c6ed80d34b135e0e11e79b7dbefc8 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Wed, 15 Apr 2026 10:52:41 +0200 Subject: [PATCH 062/352] fix(ro-crate): :bug: update content size check --- .../1.1/should/5_web_data_entity_metadata.py | 2 +- .../1.2/should/4_data_entity_metadata.py | 145 ++++++++++++++++++ rocrate_validator/rocrate/base.py | 18 ++- 3 files changed, 159 insertions(+), 6 deletions(-) create mode 100644 rocrate_validator/profiles/ro-crate/1.2/should/4_data_entity_metadata.py diff --git a/rocrate_validator/profiles/ro-crate/1.1/should/5_web_data_entity_metadata.py b/rocrate_validator/profiles/ro-crate/1.1/should/5_web_data_entity_metadata.py index e10b9ea70..eb821df23 100644 --- a/rocrate_validator/profiles/ro-crate/1.1/should/5_web_data_entity_metadata.py +++ b/rocrate_validator/profiles/ro-crate/1.1/should/5_web_data_entity_metadata.py @@ -92,7 +92,7 @@ def check_content_size(self, context: ValidationContext) -> bool: if entity.is_available(): content_size = entity.get_property("contentSize") actual_size = context.ro_crate.get_external_file_size(entity.id) - if content_size and int(content_size) != actual_size: + if content_size and actual_size is not None and int(content_size) != actual_size: context.result.add_issue( f"The property contentSize={content_size} of the Web-based Data Entity " f"{entity.id} does not match the actual size of " diff --git a/rocrate_validator/profiles/ro-crate/1.2/should/4_data_entity_metadata.py b/rocrate_validator/profiles/ro-crate/1.2/should/4_data_entity_metadata.py new file mode 100644 index 000000000..9a68c7e76 --- /dev/null +++ b/rocrate_validator/profiles/ro-crate/1.2/should/4_data_entity_metadata.py @@ -0,0 +1,145 @@ +# Copyright (c) 2024-2026 CRS4 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import re + +from rocrate_validator.utils import log as logging +from rocrate_validator.models import Severity, ValidationContext +from rocrate_validator.requirements.python import (PyFunctionCheck, check, + requirement) + +# set up logging +logger = logging.getLogger(__name__) + + +@requirement(name="Data Entity: citation references") +class DataEntityCitationChecker(PyFunctionCheck): + """ + Citation references must include an absolute URI. + """ + + @check(name="Data Entity: citation must include @id") + def check_citation(self, context: ValidationContext) -> bool: + result = True + for entity in context.ro_crate.metadata.get_data_entities(): + citations = entity.get_property("citation") + if citations is None: + continue + citation_list = citations if isinstance(citations, list) else [citations] + for citation in citation_list: + if isinstance(citation, str): + citation_id = citation + elif hasattr(citation, "id"): + citation_id = citation.id + else: + context.result.add_issue( + f"Citation for Data Entity '{entity.id}' must reference a publication @id", self) + result = False + if context.fail_fast: + return False + continue + if not re.match(r"^[A-Za-z][A-Za-z0-9+\.-]*:", citation_id): + context.result.add_issue( + f"Citation for Data Entity '{entity.id}' must be an absolute URI", self) + result = False + if context.fail_fast: + return False + return result + + +@requirement(name="Web-based Data Entity: REQUIRED availability") +class WebDataEntityRequiredChecker(PyFunctionCheck): + """ + Web-based Data Entities MUST be directly downloadable at the time of creation. + """ + + @check(name="Web-based Data Entity: RECOMMENDED resource availability", severity=Severity.RECOMMENDED) + def check_availability_warning(self, context: ValidationContext) -> bool: + if context.settings.skip_availability_check: + return True + if context.settings.creation_time or context.settings.enforce_availability: + return True + if context.settings.metadata_only: + return True + result = True + for entity in context.ro_crate.metadata.get_web_data_entities(): + assert entity.id is not None, "Entity has no @id" + try: + if not entity.is_available(): + context.result.add_issue( + f"Web-based Data Entity '{entity.id}' is not directly downloadable", self) + result = False + except Exception as e: + context.result.add_issue( + f"Web-based Data Entity '{entity.id}' availability check failed: {e}", self) + result = False + if not result and context.fail_fast: + return result + return result + + @check(name="Web-based Data Entity: `contentSize` property", severity=Severity.RECOMMENDED) + def check_content_size(self, context: ValidationContext) -> bool: + if context.settings.skip_availability_check: + return True + result = True + for entity in context.ro_crate.metadata.get_web_data_entities(): + assert entity.id is not None, "Entity has no @id" + if entity.is_available(): + content_size = entity.get_property("contentSize") + if content_size: + if isinstance(content_size, str): + content_value = content_size + elif hasattr(content_size, "id"): + content_value = content_size.id + else: + content_value = str(content_size) + try: + content_int = int(str(content_value)) + except Exception: + content_int = None + external_size = context.ro_crate.get_external_file_size(entity.id) + if external_size is not None and content_int is not None and content_int != external_size: + context.result.add_issue( + f'The property contentSize={content_size} of the Web-based Data Entity ' + f'{entity.id} does not match the actual size of ' + f'the downloadable content, i.e., {external_size} (bytes)', self, + violatingEntity=entity.id, violatingProperty='contentSize', + violatingPropertyValue=str(content_value)) + result = False + if not result and context.fail_fast: + return result + return result + + @check(name="Web-based Data Entity: `contentUrl` availability", severity=Severity.RECOMMENDED) + def check_content_url(self, context: ValidationContext) -> bool: + if context.settings.skip_availability_check: + return True + result = True + for entity in context.ro_crate.metadata.get_web_data_entities(): + content_url = entity.get_property("contentUrl") + if not content_url: + continue + urls = content_url if isinstance(content_url, list) else [content_url] + for url in urls: + try: + url_value = url if isinstance(url, str) else url.id + context.ro_crate.get_external_file_size(url_value) + except Exception as e: + context.result.add_issue( + f"contentUrl {url_value} for Web-based Data Entity {entity.id} is not directly downloadable: {e}", + self) + result = False + if not result and context.fail_fast: + return result + return result diff --git a/rocrate_validator/rocrate/base.py b/rocrate_validator/rocrate/base.py index 3c55ec4fa..e284b842c 100644 --- a/rocrate_validator/rocrate/base.py +++ b/rocrate_validator/rocrate/base.py @@ -114,7 +114,12 @@ def is_detached(self) -> bool: root = self.metadata.get_root_data_entity() if root and root.has_type("Dataset") and root.id == "./": return False - return bool(root and root.id_as_uri.is_remote_resource()) + if root and root.id_as_uri.is_remote_resource(): + # An absolute root @id doesn't necessarily mean detached; + # check if there are any local (non-web) data entities + local_data_entities = self.metadata.get_data_entities(exclude_web_data_entities=True) + return all(entity.id == root.id for entity in local_data_entities) + return False @property def metadata_descriptor_id(self) -> str: @@ -318,21 +323,24 @@ def get_external_file_content(uri: str, binary_mode: bool = True) -> str | bytes return response.content if binary_mode else response.text @staticmethod - def get_external_file_size(uri: str) -> int: + def get_external_file_size(uri: str) -> int | None: """ Get the size of an external file. :param uri: the URI of the file :type uri: str - :return: the size of the file - :rtype: int + :return: the size of the file, or None if the server did not provide a Content-Length header + :rtype: int | None :raises requests.HTTPError: if the request fails """ response = HttpRequester().head(str(uri)) response.raise_for_status() - return int(response.headers.get("Content-Length")) + content_length = response.headers.get("Content-Length") + if content_length is None: + return None + return int(content_length) @staticmethod def from_metadata_dict(metadata_dict: dict) -> ROCrate: From 9d09c5ee42f3c005034708c38543acc6c50c722f Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Wed, 15 Apr 2026 11:03:58 +0200 Subject: [PATCH 063/352] test(ro-crate-1.2): :bug: fix test data --- .../valid/ro-crate-metadata.json | 68 ++++++++++++++++++- 1 file changed, 65 insertions(+), 3 deletions(-) diff --git a/tests/data/crates/rocrate-1.2/2_attached_rocrates/attached-preview-not-in-hasPart/valid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/2_attached_rocrates/attached-preview-not-in-hasPart/valid/ro-crate-metadata.json index 3ebcc8ac7..26b39d769 100644 --- a/tests/data/crates/rocrate-1.2/2_attached_rocrates/attached-preview-not-in-hasPart/valid/ro-crate-metadata.json +++ b/tests/data/crates/rocrate-1.2/2_attached_rocrates/attached-preview-not-in-hasPart/valid/ro-crate-metadata.json @@ -36,9 +36,71 @@ "@id": "data.csv", "@type": "File", "name": "Sample data file", - "description": "A sample CSV data file.", - "encodingFormat": "text/csv", - "contentSize": "42" + "description": "A sample CSV data file containing temperature readings.", + "encodingFormat": [ + "text/csv", + { + "@id": "https://www.nationalarchives.gov.uk/PRONOM/x-fmt/18" + } + ], + "contentSize": "42", + "contentLocation": { + "@id": "#bologna" + }, + "conformsTo": { + "@id": "#csv-profile" + }, + "dateCreated": "2025-06-15", + "dateModified": "2025-12-01", + "author": { + "@id": "https://orcid.org/0000-0001-2345-6789" + }, + "license": { + "@id": "https://creativecommons.org/licenses/by/4.0/" + }, + "sdDatePublished": "2026-04-15" + }, + { + "@id": "#csv-profile", + "@type": "CreativeWork", + "name": "CSV Profile for RO-Crate 1.2", + "description": "A profile describing the expected structure and content of CSV files used in RO-Crates.", + "conformsTo": { + "@id": "https://www.iana.org/assignments/media-types/text/csv" + } + }, + { + "@id": "#bologna", + "@type": "Place", + "name": "Bologna, Italy", + "geo": { + "@id": "#bologna-geo" + } + }, + { + "@id": "#bologna-geo", + "@type": [ + "GeoCoordinates", + "Geometry" + ], + "name": "Geographical coordinates of Bologna", + "latitude": 44.4949, + "longitude": 11.3426, + "asWKT": "POINT(11.3426 44.4949)" + }, + { + "@id": "https://orcid.org/0000-0001-2345-6789", + "@type": "Person", + "name": "Dr. Jane Doe", + "affiliation": { + "@id": "https://ror.org/12345" + } + }, + { + "@id": "https://ror.org/12345", + "@type": "Organization", + "name": "Example University", + "url": "https://www.exampleuniversity.edu" } ] } From 4fc4c4f2919e7de7c1b34cdab32c1093e355e350 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Wed, 15 Apr 2026 11:06:48 +0200 Subject: [PATCH 064/352] test(ro-crate-1.2): :card_file_box: add more test data --- .../valid/ro-crate-1.2-absolute-root/data.txt | 1 - .../prefix-ro-crate-metadata.json | 97 +++++++++++++++++++ .../ro-crate-metadata.json | 38 -------- 3 files changed, 97 insertions(+), 39 deletions(-) delete mode 100644 tests/data/crates/valid/ro-crate-1.2-absolute-root/data.txt create mode 100644 tests/data/crates/valid/ro-crate-1.2-absolute-root/prefix-ro-crate-metadata.json delete mode 100644 tests/data/crates/valid/ro-crate-1.2-absolute-root/ro-crate-metadata.json diff --git a/tests/data/crates/valid/ro-crate-1.2-absolute-root/data.txt b/tests/data/crates/valid/ro-crate-1.2-absolute-root/data.txt deleted file mode 100644 index ce0136250..000000000 --- a/tests/data/crates/valid/ro-crate-1.2-absolute-root/data.txt +++ /dev/null @@ -1 +0,0 @@ -hello diff --git a/tests/data/crates/valid/ro-crate-1.2-absolute-root/prefix-ro-crate-metadata.json b/tests/data/crates/valid/ro-crate-1.2-absolute-root/prefix-ro-crate-metadata.json new file mode 100644 index 000000000..2a8f68bee --- /dev/null +++ b/tests/data/crates/valid/ro-crate-1.2-absolute-root/prefix-ro-crate-metadata.json @@ -0,0 +1,97 @@ +{ + "@context": "https://w3id.org/ro/crate/1.2/context", + "@graph": [ + { + "@id": "ro-crate-metadata.json", + "@type": "CreativeWork", + "conformsTo": { + "@id": "https://w3id.org/ro/crate/1.2" + }, + "about": { + "@id": "https://github.com/crs4/pydoop/tree/develop/" + } + }, + { + "@id": "https://github.com/crs4/pydoop/tree/develop/", + "@type": "Dataset", + "name": "Example RO-Crate 1.2 (absolute root)", + "description": "Attached RO-Crate using an absolute root @id.", + "license": { + "@id": "https://creativecommons.org/publicdomain/zero/1.0/" + }, + "datePublished": "2024-01-01", + "hasPart": [ + { + "@id": "https://raw.githubusercontent.com/crs4/pydoop/refs/heads/develop/README.md" + } + ], + "cite-as": { + "@id": "https://github.com/crs4/pydoop/tree/develop/" + }, + "url": "https://github.com/crs4/pydoop/tree/develop/" + }, + { + "@id": "https://raw.githubusercontent.com/crs4/pydoop/refs/heads/develop/README.md", + "@type": "File", + "name": "README.md", + "description": "Example data file.", + "encodingFormat": "text/plain", + "contentSize": "221", + "contentLocation": { + "@id": "#bologna" + }, + "conformsTo": { + "@id": "#csv-profile" + }, + "dateCreated": "2025-06-15", + "dateModified": "2025-12-01", + "author": { + "@id": "https://orcid.org/0000-0001-2345-6789" + }, + "license": { + "@id": "https://creativecommons.org/licenses/by/4.0/" + }, + "sdDatePublished": "2026-04-15" + }, + { + "@id": "#csv-profile", + "@type": "CreativeWork", + "name": "Example CSV Profile", + "description": "An example profile describing CSV files." + }, + { + "@id": "#bologna", + "@type": "Place", + "name": "Bologna, Italy", + "description": "The city of Bologna in Italy." + }, + { + "@id": "https://orcid.org/0000-0001-2345-6789", + "@type": "Person", + "name": "Jane Doe", + "description": "An example author with an ORCID identifier.", + "affiliation": { + "@id": "https://ror.org/123456789" + } + }, + { + "@id": "https://ror.org/123456789", + "@type": "Organization", + "name": "Example University", + "description": "An example university affiliated with the author.", + "url": "https://exampleuniversity.edu" + }, + { + "@id": "https://creativecommons.org/licenses/by/4.0/", + "@type": "CreativeWork", + "name": "Creative Commons Attribution 4.0 International", + "description": "A Creative Commons license that lets others distribute, remix, adapt, and build upon your work, even commercially." + }, + { + "@id": "https://creativecommons.org/publicdomain/zero/1.0/", + "@type": "CreativeWork", + "name": "Creative Commons CC0 1.0 Universal (CC0 1.0) Public Domain Dedication", + "description": "A Creative Commons license that allows creators to waive all their copyright and related rights in their works, effectively placing them in the public domain." + } + ] +} diff --git a/tests/data/crates/valid/ro-crate-1.2-absolute-root/ro-crate-metadata.json b/tests/data/crates/valid/ro-crate-1.2-absolute-root/ro-crate-metadata.json deleted file mode 100644 index 348599ad6..000000000 --- a/tests/data/crates/valid/ro-crate-1.2-absolute-root/ro-crate-metadata.json +++ /dev/null @@ -1,38 +0,0 @@ -{ - "@context": "https://w3id.org/ro/crate/1.2/context", - "@graph": [ - { - "@id": "ro-crate-metadata.json", - "@type": "CreativeWork", - "conformsTo": { - "@id": "https://w3id.org/ro/crate/1.2" - }, - "about": { - "@id": "https://example.org/ro-crate/123" - } - }, - { - "@id": "https://example.org/ro-crate/123", - "@type": "Dataset", - "name": "Example RO-Crate 1.2 (absolute root)", - "description": "Attached RO-Crate using an absolute root @id.", - "license": { - "@id": "https://creativecommons.org/publicdomain/zero/1.0/" - }, - "datePublished": "2024-01-01", - "hasPart": [ - { - "@id": "https://example.org/ro-crate/123/data.txt" - } - ] - }, - { - "@id": "https://example.org/ro-crate/123/data.txt", - "@type": "File", - "name": "data.txt", - "description": "Example data file.", - "encodingFormat": "text/plain", - "contentSize": "5" - } - ] -} From 53ba5e5d049c6eb1d68cd2ef55cd2977f971e08c Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Wed, 15 Apr 2026 11:13:31 +0200 Subject: [PATCH 065/352] test(ro-crate-1.2): :adhesive_bandage: update test data --- .../valid/basic-ro-crate-metadata.json | 30 +++++------ .../recommended_properties/valid/data.csv | 2 + .../valid/ro-crate-metadata.json | 53 +++++++++++++++++-- 3 files changed, 64 insertions(+), 21 deletions(-) create mode 100644 tests/data/crates/rocrate-1.2/8_metadata_dataEntities/recommended_properties/valid/data.csv diff --git a/tests/data/crates/rocrate-1.2/3_detached_rocrates/naming-convention/local-descriptor/valid/basic-ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/3_detached_rocrates/naming-convention/local-descriptor/valid/basic-ro-crate-metadata.json index 5aa5d10df..a1c732df8 100644 --- a/tests/data/crates/rocrate-1.2/3_detached_rocrates/naming-convention/local-descriptor/valid/basic-ro-crate-metadata.json +++ b/tests/data/crates/rocrate-1.2/3_detached_rocrates/naming-convention/local-descriptor/valid/basic-ro-crate-metadata.json @@ -2,7 +2,7 @@ "@context": "https://w3id.org/ro/crate/1.2/context", "@graph": [ { - "@id": "ro-crate-metadata.json", + "@id": "basic-ro-crate-metadata.json", "@type": "CreativeWork", "conformsTo": { "@id": "https://w3id.org/ro/crate/1.2" @@ -15,28 +15,22 @@ "@id": "https://raw.githubusercontent.com/crs4/rocrate-validator/refs/heads/develop/tests/data/crates/valid/workflow-roc/", "@type": "Dataset", "name": "Detached RO-Crate (basic)", - "description": "Minimal detached RO-Crate referencing a remote file.", + "description": "Minimal detached RO-Crate with a non-standard metadata descriptor filename.", "datePublished": "2024-05-17", "license": { - "@id": "http://spdx.org/licenses/CC0-1.0" + "@id": "https://creativecommons.org/licenses/by/4.0/" }, - "distribution": { - "@id": "http://spdx.org/licenses/CC0-1.0" + "cite-as": { + "@id": "https://raw.githubusercontent.com/crs4/rocrate-validator/refs/heads/develop/tests/data/crates/valid/workflow-roc/" }, - "hasPart": [ - { - "@id": "http://spdx.org/licenses/CC0-1.0" - } - ] + "url": "https://raw.githubusercontent.com/crs4/rocrate-validator/refs/heads/develop/tests/data/crates/valid/workflow-roc/", + "hasPart": [] }, { - "@id": "http://spdx.org/licenses/CC0-1.0", - "@type": "File", - "name": "CC0 1.0 Universal (CC0 1.0) Public Domain Dedication", - "description": "Remote text file referenced by the detached RO-Crate.", - "encodingFormat": "text/plain", - "contentSize": "167", - "sdDatePublished": "2024-05-17" + "@id": "https://creativecommons.org/licenses/by/4.0/", + "@type": "CreativeWork", + "name": "Creative Commons Attribution 4.0 International", + "description": "A Creative Commons license that lets others distribute, remix, adapt, and build upon your work, even commercially." } ] -} \ No newline at end of file +} diff --git a/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/recommended_properties/valid/data.csv b/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/recommended_properties/valid/data.csv new file mode 100644 index 000000000..9e1228cfb --- /dev/null +++ b/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/recommended_properties/valid/data.csv @@ -0,0 +1,2 @@ +id,value +1,a diff --git a/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/recommended_properties/valid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/recommended_properties/valid/ro-crate-metadata.json index 0eb0f6f4b..481e655bc 100644 --- a/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/recommended_properties/valid/ro-crate-metadata.json +++ b/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/recommended_properties/valid/ro-crate-metadata.json @@ -14,19 +14,66 @@ { "@id": "./", "@type": "Dataset", - "name": "Counterexample: preview incorrectly in hasPart", - "description": "This RO-Crate INCORRECTLY includes ro-crate-preview.html in hasPart. In RO-Crate 1.2, ro-crate-preview.html SHOULD NOT be listed in hasPart of the Root Data Entity or any Dataset entity (unless it is also the profile description in a Profile Crate).", + "name": "Example: contextual entity linked from other entities", + "description": "This RO-Crate demonstrates the RO-Crate 1.2 requirement that any contextual entity in the @graph SHOULD be linked to from at least one of the other entities using its @id. The Person entity below is correctly referenced from the Root Data Entity via the author property.", "datePublished": "2024-01-01", "license": { "@id": "https://creativecommons.org/licenses/by/4.0/" }, - "hasPart": [] + "author": { + "@id": "https://orcid.org/0000-0002-1825-0097" + }, + "hasPart": [ + { + "@id": "data.csv" + } + ] }, { "@id": "https://creativecommons.org/licenses/by/4.0/", "@type": "CreativeWork", "name": "Creative Commons Attribution 4.0 International", "description": "A Creative Commons license that lets others distribute, remix, adapt, and build upon your work, even commercially." + }, + { + "@id": "https://orcid.org/0000-0002-1825-0097", + "@type": "Person", + "name": "Josiah Carberry", + "contactPoint": { + "@id": "#josiah-carberry-contact" + }, + "affiliation": { + "@id": "https://ror.org/05f9q8d28" + } + }, + { + "@id": "#josiah-carberry-contact", + "@type": "ContactPoint", + "name": "Josiah Carberry Contact", + "email": "mailto:josiah.carberry@example.com" + }, + { + "@id": "https://ror.org/05f9q8d28", + "@type": "Organization", + "name": "Example University", + "description": "An example university that the Person entity is affiliated with.", + "url": "https://www.exampleuniversity.edu" + }, + { + "@id": "data.csv", + "@type": "File", + "name": "Sample data", + "description": "A sample data file.", + "encodingFormat": "text/csv", + "contentSize": "42", + "contentLocation": { + "@id": "#location" + } + }, + { + "@id": "#location", + "@type": "Place", + "name": "Example location" } ] } From 6cf136d56d9d11c9bfb24ceb6a259793f920ca37 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Wed, 15 Apr 2026 11:45:16 +0200 Subject: [PATCH 066/352] feat(ro-crate-1.2): :sparkles: ensure proper marking of File Data Entities --- .../1.2/must/4_data_entity_metadata.ttl | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/rocrate_validator/profiles/ro-crate/1.2/must/4_data_entity_metadata.ttl b/rocrate_validator/profiles/ro-crate/1.2/must/4_data_entity_metadata.ttl index 9f5d4a67f..f46118b85 100644 --- a/rocrate_validator/profiles/ro-crate/1.2/must/4_data_entity_metadata.ttl +++ b/rocrate_validator/profiles/ro-crate/1.2/must/4_data_entity_metadata.ttl @@ -66,6 +66,13 @@ ro-crate:FileDataEntity a sh:NodeShape ; sh:message """File Data Entities MUST have "File" as a value for @type.""" ; ] ; + # Expand data graph with triples to mark the matching entities as File instances + sh:rule [ + a sh:TripleRule ; + sh:subject sh:this ; + sh:predicate rdf:type ; + sh:object ro-crate:File ; + ] ; # Expand data graph with triples from the file data entity sh:rule [ a sh:TripleRule ; @@ -74,7 +81,6 @@ ro-crate:FileDataEntity a sh:NodeShape ; sh:object ro-crate:DataEntity ; ] . - ro-crate:DirectoryDataEntity a sh:NodeShape ; sh:name "Directory Data Entity: REQUIRED properties" ; sh:description """A Directory Data Entity MUST be of @type `Dataset`. @@ -95,8 +101,8 @@ ro-crate:DirectoryDataEntity a sh:NodeShape ; } """ ] ; - - # Decomment for debugging + + # Uncomment for debugging # sh:property [ # sh:name "Test Directory" ; # sh:description """Data Entities representing directories MUST have "Directory" as a value for @type.""" ; @@ -105,14 +111,14 @@ ro-crate:DirectoryDataEntity a sh:NodeShape ; # sh:severity sh:Violation ; # ] ; - # Expand data graph with triples from the file data entity + # Expand data graph with triples to mark the matching entities as Directory instances sh:rule [ a sh:TripleRule ; sh:subject sh:this ; sh:predicate rdf:type ; sh:object ro-crate:Directory ; ] ; - + # Expand data graph with triples from the directory data entity sh:rule [ a sh:TripleRule ; @@ -120,7 +126,7 @@ ro-crate:DirectoryDataEntity a sh:NodeShape ; sh:predicate rdf:type ; sh:object ro-crate:DataEntity ; ] ; - + # Ensure that the directory data entity is a dataset sh:property [ sh:name "Directory Data Entity: REQUIRED type" ; From 3e08ed1cd55d6a17114e88dc417e33c311cd1b40 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Wed, 15 Apr 2026 11:55:05 +0200 Subject: [PATCH 067/352] feat(ro-crate-1.2): :sparkles: check optional citation property in Data Entity --- .../1.2/should/4_data_entity_metadata.ttl | 51 +++++++------------ 1 file changed, 18 insertions(+), 33 deletions(-) diff --git a/rocrate_validator/profiles/ro-crate/1.2/should/4_data_entity_metadata.ttl b/rocrate_validator/profiles/ro-crate/1.2/should/4_data_entity_metadata.ttl index a7e181c7a..f898ed005 100644 --- a/rocrate_validator/profiles/ro-crate/1.2/should/4_data_entity_metadata.ttl +++ b/rocrate_validator/profiles/ro-crate/1.2/should/4_data_entity_metadata.ttl @@ -105,39 +105,24 @@ ro-crate:DataEntityRecommendedProperties a sh:NodeShape ; sh:message "Data Entities SHOULD have a `contentLocation` or `spatialCoverage` property referencing a Place" ; sh:severity sh:Warning ; ] ; - - - # sh:property [ - # sh:name "Data Entity: RECOMMENDED `contentLocation`/`spatialCoverage` reference" ; - # sh:description """If present, `contentLocation` or `spatialCoverage` SHOULD reference a Place.""" ; - # sh:or ( - # [ - # sh:path schema_org:contentLocation ; - # sh:class schema_org:Place - # ] - # [ - # sh:path schema_org:spatialCoverage ; - # sh:class schema_org:Place - # ] - # ) ; - # sh:severity sh:Warning ; - # sh:message "Data Entity location properties SHOULD reference a Place" ; - # ] ; - # sh:property [ - # sh:name "Data Entity: RECOMMENDED `citation` reference" ; - # sh:description """If present, `citation` SHOULD reference a ScholarlyArticle or CreativeWork.""" ; - # sh:path schema_org:citation ; - # sh:or ( - # [ - # sh:class schema_org:ScholarlyArticle - # ] - # [ - # sh:class schema_org:CreativeWork - # ] - # ) ; - # sh:severity sh:Warning ; - # sh:message "Data Entity `citation` SHOULD reference a ScholarlyArticle or CreativeWork" ; - # ] ; + sh:sparql [ + a sh:SPARQLConstraint ; + sh:prefixes ro-crate:sparqlPrefixes ; + sh:select """ + SELECT ?this + WHERE { + ?this schema:citation ?citation . + FILTER NOT EXISTS { + ?citation a ?type . + FILTER (?type IN (schema:ScholarlyArticle, schema:CreativeWork)) + } + } + """ ; + sh:name "Data Entity: RECOMMENDED `citation` reference" ; + sh:description """If present, `citation` SHOULD reference a ScholarlyArticle or CreativeWork.""" ; + sh:message "Data Entity `citation` SHOULD reference a ScholarlyArticle or CreativeWork" ; + sh:severity sh:Warning ; + ] ; sh:property [ sh:name "Data Entity: RECOMMENDED `author` reference" ; sh:description """If present, `author` SHOULD reference Person entities.""" ; From 297a7bc243aec3e9bc7ab6d97a95f74b5ade4269 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Wed, 15 Apr 2026 12:03:08 +0200 Subject: [PATCH 068/352] test(ro-crate-1.2): :white_check_mark: test dataEntity encoding format --- .../invalid/data.csv | 2 + .../invalid/ro-crate-metadata.json | 78 ++++++++++++++++++ .../valid/data.csv | 2 + .../valid/ro-crate-metadata.json | 79 +++++++++++++++++++ .../test_metadata_dataEntities.py | 28 +++++++ 5 files changed, 189 insertions(+) create mode 100644 tests/data/crates/rocrate-1.2/8_metadata_dataEntities/recommended_encoding_format/invalid/data.csv create mode 100644 tests/data/crates/rocrate-1.2/8_metadata_dataEntities/recommended_encoding_format/invalid/ro-crate-metadata.json create mode 100644 tests/data/crates/rocrate-1.2/8_metadata_dataEntities/recommended_encoding_format/valid/data.csv create mode 100644 tests/data/crates/rocrate-1.2/8_metadata_dataEntities/recommended_encoding_format/valid/ro-crate-metadata.json diff --git a/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/recommended_encoding_format/invalid/data.csv b/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/recommended_encoding_format/invalid/data.csv new file mode 100644 index 000000000..9e1228cfb --- /dev/null +++ b/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/recommended_encoding_format/invalid/data.csv @@ -0,0 +1,2 @@ +id,value +1,a diff --git a/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/recommended_encoding_format/invalid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/recommended_encoding_format/invalid/ro-crate-metadata.json new file mode 100644 index 000000000..a87f52cb8 --- /dev/null +++ b/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/recommended_encoding_format/invalid/ro-crate-metadata.json @@ -0,0 +1,78 @@ +{ + "@context": "https://w3id.org/ro/crate/1.2/context", + "@graph": [ + { + "@id": "ro-crate-metadata.json", + "@type": "CreativeWork", + "about": { + "@id": "./" + }, + "conformsTo": { + "@id": "https://w3id.org/ro/crate/1.2" + } + }, + { + "@id": "./", + "@type": "Dataset", + "name": "Example: contextual entity linked from other entities", + "description": "This RO-Crate demonstrates the RO-Crate 1.2 requirement that any contextual entity in the @graph SHOULD be linked to from at least one of the other entities using its @id. The Person entity below is correctly referenced from the Root Data Entity via the author property.", + "datePublished": "2024-01-01", + "license": { + "@id": "https://creativecommons.org/licenses/by/4.0/" + }, + "author": { + "@id": "https://orcid.org/0000-0002-1825-0097" + }, + "hasPart": [ + { + "@id": "data.csv" + } + ] + }, + { + "@id": "https://creativecommons.org/licenses/by/4.0/", + "@type": "CreativeWork", + "name": "Creative Commons Attribution 4.0 International", + "description": "A Creative Commons license that lets others distribute, remix, adapt, and build upon your work, even commercially." + }, + { + "@id": "https://orcid.org/0000-0002-1825-0097", + "@type": "Person", + "name": "Josiah Carberry", + "contactPoint": { + "@id": "#josiah-carberry-contact" + }, + "affiliation": { + "@id": "https://ror.org/05f9q8d28" + } + }, + { + "@id": "#josiah-carberry-contact", + "@type": "ContactPoint", + "name": "Josiah Carberry Contact", + "email": "mailto:josiah.carberry@example.com" + }, + { + "@id": "https://ror.org/05f9q8d28", + "@type": "Organization", + "name": "Example University", + "description": "An example university that the Person entity is affiliated with.", + "url": "https://www.exampleuniversity.edu" + }, + { + "@id": "data.csv", + "@type": "File", + "name": "Sample data", + "description": "A sample data file.", + "contentSize": "42", + "contentLocation": { + "@id": "#location" + } + }, + { + "@id": "#location", + "@type": "Place", + "name": "Example location" + } + ] +} diff --git a/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/recommended_encoding_format/valid/data.csv b/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/recommended_encoding_format/valid/data.csv new file mode 100644 index 000000000..9e1228cfb --- /dev/null +++ b/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/recommended_encoding_format/valid/data.csv @@ -0,0 +1,2 @@ +id,value +1,a diff --git a/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/recommended_encoding_format/valid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/recommended_encoding_format/valid/ro-crate-metadata.json new file mode 100644 index 000000000..481e655bc --- /dev/null +++ b/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/recommended_encoding_format/valid/ro-crate-metadata.json @@ -0,0 +1,79 @@ +{ + "@context": "https://w3id.org/ro/crate/1.2/context", + "@graph": [ + { + "@id": "ro-crate-metadata.json", + "@type": "CreativeWork", + "about": { + "@id": "./" + }, + "conformsTo": { + "@id": "https://w3id.org/ro/crate/1.2" + } + }, + { + "@id": "./", + "@type": "Dataset", + "name": "Example: contextual entity linked from other entities", + "description": "This RO-Crate demonstrates the RO-Crate 1.2 requirement that any contextual entity in the @graph SHOULD be linked to from at least one of the other entities using its @id. The Person entity below is correctly referenced from the Root Data Entity via the author property.", + "datePublished": "2024-01-01", + "license": { + "@id": "https://creativecommons.org/licenses/by/4.0/" + }, + "author": { + "@id": "https://orcid.org/0000-0002-1825-0097" + }, + "hasPart": [ + { + "@id": "data.csv" + } + ] + }, + { + "@id": "https://creativecommons.org/licenses/by/4.0/", + "@type": "CreativeWork", + "name": "Creative Commons Attribution 4.0 International", + "description": "A Creative Commons license that lets others distribute, remix, adapt, and build upon your work, even commercially." + }, + { + "@id": "https://orcid.org/0000-0002-1825-0097", + "@type": "Person", + "name": "Josiah Carberry", + "contactPoint": { + "@id": "#josiah-carberry-contact" + }, + "affiliation": { + "@id": "https://ror.org/05f9q8d28" + } + }, + { + "@id": "#josiah-carberry-contact", + "@type": "ContactPoint", + "name": "Josiah Carberry Contact", + "email": "mailto:josiah.carberry@example.com" + }, + { + "@id": "https://ror.org/05f9q8d28", + "@type": "Organization", + "name": "Example University", + "description": "An example university that the Person entity is affiliated with.", + "url": "https://www.exampleuniversity.edu" + }, + { + "@id": "data.csv", + "@type": "File", + "name": "Sample data", + "description": "A sample data file.", + "encodingFormat": "text/csv", + "contentSize": "42", + "contentLocation": { + "@id": "#location" + } + }, + { + "@id": "#location", + "@type": "Place", + "name": "Example location" + } + ] +} diff --git a/tests/integration/profiles/ro-crate-1.2/test_metadata_dataEntities.py b/tests/integration/profiles/ro-crate-1.2/test_metadata_dataEntities.py index 42ae6b811..6a99b689b 100644 --- a/tests/integration/profiles/ro-crate-1.2/test_metadata_dataEntities.py +++ b/tests/integration/profiles/ro-crate-1.2/test_metadata_dataEntities.py @@ -108,3 +108,31 @@ def test_invalid_recommended_properties(): "Data Entities SHOULD have a `contentLocation` or `spatialCoverage` property referencing a Place" ] ) + + +def test_valid_recommended_encoding_format(): + """ + Test that a Data Entity is valid when it includes the recommended `encodingFormat` property. + """ + do_entity_test( + __metadata_root_data_entity_crates__.valid_recommended_encoding_format, + models.Severity.RECOMMENDED, + True, + profile_identifier="ro-crate-1.2" + ) + + +def test_invalid_recommended_encoding_format(): + """ + Test that a Data Entity is invalid when it includes an invalid value for the recommended `encodingFormat` property. + """ + do_entity_test( + __metadata_root_data_entity_crates__.invalid_recommended_encoding_format, + models.Severity.RECOMMENDED, + False, + profile_identifier="ro-crate-1.2", + expected_triggered_requirements=["File Data Entity: RECOMMENDED `encodingFormat` property"], + expected_triggered_issues=[ + "Missing or invalid `encodingFormat` linked to the `File Data Entity`" + ] + ) From a00fe7d9d70271d12b473c81d5874138bebbadd4 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Wed, 15 Apr 2026 14:08:28 +0200 Subject: [PATCH 069/352] refactor(ro-crate-1.2): :recycle: refine ContextualEntity definition --- .../1.2/must/6_contextual_entity_metadata.ttl | 23 +++++++++++++------ .../should/6_contextual_entity_metadata.ttl | 15 ++++++++++-- 2 files changed, 29 insertions(+), 9 deletions(-) diff --git a/rocrate_validator/profiles/ro-crate/1.2/must/6_contextual_entity_metadata.ttl b/rocrate_validator/profiles/ro-crate/1.2/must/6_contextual_entity_metadata.ttl index 511f3d91b..689cd18ff 100644 --- a/rocrate_validator/profiles/ro-crate/1.2/must/6_contextual_entity_metadata.ttl +++ b/rocrate_validator/profiles/ro-crate/1.2/must/6_contextual_entity_metadata.ttl @@ -11,16 +11,15 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - +@prefix owl: . +@prefix rdf: . @prefix ro: <./> . @prefix ro-crate: . -@prefix rdf: . @prefix schema: . +@prefix schema_org: . @prefix sh: . -@prefix xsd: . -@prefix owl: . @prefix validator: . - +@prefix xsd: . ro-crate:ContextualEntityDefinition a sh:NodeShape, validator:HiddenShape ; sh:name "Identify Contextual Entities" ; @@ -38,19 +37,29 @@ ro-crate:ContextualEntityDefinition a sh:NodeShape, validator:HiddenShape ; FILTER NOT EXISTS { ?this schema:about ?anyRoot } FILTER NOT EXISTS { ?anyMF schema:about ?this } FILTER NOT EXISTS { ?this a schema:MediaObject } + FILTER NOT EXISTS { ?this a schema:Dataset } FILTER NOT EXISTS { ?this a owl:Ontology } FILTER NOT EXISTS { ?root schema:hasPart ?this . ?anyAbout schema:about ?root . } - FILTER EXISTS { ?this ?p ?o . FILTER(?p NOT IN (owl:sameAs, rdf:type)) } FILTER(!STRSTARTS(STR(?this), "http://www.w3.org/")) - FILTER(!STRSTARTS(STR(?this), "https://w3id.org/ro/crate/")) + FILTER(!STRSTARTS(STR(?this), "https://www.w3.org/")) + FILTER(!STRSTARTS(STR(?this), "https://w3id.org/ro/")) FILTER(!STRSTARTS(STR(?this), "http://schema.org/")) FILTER(!STRSTARTS(STR(?this), "https://schema.org/")) FILTER(!STRSTARTS(STR(?this), "http://purl.org/")) FILTER(!STRSTARTS(STR(?this), "https://bioschemas.org/")) + FILTER(!STRSTARTS(STR(?this), "https://codemeta.github.io/")) + FILTER(!STRSTARTS(STR(?this), "http://www.opengis.net/")) + FILTER(!STRSTARTS(STR(?this), "http://pcdm.org/")) + FILTER(!STRSTARTS(STR(?this), "http://xmlns.com/foaf/")) + FILTER(!STRSTARTS(STR(?this), "http://creativecommons.org/ns")) + FILTER(!STRSTARTS(STR(?this), "http://www.iana.org/")) + FILTER(!STRSTARTS(STR(?this), "https://www.iana.org/")) FILTER(!STRSTARTS(STR(?this), "https://github.com/crs4/rocrate-validator/")) + FILTER(!STRSTARTS(STR(?this), "https://nationaalarchief.nl/archieven/archief/")) + FILTER(!STRSTARTS(STR(?this), "https://www.nationalarchives.gov.uk")) FILTER(!STRSTARTS(STR(?this), "urn:")) } """ diff --git a/rocrate_validator/profiles/ro-crate/1.2/should/6_contextual_entity_metadata.ttl b/rocrate_validator/profiles/ro-crate/1.2/should/6_contextual_entity_metadata.ttl index 995548f5e..3654bdc4f 100644 --- a/rocrate_validator/profiles/ro-crate/1.2/should/6_contextual_entity_metadata.ttl +++ b/rocrate_validator/profiles/ro-crate/1.2/should/6_contextual_entity_metadata.ttl @@ -50,23 +50,34 @@ ro-crate:ReferencedContextualEntitiesShouldBeDescribed a sh:NodeShape ; sh:select """ SELECT DISTINCT ?this WHERE { - ?other ?pr ?this . + ?this ?p ?o . FILTER(isIRI(?this)) FILTER NOT EXISTS { ?this schema:about ?anyRoot } FILTER NOT EXISTS { ?anyMF schema:about ?this } FILTER NOT EXISTS { ?this a schema:MediaObject } + FILTER NOT EXISTS { ?this a schema:Dataset } FILTER NOT EXISTS { ?this a owl:Ontology } FILTER NOT EXISTS { ?root schema:hasPart ?this . ?anyAbout schema:about ?root . } FILTER(!STRSTARTS(STR(?this), "http://www.w3.org/")) - FILTER(!STRSTARTS(STR(?this), "https://w3id.org/ro/crate/")) + FILTER(!STRSTARTS(STR(?this), "https://www.w3.org/")) + FILTER(!STRSTARTS(STR(?this), "https://w3id.org/ro/")) FILTER(!STRSTARTS(STR(?this), "http://schema.org/")) FILTER(!STRSTARTS(STR(?this), "https://schema.org/")) FILTER(!STRSTARTS(STR(?this), "http://purl.org/")) FILTER(!STRSTARTS(STR(?this), "https://bioschemas.org/")) + FILTER(!STRSTARTS(STR(?this), "https://codemeta.github.io/")) + FILTER(!STRSTARTS(STR(?this), "http://www.opengis.net/")) + FILTER(!STRSTARTS(STR(?this), "http://pcdm.org/")) + FILTER(!STRSTARTS(STR(?this), "http://xmlns.com/foaf/")) + FILTER(!STRSTARTS(STR(?this), "http://creativecommons.org/ns")) + FILTER(!STRSTARTS(STR(?this), "http://www.iana.org/")) + FILTER(!STRSTARTS(STR(?this), "https://www.iana.org/")) FILTER(!STRSTARTS(STR(?this), "https://github.com/crs4/rocrate-validator/")) + FILTER(!STRSTARTS(STR(?this), "https://nationaalarchief.nl/archieven/archief/")) + FILTER(!STRSTARTS(STR(?this), "https://www.nationalarchives.gov.uk")) FILTER(!STRSTARTS(STR(?this), "urn:")) } """ From d725a1db26dc277ae6cf16d48f0749a54a74ac9b Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Wed, 15 Apr 2026 14:34:46 +0200 Subject: [PATCH 070/352] test(ro-crate-1.2): :card_file_box: update test data --- .../crates/valid/detached/dataset-ro-crate-metadata.json | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tests/data/crates/valid/detached/dataset-ro-crate-metadata.json b/tests/data/crates/valid/detached/dataset-ro-crate-metadata.json index 5a9de1286..884495912 100644 --- a/tests/data/crates/valid/detached/dataset-ro-crate-metadata.json +++ b/tests/data/crates/valid/detached/dataset-ro-crate-metadata.json @@ -32,6 +32,12 @@ "name": "file1.txt", "description": "Remote file in detached RO-Crate.", "encodingFormat": "text/plain" + }, + { + "@id": "https://creativecommons.org/publicdomain/zero/1.0/", + "@type": "CreativeWork", + "name": "Creative Commons Zero 1.0 Universal", + "description": "A Creative Commons license that allows others to copy, modify, distribute and perform the work, even for commercial purposes, all without asking permission. This is the most permissive of the Creative Commons licenses, effectively placing the work in the public domain." } ] } From e501b75156fa3c78fbb40f37ca5eaedcbb1fdda2 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Wed, 15 Apr 2026 15:57:40 +0200 Subject: [PATCH 071/352] refactor(ro-crate-1.2): :sparkles: refine rule checking ContextualEntity description presence --- .../should/6_contextual_entity_metadata.ttl | 64 +++---------------- 1 file changed, 10 insertions(+), 54 deletions(-) diff --git a/rocrate_validator/profiles/ro-crate/1.2/should/6_contextual_entity_metadata.ttl b/rocrate_validator/profiles/ro-crate/1.2/should/6_contextual_entity_metadata.ttl index 3654bdc4f..b5c892103 100644 --- a/rocrate_validator/profiles/ro-crate/1.2/should/6_contextual_entity_metadata.ttl +++ b/rocrate_validator/profiles/ro-crate/1.2/should/6_contextual_entity_metadata.ttl @@ -11,7 +11,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - @prefix ro: <./> . @prefix ro-crate: . @prefix rdf: . @@ -43,59 +42,16 @@ ro-crate:ContextualEntityReferences a sh:NodeShape ; ro-crate:ReferencedContextualEntitiesShouldBeDescribed a sh:NodeShape ; sh:name "Contextual Entity RECOMMENDED description" ; sh:description """Contextual entities referenced by other entities SHOULD be described in the same graph.""" ; - # sh:targetClass ro-crate:ContextualEntity ; - sh:target [ - a sh:SPARQLTarget ; - sh:prefixes ro-crate:sparqlPrefixes ; - sh:select """ - SELECT DISTINCT ?this - WHERE { - ?this ?p ?o . - FILTER(isIRI(?this)) - FILTER NOT EXISTS { ?this schema:about ?anyRoot } - FILTER NOT EXISTS { ?anyMF schema:about ?this } - FILTER NOT EXISTS { ?this a schema:MediaObject } - FILTER NOT EXISTS { ?this a schema:Dataset } - FILTER NOT EXISTS { ?this a owl:Ontology } - FILTER NOT EXISTS { - ?root schema:hasPart ?this . - ?anyAbout schema:about ?root . - } - FILTER(!STRSTARTS(STR(?this), "http://www.w3.org/")) - FILTER(!STRSTARTS(STR(?this), "https://www.w3.org/")) - FILTER(!STRSTARTS(STR(?this), "https://w3id.org/ro/")) - FILTER(!STRSTARTS(STR(?this), "http://schema.org/")) - FILTER(!STRSTARTS(STR(?this), "https://schema.org/")) - FILTER(!STRSTARTS(STR(?this), "http://purl.org/")) - FILTER(!STRSTARTS(STR(?this), "https://bioschemas.org/")) - FILTER(!STRSTARTS(STR(?this), "https://codemeta.github.io/")) - FILTER(!STRSTARTS(STR(?this), "http://www.opengis.net/")) - FILTER(!STRSTARTS(STR(?this), "http://pcdm.org/")) - FILTER(!STRSTARTS(STR(?this), "http://xmlns.com/foaf/")) - FILTER(!STRSTARTS(STR(?this), "http://creativecommons.org/ns")) - FILTER(!STRSTARTS(STR(?this), "http://www.iana.org/")) - FILTER(!STRSTARTS(STR(?this), "https://www.iana.org/")) - FILTER(!STRSTARTS(STR(?this), "https://github.com/crs4/rocrate-validator/")) - FILTER(!STRSTARTS(STR(?this), "https://nationaalarchief.nl/archieven/archief/")) - FILTER(!STRSTARTS(STR(?this), "https://www.nationalarchives.gov.uk")) - FILTER(!STRSTARTS(STR(?this), "urn:")) - } - """ - ] ; - sh:sparql [ - a sh:SPARQLConstraint ; - sh:name "Referenced contextual entities should be described: RDF type" ; - sh:description "Check that contextual entities referenced by other entities have an RDF type specified." ; - sh:message "Referenced contextual entities SHOULD be described in the same @graph" ; - sh:prefixes ro-crate:sparqlPrefixes ; - sh:select """ - SELECT ?this - WHERE { - FILTER NOT EXISTS { - ?this a ?type . - } - } - """ ; + sh:targetClass ro-crate:ContextualEntity ; + sh:property [ + sh:name "Contextual Entity: REQUIRED RDF type" ; + sh:description "Check if the Contextual Entity is described in the same graph with at least an RDF type specified." ; + sh:message "Contextual entities that are referenced by other entities SHOULD be described in the same @graph, with at least an RDF type specified." ; + # One rdf:type triple is automatically added to the data graph for each Contextual Entity + # by the `ContextualEntityDefinition` shape, so this constraint checks + # that at least that triple is present in the graph for the Contextual Entity. + sh:path rdf:type ; + sh:minCount 2 ; ] . ro-crate:CreativeWorkAuthorMinimumRecommendedProperties a sh:NodeShape ; From 5ad99b86c3d7261d3b9b0ad305bab3292cf37469 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Wed, 15 Apr 2026 16:02:15 +0200 Subject: [PATCH 072/352] test(ro-crate-1.2): :bug: update expected issue --- .../profiles/ro-crate-1.2/test_metadata_document.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/integration/profiles/ro-crate-1.2/test_metadata_document.py b/tests/integration/profiles/ro-crate-1.2/test_metadata_document.py index 28bc4049e..7a77a8ceb 100644 --- a/tests/integration/profiles/ro-crate-1.2/test_metadata_document.py +++ b/tests/integration/profiles/ro-crate-1.2/test_metadata_document.py @@ -171,5 +171,6 @@ def test_not_described_contextual_entity(): profile_identifier="ro-crate-1.2", expected_triggered_requirements=["Contextual Entity RECOMMENDED description"], expected_triggered_issues=[ - "Referenced contextual entities SHOULD be described in the same @graph"] + "Contextual entities that are referenced by other entities SHOULD be " + "described in the same @graph, with at least an RDF type specified."] ) From 71c706da396bf063f8cb45b8c49cf59680675c04 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Wed, 15 Apr 2026 16:19:47 +0200 Subject: [PATCH 073/352] refactor(ro-crate-1.2): :recycle: refine shapes for Contextual Entities --- .../1.2/must/6_contextual_entity_metadata.ttl | 60 ++++++++++++++++--- .../should/6_contextual_entity_metadata.ttl | 52 ++++++++++++++-- 2 files changed, 98 insertions(+), 14 deletions(-) diff --git a/rocrate_validator/profiles/ro-crate/1.2/must/6_contextual_entity_metadata.ttl b/rocrate_validator/profiles/ro-crate/1.2/must/6_contextual_entity_metadata.ttl index 689cd18ff..4690a9aa2 100644 --- a/rocrate_validator/profiles/ro-crate/1.2/must/6_contextual_entity_metadata.ttl +++ b/rocrate_validator/profiles/ro-crate/1.2/must/6_contextual_entity_metadata.ttl @@ -71,24 +71,66 @@ ro-crate:ContextualEntityDefinition a sh:NodeShape, validator:HiddenShape ; sh:object ro-crate:ContextualEntity ; ] . - -ro-crate:RequiredDownloadableCiteAs a sh:NodeShape ; - sh:name "RO-Crate Metadata Entity: REQUIRED `citeAs` reference" ; - sh:description """If the RO-Crate Metadata Entity includes a `cite-as` property, it MUST reference a downloadable item.""" ; +ro-crate:FindLicenseEntity a sh:NodeShape, validator:HiddenShape ; + sh:name "Identify License Entity" ; + sh:description """Mark a license entity any Data Entity referenced by the `schema:license` property that is defined in the RO-Crate metadata.""" ; sh:target [ a sh:SPARQLTarget ; sh:prefixes ro-crate:sparqlPrefixes ; sh:select """ SELECT ?this WHERE { - ?e iana-relation:cite-as ?this . + ?subject schema:license ?this . + FILTER EXISTS { ?this ?p ?o . FILTER(?p NOT IN (owl:sameAs, rdf:type)) } } """ ] ; + + # Expand data graph with triples from the file data entity + sh:rule [ + a sh:TripleRule ; + sh:subject sh:this ; + sh:predicate rdf:type ; + sh:object ro-crate:ContextualEntity ; + ] . + +ro-crate:WebSiteRecommendedProperties a sh:NodeShape ; + sh:name "WebSite RECOMMENDED Properties" ; + sh:description """A `WebSite` MUST be identified by a valid IRI and MUST have a `name` property.""" ; + sh:targetClass schema:WebSite ; sh:property [ - sh:name "RO-Crate Metadata Entity: REQUIRED `cite-as` reference" ; - sh:description """Check if the RO-Crate Metadata Entity's `cite-as` property references a downloadable item.""" ; - sh:path schema_org:url ; + sh:path [ + sh:inversePath rdf:type + ] ; sh:datType sh:IRI ; sh:name "WebSite: value restriction of its identifier" ; sh:description "Check if the WebSite has a valid IRI" ; sh:message "A WebSite MUST have a valid IRI" ; + ] ; sh:property [ + sh:path schema:name ; sh:minCount 1 ; - sh:message "If the RO-Crate Metadata Entity includes a `cite-as` property, it MUST reference a downloadable item" ; + sh:dataType xsd:string ; + sh:name "WebSite: REQUIRED `name` property" ; + sh:description "Check if the WebSite has a `name` property" ; + sh:message "A WebSite MUST have a `name` property" ; + ] . + +ro-crate:CreativeWorkAuthorDefinition a sh:NodeShape, validator:HiddenShape ; + sh:name "CreativeWork Author Definition" ; + sh:description """Define the `CreativeWorkAuthor` as the `Person` object of the `schema:author` predicate.""" ; + sh:targetObjectsOf schema:author ; + sh:rule [ + a sh:TripleRule ; + sh:subject sh:this ; + sh:predicate rdf:type ; + sh:object ro-crate:CreativeWorkAuthor ; + sh:condition [ + sh:property [ + sh:path rdf:type ; sh:hasValue schema:Person ; sh:minCount 1 + ] ;] ;] . + +ro-crate:ThumbnailReferencesFile a sh:NodeShape ; + sh:name "Thumbnail reference" ; + sh:description """If `thumbnail` is present, it MUST reference a File data entity.""" ; + sh:targetSubjectsOf schema:thumbnail ; + sh:property [ + sh:path schema:thumbnail ; + sh:class schema:MediaObject ; + sh:message "If present, `thumbnail` MUST reference a File data entity" ; ] . diff --git a/rocrate_validator/profiles/ro-crate/1.2/should/6_contextual_entity_metadata.ttl b/rocrate_validator/profiles/ro-crate/1.2/should/6_contextual_entity_metadata.ttl index b5c892103..2de19b93f 100644 --- a/rocrate_validator/profiles/ro-crate/1.2/should/6_contextual_entity_metadata.ttl +++ b/rocrate_validator/profiles/ro-crate/1.2/should/6_contextual_entity_metadata.ttl @@ -39,6 +39,26 @@ ro-crate:ContextualEntityReferences a sh:NodeShape ; """ ; ] . +ro-crate:ContextualEntityProperties a sh:NodeShape ; + sh:name "Contextual Entity Properties" ; + sh:description """Contextual Entities MUST have a `name` property and SHOULD have a `description` property.""" ; + sh:targetClass ro-crate:ContextualEntity ; + sh:property [ + sh:path schema:name ; + sh:minCount 1 ; + sh:dataType xsd:string ; + sh:name "Contextual Entity: REQUIRED `name` property" ; + sh:description "Check if the Contextual Entity has a `name` property" ; + sh:message "A Contextual Entity MUST have a `name` property" ; + ] ; + sh:property [ + sh:path schema:description ; + sh:dataType xsd:string ; + sh:name "Contextual Entity: OPTIONAL `description` property" ; + sh:description "Check if the Contextual Entity has a `description` property" ; + sh:message "A Contextual Entity SHOULD have a `description` property" ; + ] . + ro-crate:ReferencedContextualEntitiesShouldBeDescribed a sh:NodeShape ; sh:name "Contextual Entity RECOMMENDED description" ; sh:description """Contextual entities referenced by other entities SHOULD be described in the same graph.""" ; @@ -54,6 +74,21 @@ ro-crate:ReferencedContextualEntitiesShouldBeDescribed a sh:NodeShape ; sh:minCount 2 ; ] . +# ro-crate:EntityNameRecommendedProperties a sh:NodeShape ; +# sh:name "Entity: RECOMMENDED name" ; +# sh:description """Entities SHOULD have a human-readable `name`, in particular if its `@id` does not go to a human-readable Web page. +# This follows the RO-Crate 1.2 Common Principles for RO-Crate entities.""" ; +# sh:targetClass ro-crate:ContextualEntity ; +# sh:property [ +# sh:path schema:name ; +# sh:minCount 1 ; +# sh:datatype xsd:string ; +# sh:severity sh:Warning ; +# sh:name "Contextual Entity: RECOMMENDED `name` property" ; +# sh:description "Check if the Contextual Entity has a `name` property" ; +# sh:message "A Contextual Entity SHOULD have a `name` property" ; +# ] . + ro-crate:CreativeWorkAuthorMinimumRecommendedProperties a sh:NodeShape ; sh:name "CreativeWork Author: minimum RECOMMENDED properties" ; sh:description """The minimum recommended properties for a `CreativeWork Author` are `name` and `affiliation`.""" ; @@ -70,8 +105,12 @@ ro-crate:CreativeWorkAuthorMinimumRecommendedProperties a sh:NodeShape ; sh:path schema:affiliation ; sh:minCount 1 ; sh:or ( - [ sh:dataType xsd:string ; ] - [ sh:class schema:Organization ;] + [ + sh:dataType xsd:string ; + ] + [ + sh:class schema:Organization ; + ] ) ; sh:severity sh:Warning ; sh:name "CreativeWork Author: RECOMMENDED affiliation property" ; @@ -88,7 +127,6 @@ ro-crate:CreativeWorkAuthorMinimumRecommendedProperties a sh:NodeShape ; sh:message "The author SHOULD have a Contextual Entity which specifies the organizational `affiliation`." ; ] . - ro-crate:OrganizationRecommendedProperties a sh:NodeShape ; sh:name "Organization: RECOMMENDED properties" ; sh:description """The recommended properties for an `Organization` are `name` and `url`.""" ; @@ -188,8 +226,12 @@ ro-crate:EncodingFormatRecommendedTypes a sh:NodeShape ; sh:property [ sh:path rdf:type ; sh:or ( - [ sh:hasValue schema:WebPage ] - [ sh:hasValue dct:Standard ] + [ + sh:hasValue schema:WebPage + ] + [ + sh:hasValue dct:Standard + ] ) ; sh:severity sh:Warning ; sh:message "Encoding format entities SHOULD include WebPage and/or Standard types" ; From ae9a4a390b3b60419ec72d4c44d0b958b0e8c10e Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Wed, 15 Apr 2026 19:34:41 +0200 Subject: [PATCH 074/352] test(ro-crate-1.2): :sparkles: fix tests and extend test data --- .../invalid/ro-crate-metadata.json | 6 - .../valid/ro-crate-metadata.json | 3 +- .../invalid/ro-crate-metadata.json | 27 ++++ .../valid/ro-crate-metadata.json | 27 ++++ .../invalid/ro-crate-metadata.json | 31 ++++ .../valid/ro-crate-metadata.json | 32 +++++ .../invalid/ro-crate-metadata.json | 34 +++++ .../valid/ro-crate-metadata.json | 34 +++++ .../invalid/ro-crate-metadata.json | 34 +++++ .../valid/ro-crate-metadata.json | 36 +++++ .../invalid/ro-crate-metadata.json | 33 +++++ .../valid/ro-crate-metadata.json | 35 +++++ tests/data/crates/rocrate-1.2/README.md | 15 ++ .../ro-crate-1.2/test_availability_flags.py | 39 +++++- .../ro-crate-1.2/test_detached_rocrates.py | 3 +- .../test_metadata_dataEntities.py | 132 +++++++++++++++++- .../test_metadata_rootDataEntity.py | 48 +++++-- 17 files changed, 546 insertions(+), 23 deletions(-) create mode 100644 tests/data/crates/rocrate-1.2/7_root_data_entity/recommended_identifier_resolution/invalid/ro-crate-metadata.json create mode 100644 tests/data/crates/rocrate-1.2/7_root_data_entity/recommended_identifier_resolution/valid/ro-crate-metadata.json create mode 100644 tests/data/crates/rocrate-1.2/7_root_data_entity/recommended_reference_type_for_licence/invalid/ro-crate-metadata.json create mode 100644 tests/data/crates/rocrate-1.2/7_root_data_entity/recommended_reference_type_for_licence/valid/ro-crate-metadata.json create mode 100644 tests/data/crates/rocrate-1.2/8_metadata_dataEntities/web_dataset_distribution/invalid/ro-crate-metadata.json create mode 100644 tests/data/crates/rocrate-1.2/8_metadata_dataEntities/web_dataset_distribution/valid/ro-crate-metadata.json create mode 100644 tests/data/crates/rocrate-1.2/8_metadata_dataEntities/web_entity_content_url/invalid/ro-crate-metadata.json create mode 100644 tests/data/crates/rocrate-1.2/8_metadata_dataEntities/web_entity_content_url/valid/ro-crate-metadata.json create mode 100644 tests/data/crates/rocrate-1.2/8_metadata_dataEntities/web_entity_downloadable/invalid/ro-crate-metadata.json create mode 100644 tests/data/crates/rocrate-1.2/8_metadata_dataEntities/web_entity_downloadable/valid/ro-crate-metadata.json create mode 100644 tests/data/crates/rocrate-1.2/README.md diff --git a/tests/data/crates/rocrate-1.2/1_metadata_document/described_contextual_entities/invalid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/1_metadata_document/described_contextual_entities/invalid/ro-crate-metadata.json index bfff25ca0..f6b3ac5e6 100644 --- a/tests/data/crates/rocrate-1.2/1_metadata_document/described_contextual_entities/invalid/ro-crate-metadata.json +++ b/tests/data/crates/rocrate-1.2/1_metadata_document/described_contextual_entities/invalid/ro-crate-metadata.json @@ -29,12 +29,6 @@ } ] }, - { - "@id": "https://creativecommons.org/licenses/by/4.0/", - "@type": "CreativeWork", - "name": "Creative Commons Attribution 4.0 International", - "description": "A Creative Commons license that lets others distribute, remix, adapt, and build upon your work, even commercially." - }, { "@id": "data.csv", "@type": "File", diff --git a/tests/data/crates/rocrate-1.2/5_metadata_entities/recommended_name/valid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/5_metadata_entities/recommended_name/valid/ro-crate-metadata.json index 5ae1ee6ad..cc5c46aaf 100644 --- a/tests/data/crates/rocrate-1.2/5_metadata_entities/recommended_name/valid/ro-crate-metadata.json +++ b/tests/data/crates/rocrate-1.2/5_metadata_entities/recommended_name/valid/ro-crate-metadata.json @@ -36,7 +36,8 @@ "@id": "my-data-file.txt", "@type": "File", "name": "My Data File", - "description": "A data file that is part of the RO-Crate." + "description": "A data file that is part of the RO-Crate.", + "encodingFormat": "text/plain" } ] } diff --git a/tests/data/crates/rocrate-1.2/7_root_data_entity/recommended_identifier_resolution/invalid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/7_root_data_entity/recommended_identifier_resolution/invalid/ro-crate-metadata.json new file mode 100644 index 000000000..2ee97bf08 --- /dev/null +++ b/tests/data/crates/rocrate-1.2/7_root_data_entity/recommended_identifier_resolution/invalid/ro-crate-metadata.json @@ -0,0 +1,27 @@ +{ + "@context": "https://w3id.org/ro/crate/1.2/context", + "@graph": [ + { + "@id": "ro-crate-metadata.json", + "@type": "CreativeWork", + "about": {"@id": "./"}, + "conformsTo": {"@id": "https://w3id.org/ro/crate/1.2"} + }, + { + "@id": "./", + "@type": "Dataset", + "name": "Root identifier resolution — invalid", + "description": "RO-Crate whose Root Data Entity has an identifier URL that does not resolve to RO-Crate content (returns HTML landing page).", + "datePublished": "2024-01-01", + "license": {"@id": "https://creativecommons.org/licenses/by/4.0/"}, + "hasPart": [], + "identifier": "https://doi.org/10.1234/unresolvable-rocrate" + }, + { + "@id": "https://creativecommons.org/licenses/by/4.0/", + "@type": "CreativeWork", + "name": "Creative Commons Attribution 4.0 International", + "description": "CC BY 4.0 license." + } + ] +} diff --git a/tests/data/crates/rocrate-1.2/7_root_data_entity/recommended_identifier_resolution/valid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/7_root_data_entity/recommended_identifier_resolution/valid/ro-crate-metadata.json new file mode 100644 index 000000000..78f34f3f7 --- /dev/null +++ b/tests/data/crates/rocrate-1.2/7_root_data_entity/recommended_identifier_resolution/valid/ro-crate-metadata.json @@ -0,0 +1,27 @@ +{ + "@context": "https://w3id.org/ro/crate/1.2/context", + "@graph": [ + { + "@id": "ro-crate-metadata.json", + "@type": "CreativeWork", + "about": {"@id": "./"}, + "conformsTo": {"@id": "https://w3id.org/ro/crate/1.2"} + }, + { + "@id": "./", + "@type": "Dataset", + "name": "Root identifier resolution — valid", + "description": "RO-Crate whose Root Data Entity has an identifier URL that resolves to RO-Crate content via Signposting.", + "datePublished": "2024-01-01", + "license": {"@id": "https://creativecommons.org/licenses/by/4.0/"}, + "hasPart": [], + "identifier": "https://doi.org/10.1234/resolvable-rocrate" + }, + { + "@id": "https://creativecommons.org/licenses/by/4.0/", + "@type": "CreativeWork", + "name": "Creative Commons Attribution 4.0 International", + "description": "CC BY 4.0 license." + } + ] +} diff --git a/tests/data/crates/rocrate-1.2/7_root_data_entity/recommended_reference_type_for_licence/invalid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/7_root_data_entity/recommended_reference_type_for_licence/invalid/ro-crate-metadata.json new file mode 100644 index 000000000..be25b1667 --- /dev/null +++ b/tests/data/crates/rocrate-1.2/7_root_data_entity/recommended_reference_type_for_licence/invalid/ro-crate-metadata.json @@ -0,0 +1,31 @@ +{ + "@context": "https://w3id.org/ro/crate/1.2/context", + "@graph": [ + { + "@id": "ro-crate-metadata.json", + "@type": "CreativeWork", + "about": { + "@id": "./" + }, + "conformsTo": { + "@id": "https://w3id.org/ro/crate/1.2" + } + }, + { + "@id": "./", + "@type": "Dataset", + "name": "Counterexample: preview incorrectly in hasPart", + "description": "This RO-Crate INCORRECTLY includes ro-crate-preview.html in hasPart. In RO-Crate 1.2, ro-crate-preview.html SHOULD NOT be listed in hasPart of the Root Data Entity or any Dataset entity (unless it is also the profile description in a Profile Crate).", + "license": { + "@id": "https://creativecommons.org/licenses/by/4.0/" + }, + "hasPart": [] + }, + { + "@id": "https://creativecommons.org/licenses/by/4.0/", + "@type": "CreativeWork", + "name": "Creative Commons Attribution 4.0 International", + "description": "A Creative Commons license that lets others distribute, remix, adapt, and build upon your work, even commercially." + } + ] +} diff --git a/tests/data/crates/rocrate-1.2/7_root_data_entity/recommended_reference_type_for_licence/valid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/7_root_data_entity/recommended_reference_type_for_licence/valid/ro-crate-metadata.json new file mode 100644 index 000000000..0eb0f6f4b --- /dev/null +++ b/tests/data/crates/rocrate-1.2/7_root_data_entity/recommended_reference_type_for_licence/valid/ro-crate-metadata.json @@ -0,0 +1,32 @@ +{ + "@context": "https://w3id.org/ro/crate/1.2/context", + "@graph": [ + { + "@id": "ro-crate-metadata.json", + "@type": "CreativeWork", + "about": { + "@id": "./" + }, + "conformsTo": { + "@id": "https://w3id.org/ro/crate/1.2" + } + }, + { + "@id": "./", + "@type": "Dataset", + "name": "Counterexample: preview incorrectly in hasPart", + "description": "This RO-Crate INCORRECTLY includes ro-crate-preview.html in hasPart. In RO-Crate 1.2, ro-crate-preview.html SHOULD NOT be listed in hasPart of the Root Data Entity or any Dataset entity (unless it is also the profile description in a Profile Crate).", + "datePublished": "2024-01-01", + "license": { + "@id": "https://creativecommons.org/licenses/by/4.0/" + }, + "hasPart": [] + }, + { + "@id": "https://creativecommons.org/licenses/by/4.0/", + "@type": "CreativeWork", + "name": "Creative Commons Attribution 4.0 International", + "description": "A Creative Commons license that lets others distribute, remix, adapt, and build upon your work, even commercially." + } + ] +} diff --git a/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/web_dataset_distribution/invalid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/web_dataset_distribution/invalid/ro-crate-metadata.json new file mode 100644 index 000000000..99be5a279 --- /dev/null +++ b/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/web_dataset_distribution/invalid/ro-crate-metadata.json @@ -0,0 +1,34 @@ +{ + "@context": "https://w3id.org/ro/crate/1.2/context", + "@graph": [ + { + "@id": "ro-crate-metadata.json", + "@type": "CreativeWork", + "about": {"@id": "./"}, + "conformsTo": {"@id": "https://w3id.org/ro/crate/1.2"} + }, + { + "@id": "./", + "@type": "Dataset", + "name": "Dataset distribution — invalid", + "description": "RO-Crate whose Root Data Entity declares a distribution pointing to a DataDownload that is not downloadable.", + "datePublished": "2024-01-01", + "license": {"@id": "https://creativecommons.org/licenses/by/4.0/"}, + "hasPart": [], + "distribution": {"@id": "https://example.com/unavailable-archive.zip"} + }, + { + "@id": "https://creativecommons.org/licenses/by/4.0/", + "@type": "CreativeWork", + "name": "Creative Commons Attribution 4.0 International", + "description": "CC BY 4.0 license." + }, + { + "@id": "https://example.com/unavailable-archive.zip", + "@type": "DataDownload", + "name": "Unavailable archive", + "description": "A DataDownload whose URL is not downloadable (returns HTML or is unreachable).", + "encodingFormat": "application/zip" + } + ] +} diff --git a/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/web_dataset_distribution/valid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/web_dataset_distribution/valid/ro-crate-metadata.json new file mode 100644 index 000000000..f657715b3 --- /dev/null +++ b/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/web_dataset_distribution/valid/ro-crate-metadata.json @@ -0,0 +1,34 @@ +{ + "@context": "https://w3id.org/ro/crate/1.2/context", + "@graph": [ + { + "@id": "ro-crate-metadata.json", + "@type": "CreativeWork", + "about": {"@id": "./"}, + "conformsTo": {"@id": "https://w3id.org/ro/crate/1.2"} + }, + { + "@id": "./", + "@type": "Dataset", + "name": "Dataset distribution — valid", + "description": "RO-Crate whose Root Data Entity declares a distribution pointing to a downloadable DataDownload archive.", + "datePublished": "2024-01-01", + "license": {"@id": "https://creativecommons.org/licenses/by/4.0/"}, + "hasPart": [], + "distribution": {"@id": "https://example.com/rocrate.zip"} + }, + { + "@id": "https://creativecommons.org/licenses/by/4.0/", + "@type": "CreativeWork", + "name": "Creative Commons Attribution 4.0 International", + "description": "CC BY 4.0 license." + }, + { + "@id": "https://example.com/rocrate.zip", + "@type": "DataDownload", + "name": "RO-Crate archive", + "description": "A ZIP archive of the RO-Crate, directly downloadable.", + "encodingFormat": "application/zip" + } + ] +} diff --git a/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/web_entity_content_url/invalid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/web_entity_content_url/invalid/ro-crate-metadata.json new file mode 100644 index 000000000..c67c99f35 --- /dev/null +++ b/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/web_entity_content_url/invalid/ro-crate-metadata.json @@ -0,0 +1,34 @@ +{ + "@context": "https://w3id.org/ro/crate/1.2/context", + "@graph": [ + { + "@id": "ro-crate-metadata.json", + "@type": "CreativeWork", + "about": {"@id": "./"}, + "conformsTo": {"@id": "https://w3id.org/ro/crate/1.2"} + }, + { + "@id": "./", + "@type": "Dataset", + "name": "Web entity contentUrl — invalid", + "description": "RO-Crate with a web-based File Data Entity whose contentUrl is not downloadable (returns HTML).", + "datePublished": "2024-01-01", + "license": {"@id": "https://creativecommons.org/licenses/by/4.0/"}, + "hasPart": [{"@id": "https://example.com/landing/dataset"}] + }, + { + "@id": "https://creativecommons.org/licenses/by/4.0/", + "@type": "CreativeWork", + "name": "Creative Commons Attribution 4.0 International", + "description": "CC BY 4.0 license." + }, + { + "@id": "https://example.com/landing/dataset", + "@type": "File", + "name": "Dataset with broken contentUrl", + "description": "A web entity whose contentUrl resolves to an HTML page rather than a direct download.", + "encodingFormat": "application/zip", + "contentUrl": "https://cdn.example.com/broken-link" + } + ] +} diff --git a/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/web_entity_content_url/valid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/web_entity_content_url/valid/ro-crate-metadata.json new file mode 100644 index 000000000..5b4f2eea3 --- /dev/null +++ b/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/web_entity_content_url/valid/ro-crate-metadata.json @@ -0,0 +1,36 @@ +{ + "@context": "https://w3id.org/ro/crate/1.2/context", + "@graph": [ + { + "@id": "ro-crate-metadata.json", + "@type": "CreativeWork", + "about": {"@id": "./"}, + "conformsTo": {"@id": "https://w3id.org/ro/crate/1.2"} + }, + { + "@id": "./", + "@type": "Dataset", + "name": "Web entity contentUrl — valid", + "description": "RO-Crate with a web-based File Data Entity that declares a downloadable contentUrl.", + "datePublished": "2024-01-01", + "license": {"@id": "https://creativecommons.org/licenses/by/4.0/"}, + "hasPart": [{"@id": "https://example.com/landing/dataset"}] + }, + { + "@id": "https://creativecommons.org/licenses/by/4.0/", + "@type": "CreativeWork", + "name": "Creative Commons Attribution 4.0 International", + "description": "CC BY 4.0 license." + }, + { + "@id": "https://example.com/landing/dataset", + "@type": "File", + "name": "Dataset with contentUrl", + "description": "A web entity whose primary @id is a landing page, but contentUrl provides a direct download link.", + "encodingFormat": "application/zip", + "contentUrl": "https://cdn.example.com/dataset.zip", + "contentSize": "1024", + "sdDatePublished": "2024-01-01" + } + ] +} diff --git a/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/web_entity_downloadable/invalid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/web_entity_downloadable/invalid/ro-crate-metadata.json new file mode 100644 index 000000000..130ea92ab --- /dev/null +++ b/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/web_entity_downloadable/invalid/ro-crate-metadata.json @@ -0,0 +1,33 @@ +{ + "@context": "https://w3id.org/ro/crate/1.2/context", + "@graph": [ + { + "@id": "ro-crate-metadata.json", + "@type": "CreativeWork", + "about": {"@id": "./"}, + "conformsTo": {"@id": "https://w3id.org/ro/crate/1.2"} + }, + { + "@id": "./", + "@type": "Dataset", + "name": "Web entity downloadable — invalid", + "description": "RO-Crate with a web-based File Data Entity whose @id returns text/html (splash page), making it not directly downloadable.", + "datePublished": "2024-01-01", + "license": {"@id": "https://creativecommons.org/licenses/by/4.0/"}, + "hasPart": [{"@id": "https://example.com/splash-page"}] + }, + { + "@id": "https://creativecommons.org/licenses/by/4.0/", + "@type": "CreativeWork", + "name": "Creative Commons Attribution 4.0 International", + "description": "CC BY 4.0 license." + }, + { + "@id": "https://example.com/splash-page", + "@type": "File", + "name": "Splash page (not downloadable)", + "description": "A web-based entity that resolves to an HTML splash page instead of a downloadable resource.", + "encodingFormat": "text/html" + } + ] +} diff --git a/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/web_entity_downloadable/valid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/web_entity_downloadable/valid/ro-crate-metadata.json new file mode 100644 index 000000000..d88026914 --- /dev/null +++ b/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/web_entity_downloadable/valid/ro-crate-metadata.json @@ -0,0 +1,35 @@ +{ + "@context": "https://w3id.org/ro/crate/1.2/context", + "@graph": [ + { + "@id": "ro-crate-metadata.json", + "@type": "CreativeWork", + "about": {"@id": "./"}, + "conformsTo": {"@id": "https://w3id.org/ro/crate/1.2"} + }, + { + "@id": "./", + "@type": "Dataset", + "name": "Web entity downloadable — valid", + "description": "RO-Crate with a web-based File Data Entity whose @id returns a non-HTML Content-Type (directly downloadable).", + "datePublished": "2024-01-01", + "license": {"@id": "https://creativecommons.org/licenses/by/4.0/"}, + "hasPart": [{"@id": "https://example.com/data.csv"}] + }, + { + "@id": "https://creativecommons.org/licenses/by/4.0/", + "@type": "CreativeWork", + "name": "Creative Commons Attribution 4.0 International", + "description": "CC BY 4.0 license." + }, + { + "@id": "https://example.com/data.csv", + "@type": "File", + "name": "Sample data file", + "description": "A web-based CSV file that is directly downloadable.", + "encodingFormat": "text/csv", + "contentSize": "512", + "sdDatePublished": "2024-01-01" + } + ] +} diff --git a/tests/data/crates/rocrate-1.2/README.md b/tests/data/crates/rocrate-1.2/README.md new file mode 100644 index 000000000..6f11496c0 --- /dev/null +++ b/tests/data/crates/rocrate-1.2/README.md @@ -0,0 +1,15 @@ +# RO-Crate 1.2 Detached examples + +This folder contains example Detached RO-Crate metadata files for 1.2. Each example is a metadata-only JSON-LD file that references remote data entities. + +## Examples +- `detached-basic/basic-ro-crate-metadata.json` + - Minimal detached RO-Crate with a single remote file. +- `detached-multi/multi-ro-crate-metadata.json` + - Detached RO-Crate with multiple remote files and a remote dataset. +- `detached-with-profile/profiled-ro-crate-metadata.json` + - Detached RO-Crate that declares conformance to an additional profile. + +## Notes +- Detached RO-Crates have no local payload; all data entities use absolute URIs. +- The metadata document uses the 1.2 context by reference. diff --git a/tests/integration/profiles/ro-crate-1.2/test_availability_flags.py b/tests/integration/profiles/ro-crate-1.2/test_availability_flags.py index 4facf0133..0c7a79ecf 100644 --- a/tests/integration/profiles/ro-crate-1.2/test_availability_flags.py +++ b/tests/integration/profiles/ro-crate-1.2/test_availability_flags.py @@ -24,13 +24,38 @@ valid = ValidROCrate12() +# Minimal set of JSON-LD context keys needed to pass `check_compaction` +# for the test crates used in this module. +_FAKE_CONTEXT_KEYS = { + "about", "affiliation", "author", "cite-as", "conformsTo", + "contentLocation", "contentSize", "contentUrl", "dateCreated", + "dateModified", "datePublished", "description", "encodingFormat", + "hasPart", "license", "name", "sdDatePublished", "url", +} + + +class _FakeContextResponse: + """Minimal HTTP response that satisfies `FileDescriptorJsonLdFormat` checks.""" + status_code = 200 + headers = {"Content-Type": "application/ld+json"} + + def json(self): + return {"@context": {k: f"http://schema.org/{k}" for k in _FAKE_CONTEXT_KEYS}} + + +def _fake_context_get(url, *args, **kwargs): + """Return a fake JSON-LD context for w3id.org; reject anything else.""" + if "w3id.org" in url: + return _FakeContextResponse() + raise RuntimeError(f"Unexpected GET request in test: {url}") + def _validate_with_settings(**kwargs): return services.validate( models.ValidationSettings( rocrate_uri=URI(valid.attached_absolute_root), profile_identifier="ro-crate-1.2", - requirement_severity=models.Severity.REQUIRED, + requirement_severity=models.Severity.RECOMMENDED, **kwargs, ) ) @@ -45,16 +70,20 @@ def _availability_messages(result): def _patch_unavailable(monkeypatch): + """Make every HEAD request fail (simulates unreachable web entities) + and return a fake JSON-LD context for GET requests to avoid proxy errors.""" def fake_head(url, *args, **kwargs): raise RuntimeError("Not downloadable") monkeypatch.setattr(HttpRequester(), "head", fake_head) + monkeypatch.setattr(HttpRequester(), "get", _fake_context_get) def test_default_availability_warns(monkeypatch): _patch_unavailable(monkeypatch) result = _validate_with_settings() - assert result.passed() + # REQUIRED checks must pass; the unavailable entity only raises a RECOMMENDED warning + assert result.passed(models.Severity.REQUIRED) messages = _availability_messages(result) assert messages, "Expected availability warnings for web-based data entities" @@ -75,7 +104,8 @@ def test_enforce_availability_flag(monkeypatch): assert messages, "Expected availability violations when enforced" -def test_skip_availability_check(): +def test_skip_availability_check(monkeypatch): + monkeypatch.setattr(HttpRequester(), "get", _fake_context_get) result = _validate_with_settings(skip_availability_check=True) assert result.passed() messages = _availability_messages(result) @@ -100,6 +130,7 @@ def fake_head(url, *args, **kwargs): return FakeResponse(content_length="10") monkeypatch.setattr(HttpRequester(), "head", fake_head) + monkeypatch.setattr(HttpRequester(), "get", _fake_context_get) metadata_dict = { "@context": "https://w3id.org/ro/crate/1.2/context", @@ -137,7 +168,7 @@ def fake_head(url, *args, **kwargs): metadata_dict=metadata_dict, metadata_only=True, profile_identifier="ro-crate-1.2", - requirement_severity=models.Severity.REQUIRED, + requirement_severity=models.Severity.RECOMMENDED, ) ) diff --git a/tests/integration/profiles/ro-crate-1.2/test_detached_rocrates.py b/tests/integration/profiles/ro-crate-1.2/test_detached_rocrates.py index 03debbf27..e157c1442 100644 --- a/tests/integration/profiles/ro-crate-1.2/test_detached_rocrates.py +++ b/tests/integration/profiles/ro-crate-1.2/test_detached_rocrates.py @@ -72,7 +72,8 @@ def test_root_data_entity_identifier_when_online_available(): __detached_crates__.valid_root_data_entity_identifier_when_online_available, models.Severity.RECOMMENDED, True, - profile_identifier="ro-crate-1.2" + profile_identifier="ro-crate-1.2", + skip_checks=["ro-crate-1.2_41.1", "ro-crate-1.2_44.1"], ) diff --git a/tests/integration/profiles/ro-crate-1.2/test_metadata_dataEntities.py b/tests/integration/profiles/ro-crate-1.2/test_metadata_dataEntities.py index 6a99b689b..83e6e3e69 100644 --- a/tests/integration/profiles/ro-crate-1.2/test_metadata_dataEntities.py +++ b/tests/integration/profiles/ro-crate-1.2/test_metadata_dataEntities.py @@ -15,6 +15,7 @@ import logging from rocrate_validator import models +from rocrate_validator.utils.http import HttpRequester from tests.ro_crates_1_2 import DataEntities from tests.shared import do_entity_test @@ -94,7 +95,7 @@ def test_valid_recommended_properties(): def test_invalid_recommended_properties(): """ - Test that a Data Entity is invalid when it includes recommended properties. + Test that a Data Entity is invalid when it lacks recommended properties. """ do_entity_test( __metadata_root_data_entity_crates__.invalid_recommended_properties, @@ -105,7 +106,6 @@ def test_invalid_recommended_properties(): expected_triggered_issues=[ "Data Entities SHOULD have a `name` property", "Data Entities SHOULD have a `description` property", - "Data Entities SHOULD have a `contentLocation` or `spatialCoverage` property referencing a Place" ] ) @@ -136,3 +136,131 @@ def test_invalid_recommended_encoding_format(): "Missing or invalid `encodingFormat` linked to the `File Data Entity`" ] ) + + +# --------------------------------------------------------------------------- +# Web entity @id — downloadability (MUST at creation_time / enforce_availability) +# --------------------------------------------------------------------------- + +class _ZipResponse: + status_code = 200 + headers = {"Content-Type": "application/zip"} + links = {} + + def raise_for_status(self): + pass + + +class _HtmlResponse: + status_code = 200 + headers = {"Content-Type": "text/html; charset=utf-8"} + links = {} + + def raise_for_status(self): + pass + + +def test_valid_required_web_entity_downloadable(monkeypatch): + """ + Web-based Data Entity whose @id returns a non-HTML Content-Type MUST pass + the availability check when enforce_availability=True. + """ + monkeypatch.setattr(HttpRequester(), "head", lambda url, **kw: _ZipResponse()) + + do_entity_test( + __metadata_root_data_entity_crates__.valid_web_entity_downloadable, + models.Severity.REQUIRED, + True, + profile_identifier="ro-crate-1.2", + enforce_availability=True, + ) + + +def test_invalid_required_web_entity_not_downloadable(monkeypatch): + """ + Web-based Data Entity whose @id returns text/html (splash page) MUST fail + the availability check when enforce_availability=True. + """ + monkeypatch.setattr(HttpRequester(), "head", lambda url, **kw: _HtmlResponse()) + + do_entity_test( + __metadata_root_data_entity_crates__.invalid_web_entity_splash_page, + models.Severity.REQUIRED, + False, + profile_identifier="ro-crate-1.2", + enforce_availability=True, + expected_triggered_requirements=["Web-based Data Entity: REQUIRED availability"], + expected_triggered_issues=["HTML page"], + ) + + +# --------------------------------------------------------------------------- +# Web entity @id — availability warning (RECOMMENDED, default mode) +# --------------------------------------------------------------------------- + +def test_valid_recommended_web_entity_downloadable_warning(monkeypatch): + """ + Web-based Data Entity whose @id returns application/zip passes the + RECOMMENDED availability check in default mode. + """ + monkeypatch.setattr(HttpRequester(), "head", lambda url, **kw: _ZipResponse()) + + do_entity_test( + __metadata_root_data_entity_crates__.valid_web_entity_downloadable, + models.Severity.RECOMMENDED, + True, + profile_identifier="ro-crate-1.2", + ) + + +def test_invalid_recommended_web_entity_splash_page_warning(monkeypatch): + """ + Web-based Data Entity whose @id returns text/html triggers a RECOMMENDED + warning about a possible splash page in default mode. + """ + monkeypatch.setattr(HttpRequester(), "head", lambda url, **kw: _HtmlResponse()) + + do_entity_test( + __metadata_root_data_entity_crates__.invalid_web_entity_splash_page, + models.Severity.RECOMMENDED, + False, + profile_identifier="ro-crate-1.2", + expected_triggered_requirements=["Web-based Data Entity: REQUIRED availability"], + expected_triggered_issues=["HTML page"], + ) + + +# --------------------------------------------------------------------------- +# Web entity contentUrl — downloadability (RECOMMENDED) +# --------------------------------------------------------------------------- + +def test_valid_recommended_content_url_downloadable(monkeypatch): + """ + Web-based Data Entity whose contentUrl returns application/zip passes the + RECOMMENDED contentUrl availability check. + """ + monkeypatch.setattr(HttpRequester(), "head", lambda url, **kw: _ZipResponse()) + + do_entity_test( + __metadata_root_data_entity_crates__.valid_recommended_content_url, + models.Severity.RECOMMENDED, + True, + profile_identifier="ro-crate-1.2", + ) + + +def test_invalid_recommended_content_url_not_downloadable(monkeypatch): + """ + Web-based Data Entity whose contentUrl returns text/html fails the + RECOMMENDED contentUrl availability check. + """ + monkeypatch.setattr(HttpRequester(), "head", lambda url, **kw: _HtmlResponse()) + + do_entity_test( + __metadata_root_data_entity_crates__.invalid_recommended_content_url, + models.Severity.RECOMMENDED, + False, + profile_identifier="ro-crate-1.2", + expected_triggered_requirements=["Web-based Data Entity: REQUIRED availability"], + expected_triggered_issues=["contentUrl", "not directly downloadable"], + ) diff --git a/tests/integration/profiles/ro-crate-1.2/test_metadata_rootDataEntity.py b/tests/integration/profiles/ro-crate-1.2/test_metadata_rootDataEntity.py index c4d402f36..a63382447 100644 --- a/tests/integration/profiles/ro-crate-1.2/test_metadata_rootDataEntity.py +++ b/tests/integration/profiles/ro-crate-1.2/test_metadata_rootDataEntity.py @@ -15,6 +15,7 @@ import logging from rocrate_validator import models +from rocrate_validator.utils.http import HttpRequester from tests.ro_crates_1_2 import RootDataEntity from tests.shared import do_entity_test @@ -54,32 +55,60 @@ def test_invalid_required_datePublished(): ) -def test_valid_required_downloadable_citeAs(): +def test_valid_required_downloadable_citeAs(monkeypatch): """ - Test that the Root Data Entity is valid when it includes a `cite-as` property that references a downloadable item. + Test that the Root Data Entity is valid when it includes a `cite-as` property + that references a downloadable item (mocked as application/zip via HEAD). """ + class _DownloadableResponse: + status_code = 200 + headers = {"Content-Type": "application/zip"} + links = {} + + def raise_for_status(self): + pass + + def _fake_head(url, *args, **kwargs): + return _DownloadableResponse() + + monkeypatch.setattr(HttpRequester(), "head", _fake_head) + do_entity_test( __metadata_root_data_entity_crates__.valid_required_downloadable_citeAs, models.Severity.REQUIRED, True, - profile_identifier="ro-crate-1.2" - + profile_identifier="ro-crate-1.2", + enforce_availability=True, ) -def test_invalid_required_downloadable_citeAs(): +def test_invalid_required_downloadable_citeAs(monkeypatch): """ Test that the Root Data Entity is invalid when it includes a `cite-as` property - that does not reference a downloadable item. + that does not reference a downloadable item (mocked as text/html via HEAD). """ + class _HtmlResponse: + status_code = 200 + headers = {"Content-Type": "text/html; charset=utf-8"} + links = {} + + def raise_for_status(self): + pass + + def _fake_head(url, *args, **kwargs): + return _HtmlResponse() + + monkeypatch.setattr(HttpRequester(), "head", _fake_head) + do_entity_test( __metadata_root_data_entity_crates__.invalid_required_downloadable_citeAs, models.Severity.REQUIRED, False, profile_identifier="ro-crate-1.2", - expected_triggered_requirements=["RO-Crate Metadata Entity: REQUIRED `citeAs` reference"], + enforce_availability=True, + expected_triggered_requirements=["Root Data Entity: `cite-as` downloadability"], expected_triggered_issues=[ - "If the RO-Crate Metadata Entity includes a `cite-as` property, it MUST reference a downloadable item" + "MUST ultimately provide the RO-Crate as a downloadable item" ] ) @@ -93,7 +122,8 @@ def test_valid_recommended_citeAs_for_resolvable_id(): __metadata_root_data_entity_crates__.valid_recommended_citeAs_for_resolvable_id, models.Severity.RECOMMENDED, True, - profile_identifier="ro-crate-1.2" + profile_identifier="ro-crate-1.2", + skip_checks=["ro-crate-1.2_44.1"], ) From 87e9a16dc291e433277dad3fef342643681c2582 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Thu, 16 Apr 2026 09:29:14 +0200 Subject: [PATCH 075/352] test(cli): :bug: fix CLI test --- tests/test_cli.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/test_cli.py b/tests/test_cli.py index 2895bb7ab..9d34725fd 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -147,7 +147,6 @@ def mock_validate(*args, **kwargs): def test_validate_with_invalid_profiles_path_dir(cli_runner: CliRunner): - # Create a directory with a dummy profile file dummy_profiles_path = "/tmp/dummy_profiles" result = cli_runner.invoke( cli, @@ -159,6 +158,7 @@ def test_validate_with_invalid_profiles_path_dir(cli_runner: CliRunner): "--verbose", "--no-paging", ], + env={"COLUMNS": "200"}, ) assert result.exit_code == 2 # On narrow terminals the Rich error panel wraps the message across lines @@ -172,7 +172,7 @@ def test_profiles_list(cli_runner: CliRunner): """ Test the list of profiles. """ - result = cli_runner.invoke(cli, ["profiles", "list", "--no-paging"]) + result = cli_runner.invoke(cli, ["profiles", "list", "--no-paging"], env={"COLUMNS": "200"}) assert result.exit_code == 0 assert "ro-crate-1.1" in result.output # Check for a known profile @@ -184,6 +184,7 @@ def test_extra_profiles_list(cli_runner: CliRunner, fake_profiles_path: Path): result = cli_runner.invoke( cli, ["profiles", "--extra-profiles-path", str(fake_profiles_path), "list", "--no-paging"], + env={"COLUMNS": "200"}, ) assert result.exit_code == 0 assert "Profile A" in result.output # Check for a known extra profile From bf67d10a8a15d73a632cc978ff90c9d9e3e94273 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Thu, 16 Apr 2026 09:48:37 +0200 Subject: [PATCH 076/352] feat(ro-crate-1.2): :sparkles: check downloadability via Signposting --- .../1.2/must/2_root_data_entity_cite_as.py | 78 +++++++++ .../1.2/must/4_data_entity_metadata.py | 28 ++- .../should/2_root_data_entity_identifier.py | 78 ++++++++- .../1.2/should/4_web_dataset_distribution.py | 93 ++++++++++ rocrate_validator/utils/signposting.py | 165 ++++++++++++++++++ 5 files changed, 435 insertions(+), 7 deletions(-) create mode 100644 rocrate_validator/profiles/ro-crate/1.2/must/2_root_data_entity_cite_as.py create mode 100644 rocrate_validator/profiles/ro-crate/1.2/should/4_web_dataset_distribution.py create mode 100644 rocrate_validator/utils/signposting.py diff --git a/rocrate_validator/profiles/ro-crate/1.2/must/2_root_data_entity_cite_as.py b/rocrate_validator/profiles/ro-crate/1.2/must/2_root_data_entity_cite_as.py new file mode 100644 index 000000000..57f4cb192 --- /dev/null +++ b/rocrate_validator/profiles/ro-crate/1.2/must/2_root_data_entity_cite_as.py @@ -0,0 +1,78 @@ +# Copyright (c) 2024-2026 CRS4 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from rocrate_validator.utils import log as logging +from rocrate_validator.models import ValidationContext +from rocrate_validator.requirements.python import (PyFunctionCheck, check, + requirement) +from rocrate_validator.utils.signposting import check_downloadable + +logger = logging.getLogger(__name__) + + +@requirement(name="Root Data Entity: `cite-as` downloadability") +class CiteAsDownloadableChecker(PyFunctionCheck): + """ + If present, the `cite-as` value of the Root Data Entity MUST ultimately + provide the RO-Crate as a downloadable item, accessible via Signposting + (Link rel="item" or rel="describedby"), direct download, or content + negotiation (RO-Crate 1.2, RFC 8574). + """ + + @check(name="Root Data Entity: `cite-as` MUST reference a downloadable item") + def check_cite_as_downloadable(self, context: ValidationContext) -> bool: + if context.settings.skip_availability_check: + return True + if not (context.settings.creation_time or context.settings.enforce_availability): + return True + if context.settings.metadata_only: + return True + + try: + root_entity = context.ro_crate.metadata.get_root_data_entity() + cite_as_raw = root_entity.get_property("cite-as") + if not cite_as_raw: + return True + + # cite-as can be a plain string literal or an entity reference {"@id": "..."} + if isinstance(cite_as_raw, str): + cite_as_url = cite_as_raw + elif hasattr(cite_as_raw, "id"): + cite_as_url = cite_as_raw.id + else: + return True + + if not cite_as_url or not cite_as_url.startswith("http"): + return True + + result = check_downloadable(cite_as_url) + if not result.is_downloadable: + context.result.add_issue( + f"The `cite-as` value '{cite_as_url}' MUST ultimately provide " + f"the RO-Crate as a downloadable item" + + (f": {result.reason}" if result.reason else ""), + self, + ) + return False + + logger.debug( + "cite-as '%s' is downloadable via %s (url: %s)", + cite_as_url, result.via, result.download_url, + ) + return True + + except Exception as e: + context.result.add_issue( + f"Error checking `cite-as` downloadability: {str(e)}", self) + return False diff --git a/rocrate_validator/profiles/ro-crate/1.2/must/4_data_entity_metadata.py b/rocrate_validator/profiles/ro-crate/1.2/must/4_data_entity_metadata.py index 4bbb6babe..d5ab5ef26 100644 --- a/rocrate_validator/profiles/ro-crate/1.2/must/4_data_entity_metadata.py +++ b/rocrate_validator/profiles/ro-crate/1.2/must/4_data_entity_metadata.py @@ -14,10 +14,11 @@ import re -from rocrate_validator.utils import log as logging -from rocrate_validator.models import Severity, ValidationContext +from rocrate_validator.models import ValidationContext from rocrate_validator.requirements.python import (PyFunctionCheck, check, requirement) +from rocrate_validator.utils import log as logging +from rocrate_validator.utils.signposting import check_downloadable # set up logging logger = logging.getLogger(__name__) @@ -212,7 +213,9 @@ def check_citation(self, context: ValidationContext) -> bool: @requirement(name="Web-based Data Entity: REQUIRED availability") class WebDataEntityRequiredChecker(PyFunctionCheck): """ - Web-based Data Entities MUST be directly downloadable at the time of creation. + Web-based Data Entities MUST be directly downloadable at the time of creation + (RO-Crate 1.2). Downloadability is verified via Signposting (rel=item, + rel=describedby), direct Content-Type inspection, and content negotiation. """ @check(name="Web-based Data Entity: REQUIRED resource availability") @@ -226,10 +229,23 @@ def check_availability(self, context: ValidationContext) -> bool: result = True for entity in context.ro_crate.metadata.get_web_data_entities(): assert entity.id is not None, "Entity has no @id" + # Skip directory URIs: assumed available, not directly downloadable by spec + if entity.id.endswith("/"): + logger.debug("Skipping downloadability check for directory entity '%s'", entity.id) + continue try: - if not entity.is_available(): - context.result.add_issue( - f"Web-based Data Entity '{entity.id}' is not directly downloadable", self) + dl = check_downloadable(entity.id) + if not dl.is_downloadable: + msg = f"Web-based Data Entity '{entity.id}' is not directly downloadable" + if dl.reason and "HTML" in dl.reason: + msg = ( + f"Web-based Data Entity '{entity.id}' references an HTML page " + f"(possible splash page or viewer application); " + f"it MUST be directly downloadable" + ) + elif dl.reason: + msg += f": {dl.reason}" + context.result.add_issue(msg, self) result = False except Exception as e: context.result.add_issue( diff --git a/rocrate_validator/profiles/ro-crate/1.2/should/2_root_data_entity_identifier.py b/rocrate_validator/profiles/ro-crate/1.2/should/2_root_data_entity_identifier.py index 4e8fb2f92..35ed9fa90 100644 --- a/rocrate_validator/profiles/ro-crate/1.2/should/2_root_data_entity_identifier.py +++ b/rocrate_validator/profiles/ro-crate/1.2/should/2_root_data_entity_identifier.py @@ -12,10 +12,11 @@ # See the License for the specific language governing permissions and # limitations under the License. -from rocrate_validator.models import ValidationContext +from rocrate_validator.models import Severity, ValidationContext from rocrate_validator.requirements.python import (PyFunctionCheck, check, requirement) from rocrate_validator.utils import log as logging +from rocrate_validator.utils.signposting import check_downloadable from rocrate_validator.utils.uri import URI logger = logging.getLogger(__name__) @@ -95,3 +96,78 @@ def check_cite_as_reference(self, context: ValidationContext) -> bool: context.result.add_issue( f'Error checking Root Data Entity `cite-as` reference: {str(e)}', self) return False + + +def _extract_identifier_urls(identifier_raw) -> list[str]: + """ + Extract HTTP URL(s) from a raw ``identifier`` property value. + + The identifier can be: + - A plain string URL + - A ``schema:PropertyValue`` entity with a ``url`` or ``value`` sub-property + - A list of any of the above + """ + urls = [] + items = identifier_raw if isinstance(identifier_raw, list) else [identifier_raw] + for item in items: + if isinstance(item, str): + if item.startswith("http"): + urls.append(item) + elif hasattr(item, "get_property"): + # PropertyValue: try url first, then value + for prop in ("url", "value"): + val = item.get_property(prop) + if val: + url = val if isinstance(val, str) else val.id if hasattr(val, "id") else None + if url and url.startswith("http"): + urls.append(url) + break + elif hasattr(item, "id") and item.id and item.id.startswith("http"): + urls.append(item.id) + return urls + + +@requirement(name="Root Data Entity: persistent identifier resolution") +class RootDataEntityPersistentIdentifierChecker(PyFunctionCheck): + """ + If the Root Data Entity has an ``identifier`` property with a resolvable + HTTP URL, resolving that URL SHOULD ultimately provide the RO-Crate Metadata + Document or an archive, accessible via Signposting or content negotiation + (RO-Crate 1.2, RECOMMENDED). + """ + + @check(name="Root Data Entity: identifier SHOULD resolve to RO-Crate content", + severity=Severity.RECOMMENDED) + def check_identifier_resolvable(self, context: ValidationContext) -> bool: + if context.settings.skip_availability_check: + return True + if context.settings.metadata_only: + return True + try: + root_entity = context.ro_crate.metadata.get_root_data_entity() + identifier_raw = root_entity.get_property("identifier") + if not identifier_raw: + return True + + urls = _extract_identifier_urls(identifier_raw) + if not urls: + return True + + result = True + for url in urls: + dl = check_downloadable(url) + if not dl.is_downloadable: + msg = ( + f"The Root Data Entity identifier '{url}' SHOULD resolve to the " + f"RO-Crate Metadata Document or an archive via Signposting or " + f"content negotiation" + ) + if dl.reason: + msg += f": {dl.reason}" + context.result.add_issue(msg, self) + result = False + return result + except Exception as e: + context.result.add_issue( + f"Error checking Root Data Entity identifier resolution: {str(e)}", self) + return False diff --git a/rocrate_validator/profiles/ro-crate/1.2/should/4_web_dataset_distribution.py b/rocrate_validator/profiles/ro-crate/1.2/should/4_web_dataset_distribution.py new file mode 100644 index 000000000..6ba85396b --- /dev/null +++ b/rocrate_validator/profiles/ro-crate/1.2/should/4_web_dataset_distribution.py @@ -0,0 +1,93 @@ +# Copyright (c) 2024-2026 CRS4 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from rocrate_validator.models import Severity, ValidationContext +from rocrate_validator.requirements.python import (PyFunctionCheck, check, + requirement) +from rocrate_validator.utils import log as logging +from rocrate_validator.utils.signposting import check_downloadable + +logger = logging.getLogger(__name__) + + +def _resolve_distribution_url(dist) -> str | None: + """ + Extract the download URL from a distribution value. + + The distribution property can be: + - A plain string URL + - A DataDownload entity reference (ROCrateEntity) with a ``contentUrl`` + or an ``@id`` that is itself a download URL + """ + if isinstance(dist, str): + return dist if dist.startswith("http") else None + if hasattr(dist, "get_property"): + # Prefer contentUrl, fall back to @id + content_url_raw = dist.get_property("contentUrl") + if content_url_raw: + url = (content_url_raw if isinstance(content_url_raw, str) + else content_url_raw.id if hasattr(content_url_raw, "id") else None) + if url and url.startswith("http"): + return url + if hasattr(dist, "id") and dist.id and dist.id.startswith("http"): + return dist.id + return None + + +@requirement(name="Dataset: distribution downloadability") +class DatasetDistributionChecker(PyFunctionCheck): + """ + If a Dataset declares a ``distribution`` property pointing to a DataDownload + entity, the referenced archive SHOULD be directly downloadable (RO-Crate 1.2, + RECOMMENDED). Downloadability is verified via Signposting, Content-Type, and + content negotiation. + """ + + @check(name="Dataset: distribution SHOULD be downloadable", severity=Severity.RECOMMENDED) + def check_distribution_downloadable(self, context: ValidationContext) -> bool: + if context.settings.skip_availability_check: + return True + if context.settings.metadata_only: + return True + result = True + for entity in context.ro_crate.metadata.get_dataset_entities(): + distribution_raw = entity.get_property("distribution") + if not distribution_raw: + continue + distributions = (distribution_raw + if isinstance(distribution_raw, list) + else [distribution_raw]) + for dist in distributions: + url = _resolve_distribution_url(dist) + if not url: + continue + try: + dl = check_downloadable(url) + if not dl.is_downloadable: + msg = ( + f"The distribution '{url}' of Dataset '{entity.id}' " + f"SHOULD be downloadable" + ) + if dl.reason: + msg += f": {dl.reason}" + context.result.add_issue(msg, self) + result = False + except Exception as e: + context.result.add_issue( + f"Error checking downloadability of distribution '{url}' " + f"for Dataset '{entity.id}': {e}", self) + result = False + if not result and context.fail_fast: + return result + return result diff --git a/rocrate_validator/utils/signposting.py b/rocrate_validator/utils/signposting.py new file mode 100644 index 000000000..98946f570 --- /dev/null +++ b/rocrate_validator/utils/signposting.py @@ -0,0 +1,165 @@ +# Copyright (c) 2024-2026 CRS4 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +Utilities for checking resource downloadability via Signposting (RFC 8574), +direct content-type inspection, and HTTP content negotiation. +""" + +from __future__ import annotations + +from dataclasses import dataclass +from enum import Enum +from typing import Optional + +from rocrate_validator.utils import log as logging +from rocrate_validator.utils.http import HttpRequester + +logger = logging.getLogger(__name__) + + +# MIME types that indicate a landing/navigation page, not directly downloadable content +_HTML_MIME_TYPES = frozenset({"text/html", "application/xhtml+xml"}) + +# Accept header used when attempting content negotiation for RO-Crate metadata/archives. +# The profile parameter is included per RO-Crate 1.2 spec to disambiguate RO-Crate content +# from generic JSON-LD or ZIP resources (see signposting.md for details). +_ROCRATE_ACCEPT = ( + 'application/ld+json; profile="https://w3id.org/ro/crate", ' + "application/ld+json;q=0.9, " + "application/json;q=0.8, " + "application/zip;q=0.7" +) + + +class DownloadVia(str, Enum): + """Mechanism through which a URL was determined to be downloadable.""" + DIRECT = "direct" + SIGNPOSTING_ITEM = "signposting_item" + SIGNPOSTING_DESCRIBEDBY = "signposting_describedby" + CONTENT_NEGOTIATION = "content_negotiation" + + +@dataclass +class DownloadabilityResult: + """ + Result of a downloadability check. + + :param is_downloadable: True if the URL ultimately provides a downloadable item. + :param via: The mechanism through which downloadability was determined. + :param download_url: The concrete URL of the downloadable resource (may differ + from the checked URL when resolved via Signposting). + :param reason: Human-readable explanation when ``is_downloadable`` is False. + """ + is_downloadable: bool + via: Optional[DownloadVia] = None + download_url: Optional[str] = None + reason: Optional[str] = None + + +def check_downloadable(url: str) -> DownloadabilityResult: + """ + Check whether *url* ultimately provides a downloadable item, following the + RO-Crate 1.2 / RFC 8574 requirements for ``cite-as`` targets. + + The function probes the URL in the following order: + + 1. **Signposting** — parses the ``Link`` response headers (RFC 8288). + A ``rel="item"`` link indicates a downloadable archive; a + ``rel="describedby"`` link indicates a retrievable metadata document. + 2. **Direct download** — if the ``Content-Type`` of the HEAD response is + not an HTML MIME type the resource is considered directly downloadable. + 3. **Content negotiation** — a second HEAD request is issued with + ``Accept: application/ld+json, …``. If the server returns a non-HTML + ``Content-Type`` the resource is reachable via content negotiation. + + :param url: The URL to check. + :returns: A :class:`DownloadabilityResult` instance. + :raises: Does **not** raise; all HTTP errors are captured in + ``DownloadabilityResult.reason``. + """ + try: + response = HttpRequester().head(url, allow_redirects=True) + response.raise_for_status() + + # -- 1. Signposting (RFC 8288 Link headers) --------------------------- + # requests parses all Link headers into response.links keyed by rel. + links = response.links + + item_link = links.get("item") + if item_link: + logger.error("cite-as '%s' is downloadable via Signposting rel=item: %s", + url, item_link.get("url")) + return DownloadabilityResult( + is_downloadable=True, + via=DownloadVia.SIGNPOSTING_ITEM, + download_url=item_link.get("url"), + ) + + describedby_link = links.get("describedby") + if describedby_link: + logger.error("cite-as '%s' is downloadable via Signposting rel=describedby: %s", + url, describedby_link.get("url")) + return DownloadabilityResult( + is_downloadable=True, + via=DownloadVia.SIGNPOSTING_DESCRIBEDBY, + download_url=describedby_link.get("url"), + ) + + # -- 2. Direct download ----------------------------------------------- + content_type = response.headers.get("Content-Type", "").split(";")[0].strip() + if content_type and content_type not in _HTML_MIME_TYPES: + logger.error("cite-as '%s' is directly downloadable (Content-Type: %s)", + url, content_type) + return DownloadabilityResult( + is_downloadable=True, + via=DownloadVia.DIRECT, + download_url=url, + ) + + # -- 3. Content negotiation ------------------------------------------- + neg_response = HttpRequester().head( + url, + headers={"Accept": _ROCRATE_ACCEPT}, + allow_redirects=True, + ) + if neg_response.status_code == 200: + neg_ct = neg_response.headers.get("Content-Type", "").split(";")[0].strip() + if neg_ct and neg_ct not in _HTML_MIME_TYPES: + logger.error( + "cite-as '%s' is downloadable via content negotiation (Content-Type: %s)", + url, neg_ct, + ) + return DownloadabilityResult( + is_downloadable=True, + via=DownloadVia.CONTENT_NEGOTIATION, + download_url=url, + ) + + # -- Not downloadable ------------------------------------------------- + return DownloadabilityResult( + is_downloadable=False, + reason=( + "No Signposting links (rel='item' or rel='describedby') found, " + "Content-Type is HTML, and content negotiation did not yield a " + "downloadable format" + ), + ) + + except Exception as e: + logger.error("Error checking downloadability of '%s': %s", url, e, exc_info=True) + return DownloadabilityResult( + is_downloadable=False, + reason=str(e), + ) From f21b7fcab4368652788022b09309aae343662695 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Thu, 16 Apr 2026 09:55:18 +0200 Subject: [PATCH 077/352] test(ro-crate-1.2): :white_check_mark: add tests for downloadability --- .../test_metadata_rootDataEntity.py | 56 ++ .../ro-crate-1.2/test_metadata_webDatasets.py | 75 +++ tests/unit/test_signposting.py | 531 ++++++++++++++++++ 3 files changed, 662 insertions(+) create mode 100644 tests/integration/profiles/ro-crate-1.2/test_metadata_webDatasets.py create mode 100644 tests/unit/test_signposting.py diff --git a/tests/integration/profiles/ro-crate-1.2/test_metadata_rootDataEntity.py b/tests/integration/profiles/ro-crate-1.2/test_metadata_rootDataEntity.py index a63382447..844d92599 100644 --- a/tests/integration/profiles/ro-crate-1.2/test_metadata_rootDataEntity.py +++ b/tests/integration/profiles/ro-crate-1.2/test_metadata_rootDataEntity.py @@ -69,6 +69,7 @@ def raise_for_status(self): pass def _fake_head(url, *args, **kwargs): + logger.debug("Mock HEAD request to %s with args: %s, kwargs: %s", url, args, kwargs) return _DownloadableResponse() monkeypatch.setattr(HttpRequester(), "head", _fake_head) @@ -96,6 +97,7 @@ def raise_for_status(self): pass def _fake_head(url, *args, **kwargs): + logger.debug("Mock HEAD request to %s with args: %s, kwargs: %s", url, args, kwargs) return _HtmlResponse() monkeypatch.setattr(HttpRequester(), "head", _fake_head) @@ -174,3 +176,57 @@ def test_invalid_additional_conformsTo_reference(): "If the Root Data Entity includes a `conformsTo` property, its values MUST reference Profile entities." ] ) + + +# --------------------------------------------------------------------------- +# Root Data Entity identifier — persistent identifier resolution (RECOMMENDED) +# --------------------------------------------------------------------------- + +class _ZipResponse: + status_code = 200 + headers = {"Content-Type": "application/zip"} + links = {} + + def raise_for_status(self): + pass + + +class _HtmlResponse: + status_code = 200 + headers = {"Content-Type": "text/html; charset=utf-8"} + links = {} + + def raise_for_status(self): + pass + + +def test_valid_recommended_identifier_resolution(monkeypatch): + """ + Root Data Entity whose identifier URL resolves to a downloadable resource + (mocked as application/zip) passes the RECOMMENDED identifier resolution check. + """ + monkeypatch.setattr(HttpRequester(), "head", lambda url, **kw: _ZipResponse()) + + do_entity_test( + __metadata_root_data_entity_crates__.valid_recommended_identifier_resolution, + models.Severity.RECOMMENDED, + True, + profile_identifier="ro-crate-1.2", + ) + + +def test_invalid_recommended_identifier_resolution(monkeypatch): + """ + Root Data Entity whose identifier URL returns text/html (landing page) fails + the RECOMMENDED identifier resolution check. + """ + monkeypatch.setattr(HttpRequester(), "head", lambda url, **kw: _HtmlResponse()) + + do_entity_test( + __metadata_root_data_entity_crates__.invalid_recommended_identifier_resolution, + models.Severity.RECOMMENDED, + False, + profile_identifier="ro-crate-1.2", + expected_triggered_requirements=["Root Data Entity: persistent identifier resolution"], + expected_triggered_issues=["SHOULD resolve to the RO-Crate Metadata Document or an archive"], + ) diff --git a/tests/integration/profiles/ro-crate-1.2/test_metadata_webDatasets.py b/tests/integration/profiles/ro-crate-1.2/test_metadata_webDatasets.py new file mode 100644 index 000000000..b6d360cc0 --- /dev/null +++ b/tests/integration/profiles/ro-crate-1.2/test_metadata_webDatasets.py @@ -0,0 +1,75 @@ +# Copyright (c) 2024-2026 CRS4 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging + +from rocrate_validator import models +from rocrate_validator.utils.http import HttpRequester +from tests.ro_crates_1_2 import DataEntities +from tests.shared import do_entity_test + +logger = logging.getLogger(__name__) + + +__metadata_data_entities_crates__ = DataEntities() + + +class _ZipResponse: + status_code = 200 + headers = {"Content-Type": "application/zip"} + links = {} + + def raise_for_status(self): + pass + + +class _HtmlResponse: + status_code = 200 + headers = {"Content-Type": "text/html; charset=utf-8"} + links = {} + + def raise_for_status(self): + pass + + +def test_valid_recommended_distribution_downloadable(monkeypatch): + """ + Dataset whose distribution DataDownload @id returns application/zip passes + the RECOMMENDED distribution downloadability check. + """ + monkeypatch.setattr(HttpRequester(), "head", lambda url, **kw: _ZipResponse()) + + do_entity_test( + __metadata_data_entities_crates__.valid_recommended_distribution, + models.Severity.RECOMMENDED, + True, + profile_identifier="ro-crate-1.2", + ) + + +def test_invalid_recommended_distribution_not_downloadable(monkeypatch): + """ + Dataset whose distribution DataDownload @id returns text/html fails the + RECOMMENDED distribution downloadability check. + """ + monkeypatch.setattr(HttpRequester(), "head", lambda url, **kw: _HtmlResponse()) + + do_entity_test( + __metadata_data_entities_crates__.invalid_recommended_distribution, + models.Severity.RECOMMENDED, + False, + profile_identifier="ro-crate-1.2", + expected_triggered_requirements=["Dataset: distribution downloadability"], + expected_triggered_issues=["SHOULD be downloadable"], + ) diff --git a/tests/unit/test_signposting.py b/tests/unit/test_signposting.py new file mode 100644 index 000000000..1bc115bfd --- /dev/null +++ b/tests/unit/test_signposting.py @@ -0,0 +1,531 @@ +# Copyright (c) 2024-2026 CRS4 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Unit tests for rocrate_validator.utils.signposting.check_downloadable.""" + + +from rocrate_validator.utils.http import HttpRequester +from rocrate_validator.utils.signposting import (_HTML_MIME_TYPES, + _ROCRATE_ACCEPT, + DownloadabilityResult, + DownloadVia, + check_downloadable) + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + +_URL = "https://example.com/rocrate" + + +class _HeadResponse: + """Minimal fake HEAD response sufficient for check_downloadable.""" + + def __init__( + self, + status_code: int = 200, + content_type: str | None = None, + links: dict | None = None, + raise_on_status: bool = False, + ): + self.status_code = status_code + self.headers: dict = {} + if content_type is not None: + self.headers["Content-Type"] = content_type + self.links: dict = links or {} + self._raise = raise_on_status + + def raise_for_status(self): + if self._raise: + raise RuntimeError(f"HTTP {self.status_code}") + + +def _html_resp(**kw) -> _HeadResponse: + return _HeadResponse(content_type="text/html", **kw) + + +def _zip_resp(**kw) -> _HeadResponse: + return _HeadResponse(content_type="application/zip", **kw) + + +# --------------------------------------------------------------------------- +# Data-model tests +# --------------------------------------------------------------------------- +class TestDownloadVia: + + def test_enum_string_values(self): + assert DownloadVia.DIRECT == "direct" + assert DownloadVia.SIGNPOSTING_ITEM == "signposting_item" + assert DownloadVia.SIGNPOSTING_DESCRIBEDBY == "signposting_describedby" + assert DownloadVia.CONTENT_NEGOTIATION == "content_negotiation" + + def test_all_values_are_strings(self): + for member in DownloadVia: + assert isinstance(member.value, str) + + +class TestDownloadabilityResult: + + def test_downloadable_result_fields(self): + r = DownloadabilityResult( + is_downloadable=True, + via=DownloadVia.DIRECT, + download_url=_URL, + ) + assert r.is_downloadable is True + assert r.via == DownloadVia.DIRECT + assert r.download_url == _URL + assert r.reason is None + + def test_not_downloadable_result_fields(self): + r = DownloadabilityResult(is_downloadable=False, reason="no luck") + assert r.is_downloadable is False + assert r.via is None + assert r.download_url is None + assert r.reason == "no luck" + + def test_defaults_are_none(self): + r = DownloadabilityResult(is_downloadable=True) + assert r.via is None + assert r.download_url is None + assert r.reason is None + + +# --------------------------------------------------------------------------- +# Module-level constants +# --------------------------------------------------------------------------- +class TestModuleConstants: + + def test_html_mime_types_contains_text_html(self): + assert "text/html" in _HTML_MIME_TYPES + + def test_html_mime_types_contains_xhtml(self): + assert "application/xhtml+xml" in _HTML_MIME_TYPES + + def test_html_mime_types_does_not_contain_json(self): + assert "application/json" not in _HTML_MIME_TYPES + assert "application/ld+json" not in _HTML_MIME_TYPES + assert "application/zip" not in _HTML_MIME_TYPES + + def test_rocrate_accept_includes_json_ld(self): + assert "application/ld+json" in _ROCRATE_ACCEPT + + def test_rocrate_accept_includes_zip(self): + assert "application/zip" in _ROCRATE_ACCEPT + + +# --------------------------------------------------------------------------- +# Signposting (RFC 8288 Link headers) +# --------------------------------------------------------------------------- +class TestSignpostingItemLink: + + def test_item_link_is_downloadable(self, monkeypatch): + item_url = "https://example.com/rocrate.zip" + resp = _HeadResponse(links={"item": {"url": item_url}}) + monkeypatch.setattr(HttpRequester(), "head", lambda url, **kw: resp) + + result = check_downloadable(_URL) + + assert result.is_downloadable is True + + def test_item_link_via(self, monkeypatch): + resp = _HeadResponse(links={"item": {"url": "https://example.com/rocrate.zip"}}) + monkeypatch.setattr(HttpRequester(), "head", lambda url, **kw: resp) + + result = check_downloadable(_URL) + + assert result.via == DownloadVia.SIGNPOSTING_ITEM + + def test_item_link_download_url(self, monkeypatch): + item_url = "https://example.com/rocrate.zip" + resp = _HeadResponse(links={"item": {"url": item_url}}) + monkeypatch.setattr(HttpRequester(), "head", lambda url, **kw: resp) + + result = check_downloadable(_URL) + + assert result.download_url == item_url + + def test_item_link_empty_dict_is_falsy_not_downloadable(self, monkeypatch): + """rel=item whose value is an empty dict is falsy → treated as absent, falls through.""" + # In Python `if {}` is False, so `if item_link:` skips an empty-dict entry. + resp = _HeadResponse(links={"item": {}}) + monkeypatch.setattr(HttpRequester(), "head", lambda url, **kw: resp) + + result = check_downloadable(_URL) + + # The empty dict is falsy; the code does not enter the Signposting branch. + assert result.via != DownloadVia.SIGNPOSTING_ITEM + + +class TestSignpostingDescribedByLink: + + def test_describedby_link_is_downloadable(self, monkeypatch): + meta_url = "https://example.com/ro-crate-metadata.json" + resp = _HeadResponse(links={"describedby": {"url": meta_url}}) + monkeypatch.setattr(HttpRequester(), "head", lambda url, **kw: resp) + + result = check_downloadable(_URL) + + assert result.is_downloadable is True + + def test_describedby_link_via(self, monkeypatch): + resp = _HeadResponse(links={"describedby": {"url": "https://example.com/meta.json"}}) + monkeypatch.setattr(HttpRequester(), "head", lambda url, **kw: resp) + + result = check_downloadable(_URL) + + assert result.via == DownloadVia.SIGNPOSTING_DESCRIBEDBY + + def test_describedby_link_download_url(self, monkeypatch): + meta_url = "https://example.com/ro-crate-metadata.json" + resp = _HeadResponse(links={"describedby": {"url": meta_url}}) + monkeypatch.setattr(HttpRequester(), "head", lambda url, **kw: resp) + + result = check_downloadable(_URL) + + assert result.download_url == meta_url + + +class TestSignpostingPriority: + + def test_item_takes_priority_over_describedby(self, monkeypatch): + """When both rel=item and rel=describedby are present, rel=item wins.""" + resp = _HeadResponse(links={ + "item": {"url": "https://example.com/rocrate.zip"}, + "describedby": {"url": "https://example.com/meta.json"}, + }) + monkeypatch.setattr(HttpRequester(), "head", lambda url, **kw: resp) + + result = check_downloadable(_URL) + + assert result.via == DownloadVia.SIGNPOSTING_ITEM + assert result.download_url == "https://example.com/rocrate.zip" + + def test_signposting_takes_priority_over_direct_content_type(self, monkeypatch): + """A Signposting link wins over a non-HTML Content-Type on the same response.""" + resp = _HeadResponse( + content_type="application/zip", + links={"item": {"url": "https://example.com/rocrate.zip"}}, + ) + monkeypatch.setattr(HttpRequester(), "head", lambda url, **kw: resp) + + result = check_downloadable(_URL) + + assert result.via == DownloadVia.SIGNPOSTING_ITEM + + +# --------------------------------------------------------------------------- +# Direct download (Content-Type based) +# --------------------------------------------------------------------------- +class TestDirectDownload: + + def test_zip_content_type(self, monkeypatch): + monkeypatch.setattr(HttpRequester(), "head", lambda url, **kw: _zip_resp()) + + result = check_downloadable(_URL) + + assert result.is_downloadable is True + assert result.via == DownloadVia.DIRECT + + def test_direct_download_url_equals_checked_url(self, monkeypatch): + monkeypatch.setattr(HttpRequester(), "head", lambda url, **kw: _zip_resp()) + + result = check_downloadable(_URL) + + assert result.download_url == _URL + + def test_json_ld_content_type(self, monkeypatch): + resp = _HeadResponse(content_type="application/ld+json") + monkeypatch.setattr(HttpRequester(), "head", lambda url, **kw: resp) + + result = check_downloadable(_URL) + + assert result.is_downloadable is True + assert result.via == DownloadVia.DIRECT + + def test_content_type_with_charset_suffix(self, monkeypatch): + """charset suffix is stripped before the MIME type comparison.""" + resp = _HeadResponse(content_type="application/json; charset=utf-8") + monkeypatch.setattr(HttpRequester(), "head", lambda url, **kw: resp) + + result = check_downloadable(_URL) + + assert result.is_downloadable is True + assert result.via == DownloadVia.DIRECT + + def test_text_html_is_not_direct(self, monkeypatch): + """text/html must not be treated as a direct download.""" + monkeypatch.setattr(HttpRequester(), "head", lambda url, **kw: _html_resp()) + + result = check_downloadable(_URL) + + assert result.via != DownloadVia.DIRECT + + def test_xhtml_is_not_direct(self, monkeypatch): + """application/xhtml+xml is in _HTML_MIME_TYPES and must not be direct.""" + resp = _HeadResponse(content_type="application/xhtml+xml") + monkeypatch.setattr(HttpRequester(), "head", lambda url, **kw: resp) + + result = check_downloadable(_URL) + + assert result.via != DownloadVia.DIRECT + + def test_missing_content_type_header_is_not_direct(self, monkeypatch): + """No Content-Type header → empty string → falsy → falls through, not DIRECT.""" + resp = _HeadResponse() # no content_type kwarg → headers dict is empty + monkeypatch.setattr(HttpRequester(), "head", lambda url, **kw: resp) + + result = check_downloadable(_URL) + + assert result.via != DownloadVia.DIRECT + + +# --------------------------------------------------------------------------- +# Content negotiation +# --------------------------------------------------------------------------- +class TestContentNegotiation: + + def _make_neg_head(self, plain_ct: str, neg_ct: str, neg_status: int = 200): + """Return a fake_head function that differentiates plain and Accept-based calls.""" + def fake_head(url, **kwargs): + if kwargs.get("headers", {}).get("Accept"): + return _HeadResponse(status_code=neg_status, content_type=neg_ct) + return _HeadResponse(content_type=plain_ct) + return fake_head + + def test_content_negotiation_success(self, monkeypatch): + monkeypatch.setattr( + HttpRequester(), "head", + self._make_neg_head("text/html", "application/ld+json"), + ) + + result = check_downloadable(_URL) + + assert result.is_downloadable is True + assert result.via == DownloadVia.CONTENT_NEGOTIATION + + def test_content_negotiation_download_url_equals_checked_url(self, monkeypatch): + monkeypatch.setattr( + HttpRequester(), "head", + self._make_neg_head("text/html", "application/ld+json"), + ) + + result = check_downloadable(_URL) + + assert result.download_url == _URL + + def test_content_negotiation_zip_response(self, monkeypatch): + monkeypatch.setattr( + HttpRequester(), "head", + self._make_neg_head("text/html", "application/zip"), + ) + + result = check_downloadable(_URL) + + assert result.is_downloadable is True + assert result.via == DownloadVia.CONTENT_NEGOTIATION + + def test_content_negotiation_html_response_not_downloadable(self, monkeypatch): + """Accept-based HEAD also returns HTML → not downloadable.""" + monkeypatch.setattr( + HttpRequester(), "head", + self._make_neg_head("text/html", "text/html"), + ) + + result = check_downloadable(_URL) + + assert result.is_downloadable is False + + def test_content_negotiation_non_200_status_not_downloadable(self, monkeypatch): + """Accept-based HEAD returns 404 → content negotiation does not count.""" + monkeypatch.setattr( + HttpRequester(), "head", + self._make_neg_head("text/html", "application/ld+json", neg_status=404), + ) + + result = check_downloadable(_URL) + + assert result.is_downloadable is False + + def test_content_negotiation_accept_header_value(self, monkeypatch): + """The second HEAD request MUST carry the RO-Crate Accept header.""" + captured: dict = {} + + def fake_head(url, **kwargs): + hdrs = kwargs.get("headers", {}) + if hdrs.get("Accept"): + captured["accept"] = hdrs["Accept"] + return _HeadResponse(status_code=200, content_type="application/ld+json") + return _HeadResponse(content_type="text/html") + + monkeypatch.setattr(HttpRequester(), "head", fake_head) + check_downloadable(_URL) + + assert "accept" in captured, "Second HEAD must send an Accept header" + assert "application/ld+json" in captured["accept"] + assert "application/zip" in captured["accept"] + + def test_only_one_head_call_when_direct(self, monkeypatch): + """Direct download path must NOT trigger a second HEAD for content negotiation.""" + call_count = [0] + + def fake_head(url, **kwargs): + call_count[0] += 1 + return _zip_resp() + + monkeypatch.setattr(HttpRequester(), "head", fake_head) + check_downloadable(_URL) + + assert call_count[0] == 1 + + def test_only_one_head_call_when_signposting(self, monkeypatch): + """Signposting path must NOT trigger a second HEAD.""" + call_count = [0] + + def fake_head(url, **kwargs): + call_count[0] += 1 + return _HeadResponse(links={"item": {"url": "https://example.com/rocrate.zip"}}) + + monkeypatch.setattr(HttpRequester(), "head", fake_head) + check_downloadable(_URL) + + assert call_count[0] == 1 + + +# --------------------------------------------------------------------------- +# Not downloadable result +# --------------------------------------------------------------------------- +class TestNotDownloadable: + + def test_not_downloadable_is_false(self, monkeypatch): + monkeypatch.setattr(HttpRequester(), "head", lambda url, **kw: _html_resp()) + + result = check_downloadable(_URL) + + assert result.is_downloadable is False + + def test_not_downloadable_via_is_none(self, monkeypatch): + monkeypatch.setattr(HttpRequester(), "head", lambda url, **kw: _html_resp()) + + result = check_downloadable(_URL) + + assert result.via is None + + def test_not_downloadable_download_url_is_none(self, monkeypatch): + monkeypatch.setattr(HttpRequester(), "head", lambda url, **kw: _html_resp()) + + result = check_downloadable(_URL) + + assert result.download_url is None + + def test_not_downloadable_reason_mentions_signposting(self, monkeypatch): + monkeypatch.setattr(HttpRequester(), "head", lambda url, **kw: _html_resp()) + + result = check_downloadable(_URL) + + assert result.reason is not None + assert "Signposting" in result.reason + + def test_not_downloadable_reason_mentions_html(self, monkeypatch): + monkeypatch.setattr(HttpRequester(), "head", lambda url, **kw: _html_resp()) + + result = check_downloadable(_URL) + + assert "HTML" in result.reason + + def test_not_downloadable_reason_mentions_content_negotiation(self, monkeypatch): + monkeypatch.setattr(HttpRequester(), "head", lambda url, **kw: _html_resp()) + + result = check_downloadable(_URL) + + assert "content negotiation" in result.reason + + +# --------------------------------------------------------------------------- +# Error handling +# --------------------------------------------------------------------------- + + +class TestErrorHandling: + + def test_http_error_status_not_downloadable(self, monkeypatch): + """raise_for_status() raises → is_downloadable is False.""" + resp = _HeadResponse(status_code=404, raise_on_status=True) + monkeypatch.setattr(HttpRequester(), "head", lambda url, **kw: resp) + + result = check_downloadable(_URL) + + assert result.is_downloadable is False + + def test_http_error_reason_contains_message(self, monkeypatch): + """The exception message is propagated to DownloadabilityResult.reason.""" + resp = _HeadResponse(status_code=404, raise_on_status=True) + monkeypatch.setattr(HttpRequester(), "head", lambda url, **kw: resp) + + result = check_downloadable(_URL) + + assert result.reason is not None + assert "HTTP 404" in result.reason + + def test_connection_error_not_downloadable(self, monkeypatch): + def fake_head(url, **kwargs): + raise ConnectionError("Connection refused") + + monkeypatch.setattr(HttpRequester(), "head", fake_head) + + result = check_downloadable(_URL) + + assert result.is_downloadable is False + + def test_connection_error_reason(self, monkeypatch): + def fake_head(url, **kwargs): + raise ConnectionError("Connection refused") + + monkeypatch.setattr(HttpRequester(), "head", fake_head) + + result = check_downloadable(_URL) + + assert "Connection refused" in result.reason + + def test_timeout_error_not_downloadable(self, monkeypatch): + def fake_head(url, **kwargs): + raise TimeoutError("Timed out") + + monkeypatch.setattr(HttpRequester(), "head", fake_head) + + result = check_downloadable(_URL) + + assert result.is_downloadable is False + + def test_generic_exception_not_downloadable(self, monkeypatch): + def fake_head(url, **kwargs): + raise ValueError("unexpected error") + + monkeypatch.setattr(HttpRequester(), "head", fake_head) + + result = check_downloadable(_URL) + + assert result.is_downloadable is False + assert "unexpected error" in result.reason + + def test_error_handling_never_raises(self, monkeypatch): + """check_downloadable must never propagate exceptions to the caller.""" + def fake_head(url, **kwargs): + raise RuntimeError("catastrophic failure") + + monkeypatch.setattr(HttpRequester(), "head", fake_head) + + # Must not raise + result = check_downloadable(_URL) + assert isinstance(result, DownloadabilityResult) From 0b5e354ef7c372b14dfc24a7c0d02c2cf717301d Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Thu, 16 Apr 2026 11:27:24 +0200 Subject: [PATCH 078/352] fix(core): :loud_sound: set the appropriate log level --- rocrate_validator/utils/signposting.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/rocrate_validator/utils/signposting.py b/rocrate_validator/utils/signposting.py index 98946f570..a3e9adfa2 100644 --- a/rocrate_validator/utils/signposting.py +++ b/rocrate_validator/utils/signposting.py @@ -99,7 +99,7 @@ def check_downloadable(url: str) -> DownloadabilityResult: item_link = links.get("item") if item_link: - logger.error("cite-as '%s' is downloadable via Signposting rel=item: %s", + logger.debug("cite-as '%s' is downloadable via Signposting rel=item: %s", url, item_link.get("url")) return DownloadabilityResult( is_downloadable=True, @@ -109,7 +109,7 @@ def check_downloadable(url: str) -> DownloadabilityResult: describedby_link = links.get("describedby") if describedby_link: - logger.error("cite-as '%s' is downloadable via Signposting rel=describedby: %s", + logger.debug("cite-as '%s' is downloadable via Signposting rel=describedby: %s", url, describedby_link.get("url")) return DownloadabilityResult( is_downloadable=True, @@ -120,7 +120,7 @@ def check_downloadable(url: str) -> DownloadabilityResult: # -- 2. Direct download ----------------------------------------------- content_type = response.headers.get("Content-Type", "").split(";")[0].strip() if content_type and content_type not in _HTML_MIME_TYPES: - logger.error("cite-as '%s' is directly downloadable (Content-Type: %s)", + logger.debug("cite-as '%s' is directly downloadable (Content-Type: %s)", url, content_type) return DownloadabilityResult( is_downloadable=True, @@ -137,7 +137,7 @@ def check_downloadable(url: str) -> DownloadabilityResult: if neg_response.status_code == 200: neg_ct = neg_response.headers.get("Content-Type", "").split(";")[0].strip() if neg_ct and neg_ct not in _HTML_MIME_TYPES: - logger.error( + logger.debug( "cite-as '%s' is downloadable via content negotiation (Content-Type: %s)", url, neg_ct, ) From 4176cf9af1ffa5a9f9ef9f9e4af1db6733d1f023 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Thu, 16 Apr 2026 12:20:08 +0200 Subject: [PATCH 079/352] feat(ro-crate-1.2): :sparkles: add validation for recommended publisher property of Root Data Entity --- .../should/2_root_data_entity_publisher.ttl | 36 +++++++++++++++++++ 1 file changed, 36 insertions(+) create mode 100644 rocrate_validator/profiles/ro-crate/1.2/should/2_root_data_entity_publisher.ttl diff --git a/rocrate_validator/profiles/ro-crate/1.2/should/2_root_data_entity_publisher.ttl b/rocrate_validator/profiles/ro-crate/1.2/should/2_root_data_entity_publisher.ttl new file mode 100644 index 000000000..baa88dbc9 --- /dev/null +++ b/rocrate_validator/profiles/ro-crate/1.2/should/2_root_data_entity_publisher.ttl @@ -0,0 +1,36 @@ +# Copyright (c) 2024-2026 CRS4 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +@prefix ro: <./> . +@prefix ro-crate: . +@prefix rdf: . +@prefix schema_org: . +@prefix sh: . + +ro-crate:RootDataEntityRecommendedPublisher a sh:NodeShape ; + sh:name "Root Data Entity: recommended publisher" ; + sh:description """The Root Data Entity SHOULD have a `publisher` property + referencing an Organization or Person (as per schema.org).""" ; + sh:targetClass ro-crate:RootDataEntity ; + sh:property [ + a sh:PropertyShape ; + sh:name "Root Data Entity: RECOMMENDED `publisher` property" ; + sh:description """Check if the Root Data Entity has a `publisher` property. + The RO-Crate specification recommends that the Root Data Entity includes + a publisher (Organization or Person) to describe who is responsible for + making the RO-Crate available.""" ; + sh:path schema_org:publisher ; + sh:minCount 1 ; + sh:severity sh:Warning ; + sh:message "The Root Data Entity SHOULD have a `publisher` property (Organization or Person)" ; + ] . From cfa61b6fdb3dac49a87ccef3502f5c6cf148bac6 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Thu, 16 Apr 2026 12:22:26 +0200 Subject: [PATCH 080/352] test(ro-crate-1.2): :white_check_mark: test recommended publisher property --- .../invalid/ro-crate-metadata.json | 26 +++++++++++++++ .../valid/ro-crate-metadata.json | 33 +++++++++++++++++++ .../test_metadata_rootDataEntity.py | 31 +++++++++++++++++ 3 files changed, 90 insertions(+) create mode 100644 tests/data/crates/rocrate-1.2/7_root_data_entity/recommended_publisher/invalid/ro-crate-metadata.json create mode 100644 tests/data/crates/rocrate-1.2/7_root_data_entity/recommended_publisher/valid/ro-crate-metadata.json diff --git a/tests/data/crates/rocrate-1.2/7_root_data_entity/recommended_publisher/invalid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/7_root_data_entity/recommended_publisher/invalid/ro-crate-metadata.json new file mode 100644 index 000000000..db3c56e59 --- /dev/null +++ b/tests/data/crates/rocrate-1.2/7_root_data_entity/recommended_publisher/invalid/ro-crate-metadata.json @@ -0,0 +1,26 @@ +{ + "@context": "https://w3id.org/ro/crate/1.2/context", + "@graph": [ + { + "@id": "ro-crate-metadata.json", + "@type": "CreativeWork", + "about": {"@id": "./"}, + "conformsTo": {"@id": "https://w3id.org/ro/crate/1.2"} + }, + { + "@id": "./", + "@type": "Dataset", + "name": "Publisher missing — invalid", + "description": "RO-Crate whose Root Data Entity does not declare a publisher.", + "datePublished": "2024-01-01", + "license": {"@id": "https://creativecommons.org/licenses/by/4.0/"}, + "hasPart": [] + }, + { + "@id": "https://creativecommons.org/licenses/by/4.0/", + "@type": "CreativeWork", + "name": "Creative Commons Attribution 4.0 International", + "description": "CC BY 4.0 license." + } + ] +} diff --git a/tests/data/crates/rocrate-1.2/7_root_data_entity/recommended_publisher/valid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/7_root_data_entity/recommended_publisher/valid/ro-crate-metadata.json new file mode 100644 index 000000000..2ecc0c30a --- /dev/null +++ b/tests/data/crates/rocrate-1.2/7_root_data_entity/recommended_publisher/valid/ro-crate-metadata.json @@ -0,0 +1,33 @@ +{ + "@context": "https://w3id.org/ro/crate/1.2/context", + "@graph": [ + { + "@id": "ro-crate-metadata.json", + "@type": "CreativeWork", + "about": {"@id": "./"}, + "conformsTo": {"@id": "https://w3id.org/ro/crate/1.2"} + }, + { + "@id": "./", + "@type": "Dataset", + "name": "Publisher present — valid", + "description": "RO-Crate whose Root Data Entity declares a publisher Organization.", + "datePublished": "2024-01-01", + "license": {"@id": "https://creativecommons.org/licenses/by/4.0/"}, + "publisher": {"@id": "#publisher-org"}, + "hasPart": [] + }, + { + "@id": "https://creativecommons.org/licenses/by/4.0/", + "@type": "CreativeWork", + "name": "Creative Commons Attribution 4.0 International", + "description": "CC BY 4.0 license." + }, + { + "@id": "#publisher-org", + "@type": "Organization", + "name": "Example Research Institute", + "url": "https://example.org" + } + ] +} diff --git a/tests/integration/profiles/ro-crate-1.2/test_metadata_rootDataEntity.py b/tests/integration/profiles/ro-crate-1.2/test_metadata_rootDataEntity.py index 844d92599..48ca4904b 100644 --- a/tests/integration/profiles/ro-crate-1.2/test_metadata_rootDataEntity.py +++ b/tests/integration/profiles/ro-crate-1.2/test_metadata_rootDataEntity.py @@ -230,3 +230,34 @@ def test_invalid_recommended_identifier_resolution(monkeypatch): expected_triggered_requirements=["Root Data Entity: persistent identifier resolution"], expected_triggered_issues=["SHOULD resolve to the RO-Crate Metadata Document or an archive"], ) + + +# --------------------------------------------------------------------------- +# Root Data Entity: publisher SHOULD be present (RECOMMENDED) +# --------------------------------------------------------------------------- + +def test_valid_recommended_publisher(): + """ + Root Data Entity with a publisher Organization passes the RECOMMENDED + publisher check. + """ + do_entity_test( + __metadata_root_data_entity_crates__.valid_recommended_publisher, + models.Severity.RECOMMENDED, + True, + profile_identifier="ro-crate-1.2", + ) + + +def test_invalid_recommended_publisher(): + """ + Root Data Entity without a publisher fails the RECOMMENDED publisher check. + """ + do_entity_test( + __metadata_root_data_entity_crates__.invalid_recommended_publisher, + models.Severity.RECOMMENDED, + False, + profile_identifier="ro-crate-1.2", + expected_triggered_requirements=["Root Data Entity: recommended publisher"], + expected_triggered_issues=["SHOULD have a `publisher` property"], + ) From dc23ee4c6d24e8d2113d7c6ba18f1fc492959200 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Thu, 16 Apr 2026 12:25:50 +0200 Subject: [PATCH 081/352] test(ro-crate-1.2): :white_check_mark: add tests for the distribution property of web entities --- .../invalid/ro-crate-metadata.json | 32 +++++++++++++ .../valid/ro-crate-metadata.json | 47 +++++++++++++++++++ .../ro-crate-1.2/test_metadata_webDatasets.py | 36 ++++++++++++++ 3 files changed, 115 insertions(+) create mode 100644 tests/data/crates/rocrate-1.2/8_metadata_dataEntities/web_directory_distribution/invalid/ro-crate-metadata.json create mode 100644 tests/data/crates/rocrate-1.2/8_metadata_dataEntities/web_directory_distribution/valid/ro-crate-metadata.json diff --git a/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/web_directory_distribution/invalid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/web_directory_distribution/invalid/ro-crate-metadata.json new file mode 100644 index 000000000..4fe64fb94 --- /dev/null +++ b/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/web_directory_distribution/invalid/ro-crate-metadata.json @@ -0,0 +1,32 @@ +{ + "@context": "https://w3id.org/ro/crate/1.2/context", + "@graph": [ + { + "@id": "ro-crate-metadata.json", + "@type": "CreativeWork", + "about": {"@id": "./"}, + "conformsTo": {"@id": "https://w3id.org/ro/crate/1.2"} + }, + { + "@id": "./", + "@type": "Dataset", + "name": "Web directory without distribution — invalid", + "description": "RO-Crate containing a web-based Directory Data Entity that does not declare a distribution.", + "datePublished": "2024-01-01", + "license": {"@id": "https://creativecommons.org/licenses/by/4.0/"}, + "hasPart": [{"@id": "https://example.com/dataset-dir/"}] + }, + { + "@id": "https://creativecommons.org/licenses/by/4.0/", + "@type": "CreativeWork", + "name": "Creative Commons Attribution 4.0 International", + "description": "CC BY 4.0 license." + }, + { + "@id": "https://example.com/dataset-dir/", + "@type": "Dataset", + "name": "Web dataset directory (no distribution)", + "description": "A web-based directory that does not declare a distribution property." + } + ] +} diff --git a/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/web_directory_distribution/valid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/web_directory_distribution/valid/ro-crate-metadata.json new file mode 100644 index 000000000..ba95f768a --- /dev/null +++ b/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/web_directory_distribution/valid/ro-crate-metadata.json @@ -0,0 +1,47 @@ +{ + "@context": "https://w3id.org/ro/crate/1.2/context", + "@graph": [ + { + "@id": "ro-crate-metadata.json", + "@type": "CreativeWork", + "about": {"@id": "./"}, + "conformsTo": {"@id": "https://w3id.org/ro/crate/1.2"} + }, + { + "@id": "./", + "@type": "Dataset", + "name": "Web directory with distribution — valid", + "description": "RO-Crate containing a web-based Directory Data Entity that declares a distribution.", + "datePublished": "2024-01-01", + "license": {"@id": "https://creativecommons.org/licenses/by/4.0/"}, + "publisher": {"@id": "#publisher-org"}, + "hasPart": [{"@id": "https://example.com/dataset-dir/"}] + }, + { + "@id": "https://creativecommons.org/licenses/by/4.0/", + "@type": "CreativeWork", + "name": "Creative Commons Attribution 4.0 International", + "description": "CC BY 4.0 license." + }, + { + "@id": "https://example.com/dataset-dir/", + "@type": "Dataset", + "name": "Web dataset directory", + "description": "A web-based directory with a distribution pointing to a downloadable archive.", + "distribution": {"@id": "https://example.com/dataset-dir.zip"} + }, + { + "@id": "https://example.com/dataset-dir.zip", + "@type": "DataDownload", + "name": "Dataset archive", + "description": "ZIP archive of the web dataset directory.", + "encodingFormat": "application/zip" + }, + { + "@id": "#publisher-org", + "@type": "Organization", + "name": "Example Research Institute", + "url": "https://example.org" + } + ] +} diff --git a/tests/integration/profiles/ro-crate-1.2/test_metadata_webDatasets.py b/tests/integration/profiles/ro-crate-1.2/test_metadata_webDatasets.py index b6d360cc0..4c68acb36 100644 --- a/tests/integration/profiles/ro-crate-1.2/test_metadata_webDatasets.py +++ b/tests/integration/profiles/ro-crate-1.2/test_metadata_webDatasets.py @@ -73,3 +73,39 @@ def test_invalid_recommended_distribution_not_downloadable(monkeypatch): expected_triggered_requirements=["Dataset: distribution downloadability"], expected_triggered_issues=["SHOULD be downloadable"], ) + + +# --------------------------------------------------------------------------- +# Web-based Directory Data Entity: distribution SHOULD be present (RECOMMENDED) +# --------------------------------------------------------------------------- + +def test_valid_recommended_web_directory_has_distribution(monkeypatch): + """ + Web-based Directory Data Entity that declares a distribution passes the + RECOMMENDED distribution-presence check. + HTTP mocked so the distribution downloadability check does not make real requests. + """ + monkeypatch.setattr(HttpRequester(), "head", lambda url, **kw: _ZipResponse()) + + do_entity_test( + __metadata_data_entities_crates__.valid_web_directory_distribution, + models.Severity.RECOMMENDED, + True, + profile_identifier="ro-crate-1.2", + ) + + +def test_invalid_recommended_web_directory_missing_distribution(): + """ + Web-based Directory Data Entity without a distribution fails the RECOMMENDED + distribution-presence check (no HTTP call needed — only metadata structure checked). + """ + do_entity_test( + __metadata_data_entities_crates__.invalid_web_directory_distribution, + models.Severity.RECOMMENDED, + False, + profile_identifier="ro-crate-1.2", + skip_availability_check=True, + expected_triggered_requirements=["Web-based Directory Data Entity: recommended distribution"], + expected_triggered_issues=["SHOULD include a `distribution` property"], + ) From 4f303b4e34b6d6ddc4b9e661a9f62d87b2d4ea07 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Thu, 16 Apr 2026 13:10:34 +0200 Subject: [PATCH 082/352] feat(ro-crate-1.2): :sparkles: add checks for recommended identifier format --- .../1.2/should/0_entity_identifier_format.py | 124 ++++++++++++++++++ 1 file changed, 124 insertions(+) create mode 100644 rocrate_validator/profiles/ro-crate/1.2/should/0_entity_identifier_format.py diff --git a/rocrate_validator/profiles/ro-crate/1.2/should/0_entity_identifier_format.py b/rocrate_validator/profiles/ro-crate/1.2/should/0_entity_identifier_format.py new file mode 100644 index 000000000..2b5613a87 --- /dev/null +++ b/rocrate_validator/profiles/ro-crate/1.2/should/0_entity_identifier_format.py @@ -0,0 +1,124 @@ +# Copyright (c) 2024-2026 CRS4 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +RECOMMENDED checks on entity @id values: + - @id SHOULD NOT use ../ to climb out of the RO-Crate Root + - International characters SHOULD be native UTF-8, not percent-encoded + - Named contextual entities (Person, Organization) SHOULD use #-prefixed @id +""" + +import re + +from rocrate_validator.models import Severity, ValidationContext +from rocrate_validator.requirements.python import PyFunctionCheck, check, requirement +from rocrate_validator.utils import log as logging + +logger = logging.getLogger(__name__) + +# Matches any %XX sequence where XX decodes to a non-ASCII byte (0x80–0xFF). +# These are UTF-8 continuation / leading bytes for multi-byte code-points. +_PCT_NON_ASCII_RE = re.compile(r"%[89A-Fa-f][0-9A-Fa-f]") + +# Types for which a non-absolute @id SHOULD start with '#' +_NAMED_ENTITY_TYPES = frozenset({"Person", "Organization"}) + + +@requirement(name="Entity identifier: format recommendations") +class EntityIdentifierFormatChecker(PyFunctionCheck): + """ + Checks that entity @id values follow RO-Crate 1.2 RECOMMENDED conventions: + no parent-directory traversal, native UTF-8 for international characters, + and '#'-prefixed identifiers for named local entities. + """ + + @check(name="Entity identifiers SHOULD NOT use ../", severity=Severity.RECOMMENDED) + def check_no_parent_traversal(self, context: ValidationContext) -> bool: + """ + @id paths SHOULD NOT use `../` to climb out of the RO-Crate Root + (RO-Crate 1.2, JSON-LD appendix). + """ + result = True + for entity in context.ro_crate.metadata.as_dict().get("@graph", []): + entity_id = entity.get("@id", "") + if "../" in entity_id: + context.result.add_issue( + f"Entity @id '{entity_id}' uses '../' to traverse above the " + f"RO-Crate Root; @id paths SHOULD NOT contain '../'", + self, + ) + result = False + if context.fail_fast: + return result + return result + + @check( + name="Entity identifiers SHOULD use native UTF-8, not percent-encoding", + severity=Severity.RECOMMENDED, + ) + def check_utf8_identifiers(self, context: ValidationContext) -> bool: + """ + International characters in @id values SHOULD be written in native + UTF-8 rather than percent-encoded (RO-Crate 1.2, JSON-LD appendix). + """ + result = True + for entity in context.ro_crate.metadata.as_dict().get("@graph", []): + entity_id = entity.get("@id", "") + if _PCT_NON_ASCII_RE.search(entity_id): + context.result.add_issue( + f"Entity @id '{entity_id}' contains percent-encoded non-ASCII " + f"characters; international characters SHOULD be written in " + f"native UTF-8 rather than percent-encoded", + self, + ) + result = False + if context.fail_fast: + return result + return result + + @check( + name="Named contextual entity @id SHOULD use '#' prefix", + severity=Severity.RECOMMENDED, + ) + def check_named_entity_id_format(self, context: ValidationContext) -> bool: + """ + Named entities such as Person or Organization that are referenced locally + SHOULD use an @id starting with '#' rather than a bare relative path + (RO-Crate 1.2, JSON-LD appendix). + """ + result = True + for entity in context.ro_crate.metadata.as_dict().get("@graph", []): + entity_id = entity.get("@id", "") + raw_type = entity.get("@type", "") + types = [raw_type] if isinstance(raw_type, str) else raw_type + if not _NAMED_ENTITY_TYPES.intersection(types): + continue + # Absolute IRIs and blank nodes are fine; only flag bare relative paths + if ( + entity_id.startswith("http://") + or entity_id.startswith("https://") + or entity_id.startswith("#") + or entity_id.startswith("_:") + ): + continue + context.result.add_issue( + f"Entity @id '{entity_id}' of type '{raw_type}' is a local identifier " + f"that does not start with '#'; named local entities SHOULD use a " + f"'#'-prefixed @id (e.g. '#alice')", + self, + ) + result = False + if context.fail_fast: + return result + return result From 094c9fea4d17f270616784c84addd61d13f0cacb Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Thu, 16 Apr 2026 13:12:36 +0200 Subject: [PATCH 083/352] feat(ro-crate-1.2): :sparkles: update minimal recommended checks for Web Data Entity metadata --- .../1.2/should/5_web_data_entity_metadata.ttl | 39 ++++--------------- 1 file changed, 7 insertions(+), 32 deletions(-) diff --git a/rocrate_validator/profiles/ro-crate/1.2/should/5_web_data_entity_metadata.ttl b/rocrate_validator/profiles/ro-crate/1.2/should/5_web_data_entity_metadata.ttl index 7abdf9c9a..5d108b5ad 100644 --- a/rocrate_validator/profiles/ro-crate/1.2/should/5_web_data_entity_metadata.ttl +++ b/rocrate_validator/profiles/ro-crate/1.2/should/5_web_data_entity_metadata.ttl @@ -15,57 +15,32 @@ @prefix ro: <./> . @prefix ro-crate: . @prefix rdf: . -@prefix dct: . @prefix schema_org: . @prefix sh: . -@prefix owl: . @prefix xsd: . -@prefix validator: . - -ro-crate:WebBasedDataEntityRequiredValueRestriction a sh:NodeShape ; +ro-crate:WebBasedDataEntityRecommendedProperties a sh:NodeShape ; sh:name "Web-based Data Entity: RECOMMENDED properties" ; - sh:description """A Web-based Data Entity MUST be identified by an absolute URL and - SHOULD have a `contentSize` and `sdDatePublished` property""" ; + sh:description """A Web-based Data Entity SHOULD have `contentSize` and `sdDatePublished` properties.""" ; sh:targetClass ro-crate:WebDataEntity ; - # Check if the Web-based Data Entity has a contentSize property - # sh:property [ - # a sh:PropertyShape ; - # sh:minCount 1 ; - # sh:name "Web-based Data Entity: test property" ; - # sh:path schema_org:pippo ; - # sh:severity sh:Warning ; - # sh:message """Web-based Data Entities SHOULD have a `pippo` property""" ; - # ] ; - sh:property [ a sh:PropertyShape ; sh:minCount 1 ; - sh:name "Web-based Data Entity: `contentSize` property" ; + sh:name "Web-based Data Entity: RECOMMENDED `contentSize` property" ; sh:description """Check if the Web-based Data Entity has a `contentSize` property""" ; sh:path schema_org:contentSize ; sh:datatype xsd:string ; sh:severity sh:Warning ; sh:message """Web-based Data Entities SHOULD have a `contentSize` property""" ; - # sh:sparql [ - # sh:message "If the value is a string it must be a string representing an integer." ; - # sh:select """ - # SELECT ?this ?value - # WHERE { - # ?this schema:contentSize ?value . - # FILTER (!regex(str(?value), "^[0-9]+$")) - # } - # """ ; - # ] ; ] ; - # Check if the Web-based Data Entity has a sdDatePublished property sh:property [ a sh:PropertyShape ; sh:minCount 1 ; sh:severity sh:Warning ; - sh:name "Web-based Data Entity: `sdDatePublished` property" ; - sh:description """Check if the Web-based Data Entity has a `sdDatePublished` property""" ; + sh:name "Web-based Data Entity: RECOMMENDED `sdDatePublished` property" ; + sh:description """Check if the Web-based Data Entity has a `sdDatePublished` property to indicate when the absolute URL was accessed""" ; sh:path schema_org:sdDatePublished ; + sh:datatype xsd:string ; sh:pattern "^([\\+-]?\\d{4})((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))|W([0-4]\\d|5[0-2])(-?[1-7])|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)?[0-5]\\d)?|24:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)$" ; sh:message """Web-based Data Entities SHOULD have a `sdDatePublished` property to indicate when the absolute URL was accessed""" ; - ] . + ] . \ No newline at end of file From 6d3080af1a31f7763a0ce1755b799c15d06b3ab5 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Thu, 16 Apr 2026 15:07:33 +0200 Subject: [PATCH 084/352] refactor(ro-crate-1.2): :recycle: minor changes to the data entity required shapes --- .../1.2/must/4_data_entity_metadata.py | 14 ++-- .../1.2/must/4_data_entity_metadata.ttl | 69 +++++++++---------- .../1.2/should/4_data_entity_metadata.py | 1 + 3 files changed, 44 insertions(+), 40 deletions(-) diff --git a/rocrate_validator/profiles/ro-crate/1.2/must/4_data_entity_metadata.py b/rocrate_validator/profiles/ro-crate/1.2/must/4_data_entity_metadata.py index d5ab5ef26..50116c5c2 100644 --- a/rocrate_validator/profiles/ro-crate/1.2/must/4_data_entity_metadata.py +++ b/rocrate_validator/profiles/ro-crate/1.2/must/4_data_entity_metadata.py @@ -81,27 +81,31 @@ def check_availability(self, context: ValidationContext) -> bool: return result -@requirement(name="Detached RO-Crate Data Entities") +@requirement(name="Detached RO-Crate: data entities MUST be web-based") class DetachedDataEntityChecker(PyFunctionCheck): """ - Detached RO-Crate Packages MUST only declare Web-based Data Entities + In a detached RO-Crate, all Data Entities MUST be web-based + resources (i.e., have an absolute URL as @id). """ - @check(name="Detached RO-Crate Data Entity: MUST be web-based") + @check(name="Detached RO-Crate: data entities MUST be web-based") def check_detached_entities(self, context: ValidationContext) -> bool: if not context.ro_crate.is_detached(): return True result = True + root_entity_id = None try: root_entity_id = context.ro_crate.metadata.get_root_data_entity().id except Exception: - root_entity_id = None + pass for entity in context.ro_crate.metadata.get_data_entities(): if root_entity_id and entity.id == root_entity_id: continue if not entity.is_remote(): context.result.add_issue( - f"Detached RO-Crate includes a non web-based Data Entity '{entity.id}'", self) + f"Data Entity '{entity.id}' is not web-based, " + f"but in a detached RO-Crate all Data Entities " + f"MUST have an absolute URL as @id", self) result = False if context.fail_fast: return False diff --git a/rocrate_validator/profiles/ro-crate/1.2/must/4_data_entity_metadata.ttl b/rocrate_validator/profiles/ro-crate/1.2/must/4_data_entity_metadata.ttl index f46118b85..797820693 100644 --- a/rocrate_validator/profiles/ro-crate/1.2/must/4_data_entity_metadata.ttl +++ b/rocrate_validator/profiles/ro-crate/1.2/must/4_data_entity_metadata.ttl @@ -11,7 +11,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - @prefix ro: <./> . @prefix ro-crate: . @prefix rdf: . @@ -25,14 +24,12 @@ ro-crate:DataEntityRequiredProperties a sh:NodeShape ; sh:description """A Data Entity MUST be a `URI Path` relative to the ROCrate root, or an absolute URI""" ; sh:targetClass ro-crate:DataEntity ; - sh:property [ sh:name "Data Entity: @id value restriction" ; sh:description """Check if the Data Entity has an absolute or relative URI as `@id`""" ; - sh:path [sh:inversePath rdf:type ] ; - sh:nodeKind sh:IRI ; - sh:severity sh:Violation ; - sh:message """Data Entities MUST have an absolute or relative URI as @id.""" ; + sh:path [ + sh:inversePath rdf:type + ] ; sh:nodeKind sh:IRI ; sh:severity sh:Violation ; sh:message """Data Entities MUST have an absolute or relative URI as @id.""" ; ] . ro-crate:FileDataEntity a sh:NodeShape ; @@ -54,17 +51,21 @@ ro-crate:FileDataEntity a sh:NodeShape ; } """ ] ; - - sh:property [ - sh:name "File Data Entity: REQUIRED type" ; - sh:description """Check if the File Data Entity has `File` as `@type`. - `File` is an RO-Crate alias for the schema.org `MediaObject`. - """ ; - sh:path rdf:type ; - sh:hasValue ro-crate:File ; - sh:severity sh:Violation ; - sh:message """File Data Entities MUST have "File" as a value for @type.""" ; - ] ; + + # Irrelevant for validation, but useful for debugging + # sh:property [ + # sh:name "File Data Entity: REQUIRED type" ; + # sh:description """Check if the File Data Entity has `File` as `@type`. + # `File` is an RO-Crate alias for the schema.org `MediaObject`. + # """ ; + # sh:path rdf:type ; + # sh:or ( + # [ sh:hasValue ro-crate:File ] + # [ sh:hasValue schema_org:MediaObject ] + # ) ; + # sh:severity sh:Violation ; + # sh:message """File Data Entities MUST have "File" as a value for @type.""" ; + # ] ; # Expand data graph with triples to mark the matching entities as File instances sh:rule [ @@ -136,20 +137,20 @@ ro-crate:DirectoryDataEntity a sh:NodeShape ; sh:severity sh:Violation ; ] . -ro-crate:DataEntityRequiredPropertiesShape a sh:NodeShape ; - sh:name "Data Entity: REQUIRED properties" ; - sh:description """A `DataEntity` MUST be linked, either directly or indirectly, from the Root Data Entity""" ; - sh:targetClass ro-crate:DataEntity ; - sh:property - [ - a sh:PropertyShape ; - sh:path [ sh:inversePath schema_org:hasPart ] ; - sh:node schema_org:Dataset ; - sh:minCount 1 ; - sh:name "Data Entity MUST be directly referenced" ; - sh:description """Check if the Data Entity is linked, either directly or indirectly, to the `Root Data Entity` using the `hasPart` (as defined in `schema.org`) property" """ ; - # sh:message "A Data Entity MUST be directly or indirectly linked to the `Root Data Entity` through the `hasPart` property" ; - ] . +# ro-crate:DataEntityRequiredPropertiesShape a sh:NodeShape ; +# sh:name "Data Entity: REQUIRED properties" ; +# sh:description """A `DataEntity`The file descriptor MUST be a valid JSON-LD file MUST be linked, either directly or indirectly, from the Root Data Entity""" ; +# sh:targetClass ro-crate:DataEntity ; +# sh:property +# [ +# a sh:PropertyShape ; +# sh:path [ sh:oneOrMorePath [ sh:inversePath schema_org:hasPart ] ] ; +# sh:qualifiedValueShape [ sh:class ro-crate:RootDataEntity ] ; +# sh:qualifiedMinCount 1 ; +# sh:name "Data Entity MUST be directly referenced" ; +# sh:description """Check if the Data Entity is linked, either directly or indirectly, to the `Root Data Entity` using the `hasPart` (as defined in `schema.org`) property" """ ; +# # sh:message "A Data Entity MUST be directly or indirectly linked to the `Root Data Entity` through the `hasPart` property" ; +# ] . ro-crate:GenericDataEntityRequiredProperties a sh:NodeShape ; sh:name "Generic Data Entity: REQUIRED properties" ; @@ -172,7 +173,7 @@ ro-crate:GenericDataEntityRequiredProperties a sh:NodeShape ; } """ ] ; - + # Expand data graph with triples to mark the matching entities as GenericDataEntity instances sh:rule [ a sh:TripleRule ; @@ -180,7 +181,7 @@ ro-crate:GenericDataEntityRequiredProperties a sh:NodeShape ; sh:predicate rdf:type ; sh:object ro-crate:GenericDataEntity ; ] ; - + # Expand data graph with triples to mark the matching entities as DataEntity instances sh:rule [ a sh:TripleRule ; @@ -189,7 +190,6 @@ ro-crate:GenericDataEntityRequiredProperties a sh:NodeShape ; sh:object ro-crate:DataEntity ; ] . - # Uncomment for debugging # ro-crate:TestGenericDataEntity a sh:NodeShape ; # sh:disabled true ; @@ -206,7 +206,6 @@ ro-crate:GenericDataEntityRequiredProperties a sh:NodeShape ; # sh:message "Testing for the invalidProperty of the generic data entity"; # ] . - # Uncomment for debugging # ro:testDirectory a sh:NodeShape ; # sh:name "Definition of Test Directory" ; diff --git a/rocrate_validator/profiles/ro-crate/1.2/should/4_data_entity_metadata.py b/rocrate_validator/profiles/ro-crate/1.2/should/4_data_entity_metadata.py index 9a68c7e76..83719551b 100644 --- a/rocrate_validator/profiles/ro-crate/1.2/should/4_data_entity_metadata.py +++ b/rocrate_validator/profiles/ro-crate/1.2/should/4_data_entity_metadata.py @@ -18,6 +18,7 @@ from rocrate_validator.models import Severity, ValidationContext from rocrate_validator.requirements.python import (PyFunctionCheck, check, requirement) +from rocrate_validator.utils.signposting import check_downloadable # set up logging logger = logging.getLogger(__name__) From fa78e043130bef50330ae0eb7bea2d92cf89cb80 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Thu, 16 Apr 2026 15:12:55 +0200 Subject: [PATCH 085/352] feat(ro-crate-1.2): :sparkles: improve recommended availability check for WebData Entities --- .../1.2/should/4_data_entity_metadata.py | 38 +++++++++++++++---- 1 file changed, 30 insertions(+), 8 deletions(-) diff --git a/rocrate_validator/profiles/ro-crate/1.2/should/4_data_entity_metadata.py b/rocrate_validator/profiles/ro-crate/1.2/should/4_data_entity_metadata.py index 83719551b..dd79eac2d 100644 --- a/rocrate_validator/profiles/ro-crate/1.2/should/4_data_entity_metadata.py +++ b/rocrate_validator/profiles/ro-crate/1.2/should/4_data_entity_metadata.py @@ -62,7 +62,9 @@ def check_citation(self, context: ValidationContext) -> bool: @requirement(name="Web-based Data Entity: REQUIRED availability") class WebDataEntityRequiredChecker(PyFunctionCheck): """ - Web-based Data Entities MUST be directly downloadable at the time of creation. + Web-based Data Entities SHOULD be directly downloadable (RO-Crate 1.2). + Downloadability is checked via Signposting, Content-Type, and content negotiation. + Entities returning an HTML page (splash page / viewer) are also flagged. """ @check(name="Web-based Data Entity: RECOMMENDED resource availability", severity=Severity.RECOMMENDED) @@ -76,10 +78,22 @@ def check_availability_warning(self, context: ValidationContext) -> bool: result = True for entity in context.ro_crate.metadata.get_web_data_entities(): assert entity.id is not None, "Entity has no @id" + if entity.id.endswith("/"): + continue try: - if not entity.is_available(): - context.result.add_issue( - f"Web-based Data Entity '{entity.id}' is not directly downloadable", self) + dl = check_downloadable(entity.id) + if not dl.is_downloadable: + if dl.reason and "HTML" in dl.reason: + msg = ( + f"Web-based Data Entity '{entity.id}' references an HTML page " + f"(possible splash page or viewer application) and is not " + f"directly downloadable" + ) + else: + msg = f"Web-based Data Entity '{entity.id}' is not directly downloadable" + if dl.reason: + msg += f": {dl.reason}" + context.result.add_issue(msg, self) result = False except Exception as e: context.result.add_issue( @@ -133,13 +147,21 @@ def check_content_url(self, context: ValidationContext) -> bool: continue urls = content_url if isinstance(content_url, list) else [content_url] for url in urls: + url_value = url if isinstance(url, str) else url.id if hasattr(url, "id") else None + if not url_value or not url_value.startswith("http"): + continue try: - url_value = url if isinstance(url, str) else url.id - context.ro_crate.get_external_file_size(url_value) + dl = check_downloadable(url_value) + if not dl.is_downloadable: + msg = f"contentUrl '{url_value}' for Web-based Data Entity '{entity.id}' is not directly downloadable" + if dl.reason: + msg += f": {dl.reason}" + context.result.add_issue(msg, self) + result = False except Exception as e: context.result.add_issue( - f"contentUrl {url_value} for Web-based Data Entity {entity.id} is not directly downloadable: {e}", - self) + f"contentUrl '{url_value}' for Web-based Data Entity '{entity.id}' " + f"availability check failed: {e}", self) result = False if not result and context.fail_fast: return result From 59179266ad8496f4f1ceaf6ac86ccad1acb3fcf2 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Thu, 16 Apr 2026 15:15:46 +0200 Subject: [PATCH 086/352] refactor(ro-crate-1.2): :rotating_light: fix linter warnings --- .../ro-crate/1.2/must/4_data_entity_metadata.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/rocrate_validator/profiles/ro-crate/1.2/must/4_data_entity_metadata.py b/rocrate_validator/profiles/ro-crate/1.2/must/4_data_entity_metadata.py index 50116c5c2..70ef2f2e3 100644 --- a/rocrate_validator/profiles/ro-crate/1.2/must/4_data_entity_metadata.py +++ b/rocrate_validator/profiles/ro-crate/1.2/must/4_data_entity_metadata.py @@ -14,7 +14,7 @@ import re -from rocrate_validator.models import ValidationContext +from rocrate_validator.models import Severity, ValidationContext from rocrate_validator.requirements.python import (PyFunctionCheck, check, requirement) from rocrate_validator.utils import log as logging @@ -127,8 +127,10 @@ def check_identifiers(self, context: ValidationContext) -> bool: try: root_data_entity = context.ro_crate.metadata.get_root_data_entity() root_entity_id = root_data_entity.id - root_entity_is_local = root_data_entity.id_as_uri.is_local_resource() if root_data_entity.id_as_uri else False - root_entity_absolute_path = root_data_entity.id_as_path if root_data_entity.has_absolute_path() else None + root_entity_is_local = root_data_entity.id_as_uri.is_local_resource() \ + if root_data_entity.id_as_uri else False + root_entity_absolute_path = root_data_entity.id_as_path \ + if root_data_entity.has_absolute_path() else None except Exception: pass for entity in context.ro_crate.metadata.get_data_entities(): @@ -136,7 +138,8 @@ def check_identifiers(self, context: ValidationContext) -> bool: continue if not root_entity_is_local and not entity.is_remote(): context.result.add_issue( - f"Data Entity '{entity.id}' has a local identifier but the Root Data Entity does not have a local identifier", self) + f"Data Entity '{entity.id}' has a local identifier but the Root Data Entity " + "does not have a local identifier", self) result = False if context.fail_fast: return False @@ -331,7 +334,8 @@ def check_content_url(self, context: ValidationContext) -> bool: url_value = url if isinstance(url, str) else url.id if not context.ro_crate.get_external_file_size(url_value): context.result.add_issue( - f"contentUrl {url_value} for Web-based Data Entity {entity.id} is not directly downloadable", + f"contentUrl {url_value} for Web-based Data Entity {entity.id} " + "is not directly downloadable", self) result = False except Exception as e: From 4f8738969a5898d1fc136448eb36569e64ecf684 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Thu, 16 Apr 2026 15:17:04 +0200 Subject: [PATCH 087/352] test(ro-crate-1.2): :adhesive_bandage: fix missing context key --- .../profiles/ro-crate-1.2/test_availability_flags.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/profiles/ro-crate-1.2/test_availability_flags.py b/tests/integration/profiles/ro-crate-1.2/test_availability_flags.py index 0c7a79ecf..d717d3838 100644 --- a/tests/integration/profiles/ro-crate-1.2/test_availability_flags.py +++ b/tests/integration/profiles/ro-crate-1.2/test_availability_flags.py @@ -30,7 +30,7 @@ "about", "affiliation", "author", "cite-as", "conformsTo", "contentLocation", "contentSize", "contentUrl", "dateCreated", "dateModified", "datePublished", "description", "encodingFormat", - "hasPart", "license", "name", "sdDatePublished", "url", + "hasPart", "license", "name", "publisher", "sdDatePublished", "url", } From f28d4b56412f6f63ff2e0e394e5e32500bdb03ce Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Thu, 16 Apr 2026 15:17:51 +0200 Subject: [PATCH 088/352] test(ro-crate-1.2): :wrench: skip non relevant checks --- .../profiles/ro-crate-1.2/test_detached_rocrates.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/integration/profiles/ro-crate-1.2/test_detached_rocrates.py b/tests/integration/profiles/ro-crate-1.2/test_detached_rocrates.py index e157c1442..11c17f608 100644 --- a/tests/integration/profiles/ro-crate-1.2/test_detached_rocrates.py +++ b/tests/integration/profiles/ro-crate-1.2/test_detached_rocrates.py @@ -87,6 +87,9 @@ def test_invalid_root_data_entity_identifier_when_online_available(): models.Severity.RECOMMENDED, False, profile_identifier="ro-crate-1.2", + skip_checks=["ro-crate-1.2_41.1", "ro-crate-1.2_41.2", "ro-crate-1.2_41.3", + "ro-crate-1.2_42.1", "ro-crate-1.2_43.1", "ro-crate-1.2_43.2", + "ro-crate-1.2_44.1"], expected_triggered_requirements=["Root Data Entity: RECOMMENDED identifier"], expected_triggered_issues=[ "In a remote RO-Crate, the Root Data Entity @id SHOULD be an absolute URL"] From 0431bdf1305da2101fda0d08d28ed9e5f13c008c Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Thu, 16 Apr 2026 15:19:13 +0200 Subject: [PATCH 089/352] feat(ro-crate-1.2): :sparkles: update recommended checks for data entities --- .../1.2/should/4_data_entity_metadata.ttl | 130 ++++++++++-------- 1 file changed, 69 insertions(+), 61 deletions(-) diff --git a/rocrate_validator/profiles/ro-crate/1.2/should/4_data_entity_metadata.ttl b/rocrate_validator/profiles/ro-crate/1.2/should/4_data_entity_metadata.ttl index f898ed005..3ee276d97 100644 --- a/rocrate_validator/profiles/ro-crate/1.2/should/4_data_entity_metadata.ttl +++ b/rocrate_validator/profiles/ro-crate/1.2/should/4_data_entity_metadata.ttl @@ -1,16 +1,16 @@ -# # Copyright (c) 2024-2026 CRS4 -# # -# # Licensed under the Apache License, Version 2.0 (the "License"); -# # you may not use this file except in compliance with the License. -# # You may obtain a copy of the License at -# # -# # http://www.apache.org/licenses/LICENSE-2.0 -# # -# # Unless required by applicable law or agreed to in writing, software -# # distributed under the License is distributed on an "AS IS" BASIS, -# # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# # See the License for the specific language governing permissions and -# # limitations under the License. +# Copyright (c) 2024-2026 CRS4 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. @prefix ro: <./> . @prefix ro-crate: . @prefix rdf: . @@ -19,29 +19,29 @@ @prefix sh: . @prefix xsd: . -# ro-crate:FileRecommendedProperties a sh:NodeShape ; -# sh:name "File Data Entity: RECOMMENDED properties" ; -# sh:description """A `File` Data Entity SHOULD have detailed descriptions of encodings through the `encodingFormat` property.""" ; -# sh:targetClass ro-crate:File ; -# sh:property [ -# sh:minCount 1 ; -# sh:maxCount 2 ; -# sh:path schema_org:encodingFormat ; -# sh:severity sh:Warning ; -# sh:name "File Data Entity: RECOMMENDED `encodingFormat` property" ; -# sh:description """Check if the File Data Entity has a detailed description of encodings through the `encodingFormat` property. -# The `encodingFormat` property SHOULD be a PRONOM identifier (e.g., application/pdf) or, -# to add more detail, SHOULD be linked using a `PRONOM` identifier to a `Contextual Entity` of type `WebPage`.""" ; -# sh:message "Missing or invalid `encodingFormat` linked to the `File Data Entity`" ; -# sh:or ( -# [ -# sh:datatype xsd:string ; -# sh:pattern "^(\\w*)\\/(\\w[\\w\\.-]*)(?:\\+(\\w[\\w\\.-]*))?(?:;(\\w+=[^;]+))*$" ; -# ] -# [ -# sh:nodeKind sh:IRI ; -# ] -# )] . +ro-crate:FileRecommendedProperties a sh:NodeShape ; + sh:name "File Data Entity: RECOMMENDED `encodingFormat` property" ; + sh:description """A `File` Data Entity SHOULD have detailed descriptions of encodings through the `encodingFormat` property.""" ; + sh:targetClass ro-crate:File ; + sh:property [ + sh:minCount 1 ; + sh:maxCount 2 ; + sh:path schema_org:encodingFormat ; + sh:severity sh:Warning ; + sh:name "File Data Entity: RECOMMENDED `encodingFormat` property" ; + sh:description """Check if the File Data Entity has a detailed description of encodings through the `encodingFormat` property. + The `encodingFormat` property SHOULD be a PRONOM identifier (e.g., application/pdf) or, + to add more detail, SHOULD be linked using a `PRONOM` identifier to a `Contextual Entity` of type `WebPage`.""" ; + sh:message "Missing or invalid `encodingFormat` linked to the `File Data Entity`" ; + sh:or ( + [ + sh:datatype xsd:string ; + sh:pattern "^(\\w*)\\/(\\w[\\w\\.-]*)(?:\\+(\\w[\\w\\.-]*))?(?:;(\\w+=[^;]+))*$" ; + ] + [ + sh:nodeKind sh:IRI ; + ] + )] . # ro-crate:FileContentSizeRecommendedProperties a sh:NodeShape ; # sh:name "File Data Entity: RECOMMENDED contentSize" ; @@ -59,7 +59,7 @@ ro-crate:DataEntityRecommendedProperties a sh:NodeShape ; sh:name "Data Entity: RECOMMENDED properties" ; - sh:description """Data Entities SHOULD have `name` and `description` properties.""" ; + sh:description """Data Entities SHOULD have `name`, `description`, `license`, `contentLocation`, `spatialCoverage`, and `citation` properties.""" ; sh:targetClass ro-crate:DataEntity ; sh:property [ a sh:PropertyShape ; @@ -90,19 +90,18 @@ ro-crate:DataEntityRecommendedProperties a sh:NodeShape ; sh:sparql [ a sh:SPARQLConstraint ; sh:prefixes ro-crate:sparqlPrefixes ; - sh:select """ SELECT ?this WHERE { + ?this (schema:contentLocation|schema:spatialCoverage) ?location . FILTER NOT EXISTS { - ?this (schema:contentLocation|schema:spatialCoverage) ?location . ?location a schema:Place . } } """ ; sh:name "Data Entity: RECOMMENDED `contentLocation`/`spatialCoverage` reference" ; sh:description """If present, `contentLocation` or `spatialCoverage` SHOULD reference a Place.""" ; - sh:message "Data Entities SHOULD have a `contentLocation` or `spatialCoverage` property referencing a Place" ; + sh:message "If present, `contentLocation` or `spatialCoverage` SHOULD reference a Place" ; sh:severity sh:Warning ; ] ; sh:sparql [ @@ -153,27 +152,36 @@ ro-crate:DataEntityRecommendedProperties a sh:NodeShape ; # sh:pattern "/$" ; # sh:message "Dataset IRI SHOULD end with '/'" . -# ro-crate:WebDatasetDistributionRecommended a sh:NodeShape ; -# sh:name "Dataset: RECOMMENDED `distribution` for web datasets" ; -# sh:description """If a Dataset has a web URI, it SHOULD include `distribution`.""" ; -# sh:target [ -# a sh:SPARQLTarget ; -# sh:prefixes ro-crate:sparqlPrefixes ; -# sh:select """ -# SELECT ?this -# WHERE { -# ?this a schema:Dataset . -# FILTER regex(str(?this), "^https?://", "i") -# } -# """ -# ] ; -# sh:property [ -# a sh:PropertyShape ; -# sh:path schema_org:distribution ; -# sh:minCount 1 ; -# sh:severity sh:Warning ; -# sh:message "Web-based Dataset entities SHOULD include `distribution`" ; -# ] . +ro-crate:WebDirectoryDistributionRecommended a sh:NodeShape ; + sh:name "Web-based Directory Data Entity: recommended distribution" ; + sh:description """If a non-root Dataset Data Entity has a web URI (absolute https?://), + it SHOULD include a `distribution` property pointing to a downloadable archive + (RO-Crate 1.2, Data Entities — Directories on the web).""" ; + sh:target [ + a sh:SPARQLTarget ; + sh:prefixes ro-crate:sparqlPrefixes ; + sh:select """ + SELECT ?this + WHERE { + ?this a schema:Dataset . + ?metadatafile schema:about ?root . + FILTER(contains(str(?metadatafile), "ro-crate-metadata.json")) + FILTER(?this != ?root) + FILTER(!STRSTARTS(STR(?this), CONCAT(STR(?root), "#"))) + FILTER regex(str(?this), "^https?://", "i") + } + """ + ] ; + sh:property [ + a sh:PropertyShape ; + sh:name "Web-based Directory Data Entity: RECOMMENDED `distribution` property" ; + sh:description """Check if the web-based Directory Data Entity has a `distribution` + property pointing to a downloadable archive.""" ; + sh:path schema_org:distribution ; + sh:minCount 1 ; + sh:severity sh:Warning ; + sh:message "Web-based Directory Data Entities SHOULD include a `distribution` property pointing to a downloadable archive" ; + ] . # ro-crate:FileConformsToProfile a sh:NodeShape ; # sh:name "File: RECOMMENDED `conformsTo` profile" ; From 238abebb2075990cc5c6643daa1d5dd0af57d157 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Thu, 16 Apr 2026 15:20:50 +0200 Subject: [PATCH 090/352] feat(ro-crate-1.2): :sparkles: add minimal OWL definition of ContextualEntity --- rocrate_validator/profiles/ro-crate/1.2/ontology.ttl | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/rocrate_validator/profiles/ro-crate/1.2/ontology.ttl b/rocrate_validator/profiles/ro-crate/1.2/ontology.ttl index 938c289e2..9e1e7f7bb 100644 --- a/rocrate_validator/profiles/ro-crate/1.2/ontology.ttl +++ b/rocrate_validator/profiles/ro-crate/1.2/ontology.ttl @@ -19,9 +19,8 @@ @prefix xsd: . @prefix rdfs: . @prefix schema: . -@prefix rocrate: . +@prefix ro-crate: . @prefix bioschemas: . -@prefix ro-crate: . # @base <./.> . rdf:type owl:Ontology ; @@ -65,3 +64,9 @@ ro-crate:DataEntity rdf:type owl:Class ; ro-crate:Directory rdf:type owl:Class ; rdfs:subClassOf schema:Dataset ; rdfs:label "Directory"@en . + + +### https://w3id.org/ro/crate/1.2/ContextualEntity +ro-crate:ContextualEntity rdf:type owl:Class ; + rdfs:subClassOf schema:Thing ; + rdfs:label "ContextualEntity"@en . From ff407b62c2c6f7704adc1cde784dc072f9bb00f6 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Thu, 16 Apr 2026 15:22:10 +0200 Subject: [PATCH 091/352] test(ro-crate-1.2): fix non-relevant check --- .../integration/profiles/ro-crate-1.2/test_detached_rocrates.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/profiles/ro-crate-1.2/test_detached_rocrates.py b/tests/integration/profiles/ro-crate-1.2/test_detached_rocrates.py index 11c17f608..c2b4fa98d 100644 --- a/tests/integration/profiles/ro-crate-1.2/test_detached_rocrates.py +++ b/tests/integration/profiles/ro-crate-1.2/test_detached_rocrates.py @@ -73,7 +73,7 @@ def test_root_data_entity_identifier_when_online_available(): models.Severity.RECOMMENDED, True, profile_identifier="ro-crate-1.2", - skip_checks=["ro-crate-1.2_41.1", "ro-crate-1.2_44.1"], + skip_checks=["ro-crate-1.2_35.1"], ) From d1ec84c8ee9496f2015294a324e382c3ac7059fb Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Thu, 16 Apr 2026 15:23:28 +0200 Subject: [PATCH 092/352] test(ro-crate-1.2): :card_file_box: fix and extend test data for RO-Crate 1.2 --- .../valid/ro-crate-metadata.json | 1 + .../invalid/ro-crate-metadata.json | 38 ++++++++++++++++++ .../valid/ro-crate-metadata.json | 40 +++++++++++++++++++ .../invalid/ro-crate-metadata.json | 40 +++++++++++++++++++ .../valid/ro-crate-metadata.json | 33 +++++++++++++++ .../valid/ro-crate-metadata.json | 1 + .../invalid/ro-crate-metadata.json | 39 ++++++++++++++++++ .../valid/ro-crate-metadata.json | 33 +++++++++++++++ .../valid/ro-crate-metadata.json | 7 ++++ .../valid/basic-ro-crate-metadata.json | 9 ++++- .../invalid/ro-crate-metadata.json | 33 +++++++++++++++ .../valid/ro-crate-metadata.json | 35 ++++++++++++++++ .../valid/ro-crate-metadata.json | 9 ++++- .../valid/ro-crate-metadata.json | 9 ++++- .../valid/ro-crate-metadata.json | 9 ++++- .../valid/ro-crate-metadata.json | 9 ++++- .../single_value/valid/ro-crate-metadata.json | 9 ++++- .../valid/citeas-ro-crate-metadata.json | 12 ++++-- .../valid/ro-crate-metadata.json | 7 ++++ .../valid/ro-crate-metadata.json | 1 + .../valid/ro-crate-metadata.json | 1 + .../valid/ro-crate-metadata.json | 7 ++++ .../valid/ro-crate-metadata.json | 7 ++++ .../valid/ro-crate-metadata.json | 7 ++++ 24 files changed, 386 insertions(+), 10 deletions(-) create mode 100644 tests/data/crates/rocrate-1.2/1_metadata_document/named_entity_id_format/invalid/ro-crate-metadata.json create mode 100644 tests/data/crates/rocrate-1.2/1_metadata_document/named_entity_id_format/valid/ro-crate-metadata.json create mode 100644 tests/data/crates/rocrate-1.2/1_metadata_document/no_parent_traversal/invalid/ro-crate-metadata.json create mode 100644 tests/data/crates/rocrate-1.2/1_metadata_document/no_parent_traversal/valid/ro-crate-metadata.json create mode 100644 tests/data/crates/rocrate-1.2/1_metadata_document/utf8_identifiers/invalid/ro-crate-metadata.json create mode 100644 tests/data/crates/rocrate-1.2/1_metadata_document/utf8_identifiers/valid/ro-crate-metadata.json create mode 100644 tests/data/crates/rocrate-1.2/3_detached_rocrates/root-data-entity-identifier/online-available/invalid/ro-crate-metadata.json create mode 100644 tests/data/crates/rocrate-1.2/3_detached_rocrates/root-data-entity-identifier/online-available/valid/ro-crate-metadata.json diff --git a/tests/data/crates/rocrate-1.2/1_metadata_document/described_contextual_entities/valid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/1_metadata_document/described_contextual_entities/valid/ro-crate-metadata.json index ce16aac2b..8e170ec60 100644 --- a/tests/data/crates/rocrate-1.2/1_metadata_document/described_contextual_entities/valid/ro-crate-metadata.json +++ b/tests/data/crates/rocrate-1.2/1_metadata_document/described_contextual_entities/valid/ro-crate-metadata.json @@ -23,6 +23,7 @@ "author": { "@id": "https://orcid.org/0000-0002-1825-0097" }, + "publisher": {"@id": "https://ror.org/05f9q8d28"}, "hasPart": [ { "@id": "data.csv" diff --git a/tests/data/crates/rocrate-1.2/1_metadata_document/named_entity_id_format/invalid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/1_metadata_document/named_entity_id_format/invalid/ro-crate-metadata.json new file mode 100644 index 000000000..aae1f42ad --- /dev/null +++ b/tests/data/crates/rocrate-1.2/1_metadata_document/named_entity_id_format/invalid/ro-crate-metadata.json @@ -0,0 +1,38 @@ +{ + "@context": "https://w3id.org/ro/crate/1.2/context", + "@graph": [ + { + "@id": "ro-crate-metadata.json", + "@type": "CreativeWork", + "about": {"@id": "./"}, + "conformsTo": {"@id": "https://w3id.org/ro/crate/1.2"} + }, + { + "@id": "./", + "@type": "Dataset", + "name": "Named entity without #-prefix — invalid", + "description": "RO-Crate where a local Person entity uses a bare relative @id instead of '#'-prefixed.", + "datePublished": "2024-01-01", + "license": {"@id": "https://creativecommons.org/licenses/by/4.0/"}, + "author": {"@id": "alice"}, + "hasPart": [] + }, + { + "@id": "https://creativecommons.org/licenses/by/4.0/", + "@type": "CreativeWork", + "name": "Creative Commons Attribution 4.0 International", + "description": "CC BY 4.0 license." + }, + { + "@id": "alice", + "@type": "Person", + "name": "Alice Researcher", + "affiliation": {"@id": "example-org"} + }, + { + "@id": "example-org", + "@type": "Organization", + "name": "Example University" + } + ] +} diff --git a/tests/data/crates/rocrate-1.2/1_metadata_document/named_entity_id_format/valid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/1_metadata_document/named_entity_id_format/valid/ro-crate-metadata.json new file mode 100644 index 000000000..707d8836f --- /dev/null +++ b/tests/data/crates/rocrate-1.2/1_metadata_document/named_entity_id_format/valid/ro-crate-metadata.json @@ -0,0 +1,40 @@ +{ + "@context": "https://w3id.org/ro/crate/1.2/context", + "@graph": [ + { + "@id": "ro-crate-metadata.json", + "@type": "CreativeWork", + "about": {"@id": "./"}, + "conformsTo": {"@id": "https://w3id.org/ro/crate/1.2"} + }, + { + "@id": "./", + "@type": "Dataset", + "name": "Named entity #-prefix — valid", + "description": "RO-Crate where all local Person and Organization entities use '#'-prefixed @id.", + "datePublished": "2024-01-01", + "license": {"@id": "https://creativecommons.org/licenses/by/4.0/"}, + "publisher": {"@id": "#publisher-org"}, + "author": {"@id": "#alice"}, + "hasPart": [] + }, + { + "@id": "https://creativecommons.org/licenses/by/4.0/", + "@type": "CreativeWork", + "name": "Creative Commons Attribution 4.0 International", + "description": "CC BY 4.0 license." + }, + { + "@id": "#alice", + "@type": "Person", + "name": "Alice Researcher", + "affiliation": {"@id": "#publisher-org"} + }, + { + "@id": "#publisher-org", + "@type": "Organization", + "name": "Example University", + "url": "https://example.edu" + } + ] +} diff --git a/tests/data/crates/rocrate-1.2/1_metadata_document/no_parent_traversal/invalid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/1_metadata_document/no_parent_traversal/invalid/ro-crate-metadata.json new file mode 100644 index 000000000..291685161 --- /dev/null +++ b/tests/data/crates/rocrate-1.2/1_metadata_document/no_parent_traversal/invalid/ro-crate-metadata.json @@ -0,0 +1,40 @@ +{ + "@context": "https://w3id.org/ro/crate/1.2/context", + "@graph": [ + { + "@id": "ro-crate-metadata.json", + "@type": "CreativeWork", + "about": {"@id": "./"}, + "conformsTo": {"@id": "https://w3id.org/ro/crate/1.2"} + }, + { + "@id": "./", + "@type": "Dataset", + "name": "Parent traversal — invalid", + "description": "RO-Crate with a contextual entity whose @id uses ../ to climb out of the RO-Crate Root.", + "datePublished": "2024-01-01", + "license": {"@id": "https://creativecommons.org/licenses/by/4.0/"}, + "publisher": {"@id": "#publisher-org"}, + "hasPart": [], + "citation": {"@id": "../external-publication.html"} + }, + { + "@id": "https://creativecommons.org/licenses/by/4.0/", + "@type": "CreativeWork", + "name": "Creative Commons Attribution 4.0 International", + "description": "CC BY 4.0 license." + }, + { + "@id": "#publisher-org", + "@type": "Organization", + "name": "Example Research Institute", + "url": "https://example.org" + }, + { + "@id": "../external-publication.html", + "@type": "ScholarlyArticle", + "name": "A publication outside the crate root", + "description": "A contextual entity referenced via ../ — demonstrates parent directory traversal." + } + ] +} diff --git a/tests/data/crates/rocrate-1.2/1_metadata_document/no_parent_traversal/valid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/1_metadata_document/no_parent_traversal/valid/ro-crate-metadata.json new file mode 100644 index 000000000..3b6bfb5fd --- /dev/null +++ b/tests/data/crates/rocrate-1.2/1_metadata_document/no_parent_traversal/valid/ro-crate-metadata.json @@ -0,0 +1,33 @@ +{ + "@context": "https://w3id.org/ro/crate/1.2/context", + "@graph": [ + { + "@id": "ro-crate-metadata.json", + "@type": "CreativeWork", + "about": {"@id": "./"}, + "conformsTo": {"@id": "https://w3id.org/ro/crate/1.2"} + }, + { + "@id": "./", + "@type": "Dataset", + "name": "No parent traversal — valid", + "description": "RO-Crate with no @id values that use ../ to climb out of the root.", + "datePublished": "2024-01-01", + "license": {"@id": "https://creativecommons.org/licenses/by/4.0/"}, + "publisher": {"@id": "#publisher-org"}, + "hasPart": [] + }, + { + "@id": "https://creativecommons.org/licenses/by/4.0/", + "@type": "CreativeWork", + "name": "Creative Commons Attribution 4.0 International", + "description": "CC BY 4.0 license." + }, + { + "@id": "#publisher-org", + "@type": "Organization", + "name": "Example Research Institute", + "url": "https://example.org" + } + ] +} diff --git a/tests/data/crates/rocrate-1.2/1_metadata_document/referenced_contextual_entities/valid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/1_metadata_document/referenced_contextual_entities/valid/ro-crate-metadata.json index ce16aac2b..8e170ec60 100644 --- a/tests/data/crates/rocrate-1.2/1_metadata_document/referenced_contextual_entities/valid/ro-crate-metadata.json +++ b/tests/data/crates/rocrate-1.2/1_metadata_document/referenced_contextual_entities/valid/ro-crate-metadata.json @@ -23,6 +23,7 @@ "author": { "@id": "https://orcid.org/0000-0002-1825-0097" }, + "publisher": {"@id": "https://ror.org/05f9q8d28"}, "hasPart": [ { "@id": "data.csv" diff --git a/tests/data/crates/rocrate-1.2/1_metadata_document/utf8_identifiers/invalid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/1_metadata_document/utf8_identifiers/invalid/ro-crate-metadata.json new file mode 100644 index 000000000..a3352c00e --- /dev/null +++ b/tests/data/crates/rocrate-1.2/1_metadata_document/utf8_identifiers/invalid/ro-crate-metadata.json @@ -0,0 +1,39 @@ +{ + "@context": "https://w3id.org/ro/crate/1.2/context", + "@graph": [ + { + "@id": "ro-crate-metadata.json", + "@type": "CreativeWork", + "about": {"@id": "./"}, + "conformsTo": {"@id": "https://w3id.org/ro/crate/1.2"} + }, + { + "@id": "./", + "@type": "Dataset", + "name": "Percent-encoded UTF-8 — invalid", + "description": "RO-Crate where a contextual entity @id uses percent-encoded non-ASCII bytes instead of native UTF-8.", + "datePublished": "2024-01-01", + "license": {"@id": "https://creativecommons.org/licenses/by/4.0/"}, + "publisher": {"@id": "#publisher-org"}, + "hasPart": [] + }, + { + "@id": "https://creativecommons.org/licenses/by/4.0/", + "@type": "CreativeWork", + "name": "Creative Commons Attribution 4.0 International", + "description": "CC BY 4.0 license." + }, + { + "@id": "#publisher-org", + "@type": "Organization", + "name": "Example Research Institute", + "url": "https://example.org" + }, + { + "@id": "#donn%C3%A9es-schema", + "@type": "DefinedTerm", + "name": "Données (percent-encoded)", + "description": "A contextual entity whose @id uses %C3%A9 (percent-encoded é) instead of native UTF-8." + } + ] +} diff --git a/tests/data/crates/rocrate-1.2/1_metadata_document/utf8_identifiers/valid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/1_metadata_document/utf8_identifiers/valid/ro-crate-metadata.json new file mode 100644 index 000000000..fdc7665fa --- /dev/null +++ b/tests/data/crates/rocrate-1.2/1_metadata_document/utf8_identifiers/valid/ro-crate-metadata.json @@ -0,0 +1,33 @@ +{ + "@context": "https://w3id.org/ro/crate/1.2/context", + "@graph": [ + { + "@id": "ro-crate-metadata.json", + "@type": "CreativeWork", + "about": {"@id": "./"}, + "conformsTo": {"@id": "https://w3id.org/ro/crate/1.2"} + }, + { + "@id": "./", + "@type": "Dataset", + "name": "UTF-8 identifiers — valid", + "description": "RO-Crate where no @id uses percent-encoded non-ASCII bytes.", + "datePublished": "2024-01-01", + "license": {"@id": "https://creativecommons.org/licenses/by/4.0/"}, + "publisher": {"@id": "#publisher-org"}, + "hasPart": [] + }, + { + "@id": "https://creativecommons.org/licenses/by/4.0/", + "@type": "CreativeWork", + "name": "Creative Commons Attribution 4.0 International", + "description": "CC BY 4.0 license." + }, + { + "@id": "#publisher-org", + "@type": "Organization", + "name": "Example Research Institute", + "url": "https://example.org" + } + ] +} diff --git a/tests/data/crates/rocrate-1.2/2_attached_rocrates/attached-preview-not-in-hasPart/valid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/2_attached_rocrates/attached-preview-not-in-hasPart/valid/ro-crate-metadata.json index 26b39d769..1f1a1ee36 100644 --- a/tests/data/crates/rocrate-1.2/2_attached_rocrates/attached-preview-not-in-hasPart/valid/ro-crate-metadata.json +++ b/tests/data/crates/rocrate-1.2/2_attached_rocrates/attached-preview-not-in-hasPart/valid/ro-crate-metadata.json @@ -20,12 +20,19 @@ "license": { "@id": "https://creativecommons.org/licenses/by/4.0/" }, + "publisher": {"@id": "#publisher-org"}, "hasPart": [ { "@id": "data.csv" } ] }, + { + "@id": "#publisher-org", + "@type": "Organization", + "name": "Example Research Institute", + "url": "https://example.org" + }, { "@id": "https://creativecommons.org/licenses/by/4.0/", "@type": "CreativeWork", diff --git a/tests/data/crates/rocrate-1.2/3_detached_rocrates/naming-convention/local-descriptor/valid/basic-ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/3_detached_rocrates/naming-convention/local-descriptor/valid/basic-ro-crate-metadata.json index a1c732df8..ebadcb01b 100644 --- a/tests/data/crates/rocrate-1.2/3_detached_rocrates/naming-convention/local-descriptor/valid/basic-ro-crate-metadata.json +++ b/tests/data/crates/rocrate-1.2/3_detached_rocrates/naming-convention/local-descriptor/valid/basic-ro-crate-metadata.json @@ -24,7 +24,14 @@ "@id": "https://raw.githubusercontent.com/crs4/rocrate-validator/refs/heads/develop/tests/data/crates/valid/workflow-roc/" }, "url": "https://raw.githubusercontent.com/crs4/rocrate-validator/refs/heads/develop/tests/data/crates/valid/workflow-roc/", - "hasPart": [] + "hasPart": [], + "publisher": {"@id": "#publisher-org"} + }, + { + "@id": "#publisher-org", + "@type": "Organization", + "name": "Example Research Institute", + "url": "https://example.org" }, { "@id": "https://creativecommons.org/licenses/by/4.0/", diff --git a/tests/data/crates/rocrate-1.2/3_detached_rocrates/root-data-entity-identifier/online-available/invalid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/3_detached_rocrates/root-data-entity-identifier/online-available/invalid/ro-crate-metadata.json new file mode 100644 index 000000000..7ae5c278e --- /dev/null +++ b/tests/data/crates/rocrate-1.2/3_detached_rocrates/root-data-entity-identifier/online-available/invalid/ro-crate-metadata.json @@ -0,0 +1,33 @@ +{ + "@context": "https://w3id.org/ro/crate/1.2/context", + "@graph": [ + { + "@id": "ro-crate-metadata.json", + "@type": "CreativeWork", + "conformsTo": {"@id": "https://w3id.org/ro/crate/1.2"}, + "about": {"@id": "./"} + }, + { + "@id": "./", + "@type": "Dataset", + "name": "Online-available RO-Crate with relative root @id (invalid)", + "description": "A detached RO-Crate whose Root Data Entity has a relative @id instead of an absolute URL.", + "datePublished": "2024-01-01", + "license": {"@id": "https://creativecommons.org/licenses/by/4.0/"}, + "hasPart": [], + "publisher": {"@id": "https://ror.org/012345678"} + }, + { + "@id": "https://ror.org/012345678", + "@type": "Organization", + "name": "Example Research Institute", + "url": "https://example.org" + }, + { + "@id": "https://creativecommons.org/licenses/by/4.0/", + "@type": "CreativeWork", + "name": "Creative Commons Attribution 4.0 International", + "description": "A Creative Commons license." + } + ] +} \ No newline at end of file diff --git a/tests/data/crates/rocrate-1.2/3_detached_rocrates/root-data-entity-identifier/online-available/valid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/3_detached_rocrates/root-data-entity-identifier/online-available/valid/ro-crate-metadata.json new file mode 100644 index 000000000..d02f4d5d1 --- /dev/null +++ b/tests/data/crates/rocrate-1.2/3_detached_rocrates/root-data-entity-identifier/online-available/valid/ro-crate-metadata.json @@ -0,0 +1,35 @@ +{ + "@context": "https://w3id.org/ro/crate/1.2/context", + "@graph": [ + { + "@id": "ro-crate-metadata.json", + "@type": "CreativeWork", + "conformsTo": {"@id": "https://w3id.org/ro/crate/1.2"}, + "about": {"@id": "https://example.org/ro-crate/detached/citable-root-data-entity/"} + }, + { + "@id": "https://example.org/ro-crate/detached/citable-root-data-entity/", + "@type": "Dataset", + "name": "Detached RO-Crate with absolute root @id", + "description": "A detached RO-Crate whose Root Data Entity has an absolute URL as @id.", + "datePublished": "2024-01-01", + "license": {"@id": "https://creativecommons.org/licenses/by/4.0/"}, + "hasPart": [], + "cite-as": {"@id": "https://example.org/ro-crate/detached/citable-root-data-entity/"}, + "url": "https://example.org/ro-crate/detached/citable-root-data-entity/", + "publisher": {"@id": "https://ror.org/012345678"} + }, + { + "@id": "https://ror.org/012345678", + "@type": "Organization", + "name": "Example Research Institute", + "url": "https://example.org" + }, + { + "@id": "https://creativecommons.org/licenses/by/4.0/", + "@type": "CreativeWork", + "name": "Creative Commons Attribution 4.0 International", + "description": "A Creative Commons license." + } + ] +} \ No newline at end of file diff --git a/tests/data/crates/rocrate-1.2/5_metadata_entities/entity_reachability/valid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/5_metadata_entities/entity_reachability/valid/ro-crate-metadata.json index 0eb0f6f4b..654949be4 100644 --- a/tests/data/crates/rocrate-1.2/5_metadata_entities/entity_reachability/valid/ro-crate-metadata.json +++ b/tests/data/crates/rocrate-1.2/5_metadata_entities/entity_reachability/valid/ro-crate-metadata.json @@ -20,7 +20,14 @@ "license": { "@id": "https://creativecommons.org/licenses/by/4.0/" }, - "hasPart": [] + "hasPart": [], + "publisher": {"@id": "#publisher-org"} + }, + { + "@id": "#publisher-org", + "@type": "Organization", + "name": "Example Research Institute", + "url": "https://example.org" }, { "@id": "https://creativecommons.org/licenses/by/4.0/", diff --git a/tests/data/crates/rocrate-1.2/5_metadata_entities/recommended_name/valid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/5_metadata_entities/recommended_name/valid/ro-crate-metadata.json index cc5c46aaf..743aa37ff 100644 --- a/tests/data/crates/rocrate-1.2/5_metadata_entities/recommended_name/valid/ro-crate-metadata.json +++ b/tests/data/crates/rocrate-1.2/5_metadata_entities/recommended_name/valid/ro-crate-metadata.json @@ -24,7 +24,14 @@ { "@id": "my-data-file.txt" } - ] + ], + "publisher": {"@id": "#publisher-org"} + }, + { + "@id": "#publisher-org", + "@type": "Organization", + "name": "Example Research Institute", + "url": "https://example.org" }, { "@id": "https://creativecommons.org/licenses/by/4.0/", diff --git a/tests/data/crates/rocrate-1.2/5_metadata_entities/recommended_schema_type/valid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/5_metadata_entities/recommended_schema_type/valid/ro-crate-metadata.json index 0eb0f6f4b..654949be4 100644 --- a/tests/data/crates/rocrate-1.2/5_metadata_entities/recommended_schema_type/valid/ro-crate-metadata.json +++ b/tests/data/crates/rocrate-1.2/5_metadata_entities/recommended_schema_type/valid/ro-crate-metadata.json @@ -20,7 +20,14 @@ "license": { "@id": "https://creativecommons.org/licenses/by/4.0/" }, - "hasPart": [] + "hasPart": [], + "publisher": {"@id": "#publisher-org"} + }, + { + "@id": "#publisher-org", + "@type": "Organization", + "name": "Example Research Institute", + "url": "https://example.org" }, { "@id": "https://creativecommons.org/licenses/by/4.0/", diff --git a/tests/data/crates/rocrate-1.2/6_metadata_descriptor/recommended_conformsTo/recommended_prefix/valid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/6_metadata_descriptor/recommended_conformsTo/recommended_prefix/valid/ro-crate-metadata.json index 0eb0f6f4b..654949be4 100644 --- a/tests/data/crates/rocrate-1.2/6_metadata_descriptor/recommended_conformsTo/recommended_prefix/valid/ro-crate-metadata.json +++ b/tests/data/crates/rocrate-1.2/6_metadata_descriptor/recommended_conformsTo/recommended_prefix/valid/ro-crate-metadata.json @@ -20,7 +20,14 @@ "license": { "@id": "https://creativecommons.org/licenses/by/4.0/" }, - "hasPart": [] + "hasPart": [], + "publisher": {"@id": "#publisher-org"} + }, + { + "@id": "#publisher-org", + "@type": "Organization", + "name": "Example Research Institute", + "url": "https://example.org" }, { "@id": "https://creativecommons.org/licenses/by/4.0/", diff --git a/tests/data/crates/rocrate-1.2/6_metadata_descriptor/recommended_conformsTo/single_value/valid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/6_metadata_descriptor/recommended_conformsTo/single_value/valid/ro-crate-metadata.json index 0eb0f6f4b..654949be4 100644 --- a/tests/data/crates/rocrate-1.2/6_metadata_descriptor/recommended_conformsTo/single_value/valid/ro-crate-metadata.json +++ b/tests/data/crates/rocrate-1.2/6_metadata_descriptor/recommended_conformsTo/single_value/valid/ro-crate-metadata.json @@ -20,7 +20,14 @@ "license": { "@id": "https://creativecommons.org/licenses/by/4.0/" }, - "hasPart": [] + "hasPart": [], + "publisher": {"@id": "#publisher-org"} + }, + { + "@id": "#publisher-org", + "@type": "Organization", + "name": "Example Research Institute", + "url": "https://example.org" }, { "@id": "https://creativecommons.org/licenses/by/4.0/", diff --git a/tests/data/crates/rocrate-1.2/7_root_data_entity/recommended_citeas_for_resolvable_id/valid/citeas-ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/7_root_data_entity/recommended_citeas_for_resolvable_id/valid/citeas-ro-crate-metadata.json index a29c711b8..0c1d64a52 100644 --- a/tests/data/crates/rocrate-1.2/7_root_data_entity/recommended_citeas_for_resolvable_id/valid/citeas-ro-crate-metadata.json +++ b/tests/data/crates/rocrate-1.2/7_root_data_entity/recommended_citeas_for_resolvable_id/valid/citeas-ro-crate-metadata.json @@ -21,13 +21,17 @@ "@id": "https://creativecommons.org/licenses/by/4.0/" }, "hasPart": [], + "publisher": {"@id": "#publisher-org"}, "cite-as": { "@id": "https://example.org/ro-crate/detached/citable-root-data-entity/" }, - "url": "https://example.org/ro-crate/detached/citable-root-data-entity/", - "distribution": { - "@id": "https://example.org/ro-crate/detached/citable-root-data-entity/" - } + "url": "https://example.org/ro-crate/detached/citable-root-data-entity/" + }, + { + "@id": "#publisher-org", + "@type": "Organization", + "name": "Example Research Institute", + "url": "https://example.org" }, { "@id": "https://creativecommons.org/licenses/by/4.0/", diff --git a/tests/data/crates/rocrate-1.2/7_root_data_entity/recommended_identifier_resolution/valid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/7_root_data_entity/recommended_identifier_resolution/valid/ro-crate-metadata.json index 78f34f3f7..8330ba64e 100644 --- a/tests/data/crates/rocrate-1.2/7_root_data_entity/recommended_identifier_resolution/valid/ro-crate-metadata.json +++ b/tests/data/crates/rocrate-1.2/7_root_data_entity/recommended_identifier_resolution/valid/ro-crate-metadata.json @@ -15,8 +15,15 @@ "datePublished": "2024-01-01", "license": {"@id": "https://creativecommons.org/licenses/by/4.0/"}, "hasPart": [], + "publisher": {"@id": "#publisher-org"}, "identifier": "https://doi.org/10.1234/resolvable-rocrate" }, + { + "@id": "#publisher-org", + "@type": "Organization", + "name": "Example Research Institute", + "url": "https://example.org" + }, { "@id": "https://creativecommons.org/licenses/by/4.0/", "@type": "CreativeWork", diff --git a/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/recommended_encoding_format/valid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/recommended_encoding_format/valid/ro-crate-metadata.json index 481e655bc..f76da9814 100644 --- a/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/recommended_encoding_format/valid/ro-crate-metadata.json +++ b/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/recommended_encoding_format/valid/ro-crate-metadata.json @@ -23,6 +23,7 @@ "author": { "@id": "https://orcid.org/0000-0002-1825-0097" }, + "publisher": {"@id": "https://ror.org/05f9q8d28"}, "hasPart": [ { "@id": "data.csv" diff --git a/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/recommended_properties/valid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/recommended_properties/valid/ro-crate-metadata.json index 481e655bc..f76da9814 100644 --- a/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/recommended_properties/valid/ro-crate-metadata.json +++ b/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/recommended_properties/valid/ro-crate-metadata.json @@ -23,6 +23,7 @@ "author": { "@id": "https://orcid.org/0000-0002-1825-0097" }, + "publisher": {"@id": "https://ror.org/05f9q8d28"}, "hasPart": [ { "@id": "data.csv" diff --git a/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/web_dataset_distribution/valid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/web_dataset_distribution/valid/ro-crate-metadata.json index f657715b3..64450c8e3 100644 --- a/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/web_dataset_distribution/valid/ro-crate-metadata.json +++ b/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/web_dataset_distribution/valid/ro-crate-metadata.json @@ -14,9 +14,16 @@ "description": "RO-Crate whose Root Data Entity declares a distribution pointing to a downloadable DataDownload archive.", "datePublished": "2024-01-01", "license": {"@id": "https://creativecommons.org/licenses/by/4.0/"}, + "publisher": {"@id": "#publisher-org"}, "hasPart": [], "distribution": {"@id": "https://example.com/rocrate.zip"} }, + { + "@id": "#publisher-org", + "@type": "Organization", + "name": "Example Research Institute", + "url": "https://example.org" + }, { "@id": "https://creativecommons.org/licenses/by/4.0/", "@type": "CreativeWork", diff --git a/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/web_entity_content_url/valid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/web_entity_content_url/valid/ro-crate-metadata.json index 5b4f2eea3..d54a90183 100644 --- a/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/web_entity_content_url/valid/ro-crate-metadata.json +++ b/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/web_entity_content_url/valid/ro-crate-metadata.json @@ -14,8 +14,15 @@ "description": "RO-Crate with a web-based File Data Entity that declares a downloadable contentUrl.", "datePublished": "2024-01-01", "license": {"@id": "https://creativecommons.org/licenses/by/4.0/"}, + "publisher": {"@id": "#publisher-org"}, "hasPart": [{"@id": "https://example.com/landing/dataset"}] }, + { + "@id": "#publisher-org", + "@type": "Organization", + "name": "Example Research Institute", + "url": "https://example.org" + }, { "@id": "https://creativecommons.org/licenses/by/4.0/", "@type": "CreativeWork", diff --git a/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/web_entity_downloadable/valid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/web_entity_downloadable/valid/ro-crate-metadata.json index d88026914..75105219e 100644 --- a/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/web_entity_downloadable/valid/ro-crate-metadata.json +++ b/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/web_entity_downloadable/valid/ro-crate-metadata.json @@ -14,8 +14,15 @@ "description": "RO-Crate with a web-based File Data Entity whose @id returns a non-HTML Content-Type (directly downloadable).", "datePublished": "2024-01-01", "license": {"@id": "https://creativecommons.org/licenses/by/4.0/"}, + "publisher": {"@id": "#publisher-org"}, "hasPart": [{"@id": "https://example.com/data.csv"}] }, + { + "@id": "#publisher-org", + "@type": "Organization", + "name": "Example Research Institute", + "url": "https://example.org" + }, { "@id": "https://creativecommons.org/licenses/by/4.0/", "@type": "CreativeWork", From 566c00e87ba531c6c61a98018f8f7aa3e2f821f2 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Thu, 16 Apr 2026 15:26:02 +0200 Subject: [PATCH 093/352] test(ro-crate-1.2): :white_check_mark: extend tests for entity identifiers --- .../ro-crate-1.2/test_metadata_document.py | 90 +++++++++++++++++++ .../ro-crate-1.2/test_ro_crate_1_2.py | 2 +- 2 files changed, 91 insertions(+), 1 deletion(-) diff --git a/tests/integration/profiles/ro-crate-1.2/test_metadata_document.py b/tests/integration/profiles/ro-crate-1.2/test_metadata_document.py index 7a77a8ceb..8678b4479 100644 --- a/tests/integration/profiles/ro-crate-1.2/test_metadata_document.py +++ b/tests/integration/profiles/ro-crate-1.2/test_metadata_document.py @@ -174,3 +174,93 @@ def test_not_described_contextual_entity(): "Contextual entities that are referenced by other entities SHOULD be " "described in the same @graph, with at least an RDF type specified."] ) + + +# --------------------------------------------------------------------------- +# @id format: no ../ parent traversal (RECOMMENDED) +# --------------------------------------------------------------------------- + +def test_valid_no_parent_traversal(): + """ + Crate with clean relative @id paths passes the no-../ check. + """ + do_entity_test( + __metadata_document_crates__.valid_no_parent_traversal, + models.Severity.RECOMMENDED, + True, + profile_identifier="ro-crate-1.2", + ) + + +def test_invalid_no_parent_traversal(): + """ + Crate with an entity @id containing ../ fails the RECOMMENDED check. + """ + do_entity_test( + __metadata_document_crates__.invalid_no_parent_traversal, + models.Severity.RECOMMENDED, + False, + profile_identifier="ro-crate-1.2", + expected_triggered_requirements=["Entity identifier: format recommendations"], + expected_triggered_issues=["SHOULD NOT contain '../'"], + ) + + +# --------------------------------------------------------------------------- +# @id format: native UTF-8, not percent-encoded (RECOMMENDED) +# --------------------------------------------------------------------------- + +def test_valid_utf8_identifiers(): + """ + Crate with native UTF-8 characters in @id passes the encoding check. + """ + do_entity_test( + __metadata_document_crates__.valid_utf8_identifiers, + models.Severity.RECOMMENDED, + True, + profile_identifier="ro-crate-1.2", + ) + + +def test_invalid_utf8_identifiers(): + """ + Crate with percent-encoded non-ASCII bytes in @id fails the RECOMMENDED check. + """ + do_entity_test( + __metadata_document_crates__.invalid_utf8_identifiers, + models.Severity.RECOMMENDED, + False, + profile_identifier="ro-crate-1.2", + expected_triggered_requirements=["Entity identifier: format recommendations"], + expected_triggered_issues=["percent-encoded non-ASCII characters"], + ) + + +# --------------------------------------------------------------------------- +# @id format: named contextual entity SHOULD use # prefix (RECOMMENDED) +# --------------------------------------------------------------------------- + +def test_valid_named_entity_id_format(): + """ + Crate where local Person/Organization entities use '#'-prefixed @id passes. + """ + do_entity_test( + __metadata_document_crates__.valid_named_entity_id_format, + models.Severity.RECOMMENDED, + True, + profile_identifier="ro-crate-1.2", + ) + + +def test_invalid_named_entity_id_format(): + """ + Crate where a local Person entity uses a bare relative @id (no '#') fails. + """ + do_entity_test( + __metadata_document_crates__.invalid_named_entity_id_format, + models.Severity.RECOMMENDED, + False, + profile_identifier="ro-crate-1.2", + expected_triggered_requirements=["Entity identifier: format recommendations"], + expected_triggered_issues=["named local entities SHOULD use a '#'-prefixed @id"], + ) diff --git a/tests/integration/profiles/ro-crate-1.2/test_ro_crate_1_2.py b/tests/integration/profiles/ro-crate-1.2/test_ro_crate_1_2.py index eec580b1a..9675b822f 100644 --- a/tests/integration/profiles/ro-crate-1.2/test_ro_crate_1_2.py +++ b/tests/integration/profiles/ro-crate-1.2/test_ro_crate_1_2.py @@ -87,7 +87,7 @@ def test_invalid_detached_relative_entity(): paths.detached_relative_entity, models.Severity.REQUIRED, False, - ["Detached RO-Crate Data Entities"], + ["Detached RO-Crate: data entities MUST be web-based"], profile_identifier="ro-crate-1.2", ) From 9a4d8a9ccb0d7b065caa10932f4fbb67aa627873 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Thu, 16 Apr 2026 15:26:43 +0200 Subject: [PATCH 094/352] test(ro-crate-1.2): :card_file_box: fix data set --- .../ro-crate-1.2-absolute-root/prefix-ro-crate-metadata.json | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/data/crates/valid/ro-crate-1.2-absolute-root/prefix-ro-crate-metadata.json b/tests/data/crates/valid/ro-crate-1.2-absolute-root/prefix-ro-crate-metadata.json index 2a8f68bee..7879ff53a 100644 --- a/tests/data/crates/valid/ro-crate-1.2-absolute-root/prefix-ro-crate-metadata.json +++ b/tests/data/crates/valid/ro-crate-1.2-absolute-root/prefix-ro-crate-metadata.json @@ -19,6 +19,9 @@ "license": { "@id": "https://creativecommons.org/publicdomain/zero/1.0/" }, + "publisher": { + "@id": "https://orcid.org/0000-0001-2345-6789" + }, "datePublished": "2024-01-01", "hasPart": [ { From bbe137e15f04fc879d6542291670165f7371f155 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Thu, 16 Apr 2026 16:00:28 +0200 Subject: [PATCH 095/352] feat(ro-crate-1.2): :sparkles: check funder property of Root Data Entity --- .../1.2/should/2_root_data_entity_funding.ttl | 82 +++++++++++++++++++ 1 file changed, 82 insertions(+) create mode 100644 rocrate_validator/profiles/ro-crate/1.2/should/2_root_data_entity_funding.ttl diff --git a/rocrate_validator/profiles/ro-crate/1.2/should/2_root_data_entity_funding.ttl b/rocrate_validator/profiles/ro-crate/1.2/should/2_root_data_entity_funding.ttl new file mode 100644 index 000000000..321eb235f --- /dev/null +++ b/rocrate_validator/profiles/ro-crate/1.2/should/2_root_data_entity_funding.ttl @@ -0,0 +1,82 @@ +# Copyright (c) 2024-2026 CRS4 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +@prefix ro-crate: . +@prefix rdf: . +@prefix schema_org: . +@prefix sh: . + +# Root Data Entity SHOULD reference funders directly via `funder` +ro-crate:RootDataEntityRecommendedFunder a sh:NodeShape ; + sh:name "Root Data Entity: recommended funder" ; + sh:description """The Root Data Entity SHOULD reference funders directly via the `funder` + property, using Organization (or Person) entities (RO-Crate 1.2, Funding and Grants).""" ; + sh:targetClass ro-crate:RootDataEntity ; + sh:property [ + a sh:PropertyShape ; + sh:name "Root Data Entity: RECOMMENDED `funder` property" ; + sh:description """Check if the Root Data Entity references funders directly via `funder`.""" ; + sh:path schema_org:funder ; + sh:minCount 1 ; + sh:severity sh:Warning ; + sh:message "The Root Data Entity SHOULD reference funders directly via the `funder` property" ; + ] . + +# funder values SHOULD be Organization entities; +# Organization funder entities SHOULD themselves reference an external funder. +ro-crate:FunderEntityRecommendedType a sh:NodeShape ; + sh:name "Funder entity: recommended Organization type" ; + sh:description """Entities referenced via `funder` from the Root Data Entity SHOULD be of + type Organization (RO-Crate 1.2, Funding and Grants — K1/K2).""" ; + sh:targetClass ro-crate:RootDataEntity ; + sh:property [ + a sh:PropertyShape ; + sh:name "Funder entity: RECOMMENDED `Organization` type" ; + sh:description """Check if funder entities referenced from the Root Data Entity + are of type Organization.""" ; + sh:path schema_org:funder ; + sh:class schema_org:Organization ; + sh:severity sh:Warning ; + sh:message "Funder entities referenced from the Root Data Entity SHOULD be of type `Organization`" ; + ] . + +# An Organization that is referenced as funder from Root SHOULD itself +# carry a `funder` property pointing to an external funding body. +ro-crate:ProjectOrganizationRecommendedFunder a sh:NodeShape ; + sh:name "Project Organization: recommended funder reference" ; + sh:description """An Organization entity referenced via `funder` from the Root Data Entity + SHOULD itself reference the external funding body via its own `funder` property + (RO-Crate 1.2, Funding and Grants — K2).""" ; + sh:target [ + a sh:SPARQLTarget ; + sh:prefixes ro-crate:sparqlPrefixes ; + sh:select """ + SELECT ?this + WHERE { + ?root a ro-crate:RootDataEntity . + ?root schema:funder ?this . + ?this a schema:Organization . + FILTER(!regex(str(?this), "^https?://", "i")) + } + """ + ] ; + sh:property [ + a sh:PropertyShape ; + sh:name "Project Organization: RECOMMENDED `funder` property" ; + sh:description """Check if an Organization referenced as funder from the Root Data Entity + itself references an external funding body via `funder`.""" ; + sh:path schema_org:funder ; + sh:minCount 1 ; + sh:severity sh:Warning ; + sh:message "An Organization referenced as `funder` from the Root Data Entity SHOULD itself reference an external `funder`" ; + ] . From 0cacb433a63ec6c9e008509f13d50e08d74a66a6 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Thu, 16 Apr 2026 16:01:39 +0200 Subject: [PATCH 096/352] test(ro-crate-1.2): :card_file_box: update data set to include funder property --- .../valid/ro-crate-metadata.json | 9 ++++- .../valid/ro-crate-metadata.json | 38 +++++++++++++++---- .../valid/ro-crate-metadata.json | 30 ++++++++++++--- .../valid/ro-crate-metadata.json | 9 ++++- .../valid/ro-crate-metadata.json | 30 ++++++++++++--- .../valid/ro-crate-metadata.json | 16 +++++++- .../valid/basic-ro-crate-metadata.json | 14 ++++++- .../valid/ro-crate-metadata.json | 14 ++++++- .../valid/ro-crate-metadata.json | 14 ++++++- .../valid/ro-crate-metadata.json | 14 ++++++- .../valid/ro-crate-metadata.json | 14 ++++++- .../single_value/valid/ro-crate-metadata.json | 14 ++++++- .../valid/citeas-ro-crate-metadata.json | 16 +++++++- .../valid/ro-crate-metadata.json | 30 ++++++++++++--- .../valid/ro-crate-metadata.json | 30 ++++++++++++--- .../valid/ro-crate-metadata.json | 9 ++++- .../valid/ro-crate-metadata.json | 9 ++++- .../valid/ro-crate-metadata.json | 32 +++++++++++++--- .../valid/ro-crate-metadata.json | 38 +++++++++++++++---- .../valid/ro-crate-metadata.json | 34 ++++++++++++++--- .../valid/ro-crate-metadata.json | 34 ++++++++++++++--- 21 files changed, 373 insertions(+), 75 deletions(-) diff --git a/tests/data/crates/rocrate-1.2/1_metadata_document/described_contextual_entities/valid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/1_metadata_document/described_contextual_entities/valid/ro-crate-metadata.json index 8e170ec60..966b53e02 100644 --- a/tests/data/crates/rocrate-1.2/1_metadata_document/described_contextual_entities/valid/ro-crate-metadata.json +++ b/tests/data/crates/rocrate-1.2/1_metadata_document/described_contextual_entities/valid/ro-crate-metadata.json @@ -23,12 +23,17 @@ "author": { "@id": "https://orcid.org/0000-0002-1825-0097" }, - "publisher": {"@id": "https://ror.org/05f9q8d28"}, + "publisher": { + "@id": "https://ror.org/05f9q8d28" + }, "hasPart": [ { "@id": "data.csv" } - ] + ], + "funder": { + "@id": "https://ror.org/05f9q8d28" + } }, { "@id": "https://creativecommons.org/licenses/by/4.0/", diff --git a/tests/data/crates/rocrate-1.2/1_metadata_document/named_entity_id_format/valid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/1_metadata_document/named_entity_id_format/valid/ro-crate-metadata.json index 707d8836f..43d8a2e30 100644 --- a/tests/data/crates/rocrate-1.2/1_metadata_document/named_entity_id_format/valid/ro-crate-metadata.json +++ b/tests/data/crates/rocrate-1.2/1_metadata_document/named_entity_id_format/valid/ro-crate-metadata.json @@ -4,19 +4,32 @@ { "@id": "ro-crate-metadata.json", "@type": "CreativeWork", - "about": {"@id": "./"}, - "conformsTo": {"@id": "https://w3id.org/ro/crate/1.2"} + "about": { + "@id": "./" + }, + "conformsTo": { + "@id": "https://w3id.org/ro/crate/1.2" + } }, { "@id": "./", "@type": "Dataset", - "name": "Named entity #-prefix — valid", + "name": "Named entity #-prefix \u2014 valid", "description": "RO-Crate where all local Person and Organization entities use '#'-prefixed @id.", "datePublished": "2024-01-01", - "license": {"@id": "https://creativecommons.org/licenses/by/4.0/"}, - "publisher": {"@id": "#publisher-org"}, - "author": {"@id": "#alice"}, - "hasPart": [] + "license": { + "@id": "https://creativecommons.org/licenses/by/4.0/" + }, + "publisher": { + "@id": "#publisher-org" + }, + "author": { + "@id": "#alice" + }, + "hasPart": [], + "funder": { + "@id": "https://ror.org/00k4n6c32" + } }, { "@id": "https://creativecommons.org/licenses/by/4.0/", @@ -28,13 +41,22 @@ "@id": "#alice", "@type": "Person", "name": "Alice Researcher", - "affiliation": {"@id": "#publisher-org"} + "affiliation": { + "@id": "#publisher-org" + } }, { "@id": "#publisher-org", "@type": "Organization", "name": "Example University", "url": "https://example.edu" + }, + { + "@id": "https://ror.org/00k4n6c32", + "@type": "Organization", + "name": "European Commission", + "description": "The European Commission funds research and innovation programmes.", + "url": "https://ec.europa.eu" } ] } diff --git a/tests/data/crates/rocrate-1.2/1_metadata_document/no_parent_traversal/valid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/1_metadata_document/no_parent_traversal/valid/ro-crate-metadata.json index 3b6bfb5fd..6f1215ebd 100644 --- a/tests/data/crates/rocrate-1.2/1_metadata_document/no_parent_traversal/valid/ro-crate-metadata.json +++ b/tests/data/crates/rocrate-1.2/1_metadata_document/no_parent_traversal/valid/ro-crate-metadata.json @@ -4,18 +4,29 @@ { "@id": "ro-crate-metadata.json", "@type": "CreativeWork", - "about": {"@id": "./"}, - "conformsTo": {"@id": "https://w3id.org/ro/crate/1.2"} + "about": { + "@id": "./" + }, + "conformsTo": { + "@id": "https://w3id.org/ro/crate/1.2" + } }, { "@id": "./", "@type": "Dataset", - "name": "No parent traversal — valid", + "name": "No parent traversal \u2014 valid", "description": "RO-Crate with no @id values that use ../ to climb out of the root.", "datePublished": "2024-01-01", - "license": {"@id": "https://creativecommons.org/licenses/by/4.0/"}, - "publisher": {"@id": "#publisher-org"}, - "hasPart": [] + "license": { + "@id": "https://creativecommons.org/licenses/by/4.0/" + }, + "publisher": { + "@id": "#publisher-org" + }, + "hasPart": [], + "funder": { + "@id": "https://ror.org/00k4n6c32" + } }, { "@id": "https://creativecommons.org/licenses/by/4.0/", @@ -28,6 +39,13 @@ "@type": "Organization", "name": "Example Research Institute", "url": "https://example.org" + }, + { + "@id": "https://ror.org/00k4n6c32", + "@type": "Organization", + "name": "European Commission", + "description": "The European Commission funds research and innovation programmes.", + "url": "https://ec.europa.eu" } ] } diff --git a/tests/data/crates/rocrate-1.2/1_metadata_document/referenced_contextual_entities/valid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/1_metadata_document/referenced_contextual_entities/valid/ro-crate-metadata.json index 8e170ec60..966b53e02 100644 --- a/tests/data/crates/rocrate-1.2/1_metadata_document/referenced_contextual_entities/valid/ro-crate-metadata.json +++ b/tests/data/crates/rocrate-1.2/1_metadata_document/referenced_contextual_entities/valid/ro-crate-metadata.json @@ -23,12 +23,17 @@ "author": { "@id": "https://orcid.org/0000-0002-1825-0097" }, - "publisher": {"@id": "https://ror.org/05f9q8d28"}, + "publisher": { + "@id": "https://ror.org/05f9q8d28" + }, "hasPart": [ { "@id": "data.csv" } - ] + ], + "funder": { + "@id": "https://ror.org/05f9q8d28" + } }, { "@id": "https://creativecommons.org/licenses/by/4.0/", diff --git a/tests/data/crates/rocrate-1.2/1_metadata_document/utf8_identifiers/valid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/1_metadata_document/utf8_identifiers/valid/ro-crate-metadata.json index fdc7665fa..358605ece 100644 --- a/tests/data/crates/rocrate-1.2/1_metadata_document/utf8_identifiers/valid/ro-crate-metadata.json +++ b/tests/data/crates/rocrate-1.2/1_metadata_document/utf8_identifiers/valid/ro-crate-metadata.json @@ -4,18 +4,29 @@ { "@id": "ro-crate-metadata.json", "@type": "CreativeWork", - "about": {"@id": "./"}, - "conformsTo": {"@id": "https://w3id.org/ro/crate/1.2"} + "about": { + "@id": "./" + }, + "conformsTo": { + "@id": "https://w3id.org/ro/crate/1.2" + } }, { "@id": "./", "@type": "Dataset", - "name": "UTF-8 identifiers — valid", + "name": "UTF-8 identifiers \u2014 valid", "description": "RO-Crate where no @id uses percent-encoded non-ASCII bytes.", "datePublished": "2024-01-01", - "license": {"@id": "https://creativecommons.org/licenses/by/4.0/"}, - "publisher": {"@id": "#publisher-org"}, - "hasPart": [] + "license": { + "@id": "https://creativecommons.org/licenses/by/4.0/" + }, + "publisher": { + "@id": "#publisher-org" + }, + "hasPart": [], + "funder": { + "@id": "https://ror.org/00k4n6c32" + } }, { "@id": "https://creativecommons.org/licenses/by/4.0/", @@ -28,6 +39,13 @@ "@type": "Organization", "name": "Example Research Institute", "url": "https://example.org" + }, + { + "@id": "https://ror.org/00k4n6c32", + "@type": "Organization", + "name": "European Commission", + "description": "The European Commission funds research and innovation programmes.", + "url": "https://ec.europa.eu" } ] } diff --git a/tests/data/crates/rocrate-1.2/2_attached_rocrates/attached-preview-not-in-hasPart/valid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/2_attached_rocrates/attached-preview-not-in-hasPart/valid/ro-crate-metadata.json index 1f1a1ee36..ea0830172 100644 --- a/tests/data/crates/rocrate-1.2/2_attached_rocrates/attached-preview-not-in-hasPart/valid/ro-crate-metadata.json +++ b/tests/data/crates/rocrate-1.2/2_attached_rocrates/attached-preview-not-in-hasPart/valid/ro-crate-metadata.json @@ -20,12 +20,17 @@ "license": { "@id": "https://creativecommons.org/licenses/by/4.0/" }, - "publisher": {"@id": "#publisher-org"}, + "publisher": { + "@id": "#publisher-org" + }, "hasPart": [ { "@id": "data.csv" } - ] + ], + "funder": { + "@id": "https://ror.org/00k4n6c32" + } }, { "@id": "#publisher-org", @@ -108,6 +113,13 @@ "@type": "Organization", "name": "Example University", "url": "https://www.exampleuniversity.edu" + }, + { + "@id": "https://ror.org/00k4n6c32", + "@type": "Organization", + "name": "European Commission", + "description": "The European Commission funds research and innovation programmes.", + "url": "https://ec.europa.eu" } ] } diff --git a/tests/data/crates/rocrate-1.2/3_detached_rocrates/naming-convention/local-descriptor/valid/basic-ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/3_detached_rocrates/naming-convention/local-descriptor/valid/basic-ro-crate-metadata.json index ebadcb01b..a69a2d283 100644 --- a/tests/data/crates/rocrate-1.2/3_detached_rocrates/naming-convention/local-descriptor/valid/basic-ro-crate-metadata.json +++ b/tests/data/crates/rocrate-1.2/3_detached_rocrates/naming-convention/local-descriptor/valid/basic-ro-crate-metadata.json @@ -25,7 +25,12 @@ }, "url": "https://raw.githubusercontent.com/crs4/rocrate-validator/refs/heads/develop/tests/data/crates/valid/workflow-roc/", "hasPart": [], - "publisher": {"@id": "#publisher-org"} + "publisher": { + "@id": "#publisher-org" + }, + "funder": { + "@id": "https://ror.org/00k4n6c32" + } }, { "@id": "#publisher-org", @@ -38,6 +43,13 @@ "@type": "CreativeWork", "name": "Creative Commons Attribution 4.0 International", "description": "A Creative Commons license that lets others distribute, remix, adapt, and build upon your work, even commercially." + }, + { + "@id": "https://ror.org/00k4n6c32", + "@type": "Organization", + "name": "European Commission", + "description": "The European Commission funds research and innovation programmes.", + "url": "https://ec.europa.eu" } ] } diff --git a/tests/data/crates/rocrate-1.2/5_metadata_entities/entity_reachability/valid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/5_metadata_entities/entity_reachability/valid/ro-crate-metadata.json index 654949be4..153609436 100644 --- a/tests/data/crates/rocrate-1.2/5_metadata_entities/entity_reachability/valid/ro-crate-metadata.json +++ b/tests/data/crates/rocrate-1.2/5_metadata_entities/entity_reachability/valid/ro-crate-metadata.json @@ -21,7 +21,12 @@ "@id": "https://creativecommons.org/licenses/by/4.0/" }, "hasPart": [], - "publisher": {"@id": "#publisher-org"} + "publisher": { + "@id": "#publisher-org" + }, + "funder": { + "@id": "https://ror.org/00k4n6c32" + } }, { "@id": "#publisher-org", @@ -34,6 +39,13 @@ "@type": "CreativeWork", "name": "Creative Commons Attribution 4.0 International", "description": "A Creative Commons license that lets others distribute, remix, adapt, and build upon your work, even commercially." + }, + { + "@id": "https://ror.org/00k4n6c32", + "@type": "Organization", + "name": "European Commission", + "description": "The European Commission funds research and innovation programmes.", + "url": "https://ec.europa.eu" } ] } diff --git a/tests/data/crates/rocrate-1.2/5_metadata_entities/recommended_name/valid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/5_metadata_entities/recommended_name/valid/ro-crate-metadata.json index 743aa37ff..da03a2b53 100644 --- a/tests/data/crates/rocrate-1.2/5_metadata_entities/recommended_name/valid/ro-crate-metadata.json +++ b/tests/data/crates/rocrate-1.2/5_metadata_entities/recommended_name/valid/ro-crate-metadata.json @@ -25,7 +25,12 @@ "@id": "my-data-file.txt" } ], - "publisher": {"@id": "#publisher-org"} + "publisher": { + "@id": "#publisher-org" + }, + "funder": { + "@id": "https://ror.org/00k4n6c32" + } }, { "@id": "#publisher-org", @@ -45,6 +50,13 @@ "name": "My Data File", "description": "A data file that is part of the RO-Crate.", "encodingFormat": "text/plain" + }, + { + "@id": "https://ror.org/00k4n6c32", + "@type": "Organization", + "name": "European Commission", + "description": "The European Commission funds research and innovation programmes.", + "url": "https://ec.europa.eu" } ] } diff --git a/tests/data/crates/rocrate-1.2/5_metadata_entities/recommended_schema_type/valid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/5_metadata_entities/recommended_schema_type/valid/ro-crate-metadata.json index 654949be4..153609436 100644 --- a/tests/data/crates/rocrate-1.2/5_metadata_entities/recommended_schema_type/valid/ro-crate-metadata.json +++ b/tests/data/crates/rocrate-1.2/5_metadata_entities/recommended_schema_type/valid/ro-crate-metadata.json @@ -21,7 +21,12 @@ "@id": "https://creativecommons.org/licenses/by/4.0/" }, "hasPart": [], - "publisher": {"@id": "#publisher-org"} + "publisher": { + "@id": "#publisher-org" + }, + "funder": { + "@id": "https://ror.org/00k4n6c32" + } }, { "@id": "#publisher-org", @@ -34,6 +39,13 @@ "@type": "CreativeWork", "name": "Creative Commons Attribution 4.0 International", "description": "A Creative Commons license that lets others distribute, remix, adapt, and build upon your work, even commercially." + }, + { + "@id": "https://ror.org/00k4n6c32", + "@type": "Organization", + "name": "European Commission", + "description": "The European Commission funds research and innovation programmes.", + "url": "https://ec.europa.eu" } ] } diff --git a/tests/data/crates/rocrate-1.2/6_metadata_descriptor/recommended_conformsTo/recommended_prefix/valid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/6_metadata_descriptor/recommended_conformsTo/recommended_prefix/valid/ro-crate-metadata.json index 654949be4..153609436 100644 --- a/tests/data/crates/rocrate-1.2/6_metadata_descriptor/recommended_conformsTo/recommended_prefix/valid/ro-crate-metadata.json +++ b/tests/data/crates/rocrate-1.2/6_metadata_descriptor/recommended_conformsTo/recommended_prefix/valid/ro-crate-metadata.json @@ -21,7 +21,12 @@ "@id": "https://creativecommons.org/licenses/by/4.0/" }, "hasPart": [], - "publisher": {"@id": "#publisher-org"} + "publisher": { + "@id": "#publisher-org" + }, + "funder": { + "@id": "https://ror.org/00k4n6c32" + } }, { "@id": "#publisher-org", @@ -34,6 +39,13 @@ "@type": "CreativeWork", "name": "Creative Commons Attribution 4.0 International", "description": "A Creative Commons license that lets others distribute, remix, adapt, and build upon your work, even commercially." + }, + { + "@id": "https://ror.org/00k4n6c32", + "@type": "Organization", + "name": "European Commission", + "description": "The European Commission funds research and innovation programmes.", + "url": "https://ec.europa.eu" } ] } diff --git a/tests/data/crates/rocrate-1.2/6_metadata_descriptor/recommended_conformsTo/single_value/valid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/6_metadata_descriptor/recommended_conformsTo/single_value/valid/ro-crate-metadata.json index 654949be4..153609436 100644 --- a/tests/data/crates/rocrate-1.2/6_metadata_descriptor/recommended_conformsTo/single_value/valid/ro-crate-metadata.json +++ b/tests/data/crates/rocrate-1.2/6_metadata_descriptor/recommended_conformsTo/single_value/valid/ro-crate-metadata.json @@ -21,7 +21,12 @@ "@id": "https://creativecommons.org/licenses/by/4.0/" }, "hasPart": [], - "publisher": {"@id": "#publisher-org"} + "publisher": { + "@id": "#publisher-org" + }, + "funder": { + "@id": "https://ror.org/00k4n6c32" + } }, { "@id": "#publisher-org", @@ -34,6 +39,13 @@ "@type": "CreativeWork", "name": "Creative Commons Attribution 4.0 International", "description": "A Creative Commons license that lets others distribute, remix, adapt, and build upon your work, even commercially." + }, + { + "@id": "https://ror.org/00k4n6c32", + "@type": "Organization", + "name": "European Commission", + "description": "The European Commission funds research and innovation programmes.", + "url": "https://ec.europa.eu" } ] } diff --git a/tests/data/crates/rocrate-1.2/7_root_data_entity/recommended_citeas_for_resolvable_id/valid/citeas-ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/7_root_data_entity/recommended_citeas_for_resolvable_id/valid/citeas-ro-crate-metadata.json index 0c1d64a52..aca82eea6 100644 --- a/tests/data/crates/rocrate-1.2/7_root_data_entity/recommended_citeas_for_resolvable_id/valid/citeas-ro-crate-metadata.json +++ b/tests/data/crates/rocrate-1.2/7_root_data_entity/recommended_citeas_for_resolvable_id/valid/citeas-ro-crate-metadata.json @@ -21,11 +21,16 @@ "@id": "https://creativecommons.org/licenses/by/4.0/" }, "hasPart": [], - "publisher": {"@id": "#publisher-org"}, + "publisher": { + "@id": "#publisher-org" + }, "cite-as": { "@id": "https://example.org/ro-crate/detached/citable-root-data-entity/" }, - "url": "https://example.org/ro-crate/detached/citable-root-data-entity/" + "url": "https://example.org/ro-crate/detached/citable-root-data-entity/", + "funder": { + "@id": "https://ror.org/00k4n6c32" + } }, { "@id": "#publisher-org", @@ -38,6 +43,13 @@ "@type": "CreativeWork", "name": "Creative Commons Attribution 4.0 International", "description": "A Creative Commons license that lets others distribute, remix, adapt, and build upon your work, even commercially." + }, + { + "@id": "https://ror.org/00k4n6c32", + "@type": "Organization", + "name": "European Commission", + "description": "The European Commission funds research and innovation programmes.", + "url": "https://ec.europa.eu" } ] } diff --git a/tests/data/crates/rocrate-1.2/7_root_data_entity/recommended_identifier_resolution/valid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/7_root_data_entity/recommended_identifier_resolution/valid/ro-crate-metadata.json index 8330ba64e..268924ab9 100644 --- a/tests/data/crates/rocrate-1.2/7_root_data_entity/recommended_identifier_resolution/valid/ro-crate-metadata.json +++ b/tests/data/crates/rocrate-1.2/7_root_data_entity/recommended_identifier_resolution/valid/ro-crate-metadata.json @@ -4,19 +4,30 @@ { "@id": "ro-crate-metadata.json", "@type": "CreativeWork", - "about": {"@id": "./"}, - "conformsTo": {"@id": "https://w3id.org/ro/crate/1.2"} + "about": { + "@id": "./" + }, + "conformsTo": { + "@id": "https://w3id.org/ro/crate/1.2" + } }, { "@id": "./", "@type": "Dataset", - "name": "Root identifier resolution — valid", + "name": "Root identifier resolution \u2014 valid", "description": "RO-Crate whose Root Data Entity has an identifier URL that resolves to RO-Crate content via Signposting.", "datePublished": "2024-01-01", - "license": {"@id": "https://creativecommons.org/licenses/by/4.0/"}, + "license": { + "@id": "https://creativecommons.org/licenses/by/4.0/" + }, "hasPart": [], - "publisher": {"@id": "#publisher-org"}, - "identifier": "https://doi.org/10.1234/resolvable-rocrate" + "publisher": { + "@id": "#publisher-org" + }, + "identifier": "https://doi.org/10.1234/resolvable-rocrate", + "funder": { + "@id": "https://ror.org/00k4n6c32" + } }, { "@id": "#publisher-org", @@ -29,6 +40,13 @@ "@type": "CreativeWork", "name": "Creative Commons Attribution 4.0 International", "description": "CC BY 4.0 license." + }, + { + "@id": "https://ror.org/00k4n6c32", + "@type": "Organization", + "name": "European Commission", + "description": "The European Commission funds research and innovation programmes.", + "url": "https://ec.europa.eu" } ] } diff --git a/tests/data/crates/rocrate-1.2/7_root_data_entity/recommended_publisher/valid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/7_root_data_entity/recommended_publisher/valid/ro-crate-metadata.json index 2ecc0c30a..4cd320f64 100644 --- a/tests/data/crates/rocrate-1.2/7_root_data_entity/recommended_publisher/valid/ro-crate-metadata.json +++ b/tests/data/crates/rocrate-1.2/7_root_data_entity/recommended_publisher/valid/ro-crate-metadata.json @@ -4,18 +4,29 @@ { "@id": "ro-crate-metadata.json", "@type": "CreativeWork", - "about": {"@id": "./"}, - "conformsTo": {"@id": "https://w3id.org/ro/crate/1.2"} + "about": { + "@id": "./" + }, + "conformsTo": { + "@id": "https://w3id.org/ro/crate/1.2" + } }, { "@id": "./", "@type": "Dataset", - "name": "Publisher present — valid", + "name": "Publisher present \u2014 valid", "description": "RO-Crate whose Root Data Entity declares a publisher Organization.", "datePublished": "2024-01-01", - "license": {"@id": "https://creativecommons.org/licenses/by/4.0/"}, - "publisher": {"@id": "#publisher-org"}, - "hasPart": [] + "license": { + "@id": "https://creativecommons.org/licenses/by/4.0/" + }, + "publisher": { + "@id": "#publisher-org" + }, + "hasPart": [], + "funder": { + "@id": "https://ror.org/00k4n6c32" + } }, { "@id": "https://creativecommons.org/licenses/by/4.0/", @@ -28,6 +39,13 @@ "@type": "Organization", "name": "Example Research Institute", "url": "https://example.org" + }, + { + "@id": "https://ror.org/00k4n6c32", + "@type": "Organization", + "name": "European Commission", + "description": "The European Commission funds research and innovation programmes.", + "url": "https://ec.europa.eu" } ] } diff --git a/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/recommended_encoding_format/valid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/recommended_encoding_format/valid/ro-crate-metadata.json index f76da9814..052a543f5 100644 --- a/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/recommended_encoding_format/valid/ro-crate-metadata.json +++ b/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/recommended_encoding_format/valid/ro-crate-metadata.json @@ -23,12 +23,17 @@ "author": { "@id": "https://orcid.org/0000-0002-1825-0097" }, - "publisher": {"@id": "https://ror.org/05f9q8d28"}, + "publisher": { + "@id": "https://ror.org/05f9q8d28" + }, "hasPart": [ { "@id": "data.csv" } - ] + ], + "funder": { + "@id": "https://ror.org/05f9q8d28" + } }, { "@id": "https://creativecommons.org/licenses/by/4.0/", diff --git a/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/recommended_properties/valid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/recommended_properties/valid/ro-crate-metadata.json index f76da9814..052a543f5 100644 --- a/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/recommended_properties/valid/ro-crate-metadata.json +++ b/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/recommended_properties/valid/ro-crate-metadata.json @@ -23,12 +23,17 @@ "author": { "@id": "https://orcid.org/0000-0002-1825-0097" }, - "publisher": {"@id": "https://ror.org/05f9q8d28"}, + "publisher": { + "@id": "https://ror.org/05f9q8d28" + }, "hasPart": [ { "@id": "data.csv" } - ] + ], + "funder": { + "@id": "https://ror.org/05f9q8d28" + } }, { "@id": "https://creativecommons.org/licenses/by/4.0/", diff --git a/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/web_dataset_distribution/valid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/web_dataset_distribution/valid/ro-crate-metadata.json index 64450c8e3..9d9ba1046 100644 --- a/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/web_dataset_distribution/valid/ro-crate-metadata.json +++ b/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/web_dataset_distribution/valid/ro-crate-metadata.json @@ -4,19 +4,32 @@ { "@id": "ro-crate-metadata.json", "@type": "CreativeWork", - "about": {"@id": "./"}, - "conformsTo": {"@id": "https://w3id.org/ro/crate/1.2"} + "about": { + "@id": "./" + }, + "conformsTo": { + "@id": "https://w3id.org/ro/crate/1.2" + } }, { "@id": "./", "@type": "Dataset", - "name": "Dataset distribution — valid", + "name": "Dataset distribution \u2014 valid", "description": "RO-Crate whose Root Data Entity declares a distribution pointing to a downloadable DataDownload archive.", "datePublished": "2024-01-01", - "license": {"@id": "https://creativecommons.org/licenses/by/4.0/"}, - "publisher": {"@id": "#publisher-org"}, + "license": { + "@id": "https://creativecommons.org/licenses/by/4.0/" + }, + "publisher": { + "@id": "#publisher-org" + }, "hasPart": [], - "distribution": {"@id": "https://example.com/rocrate.zip"} + "distribution": { + "@id": "https://example.com/rocrate.zip" + }, + "funder": { + "@id": "https://ror.org/00k4n6c32" + } }, { "@id": "#publisher-org", @@ -36,6 +49,13 @@ "name": "RO-Crate archive", "description": "A ZIP archive of the RO-Crate, directly downloadable.", "encodingFormat": "application/zip" + }, + { + "@id": "https://ror.org/00k4n6c32", + "@type": "Organization", + "name": "European Commission", + "description": "The European Commission funds research and innovation programmes.", + "url": "https://ec.europa.eu" } ] } diff --git a/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/web_directory_distribution/valid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/web_directory_distribution/valid/ro-crate-metadata.json index ba95f768a..f0c87695d 100644 --- a/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/web_directory_distribution/valid/ro-crate-metadata.json +++ b/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/web_directory_distribution/valid/ro-crate-metadata.json @@ -4,18 +4,33 @@ { "@id": "ro-crate-metadata.json", "@type": "CreativeWork", - "about": {"@id": "./"}, - "conformsTo": {"@id": "https://w3id.org/ro/crate/1.2"} + "about": { + "@id": "./" + }, + "conformsTo": { + "@id": "https://w3id.org/ro/crate/1.2" + } }, { "@id": "./", "@type": "Dataset", - "name": "Web directory with distribution — valid", + "name": "Web directory with distribution \u2014 valid", "description": "RO-Crate containing a web-based Directory Data Entity that declares a distribution.", "datePublished": "2024-01-01", - "license": {"@id": "https://creativecommons.org/licenses/by/4.0/"}, - "publisher": {"@id": "#publisher-org"}, - "hasPart": [{"@id": "https://example.com/dataset-dir/"}] + "license": { + "@id": "https://creativecommons.org/licenses/by/4.0/" + }, + "publisher": { + "@id": "#publisher-org" + }, + "hasPart": [ + { + "@id": "https://example.com/dataset-dir/" + } + ], + "funder": { + "@id": "https://ror.org/00k4n6c32" + } }, { "@id": "https://creativecommons.org/licenses/by/4.0/", @@ -28,7 +43,9 @@ "@type": "Dataset", "name": "Web dataset directory", "description": "A web-based directory with a distribution pointing to a downloadable archive.", - "distribution": {"@id": "https://example.com/dataset-dir.zip"} + "distribution": { + "@id": "https://example.com/dataset-dir.zip" + } }, { "@id": "https://example.com/dataset-dir.zip", @@ -42,6 +59,13 @@ "@type": "Organization", "name": "Example Research Institute", "url": "https://example.org" + }, + { + "@id": "https://ror.org/00k4n6c32", + "@type": "Organization", + "name": "European Commission", + "description": "The European Commission funds research and innovation programmes.", + "url": "https://ec.europa.eu" } ] } diff --git a/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/web_entity_content_url/valid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/web_entity_content_url/valid/ro-crate-metadata.json index d54a90183..4b3b258f2 100644 --- a/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/web_entity_content_url/valid/ro-crate-metadata.json +++ b/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/web_entity_content_url/valid/ro-crate-metadata.json @@ -4,18 +4,33 @@ { "@id": "ro-crate-metadata.json", "@type": "CreativeWork", - "about": {"@id": "./"}, - "conformsTo": {"@id": "https://w3id.org/ro/crate/1.2"} + "about": { + "@id": "./" + }, + "conformsTo": { + "@id": "https://w3id.org/ro/crate/1.2" + } }, { "@id": "./", "@type": "Dataset", - "name": "Web entity contentUrl — valid", + "name": "Web entity contentUrl \u2014 valid", "description": "RO-Crate with a web-based File Data Entity that declares a downloadable contentUrl.", "datePublished": "2024-01-01", - "license": {"@id": "https://creativecommons.org/licenses/by/4.0/"}, - "publisher": {"@id": "#publisher-org"}, - "hasPart": [{"@id": "https://example.com/landing/dataset"}] + "license": { + "@id": "https://creativecommons.org/licenses/by/4.0/" + }, + "publisher": { + "@id": "#publisher-org" + }, + "hasPart": [ + { + "@id": "https://example.com/landing/dataset" + } + ], + "funder": { + "@id": "https://ror.org/00k4n6c32" + } }, { "@id": "#publisher-org", @@ -38,6 +53,13 @@ "contentUrl": "https://cdn.example.com/dataset.zip", "contentSize": "1024", "sdDatePublished": "2024-01-01" + }, + { + "@id": "https://ror.org/00k4n6c32", + "@type": "Organization", + "name": "European Commission", + "description": "The European Commission funds research and innovation programmes.", + "url": "https://ec.europa.eu" } ] } diff --git a/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/web_entity_downloadable/valid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/web_entity_downloadable/valid/ro-crate-metadata.json index 75105219e..1c71fa7ae 100644 --- a/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/web_entity_downloadable/valid/ro-crate-metadata.json +++ b/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/web_entity_downloadable/valid/ro-crate-metadata.json @@ -4,18 +4,33 @@ { "@id": "ro-crate-metadata.json", "@type": "CreativeWork", - "about": {"@id": "./"}, - "conformsTo": {"@id": "https://w3id.org/ro/crate/1.2"} + "about": { + "@id": "./" + }, + "conformsTo": { + "@id": "https://w3id.org/ro/crate/1.2" + } }, { "@id": "./", "@type": "Dataset", - "name": "Web entity downloadable — valid", + "name": "Web entity downloadable \u2014 valid", "description": "RO-Crate with a web-based File Data Entity whose @id returns a non-HTML Content-Type (directly downloadable).", "datePublished": "2024-01-01", - "license": {"@id": "https://creativecommons.org/licenses/by/4.0/"}, - "publisher": {"@id": "#publisher-org"}, - "hasPart": [{"@id": "https://example.com/data.csv"}] + "license": { + "@id": "https://creativecommons.org/licenses/by/4.0/" + }, + "publisher": { + "@id": "#publisher-org" + }, + "hasPart": [ + { + "@id": "https://example.com/data.csv" + } + ], + "funder": { + "@id": "https://ror.org/00k4n6c32" + } }, { "@id": "#publisher-org", @@ -37,6 +52,13 @@ "encodingFormat": "text/csv", "contentSize": "512", "sdDatePublished": "2024-01-01" + }, + { + "@id": "https://ror.org/00k4n6c32", + "@type": "Organization", + "name": "European Commission", + "description": "The European Commission funds research and innovation programmes.", + "url": "https://ec.europa.eu" } ] } From c6c8beb3d04d8509ff9155d1879f1bfa4fc1c2ca Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Thu, 16 Apr 2026 16:02:39 +0200 Subject: [PATCH 097/352] test(ro-crate-1.2): :white_check_mark: test `funder` property of the Root Dataset --- .../invalid_no_funder/ro-crate-metadata.json | 33 ++++++++++ .../ro-crate-metadata.json | 41 ++++++++++++ .../ro-crate-metadata.json | 40 ++++++++++++ .../valid/ro-crate-metadata.json | 52 ++++++++++++++++ .../ro-crate-1.2/test_availability_flags.py | 2 +- .../ro-crate-1.2/test_detached_rocrates.py | 2 +- .../test_metadata_rootDataEntity.py | 62 +++++++++++++++++++ 7 files changed, 230 insertions(+), 2 deletions(-) create mode 100644 tests/data/crates/rocrate-1.2/7_root_data_entity/recommended_funding/invalid_no_funder/ro-crate-metadata.json create mode 100644 tests/data/crates/rocrate-1.2/7_root_data_entity/recommended_funding/invalid_no_project_funder/ro-crate-metadata.json create mode 100644 tests/data/crates/rocrate-1.2/7_root_data_entity/recommended_funding/invalid_non_org_funder/ro-crate-metadata.json create mode 100644 tests/data/crates/rocrate-1.2/7_root_data_entity/recommended_funding/valid/ro-crate-metadata.json diff --git a/tests/data/crates/rocrate-1.2/7_root_data_entity/recommended_funding/invalid_no_funder/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/7_root_data_entity/recommended_funding/invalid_no_funder/ro-crate-metadata.json new file mode 100644 index 000000000..135049fa0 --- /dev/null +++ b/tests/data/crates/rocrate-1.2/7_root_data_entity/recommended_funding/invalid_no_funder/ro-crate-metadata.json @@ -0,0 +1,33 @@ +{ + "@context": "https://w3id.org/ro/crate/1.2/context", + "@graph": [ + { + "@id": "ro-crate-metadata.json", + "@type": "CreativeWork", + "about": {"@id": "./"}, + "conformsTo": {"@id": "https://w3id.org/ro/crate/1.2"} + }, + { + "@id": "./", + "@type": "Dataset", + "name": "Funded RO-Crate — missing funder", + "description": "RO-Crate whose Root Data Entity has no `funder` property, triggering the RECOMMENDED funder check.", + "datePublished": "2024-01-01", + "license": {"@id": "https://creativecommons.org/licenses/by/4.0/"}, + "publisher": {"@id": "#publisher-org"}, + "hasPart": [] + }, + { + "@id": "#publisher-org", + "@type": "Organization", + "name": "Example Research Institute", + "url": "https://example.org" + }, + { + "@id": "https://creativecommons.org/licenses/by/4.0/", + "@type": "CreativeWork", + "name": "Creative Commons Attribution 4.0 International", + "description": "CC BY 4.0 license." + } + ] +} diff --git a/tests/data/crates/rocrate-1.2/7_root_data_entity/recommended_funding/invalid_no_project_funder/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/7_root_data_entity/recommended_funding/invalid_no_project_funder/ro-crate-metadata.json new file mode 100644 index 000000000..5da483059 --- /dev/null +++ b/tests/data/crates/rocrate-1.2/7_root_data_entity/recommended_funding/invalid_no_project_funder/ro-crate-metadata.json @@ -0,0 +1,41 @@ +{ + "@context": "https://w3id.org/ro/crate/1.2/context", + "@graph": [ + { + "@id": "ro-crate-metadata.json", + "@type": "CreativeWork", + "about": {"@id": "./"}, + "conformsTo": {"@id": "https://w3id.org/ro/crate/1.2"} + }, + { + "@id": "./", + "@type": "Dataset", + "name": "Funded RO-Crate — project org missing funder", + "description": "RO-Crate whose project Organization is referenced as funder from Root but does not itself reference an external funder.", + "datePublished": "2024-01-01", + "license": {"@id": "https://creativecommons.org/licenses/by/4.0/"}, + "publisher": {"@id": "#publisher-org"}, + "funder": {"@id": "#project-org"}, + "hasPart": [] + }, + { + "@id": "#publisher-org", + "@type": "Organization", + "name": "Example Research Institute", + "url": "https://example.org" + }, + { + "@id": "#project-org", + "@type": "Organization", + "name": "Example Research Project", + "description": "A project Organization that does not reference any external funder.", + "url": "https://example.org/projects/example-project" + }, + { + "@id": "https://creativecommons.org/licenses/by/4.0/", + "@type": "CreativeWork", + "name": "Creative Commons Attribution 4.0 International", + "description": "CC BY 4.0 license." + } + ] +} diff --git a/tests/data/crates/rocrate-1.2/7_root_data_entity/recommended_funding/invalid_non_org_funder/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/7_root_data_entity/recommended_funding/invalid_non_org_funder/ro-crate-metadata.json new file mode 100644 index 000000000..d0628da07 --- /dev/null +++ b/tests/data/crates/rocrate-1.2/7_root_data_entity/recommended_funding/invalid_non_org_funder/ro-crate-metadata.json @@ -0,0 +1,40 @@ +{ + "@context": "https://w3id.org/ro/crate/1.2/context", + "@graph": [ + { + "@id": "ro-crate-metadata.json", + "@type": "CreativeWork", + "about": {"@id": "./"}, + "conformsTo": {"@id": "https://w3id.org/ro/crate/1.2"} + }, + { + "@id": "./", + "@type": "Dataset", + "name": "Funded RO-Crate — funder is not Organization", + "description": "RO-Crate whose Root Data Entity references a funder that is a Person, not an Organization.", + "datePublished": "2024-01-01", + "license": {"@id": "https://creativecommons.org/licenses/by/4.0/"}, + "publisher": {"@id": "#publisher-org"}, + "funder": {"@id": "#funder-person"}, + "hasPart": [] + }, + { + "@id": "#publisher-org", + "@type": "Organization", + "name": "Example Research Institute", + "url": "https://example.org" + }, + { + "@id": "#funder-person", + "@type": "Person", + "name": "Jane Doe", + "description": "An individual acting as funder — should be Organization instead." + }, + { + "@id": "https://creativecommons.org/licenses/by/4.0/", + "@type": "CreativeWork", + "name": "Creative Commons Attribution 4.0 International", + "description": "CC BY 4.0 license." + } + ] +} diff --git a/tests/data/crates/rocrate-1.2/7_root_data_entity/recommended_funding/valid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/7_root_data_entity/recommended_funding/valid/ro-crate-metadata.json new file mode 100644 index 000000000..8104501b9 --- /dev/null +++ b/tests/data/crates/rocrate-1.2/7_root_data_entity/recommended_funding/valid/ro-crate-metadata.json @@ -0,0 +1,52 @@ +{ + "@context": "https://w3id.org/ro/crate/1.2/context", + "@graph": [ + { + "@id": "ro-crate-metadata.json", + "@type": "CreativeWork", + "about": {"@id": "./"}, + "conformsTo": {"@id": "https://w3id.org/ro/crate/1.2"} + }, + { + "@id": "./", + "@type": "Dataset", + "name": "Funded RO-Crate — valid", + "description": "RO-Crate whose Root Data Entity references both a project Organization and an external funder directly via `funder`.", + "datePublished": "2024-01-01", + "license": {"@id": "https://creativecommons.org/licenses/by/4.0/"}, + "publisher": {"@id": "#publisher-org"}, + "funder": [ + {"@id": "#project-org"}, + {"@id": "https://ror.org/00k4n6c32"} + ], + "hasPart": [] + }, + { + "@id": "#publisher-org", + "@type": "Organization", + "name": "Example Research Institute", + "url": "https://example.org" + }, + { + "@id": "#project-org", + "@type": "Organization", + "name": "Example Research Project", + "description": "The research project associated with this RO-Crate.", + "url": "https://example.org/projects/example-project", + "funder": {"@id": "https://ror.org/00k4n6c32"} + }, + { + "@id": "https://ror.org/00k4n6c32", + "@type": "Organization", + "name": "European Commission", + "description": "The European Commission funds research and innovation programmes.", + "url": "https://ec.europa.eu" + }, + { + "@id": "https://creativecommons.org/licenses/by/4.0/", + "@type": "CreativeWork", + "name": "Creative Commons Attribution 4.0 International", + "description": "CC BY 4.0 license." + } + ] +} diff --git a/tests/integration/profiles/ro-crate-1.2/test_availability_flags.py b/tests/integration/profiles/ro-crate-1.2/test_availability_flags.py index d717d3838..b525fb495 100644 --- a/tests/integration/profiles/ro-crate-1.2/test_availability_flags.py +++ b/tests/integration/profiles/ro-crate-1.2/test_availability_flags.py @@ -27,7 +27,7 @@ # Minimal set of JSON-LD context keys needed to pass `check_compaction` # for the test crates used in this module. _FAKE_CONTEXT_KEYS = { - "about", "affiliation", "author", "cite-as", "conformsTo", + "about", "affiliation", "author", "cite-as", "conformsTo", "funder", "contentLocation", "contentSize", "contentUrl", "dateCreated", "dateModified", "datePublished", "description", "encodingFormat", "hasPart", "license", "name", "publisher", "sdDatePublished", "url", diff --git a/tests/integration/profiles/ro-crate-1.2/test_detached_rocrates.py b/tests/integration/profiles/ro-crate-1.2/test_detached_rocrates.py index c2b4fa98d..a64fe070b 100644 --- a/tests/integration/profiles/ro-crate-1.2/test_detached_rocrates.py +++ b/tests/integration/profiles/ro-crate-1.2/test_detached_rocrates.py @@ -73,7 +73,7 @@ def test_root_data_entity_identifier_when_online_available(): models.Severity.RECOMMENDED, True, profile_identifier="ro-crate-1.2", - skip_checks=["ro-crate-1.2_35.1"], + skip_checks=["ro-crate-1.2_35.1", "ro-crate-1.2_38.1"], ) diff --git a/tests/integration/profiles/ro-crate-1.2/test_metadata_rootDataEntity.py b/tests/integration/profiles/ro-crate-1.2/test_metadata_rootDataEntity.py index 48ca4904b..82f845c24 100644 --- a/tests/integration/profiles/ro-crate-1.2/test_metadata_rootDataEntity.py +++ b/tests/integration/profiles/ro-crate-1.2/test_metadata_rootDataEntity.py @@ -261,3 +261,65 @@ def test_invalid_recommended_publisher(): expected_triggered_requirements=["Root Data Entity: recommended publisher"], expected_triggered_issues=["SHOULD have a `publisher` property"], ) + + +# --------------------------------------------------------------------------- +# Root Data Entity: funder SHOULD be present (RECOMMENDED — K1/K2/K3) +# --------------------------------------------------------------------------- + +def test_valid_recommended_funding(): + """ + Root Data Entity with a funder Organization (which itself references an + external funder) passes all RECOMMENDED funding checks. + """ + do_entity_test( + __metadata_root_data_entity_crates__.valid_recommended_funding, + models.Severity.RECOMMENDED, + True, + profile_identifier="ro-crate-1.2", + ) + + +def test_invalid_recommended_funding_no_funder(): + """ + Root Data Entity with no `funder` property fails the RECOMMENDED + direct-funder check (K3). + """ + do_entity_test( + __metadata_root_data_entity_crates__.invalid_recommended_funding_no_funder, + models.Severity.RECOMMENDED, + False, + profile_identifier="ro-crate-1.2", + expected_triggered_requirements=["Root Data Entity: recommended funder"], + expected_triggered_issues=["SHOULD reference funders directly via the `funder` property"], + ) + + +def test_invalid_recommended_funding_non_org_funder(): + """ + Root Data Entity whose `funder` references a Person (not an Organization) + fails the RECOMMENDED funder-type check (K1). + """ + do_entity_test( + __metadata_root_data_entity_crates__.invalid_recommended_funding_non_org_funder, + models.Severity.RECOMMENDED, + False, + profile_identifier="ro-crate-1.2", + expected_triggered_requirements=["Funder entity: recommended Organization type"], + expected_triggered_issues=["SHOULD be of type `Organization`"], + ) + + +def test_invalid_recommended_funding_no_project_funder(): + """ + Root Data Entity whose local project Organization (`funder`) does not itself + reference an external funder fails the RECOMMENDED project-org funder check (K2). + """ + do_entity_test( + __metadata_root_data_entity_crates__.invalid_recommended_funding_no_project_funder, + models.Severity.RECOMMENDED, + False, + profile_identifier="ro-crate-1.2", + expected_triggered_requirements=["Project Organization: recommended funder reference"], + expected_triggered_issues=["SHOULD itself reference an external `funder`"], + ) From 59a4c14836e472c00ee6141e44ee7aecda305c7d Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Thu, 16 Apr 2026 16:14:34 +0200 Subject: [PATCH 098/352] fix(ro-crate-1.2): :bug: fix implementation of `check_content_url` --- .../1.2/must/4_data_entity_metadata.py | 22 +++++++++++-------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/rocrate_validator/profiles/ro-crate/1.2/must/4_data_entity_metadata.py b/rocrate_validator/profiles/ro-crate/1.2/must/4_data_entity_metadata.py index 70ef2f2e3..e4089bc44 100644 --- a/rocrate_validator/profiles/ro-crate/1.2/must/4_data_entity_metadata.py +++ b/rocrate_validator/profiles/ro-crate/1.2/must/4_data_entity_metadata.py @@ -307,7 +307,7 @@ def check_content_size(self, context: ValidationContext) -> bool: except Exception: content_int = None external_size = context.ro_crate.get_external_file_size(entity.id) - if content_int is not None and content_int != external_size: + if external_size is not None and content_int is not None and content_int != external_size: context.result.add_issue( f'The property contentSize={content_size} of the Web-based Data Entity ' f'{entity.id} does not match the actual size of ' @@ -330,18 +330,22 @@ def check_content_url(self, context: ValidationContext) -> bool: continue urls = content_url if isinstance(content_url, list) else [content_url] for url in urls: + url_value = url if isinstance(url, str) else url.id if hasattr(url, "id") else None + if not url_value or not url_value.startswith("http"): + continue try: - url_value = url if isinstance(url, str) else url.id - if not context.ro_crate.get_external_file_size(url_value): - context.result.add_issue( - f"contentUrl {url_value} for Web-based Data Entity {entity.id} " - "is not directly downloadable", - self) + dl = check_downloadable(url_value) + if not dl.is_downloadable: + msg = (f"contentUrl '{url_value}' for Web-based Data Entity '{entity.id}' " + "is not directly downloadable") + if dl.reason: + msg += f": {dl.reason}" + context.result.add_issue(msg, self) result = False except Exception as e: context.result.add_issue( - f"contentUrl {url} for Web-based Data Entity {entity.id} is not directly downloadable: {e}", - self) + f"contentUrl '{url_value}' for Web-based Data Entity '{entity.id}' " + f"availability check failed: {e}", self) result = False if not result and context.fail_fast: return result From 7ad9ae2b4cf97b5587fca9f3716021ccb1f77379 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Thu, 16 Apr 2026 17:01:58 +0200 Subject: [PATCH 099/352] feat(ro-crate-1.2): :sparkles: check that #<> File Data Entities have a localPath property --- .../1.2/should/4_missing_file_local_path.py | 113 ++++++++++++++++++ 1 file changed, 113 insertions(+) create mode 100644 rocrate_validator/profiles/ro-crate/1.2/should/4_missing_file_local_path.py diff --git a/rocrate_validator/profiles/ro-crate/1.2/should/4_missing_file_local_path.py b/rocrate_validator/profiles/ro-crate/1.2/should/4_missing_file_local_path.py new file mode 100644 index 000000000..1311ffca1 --- /dev/null +++ b/rocrate_validator/profiles/ro-crate/1.2/should/4_missing_file_local_path.py @@ -0,0 +1,113 @@ +# Copyright (c) 2024-2026 CRS4 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# NOTE: This check is implemented as a Python PyFunctionCheck rather than a +# SHACL shape because the core condition — "the file is not present in the +# RO-Crate payload" — requires filesystem access (i.e. checking whether a +# referenced local file actually exists on disk or inside a ZIP archive). +# SHACL cannot express conditions that depend on external state such as +# file availability. A SHACL shape could only verify that `localPath` is +# present as a property, which would produce false positives on every local +# file that *is* present and therefore has no need for `localPath`. By +# using a Python check we can combine graph inspection with filesystem +# availability checks and emit a warning only when a local file is missing +# *and* no `localPath` property is provided. +# +# Additionally, the spec states that a File entity MAY use a local identifier +# starting with "#" for files that are deliberately not present in the +# payload. In that case `localPath` SHOULD be used to indicate where the +# file can be found. This requires distinguishing "#" identifiers from +# regular relative paths, which SHACL cannot do. + +from rocrate_validator.models import Severity, ValidationContext +from rocrate_validator.requirements.python import (PyFunctionCheck, check, + requirement) +from rocrate_validator.utils import log as logging + +logger = logging.getLogger(__name__) + + +@requirement(name="Data Entity: missing file SHOULD use localPath") +class MissingFileLocalPathChecker(PyFunctionCheck): + """ + In an attached RO-Crate, when a File Data Entity references a local file + that is not present in the RO-Crate payload, the entity SHOULD declare a + ``localPath`` property to indicate where the file can be found locally. + + Additionally, a File Data Entity whose ``@id`` begins with ``#`` denotes a + deliberately absent file; in that case ``localPath`` SHOULD also be + declared to indicate where the file can be found when it exists. + (RO-Crate 1.2 specification, section on File Data Entities.) + """ + + @check(name="Missing local File SHOULD use localPath", + severity=Severity.RECOMMENDED) + def check_missing_file_local_path(self, context: ValidationContext) -> bool: + if context.ro_crate.is_detached(): + return True + if context.settings.metadata_only: + return True + root_entity_id = None + try: + root_entity_id = context.ro_crate.metadata.get_root_data_entity().id + except Exception: + pass + result = True + for entity in context.ro_crate.metadata.get_data_entities( + exclude_web_data_entities=True): + if root_entity_id and entity.id == root_entity_id: + continue + if not entity.is_file(): + continue + if entity.has_local_identifier(): + local_path = entity.get_property("localPath") + if not local_path: + context.result.add_issue( + f"File Data Entity '{entity.id}' uses a local " + f"identifier (#) for a deliberately absent file, " + f"but does not declare a `localPath` property; " + f"consider adding `localPath` to indicate where " + f"the file can be found locally", + self) + result = False + if context.fail_fast: + return False + else: + local_path_value = ( + local_path if isinstance(local_path, str) + else local_path.id if hasattr(local_path, "id") + else str(local_path) + ) + logger.warning( + "File Data Entity '%s' declares localPath='%s' for a " + "deliberately absent file; the availability of this " + "path cannot be verified by the validator", + entity.id, local_path_value) + continue + if not entity.has_relative_path(): + continue + if entity.is_available(): + continue + local_path = entity.get_property("localPath") + if not local_path: + context.result.add_issue( + f"File Data Entity '{entity.id}' is not present in the " + f"RO-Crate payload and does not declare a `localPath` " + f"property; consider adding `localPath` to indicate where " + f"the file can be found locally", + self) + result = False + if context.fail_fast: + return False + return result From 6cee2c32c61c8e5200a969d1559c75f4e77bfe4a Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Thu, 16 Apr 2026 17:08:54 +0200 Subject: [PATCH 100/352] test(ro-crate-1.2): :white_check_mark: test localPath of File DataEntity --- .../invalid/ro-crate-metadata.json | 47 ++++++++++++++++++ .../valid/ro-crate-metadata.json | 49 +++++++++++++++++++ .../test_metadata_dataEntities.py | 34 +++++++++++++ 3 files changed, 130 insertions(+) create mode 100644 tests/data/crates/rocrate-1.2/8_metadata_dataEntities/missing_file_local_path/invalid/ro-crate-metadata.json create mode 100644 tests/data/crates/rocrate-1.2/8_metadata_dataEntities/missing_file_local_path/valid/ro-crate-metadata.json diff --git a/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/missing_file_local_path/invalid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/missing_file_local_path/invalid/ro-crate-metadata.json new file mode 100644 index 000000000..aade34b1d --- /dev/null +++ b/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/missing_file_local_path/invalid/ro-crate-metadata.json @@ -0,0 +1,47 @@ +{ + "@context": "https://w3id.org/ro/crate/1.2/context", + "@graph": [ + { + "@id": "ro-crate-metadata.json", + "@type": "CreativeWork", + "conformsTo": {"@id": "https://w3id.org/ro/crate/1.2"}, + "about": {"@id": "./"} + }, + { + "@id": "./", + "@type": "Dataset", + "name": "Test crate missing file without localPath", + "description": "A local file is referenced but not present in the payload and has no localPath.", + "license": {"@id": "https://creativecommons.org/licenses/by/4.0/"}, + "datePublished": "2024-01-01", + "publisher": {"@id": "https://ror.org/012345678"}, + "hasPart": [{"@id": "data/missing-file.csv"}, {"@id": "#output-file.csv"}] + }, + { + "@id": "https://ror.org/012345678", + "@type": "Organization", + "name": "Example Research Institute", + "url": "https://example.org" + }, + { + "@id": "data/missing-file.csv", + "@type": "File", + "name": "missing-file.csv", + "description": "A CSV file that is not included in the payload and has no localPath.", + "encodingFormat": "text/csv" + }, + { + "@id": "#output-file.csv", + "@type": "File", + "name": "output-file.csv", + "description": "A deliberately absent file without localPath.", + "encodingFormat": "text/csv" + }, + { + "@id": "https://creativecommons.org/licenses/by/4.0/", + "@type": "CreativeWork", + "name": "Creative Commons Attribution 4.0 International", + "description": "A Creative Commons license." + } + ] +} \ No newline at end of file diff --git a/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/missing_file_local_path/valid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/missing_file_local_path/valid/ro-crate-metadata.json new file mode 100644 index 000000000..28b65cae7 --- /dev/null +++ b/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/missing_file_local_path/valid/ro-crate-metadata.json @@ -0,0 +1,49 @@ +{ + "@context": "https://w3id.org/ro/crate/1.2/context", + "@graph": [ + { + "@id": "ro-crate-metadata.json", + "@type": "CreativeWork", + "conformsTo": {"@id": "https://w3id.org/ro/crate/1.2"}, + "about": {"@id": "./"} + }, + { + "@id": "./", + "@type": "Dataset", + "name": "Test crate with localPath for missing file", + "description": "A local file is referenced but not present in the payload; localPath is provided.", + "license": {"@id": "https://creativecommons.org/licenses/by/4.0/"}, + "datePublished": "2024-01-01", + "publisher": {"@id": "https://ror.org/012345678"}, + "hasPart": [{"@id": "data/missing-file.csv"}, {"@id": "#output-file.csv"}] + }, + { + "@id": "https://ror.org/012345678", + "@type": "Organization", + "name": "Example Research Institute", + "url": "https://example.org" + }, + { + "@id": "data/missing-file.csv", + "@type": "File", + "name": "missing-file.csv", + "description": "A CSV file that is not included in the payload but has localPath.", + "encodingFormat": "text/csv", + "localPath": "/mnt/archive/data/missing-file.csv" + }, + { + "@id": "#output-file.csv", + "@type": "File", + "name": "output-file.csv", + "description": "A deliberately absent file with localPath.", + "encodingFormat": "text/csv", + "localPath": "/tmp/output-file.csv" + }, + { + "@id": "https://creativecommons.org/licenses/by/4.0/", + "@type": "CreativeWork", + "name": "Creative Commons Attribution 4.0 International", + "description": "A Creative Commons license." + } + ] +} \ No newline at end of file diff --git a/tests/integration/profiles/ro-crate-1.2/test_metadata_dataEntities.py b/tests/integration/profiles/ro-crate-1.2/test_metadata_dataEntities.py index 83e6e3e69..ce32373e0 100644 --- a/tests/integration/profiles/ro-crate-1.2/test_metadata_dataEntities.py +++ b/tests/integration/profiles/ro-crate-1.2/test_metadata_dataEntities.py @@ -264,3 +264,37 @@ def test_invalid_recommended_content_url_not_downloadable(monkeypatch): expected_triggered_requirements=["Web-based Data Entity: REQUIRED availability"], expected_triggered_issues=["contentUrl", "not directly downloadable"], ) + + +def test_valid_missing_file_local_path(): + """ + A missing local file that declares localPath, and a deliberately absent + file (#id) that declares localPath, SHOULD both pass the RECOMMENDED check. + """ + do_entity_test( + __metadata_root_data_entity_crates__.valid_missing_file_local_path, + models.Severity.RECOMMENDED, + True, + profile_identifier="ro-crate-1.2", + skip_checks=["ro-crate-1.2_16.1", "ro-crate-1.2_38.1"], + ) + + +def test_invalid_missing_file_no_local_path(): + """ + A missing local file without localPath and a deliberately absent file (#id) + without localPath SHOULD each trigger a RECOMMENDED warning. + """ + do_entity_test( + __metadata_root_data_entity_crates__.invalid_missing_file_local_path, + models.Severity.RECOMMENDED, + False, + profile_identifier="ro-crate-1.2", + skip_checks=["ro-crate-1.2_16.1"], + expected_triggered_requirements=[ + "Data Entity: missing file SHOULD use localPath" + ], + expected_triggered_issues=[ + "localPath" + ], + ) From 2f673d82e877f8368c236af80703faf137fffc6c Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Thu, 16 Apr 2026 17:46:40 +0200 Subject: [PATCH 101/352] feat(ro-crate-1.2): :sparkles: check additional license for Data Entities --- .../1.2/should/4_data_entity_license.py | 142 ++++++++++++++++++ 1 file changed, 142 insertions(+) create mode 100644 rocrate_validator/profiles/ro-crate/1.2/should/4_data_entity_license.py diff --git a/rocrate_validator/profiles/ro-crate/1.2/should/4_data_entity_license.py b/rocrate_validator/profiles/ro-crate/1.2/should/4_data_entity_license.py new file mode 100644 index 000000000..b5b65b081 --- /dev/null +++ b/rocrate_validator/profiles/ro-crate/1.2/should/4_data_entity_license.py @@ -0,0 +1,142 @@ +# Copyright (c) 2024-2026 CRS4 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# ================================================================== +# NOTE +# ================================================================== +# +# The RO-Crate 1.2 specification (section on Licensing) states: +# +# "Data Entities with different license SHOULD have own license property" +# (requirement L4 / P2.2) +# +# This requirement is inherently non-actionable in its strict formulation +# because there is no way for a validator to determine whether a Data +# Entity's content *should* be under a different license than the Root +# Data Entity's. The validator can only observe three states: +# +# 1. A Data Entity declares a `license` identical to the Root's. +# This is redundant (the entity already inherits the Root license) +# but not incorrect. +# +# 2. A Data Entity declares a `license` different from the Root's. +# This is the case the spec explicitly encourages. +# +# 3. A Data Entity does not declare `license`. It inherits the Root +# license, which is the default and perfectly acceptable. +# +# While this cross-entity comparison *could* be expressed as a SHACL +# shape using a SPARQL constraint, doing so would produce a validation +# failure that affects the overall validation result. The intent here +# is purely advisory: the redundant declaration is not an error, it is +# a style improvement suggestion that should not cause the crate to +# fail validation. By implementing this as a Python PyFunctionCheck +# that only logs a warning (without adding a validation issue), the +# check provides actionable feedback to authors without penalising +# valid crates. + +from rocrate_validator.models import Severity, ValidationContext +from rocrate_validator.requirements.python import (PyFunctionCheck, check, + requirement) +from rocrate_validator.utils import log as logging + +logger = logging.getLogger(__name__) + + +def _resolve_license_id(license_value) -> str: + """ + Resolve a license property value to its @id string. + + The `license` property may be: + - a string (e.g. "https://creativecommons.org/licenses/by/4.0/") + - a dict with @id (e.g. {"@id": "https://creativecommons.org/licenses/by/4.0/"}) + - an ROCrateEntity object (which has an .id attribute) + - a list containing any of the above + - None + + Returns the @id string, or the string value, or empty string if + unresolvable. + """ + if license_value is None: + return "" + if isinstance(license_value, list): + items = [_resolve_license_id(item) for item in license_value] + return ",".join(item for item in items if item) + if isinstance(license_value, str): + return license_value + if hasattr(license_value, "id"): + return license_value.id + if isinstance(license_value, dict): + return license_value.get("@id", "") + return str(license_value) + + +@requirement(name="Data Entity: SHOULD NOT redundantly declare the Root license") +class DataEntityLicenseDivergenceChecker(PyFunctionCheck): + """ + Data Entities that declare a ``license`` property identical to the + Root Data Entity's license are redundantly overriding the inherited + license. The declaration is not incorrect, but it is unnecessary: + by default, all Data Entities inherit the Root Data Entity's license. + + This check logs a warning for each such redundant declaration, + suggesting that the property can be removed. It does **not** add + a validation issue, so the crate still passes validation. + + Data Entities with a *different* license from the Root are explicitly + encouraged by the spec and produce no warning. Data Entities with + no ``license`` property also produce no warning — they correctly + inherit the Root license. + """ + + @check(name="Data Entity SHOULD NOT redundantly declare the Root license", + severity=Severity.RECOMMENDED) + def check_license_divergence(self, context: ValidationContext) -> bool: + root_entity = None + try: + root_entity = context.ro_crate.metadata.get_root_data_entity() + except Exception: + return True + if root_entity is None: + return True + + root_license_raw = root_entity.get_property("license") + if root_license_raw is None: + return True + + root_license_id = _resolve_license_id(root_license_raw) + if not root_license_id: + return True + + for entity in context.ro_crate.metadata.get_data_entities(): + if entity.id == root_entity.id: + continue + entity_license_raw = entity.get_property("license") + if entity_license_raw is None: + continue + + entity_license_id = _resolve_license_id(entity_license_raw) + if not entity_license_id: + continue + + if entity_license_id == root_license_id: + logger.warning( + "Data Entity '%s' declares a `license` property that is " + "identical to the Root Data Entity's license ('%s'); " + "this is redundant because Data Entities inherit the Root " + "license by default. Remove the `license` property or " + "change it to a different license if the content requires " + "one.", + entity.id, root_license_id) + return True From 3fb59801c13f2a135006bb402a1723ea2dd5402a Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Thu, 16 Apr 2026 17:48:09 +0200 Subject: [PATCH 102/352] test(ro-crate-1.2): :white_check_mark: test additional license of Data Entities --- .../invalid/data/research-data.csv | 0 .../invalid/ro-crate-metadata.json | 41 ++++++++++++++ .../valid/data/open-data.csv | 0 .../valid/ro-crate-metadata.json | 47 ++++++++++++++++ .../test_metadata_dataEntities.py | 55 +++++++++++++++++++ 5 files changed, 143 insertions(+) create mode 100644 tests/data/crates/rocrate-1.2/8_metadata_dataEntities/data_entity_license/invalid/data/research-data.csv create mode 100644 tests/data/crates/rocrate-1.2/8_metadata_dataEntities/data_entity_license/invalid/ro-crate-metadata.json create mode 100644 tests/data/crates/rocrate-1.2/8_metadata_dataEntities/data_entity_license/valid/data/open-data.csv create mode 100644 tests/data/crates/rocrate-1.2/8_metadata_dataEntities/data_entity_license/valid/ro-crate-metadata.json diff --git a/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/data_entity_license/invalid/data/research-data.csv b/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/data_entity_license/invalid/data/research-data.csv new file mode 100644 index 000000000..e69de29bb diff --git a/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/data_entity_license/invalid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/data_entity_license/invalid/ro-crate-metadata.json new file mode 100644 index 000000000..d38c30d63 --- /dev/null +++ b/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/data_entity_license/invalid/ro-crate-metadata.json @@ -0,0 +1,41 @@ +{ + "@context": "https://w3id.org/ro/crate/1.2/context", + "@graph": [ + { + "@id": "ro-crate-metadata.json", + "@type": "CreativeWork", + "conformsTo": {"@id": "https://w3id.org/ro/crate/1.2"}, + "about": {"@id": "./"} + }, + { + "@id": "./", + "@type": "Dataset", + "name": "Test crate with redundant license on Data Entity", + "description": "The Data Entity repeats the same license as the Root.", + "license": {"@id": "https://creativecommons.org/licenses/by/4.0/"}, + "datePublished": "2024-01-01", + "publisher": {"@id": "https://ror.org/012345678"}, + "hasPart": [{"@id": "data/research-data.csv"}] + }, + { + "@id": "data/research-data.csv", + "@type": "File", + "name": "research-data.csv", + "description": "Research data under the same license as the Root.", + "encodingFormat": "text/csv", + "license": {"@id": "https://creativecommons.org/licenses/by/4.0/"} + }, + { + "@id": "https://ror.org/012345678", + "@type": "Organization", + "name": "Example Research Institute", + "url": "https://example.org" + }, + { + "@id": "https://creativecommons.org/licenses/by/4.0/", + "@type": "CreativeWork", + "name": "Creative Commons Attribution 4.0 International", + "description": "A Creative Commons license." + } + ] +} \ No newline at end of file diff --git a/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/data_entity_license/valid/data/open-data.csv b/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/data_entity_license/valid/data/open-data.csv new file mode 100644 index 000000000..e69de29bb diff --git a/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/data_entity_license/valid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/data_entity_license/valid/ro-crate-metadata.json new file mode 100644 index 000000000..7c619fc1a --- /dev/null +++ b/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/data_entity_license/valid/ro-crate-metadata.json @@ -0,0 +1,47 @@ +{ + "@context": "https://w3id.org/ro/crate/1.2/context", + "@graph": [ + { + "@id": "ro-crate-metadata.json", + "@type": "CreativeWork", + "conformsTo": {"@id": "https://w3id.org/ro/crate/1.2"}, + "about": {"@id": "./"} + }, + { + "@id": "./", + "@type": "Dataset", + "name": "Test crate with divergent licenses", + "description": "The Root Data Entity has CC-BY-4.0; a Data Entity has CC0.", + "license": {"@id": "https://creativecommons.org/licenses/by/4.0/"}, + "datePublished": "2024-01-01", + "publisher": {"@id": "https://ror.org/012345678"}, + "hasPart": [{"@id": "data/open-data.csv"}] + }, + { + "@id": "data/open-data.csv", + "@type": "File", + "name": "open-data.csv", + "description": "Open data file under CC0.", + "encodingFormat": "text/csv", + "license": {"@id": "http://spdx.org/licenses/CC0-1.0"} + }, + { + "@id": "https://ror.org/012345678", + "@type": "Organization", + "name": "Example Research Institute", + "url": "https://example.org" + }, + { + "@id": "https://creativecommons.org/licenses/by/4.0/", + "@type": "CreativeWork", + "name": "Creative Commons Attribution 4.0 International", + "description": "A Creative Commons license." + }, + { + "@id": "http://spdx.org/licenses/CC0-1.0", + "@type": "CreativeWork", + "name": "CC0 1.0 Universal", + "description": "Public domain dedication." + } + ] +} \ No newline at end of file diff --git a/tests/integration/profiles/ro-crate-1.2/test_metadata_dataEntities.py b/tests/integration/profiles/ro-crate-1.2/test_metadata_dataEntities.py index ce32373e0..0e4c0ba36 100644 --- a/tests/integration/profiles/ro-crate-1.2/test_metadata_dataEntities.py +++ b/tests/integration/profiles/ro-crate-1.2/test_metadata_dataEntities.py @@ -298,3 +298,58 @@ def test_invalid_missing_file_no_local_path(): "localPath" ], ) + + +def test_valid_data_entity_license_divergence(): + """ + A Data Entity with a different license from the Root SHOULD pass + the RECOMMENDED check (the entity is correctly overriding the + default license). + """ + do_entity_test( + __metadata_root_data_entity_crates__.valid_data_entity_license_divergence, + models.Severity.RECOMMENDED, + True, + profile_identifier="ro-crate-1.2", + skip_checks=["ro-crate-1.2_16.1", "ro-crate-1.2_38.1"], + ) + + +def test_invalid_data_entity_redundant_license(): + """ + A Data Entity that declares the same license as the Root SHOULD + still pass validation, but a warning SHOULD be logged about the + redundant license declaration. + """ + do_entity_test( + __metadata_root_data_entity_crates__.invalid_data_entity_license_divergence, + models.Severity.RECOMMENDED, + True, + profile_identifier="ro-crate-1.2", + skip_checks=["ro-crate-1.2_16.1", "ro-crate-1.2_38.1"], + ) + + +def test_redundant_license_logs_warning(): + """ + When a Data Entity declares the same license as the Root, a warning + message SHOULD be logged (not a validation issue). The validation + result MUST pass, and the warning MUST appear in the log stream. + """ + from rocrate_validator.utils.log import __log_stream__ + + # Clear any previous log output + __log_stream__.truncate(0) + __log_stream__.seek(0) + + result = do_entity_test( + __metadata_root_data_entity_crates__.invalid_data_entity_license_divergence, + models.Severity.RECOMMENDED, + True, + profile_identifier="ro-crate-1.2", + skip_checks=["ro-crate-1.2_16.1", "ro-crate-1.2_38.1"], + ) + + log_contents = __log_stream__.getvalue() + assert "redundant" in log_contents.lower(), \ + f"Expected a warning log about redundant license, got:\n{log_contents}" From 7784991960bf6a5cedfecfaf5733482b5ea3f88e Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Thu, 16 Apr 2026 18:42:54 +0200 Subject: [PATCH 103/352] test(ro-crate-1.2): :bug: update existing tests --- ...ata.json => prefix-ro-crate-metadata.json} | 25 ++++++++++++++----- .../invalid/ro-crate-metadata.json | 7 ++++-- .../valid/ro-crate-metadata.json | 7 ++++-- .../prefix-ro-crate-metadata.json | 3 +++ .../test_metadata_dataEntities.py | 5 ++-- 5 files changed, 35 insertions(+), 12 deletions(-) rename tests/data/crates/rocrate-1.2/3_detached_rocrates/root-data-entity-identifier/online-available/valid/{ro-crate-metadata.json => prefix-ro-crate-metadata.json} (64%) diff --git a/tests/data/crates/rocrate-1.2/3_detached_rocrates/root-data-entity-identifier/online-available/valid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/3_detached_rocrates/root-data-entity-identifier/online-available/valid/prefix-ro-crate-metadata.json similarity index 64% rename from tests/data/crates/rocrate-1.2/3_detached_rocrates/root-data-entity-identifier/online-available/valid/ro-crate-metadata.json rename to tests/data/crates/rocrate-1.2/3_detached_rocrates/root-data-entity-identifier/online-available/valid/prefix-ro-crate-metadata.json index d02f4d5d1..83bf0308f 100644 --- a/tests/data/crates/rocrate-1.2/3_detached_rocrates/root-data-entity-identifier/online-available/valid/ro-crate-metadata.json +++ b/tests/data/crates/rocrate-1.2/3_detached_rocrates/root-data-entity-identifier/online-available/valid/prefix-ro-crate-metadata.json @@ -4,8 +4,12 @@ { "@id": "ro-crate-metadata.json", "@type": "CreativeWork", - "conformsTo": {"@id": "https://w3id.org/ro/crate/1.2"}, - "about": {"@id": "https://example.org/ro-crate/detached/citable-root-data-entity/"} + "conformsTo": { + "@id": "https://w3id.org/ro/crate/1.2" + }, + "about": { + "@id": "https://example.org/ro-crate/detached/citable-root-data-entity/" + } }, { "@id": "https://example.org/ro-crate/detached/citable-root-data-entity/", @@ -13,11 +17,20 @@ "name": "Detached RO-Crate with absolute root @id", "description": "A detached RO-Crate whose Root Data Entity has an absolute URL as @id.", "datePublished": "2024-01-01", - "license": {"@id": "https://creativecommons.org/licenses/by/4.0/"}, + "license": { + "@id": "https://creativecommons.org/licenses/by/4.0/" + }, "hasPart": [], - "cite-as": {"@id": "https://example.org/ro-crate/detached/citable-root-data-entity/"}, + "cite-as": { + "@id": "https://example.org/ro-crate/detached/citable-root-data-entity/" + }, "url": "https://example.org/ro-crate/detached/citable-root-data-entity/", - "publisher": {"@id": "https://ror.org/012345678"} + "publisher": { + "@id": "https://ror.org/012345678" + }, + "funder": { + "@id": "https://ror.org/012345678" + } }, { "@id": "https://ror.org/012345678", @@ -32,4 +45,4 @@ "description": "A Creative Commons license." } ] -} \ No newline at end of file +} diff --git a/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/data_entity_license/invalid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/data_entity_license/invalid/ro-crate-metadata.json index d38c30d63..7f00f09b7 100644 --- a/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/data_entity_license/invalid/ro-crate-metadata.json +++ b/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/data_entity_license/invalid/ro-crate-metadata.json @@ -15,7 +15,10 @@ "license": {"@id": "https://creativecommons.org/licenses/by/4.0/"}, "datePublished": "2024-01-01", "publisher": {"@id": "https://ror.org/012345678"}, - "hasPart": [{"@id": "data/research-data.csv"}] + "hasPart": [{"@id": "data/research-data.csv"}], + "funder": {"@id": "https://ror.org/012345678"}, + "cite-as": {"@id": "./"}, + "url": "./" }, { "@id": "data/research-data.csv", @@ -38,4 +41,4 @@ "description": "A Creative Commons license." } ] -} \ No newline at end of file +} diff --git a/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/data_entity_license/valid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/data_entity_license/valid/ro-crate-metadata.json index 7c619fc1a..5911d7ee4 100644 --- a/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/data_entity_license/valid/ro-crate-metadata.json +++ b/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/data_entity_license/valid/ro-crate-metadata.json @@ -15,7 +15,10 @@ "license": {"@id": "https://creativecommons.org/licenses/by/4.0/"}, "datePublished": "2024-01-01", "publisher": {"@id": "https://ror.org/012345678"}, - "hasPart": [{"@id": "data/open-data.csv"}] + "hasPart": [{"@id": "data/open-data.csv"}], + "funder": {"@id": "https://ror.org/012345678"}, + "cite-as": {"@id": "./"}, + "url": "./" }, { "@id": "data/open-data.csv", @@ -44,4 +47,4 @@ "description": "Public domain dedication." } ] -} \ No newline at end of file +} diff --git a/tests/data/crates/valid/ro-crate-1.2-absolute-root/prefix-ro-crate-metadata.json b/tests/data/crates/valid/ro-crate-1.2-absolute-root/prefix-ro-crate-metadata.json index 7879ff53a..c25bb9e24 100644 --- a/tests/data/crates/valid/ro-crate-1.2-absolute-root/prefix-ro-crate-metadata.json +++ b/tests/data/crates/valid/ro-crate-1.2-absolute-root/prefix-ro-crate-metadata.json @@ -22,6 +22,9 @@ "publisher": { "@id": "https://orcid.org/0000-0001-2345-6789" }, + "funder": { + "@id": "https://ror.org/123456789" + }, "datePublished": "2024-01-01", "hasPart": [ { diff --git a/tests/integration/profiles/ro-crate-1.2/test_metadata_dataEntities.py b/tests/integration/profiles/ro-crate-1.2/test_metadata_dataEntities.py index 0e4c0ba36..8304b7664 100644 --- a/tests/integration/profiles/ro-crate-1.2/test_metadata_dataEntities.py +++ b/tests/integration/profiles/ro-crate-1.2/test_metadata_dataEntities.py @@ -276,7 +276,8 @@ def test_valid_missing_file_local_path(): models.Severity.RECOMMENDED, True, profile_identifier="ro-crate-1.2", - skip_checks=["ro-crate-1.2_16.1", "ro-crate-1.2_38.1"], + skip_checks=["ro-crate-1.2_16.1", "ro-crate-1.2_38.1", + "ro-crate-1.2_17.1", "ro-crate-1.2_39.0", "ro-crate-1.2_39.1"], ) @@ -342,7 +343,7 @@ def test_redundant_license_logs_warning(): __log_stream__.truncate(0) __log_stream__.seek(0) - result = do_entity_test( + do_entity_test( __metadata_root_data_entity_crates__.invalid_data_entity_license_divergence, models.Severity.RECOMMENDED, True, From 8d9cd5b9475be39a1bb03d5fb029cb9c51486e0d Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Thu, 16 Apr 2026 18:50:16 +0200 Subject: [PATCH 104/352] feat(ro-crate-1.2): add checks of recommended requirements for Referenced RO-Crates --- .../1.2/must/2_root_data_entity_metadata.ttl | 30 ++++ .../1.2/should/4_referenced_rocrate.ttl | 164 ++++++++++++++++++ 2 files changed, 194 insertions(+) create mode 100644 rocrate_validator/profiles/ro-crate/1.2/should/4_referenced_rocrate.ttl diff --git a/rocrate_validator/profiles/ro-crate/1.2/must/2_root_data_entity_metadata.ttl b/rocrate_validator/profiles/ro-crate/1.2/must/2_root_data_entity_metadata.ttl index 1e2fcf22e..7f5139510 100644 --- a/rocrate_validator/profiles/ro-crate/1.2/must/2_root_data_entity_metadata.ttl +++ b/rocrate_validator/profiles/ro-crate/1.2/must/2_root_data_entity_metadata.ttl @@ -147,3 +147,33 @@ ro-crate:RootDataEntityOptionalConformsToProperty a sh:NodeShape ; sh:class prof:Profile ; sh:message """If the Root Data Entity includes a `conformsTo` property, its values MUST reference Profile entities.""" ; ] . + +ro-crate:RootDataEntityVersionlessProfileProhibition a sh:NodeShape ; + sh:name "RO-Crate Root Data Entity: MUST NOT declare version-less conformsTo profile" ; + sh:description """The Root Data Entity MUST NOT declare the version-less generic RO-Crate profile URI + (https://w3id.org/ro/crate) in its conformsTo property. + Only versioned profile URIs (e.g., https://w3id.org/ro/crate/1.2) are permitted on the Root Data Entity. + The version-less URI is reserved for referenced RO-Crate data entities.""" ; + sh:target [ + a sh:SPARQLTarget ; + sh:prefixes ro-crate:sparqlPrefixes ; + sh:select """ + SELECT ?this + WHERE { + ?metadatafile schema:about ?this . + FILTER(contains(str(?metadatafile), "ro-crate-metadata.json")) + } + """ + ] ; + sh:sparql [ + a sh:SPARQLConstraint ; + sh:prefixes ro-crate:sparqlPrefixes ; + sh:select """ + SELECT ?this + WHERE { + ?this dct:conformsTo . + } + """ ; + sh:name "Root Data Entity: MUST NOT declare version-less conformsTo" ; + sh:message "The Root Data Entity MUST NOT declare the version-less RO-Crate profile URI (https://w3id.org/ro/crate) in its conformsTo property. Use a versioned URI such as https://w3id.org/ro/crate/1.2 instead." ; + ] . diff --git a/rocrate_validator/profiles/ro-crate/1.2/should/4_referenced_rocrate.ttl b/rocrate_validator/profiles/ro-crate/1.2/should/4_referenced_rocrate.ttl new file mode 100644 index 000000000..13e3558bd --- /dev/null +++ b/rocrate_validator/profiles/ro-crate/1.2/should/4_referenced_rocrate.ttl @@ -0,0 +1,164 @@ +# Copyright (c) 2024-2026 CRS4 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +@prefix ro: <./> . +@prefix ro-crate: . +@prefix rdf: . +@prefix schema_org: . +@prefix dct: . +@prefix prof: . +@prefix sh: . +@prefix xsd: . + +# --------------------------------------------------------------- +# Shared SPARQL target: Referenced RO-Crate Data Entity +# +# Selects Dataset entities that declare conformsTo pointing to an +# RO-Crate specification URI (starting with https://w3id.org/ro/crate) +# and are not the Root Data Entity. +# --------------------------------------------------------------- +ro-crate:ReferencedROCrateDataEntityTarget a sh:SPARQLTarget ; + sh:prefixes ro-crate:sparqlPrefixes ; + sh:select """ + SELECT ?this + WHERE { + ?this a schema:Dataset ; + dct:conformsTo ?profile . + FILTER(STRSTARTS(STR(?profile), "https://w3id.org/ro/crate")) + ?metadatafile schema:about ?root . + FILTER(contains(str(?metadatafile), "ro-crate-metadata.json")) + FILTER(?this != ?root) + } + """ . + +# --------------------------------------------------------------- +# Shared SPARQL target: Referenced RO-Crate Metadata Descriptor +# +# Selects entities referenced by subjectOf of a Referenced RO-Crate +# Data Entity. +# --------------------------------------------------------------- +ro-crate:ReferencedROCrateMetadataDescriptorTarget a sh:SPARQLTarget ; + sh:prefixes ro-crate:sparqlPrefixes ; + sh:select """ + SELECT ?this + WHERE { + ?crate a schema:Dataset ; + dct:conformsTo ?profile ; + schema:subjectOf ?this . + FILTER(STRSTARTS(STR(?profile), "https://w3id.org/ro/crate")) + ?metadatafile schema:about ?root . + FILTER(contains(str(?metadatafile), "ro-crate-metadata.json")) + FILTER(?crate != ?root) + } + """ . + +# These shapes validate data entities that represent referenced RO-Crates. +# A "referenced RO-Crate" is a Dataset entity whose conformsTo property +# includes an RO-Crate specification URI (starting with +# https://w3id.org/ro/crate). The RO-Crate 1.2 specification requires +# that such entities follow specific conventions regarding conformsTo, +# subjectOf, and the structure of their associated metadata descriptor. + +# --------------------------------------------------------------- +# Referenced RO-Crate data entity SHOULD include the +# version-less RO-Crate base profile URI in conformsTo +# --------------------------------------------------------------- +ro-crate:ReferencedROCrateConformsToVersionlessProfile a sh:NodeShape ; + sh:name "Referenced RO-Crate: SHOULD include version-less profile in conformsTo" ; + sh:description """A data entity that represents a referenced RO-Crate + SHOULD include the version-less RO-Crate base profile URI + (https://w3id.org/ro/crate) in its conformsTo property, to indicate + that it is an RO-Crate. Specific profile versions are also permitted + in addition to the version-less URI.""" ; + sh:target ro-crate:ReferencedROCrateDataEntityTarget ; + sh:property [ + a sh:PropertyShape ; + sh:name "Referenced RO-Crate: SHOULD include version-less conformsTo" ; + sh:description """A referenced RO-Crate data entity SHOULD include the + version-less RO-Crate base profile URI + (https://w3id.org/ro/crate) in its conformsTo property.""" ; + sh:path dct:conformsTo ; + sh:hasValue ; + sh:severity sh:Warning ; + sh:message "Referenced RO-Crate data entity SHOULD include the version-less RO-Crate base profile URI (https://w3id.org/ro/crate) in its conformsTo property" ; + ] . + +# --------------------------------------------------------------- +# Referenced RO-Crate data entity SHOULD have subjectOf +# pointing to a metadata descriptor contextual entity +# --------------------------------------------------------------- +ro-crate:ReferencedROCrateSubjectOfRecommended a sh:NodeShape ; + sh:name "Referenced RO-Crate: SHOULD have subjectOf" ; + sh:description """A data entity that represents a referenced RO-Crate + SHOULD have a subjectOf property linking to a contextual entity + representing the metadata descriptor of the referenced RO-Crate.""" ; + sh:target ro-crate:ReferencedROCrateDataEntityTarget ; + sh:property [ + a sh:PropertyShape ; + sh:name "Referenced RO-Crate: SHOULD have subjectOf" ; + sh:description """A referenced RO-Crate data entity SHOULD have a + subjectOf property linking to a contextual entity representing + the metadata descriptor of the referenced RO-Crate.""" ; + sh:path schema_org:subjectOf ; + sh:minCount 1 ; + sh:severity sh:Warning ; + sh:message "Referenced RO-Crate data entity SHOULD have a subjectOf property linking to the referenced crate metadata descriptor" ; + ] . + +# --------------------------------------------------------------- +# Metadata descriptor of a referenced RO-Crate +# +# All three constraints (encodingFormat, no conformsTo, no about) +# share the same SPARQL target. +# --------------------------------------------------------------- +ro-crate:ReferencedROCrateMetadataDescriptorProperties a sh:NodeShape ; + sh:name "Referenced RO-Crate metadata descriptor: recommended properties" ; + sh:description """The contextual entity representing the metadata descriptor + of a referenced RO-Crate SHOULD have encodingFormat set to + 'application/ld+json' and SHOULD NOT have conformsTo or about + properties. Profile declarations belong on the data entity, and + the about property is reserved for the crate's own metadata + descriptor.""" ; + sh:target ro-crate:ReferencedROCrateMetadataDescriptorTarget ; + sh:property [ + a sh:PropertyShape ; + sh:name "Referenced RO-Crate metadata descriptor: SHOULD have encodingFormat application/ld+json" ; + sh:description """The metadata descriptor of a referenced RO-Crate + SHOULD declare encodingFormat as 'application/ld+json'.""" ; + sh:path schema_org:encodingFormat ; + sh:hasValue "application/ld+json" ; + sh:severity sh:Warning ; + sh:message "Referenced RO-Crate metadata descriptor SHOULD have encodingFormat 'application/ld+json'" ; + ] ; + sh:property [ + a sh:PropertyShape ; + sh:name "Referenced RO-Crate metadata descriptor: SHOULD NOT have conformsTo" ; + sh:description """The metadata descriptor of a referenced RO-Crate + SHOULD NOT have a conformsTo property. Profile declarations belong + on the data entity.""" ; + sh:path dct:conformsTo ; + sh:maxCount 0 ; + sh:severity sh:Warning ; + sh:message "Referenced RO-Crate metadata descriptor SHOULD NOT have a conformsTo property" ; + ] ; + sh:property [ + a sh:PropertyShape ; + sh:name "Referenced RO-Crate metadata descriptor: SHOULD NOT have about" ; + sh:description """The metadata descriptor of a referenced RO-Crate + SHOULD NOT have an about property. The about property belongs + on the crate's own metadata descriptor.""" ; + sh:path schema_org:about ; + sh:maxCount 0 ; + sh:severity sh:Warning ; + sh:message "Referenced RO-Crate metadata descriptor SHOULD NOT have an about property" ; + ] . From ad9a54d9e6cd970c3bf6c01bf988397e8dbd6175 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Thu, 16 Apr 2026 18:52:24 +0200 Subject: [PATCH 105/352] test(ro-crate-1.2): :white_check_mark: add tests of Referenced RO-Crates recommended properties --- .../invalid_md_about/ro-crate-metadata.json | 62 ++++++++ .../ro-crate-metadata.json | 62 ++++++++ .../ro-crate-metadata.json | 61 +++++++ .../ro-crate-metadata.json | 55 +++++++ .../ro-crate-metadata.json | 53 +++++++ .../ro-crate-metadata.json | 66 ++++++++ .../valid/ro-crate-metadata.json | 71 +++++++++ .../ro-crate-1.2/test_referenced_rocrate.py | 150 ++++++++++++++++++ 8 files changed, 580 insertions(+) create mode 100644 tests/data/crates/rocrate-1.2/9_referenced_rocrate/invalid_md_about/ro-crate-metadata.json create mode 100644 tests/data/crates/rocrate-1.2/9_referenced_rocrate/invalid_md_conformsto/ro-crate-metadata.json create mode 100644 tests/data/crates/rocrate-1.2/9_referenced_rocrate/invalid_md_encoding_format/ro-crate-metadata.json create mode 100644 tests/data/crates/rocrate-1.2/9_referenced_rocrate/invalid_no_subjectof/ro-crate-metadata.json create mode 100644 tests/data/crates/rocrate-1.2/9_referenced_rocrate/invalid_no_versionless_conformsto/ro-crate-metadata.json create mode 100644 tests/data/crates/rocrate-1.2/9_referenced_rocrate/invalid_root_conformsto_versionless/ro-crate-metadata.json create mode 100644 tests/data/crates/rocrate-1.2/9_referenced_rocrate/valid/ro-crate-metadata.json create mode 100644 tests/integration/profiles/ro-crate-1.2/test_referenced_rocrate.py diff --git a/tests/data/crates/rocrate-1.2/9_referenced_rocrate/invalid_md_about/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/9_referenced_rocrate/invalid_md_about/ro-crate-metadata.json new file mode 100644 index 000000000..44d30a7ee --- /dev/null +++ b/tests/data/crates/rocrate-1.2/9_referenced_rocrate/invalid_md_about/ro-crate-metadata.json @@ -0,0 +1,62 @@ +{ + "@context": "https://w3id.org/ro/crate/1.2/context", + "@graph": [ + { + "@id": "ro-crate-metadata.json", + "@type": "CreativeWork", + "conformsTo": {"@id": "https://w3id.org/ro/crate/1.2"}, + "about": {"@id": "./"} + }, + { + "@id": "./", + "@type": "Dataset", + "name": "Crate with referenced RO-Crate metadata descriptor with about", + "description": "The metadata descriptor of the referenced RO-Crate has about which should not be present.", + "license": {"@id": "https://creativecommons.org/licenses/by/4.0/"}, + "datePublished": "2024-01-01", + "publisher": {"@id": "https://ror.org/012345678"}, + "hasPart": [ + {"@id": "https://example.org/other-ro-crate/"} + ] + }, + { + "@id": "https://example.org/other-ro-crate/", + "@type": "Dataset", + "name": "Other RO-Crate", + "description": "A referenced RO-Crate whose metadata descriptor has about.", + "conformsTo": [ + {"@id": "https://w3id.org/ro/crate"}, + {"@id": "https://w3id.org/ro/crate/1.2"} + ], + "subjectOf": {"@id": "#other-ro-crate-metadata"} + }, + { + "@id": "#other-ro-crate-metadata", + "@type": "CreativeWork", + "encodingFormat": "application/ld+json", + "about": {"@id": "https://example.org/other-ro-crate/"} + }, + { + "@id": "https://ror.org/012345678", + "@type": "Organization", + "name": "Example Research Institute", + "url": "https://example.org" + }, + { + "@id": "https://creativecommons.org/licenses/by/4.0/", + "@type": "CreativeWork", + "name": "Creative Commons Attribution 4.0 International", + "description": "A Creative Commons license." + }, + { + "@id": "https://w3id.org/ro/crate", + "@type": "Profile", + "name": "RO-Crate" + }, + { + "@id": "https://w3id.org/ro/crate/1.2", + "@type": "Profile", + "name": "RO-Crate 1.2" + } + ] +} \ No newline at end of file diff --git a/tests/data/crates/rocrate-1.2/9_referenced_rocrate/invalid_md_conformsto/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/9_referenced_rocrate/invalid_md_conformsto/ro-crate-metadata.json new file mode 100644 index 000000000..f34aaf859 --- /dev/null +++ b/tests/data/crates/rocrate-1.2/9_referenced_rocrate/invalid_md_conformsto/ro-crate-metadata.json @@ -0,0 +1,62 @@ +{ + "@context": "https://w3id.org/ro/crate/1.2/context", + "@graph": [ + { + "@id": "ro-crate-metadata.json", + "@type": "CreativeWork", + "conformsTo": {"@id": "https://w3id.org/ro/crate/1.2"}, + "about": {"@id": "./"} + }, + { + "@id": "./", + "@type": "Dataset", + "name": "Crate with referenced RO-Crate metadata descriptor with conformsTo", + "description": "The metadata descriptor of the referenced RO-Crate has conformsTo which should not be present.", + "license": {"@id": "https://creativecommons.org/licenses/by/4.0/"}, + "datePublished": "2024-01-01", + "publisher": {"@id": "https://ror.org/012345678"}, + "hasPart": [ + {"@id": "https://example.org/other-ro-crate/"} + ] + }, + { + "@id": "https://example.org/other-ro-crate/", + "@type": "Dataset", + "name": "Other RO-Crate", + "description": "A referenced RO-Crate whose metadata descriptor has conformsTo.", + "conformsTo": [ + {"@id": "https://w3id.org/ro/crate"}, + {"@id": "https://w3id.org/ro/crate/1.2"} + ], + "subjectOf": {"@id": "#other-ro-crate-metadata"} + }, + { + "@id": "#other-ro-crate-metadata", + "@type": "CreativeWork", + "encodingFormat": "application/ld+json", + "conformsTo": {"@id": "https://w3id.org/ro/crate/1.2"} + }, + { + "@id": "https://ror.org/012345678", + "@type": "Organization", + "name": "Example Research Institute", + "url": "https://example.org" + }, + { + "@id": "https://creativecommons.org/licenses/by/4.0/", + "@type": "CreativeWork", + "name": "Creative Commons Attribution 4.0 International", + "description": "A Creative Commons license." + }, + { + "@id": "https://w3id.org/ro/crate", + "@type": "Profile", + "name": "RO-Crate" + }, + { + "@id": "https://w3id.org/ro/crate/1.2", + "@type": "Profile", + "name": "RO-Crate 1.2" + } + ] +} \ No newline at end of file diff --git a/tests/data/crates/rocrate-1.2/9_referenced_rocrate/invalid_md_encoding_format/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/9_referenced_rocrate/invalid_md_encoding_format/ro-crate-metadata.json new file mode 100644 index 000000000..0deb72b12 --- /dev/null +++ b/tests/data/crates/rocrate-1.2/9_referenced_rocrate/invalid_md_encoding_format/ro-crate-metadata.json @@ -0,0 +1,61 @@ +{ + "@context": "https://w3id.org/ro/crate/1.2/context", + "@graph": [ + { + "@id": "ro-crate-metadata.json", + "@type": "CreativeWork", + "conformsTo": {"@id": "https://w3id.org/ro/crate/1.2"}, + "about": {"@id": "./"} + }, + { + "@id": "./", + "@type": "Dataset", + "name": "Crate with referenced RO-Crate metadata descriptor with wrong encodingFormat", + "description": "The metadata descriptor of the referenced RO-Crate has the wrong encodingFormat.", + "license": {"@id": "https://creativecommons.org/licenses/by/4.0/"}, + "datePublished": "2024-01-01", + "publisher": {"@id": "https://ror.org/012345678"}, + "hasPart": [ + {"@id": "https://example.org/other-ro-crate/"} + ] + }, + { + "@id": "https://example.org/other-ro-crate/", + "@type": "Dataset", + "name": "Other RO-Crate", + "description": "A referenced RO-Crate with wrong encodingFormat on its metadata descriptor.", + "conformsTo": [ + {"@id": "https://w3id.org/ro/crate"}, + {"@id": "https://w3id.org/ro/crate/1.2"} + ], + "subjectOf": {"@id": "#other-ro-crate-metadata"} + }, + { + "@id": "#other-ro-crate-metadata", + "@type": "CreativeWork", + "encodingFormat": "text/turtle" + }, + { + "@id": "https://ror.org/012345678", + "@type": "Organization", + "name": "Example Research Institute", + "url": "https://example.org" + }, + { + "@id": "https://creativecommons.org/licenses/by/4.0/", + "@type": "CreativeWork", + "name": "Creative Commons Attribution 4.0 International", + "description": "A Creative Commons license." + }, + { + "@id": "https://w3id.org/ro/crate", + "@type": "Profile", + "name": "RO-Crate" + }, + { + "@id": "https://w3id.org/ro/crate/1.2", + "@type": "Profile", + "name": "RO-Crate 1.2" + } + ] +} \ No newline at end of file diff --git a/tests/data/crates/rocrate-1.2/9_referenced_rocrate/invalid_no_subjectof/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/9_referenced_rocrate/invalid_no_subjectof/ro-crate-metadata.json new file mode 100644 index 000000000..2dba0de62 --- /dev/null +++ b/tests/data/crates/rocrate-1.2/9_referenced_rocrate/invalid_no_subjectof/ro-crate-metadata.json @@ -0,0 +1,55 @@ +{ + "@context": "https://w3id.org/ro/crate/1.2/context", + "@graph": [ + { + "@id": "ro-crate-metadata.json", + "@type": "CreativeWork", + "conformsTo": {"@id": "https://w3id.org/ro/crate/1.2"}, + "about": {"@id": "./"} + }, + { + "@id": "./", + "@type": "Dataset", + "name": "Crate referencing another RO-Crate without subjectOf", + "description": "An RO-Crate that references another RO-Crate missing the subjectOf property.", + "license": {"@id": "https://creativecommons.org/licenses/by/4.0/"}, + "datePublished": "2024-01-01", + "publisher": {"@id": "https://ror.org/012345678"}, + "hasPart": [ + {"@id": "https://example.org/other-ro-crate/"} + ] + }, + { + "@id": "https://example.org/other-ro-crate/", + "@type": "Dataset", + "name": "Other RO-Crate", + "description": "A referenced RO-Crate without subjectOf.", + "conformsTo": [ + {"@id": "https://w3id.org/ro/crate"}, + {"@id": "https://w3id.org/ro/crate/1.2"} + ] + }, + { + "@id": "https://ror.org/012345678", + "@type": "Organization", + "name": "Example Research Institute", + "url": "https://example.org" + }, + { + "@id": "https://creativecommons.org/licenses/by/4.0/", + "@type": "CreativeWork", + "name": "Creative Commons Attribution 4.0 International", + "description": "A Creative Commons license." + }, + { + "@id": "https://w3id.org/ro/crate", + "@type": "Profile", + "name": "RO-Crate" + }, + { + "@id": "https://w3id.org/ro/crate/1.2", + "@type": "Profile", + "name": "RO-Crate 1.2" + } + ] +} \ No newline at end of file diff --git a/tests/data/crates/rocrate-1.2/9_referenced_rocrate/invalid_no_versionless_conformsto/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/9_referenced_rocrate/invalid_no_versionless_conformsto/ro-crate-metadata.json new file mode 100644 index 000000000..f77215601 --- /dev/null +++ b/tests/data/crates/rocrate-1.2/9_referenced_rocrate/invalid_no_versionless_conformsto/ro-crate-metadata.json @@ -0,0 +1,53 @@ +{ + "@context": "https://w3id.org/ro/crate/1.2/context", + "@graph": [ + { + "@id": "ro-crate-metadata.json", + "@type": "CreativeWork", + "conformsTo": {"@id": "https://w3id.org/ro/crate/1.2"}, + "about": {"@id": "./"} + }, + { + "@id": "./", + "@type": "Dataset", + "name": "Crate referencing another RO-Crate without version-less conformsTo", + "description": "An RO-Crate that references another RO-Crate missing the version-less profile URI.", + "license": {"@id": "https://creativecommons.org/licenses/by/4.0/"}, + "datePublished": "2024-01-01", + "publisher": {"@id": "https://ror.org/012345678"}, + "hasPart": [ + {"@id": "https://example.org/other-ro-crate/"} + ] + }, + { + "@id": "https://example.org/other-ro-crate/", + "@type": "Dataset", + "name": "Other RO-Crate", + "description": "A referenced RO-Crate missing the version-less profile URI.", + "conformsTo": {"@id": "https://w3id.org/ro/crate/1.2"}, + "subjectOf": {"@id": "#other-ro-crate-metadata"} + }, + { + "@id": "#other-ro-crate-metadata", + "@type": "CreativeWork", + "encodingFormat": "application/ld+json" + }, + { + "@id": "https://ror.org/012345678", + "@type": "Organization", + "name": "Example Research Institute", + "url": "https://example.org" + }, + { + "@id": "https://creativecommons.org/licenses/by/4.0/", + "@type": "CreativeWork", + "name": "Creative Commons Attribution 4.0 International", + "description": "A Creative Commons license." + }, + { + "@id": "https://w3id.org/ro/crate/1.2", + "@type": "Profile", + "name": "RO-Crate 1.2" + } + ] +} \ No newline at end of file diff --git a/tests/data/crates/rocrate-1.2/9_referenced_rocrate/invalid_root_conformsto_versionless/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/9_referenced_rocrate/invalid_root_conformsto_versionless/ro-crate-metadata.json new file mode 100644 index 000000000..5195fe3be --- /dev/null +++ b/tests/data/crates/rocrate-1.2/9_referenced_rocrate/invalid_root_conformsto_versionless/ro-crate-metadata.json @@ -0,0 +1,66 @@ +{ + "@context": "https://w3id.org/ro/crate/1.2/context", + "@graph": [ + { + "@id": "ro-crate-metadata.json", + "@type": "CreativeWork", + "conformsTo": {"@id": "https://w3id.org/ro/crate/1.2"}, + "about": {"@id": "./"} + }, + { + "@id": "./", + "@type": "Dataset", + "name": "Crate with version-less conformsTo on Root", + "description": "An RO-Crate whose Root Data Entity incorrectly declares the version-less generic profile URI.", + "license": {"@id": "https://creativecommons.org/licenses/by/4.0/"}, + "datePublished": "2024-01-01", + "publisher": {"@id": "https://ror.org/012345678"}, + "conformsTo": [ + {"@id": "https://w3id.org/ro/crate"}, + {"@id": "https://w3id.org/ro/crate/1.2"} + ], + "hasPart": [ + {"@id": "https://example.org/other-ro-crate/"} + ] + }, + { + "@id": "https://example.org/other-ro-crate/", + "@type": "Dataset", + "name": "Other RO-Crate", + "description": "A referenced RO-Crate.", + "conformsTo": [ + {"@id": "https://w3id.org/ro/crate"}, + {"@id": "https://w3id.org/ro/crate/1.2"} + ], + "subjectOf": {"@id": "#other-ro-crate-metadata"} + }, + { + "@id": "#other-ro-crate-metadata", + "@type": "CreativeWork", + "name": "Other RO-Crate Metadata Descriptor", + "encodingFormat": "application/ld+json" + }, + { + "@id": "https://ror.org/012345678", + "@type": "Organization", + "name": "Example Research Institute", + "url": "https://example.org" + }, + { + "@id": "https://creativecommons.org/licenses/by/4.0/", + "@type": "CreativeWork", + "name": "Creative Commons Attribution 4.0 International", + "description": "A Creative Commons license." + }, + { + "@id": "https://w3id.org/ro/crate", + "@type": "Profile", + "name": "RO-Crate" + }, + { + "@id": "https://w3id.org/ro/crate/1.2", + "@type": "Profile", + "name": "RO-Crate 1.2" + } + ] +} \ No newline at end of file diff --git a/tests/data/crates/rocrate-1.2/9_referenced_rocrate/valid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/9_referenced_rocrate/valid/ro-crate-metadata.json new file mode 100644 index 000000000..5f1c9b743 --- /dev/null +++ b/tests/data/crates/rocrate-1.2/9_referenced_rocrate/valid/ro-crate-metadata.json @@ -0,0 +1,71 @@ +{ + "@context": "https://w3id.org/ro/crate/1.2/context", + "@graph": [ + { + "@id": "ro-crate-metadata.json", + "@type": "CreativeWork", + "conformsTo": {"@id": "https://w3id.org/ro/crate/1.2"}, + "about": {"@id": "./"}, + "name": "RO-Crate Metadata Descriptor" + }, + { + "@id": "./", + "@type": "Dataset", + "name": "Crate referencing another RO-Crate", + "description": "An RO-Crate that references another RO-Crate as a data entity.", + "license": {"@id": "https://creativecommons.org/licenses/by/4.0/"}, + "datePublished": "2024-01-01", + "publisher": {"@id": "https://ror.org/012345678"}, + "funder": {"@id": "https://ror.org/012345678"}, + "hasPart": [ + {"@id": "https://example.org/other-ro-crate/"} + ] + }, + { + "@id": "https://example.org/other-ro-crate/", + "@type": "Dataset", + "name": "Other RO-Crate", + "description": "A referenced RO-Crate.", + "conformsTo": [ + {"@id": "https://w3id.org/ro/crate"}, + {"@id": "https://w3id.org/ro/crate/1.2"} + ], + "subjectOf": {"@id": "#other-ro-crate-metadata"}, + "distribution": {"@id": "https://example.org/other-ro-crate/archive.tar.gz"} + }, + { + "@id": "#other-ro-crate-metadata", + "@type": "CreativeWork", + "name": "Other RO-Crate Metadata Descriptor", + "encodingFormat": "application/ld+json" + }, + { + "@id": "https://example.org/other-ro-crate/archive.tar.gz", + "@type": "DataDownload", + "name": "Other RO-Crate Archive", + "encodingFormat": "application/gzip" + }, + { + "@id": "https://ror.org/012345678", + "@type": "Organization", + "name": "Example Research Institute", + "url": "https://example.org" + }, + { + "@id": "https://creativecommons.org/licenses/by/4.0/", + "@type": "CreativeWork", + "name": "Creative Commons Attribution 4.0 International", + "description": "A Creative Commons license." + }, + { + "@id": "https://w3id.org/ro/crate", + "@type": "Profile", + "name": "RO-Crate" + }, + { + "@id": "https://w3id.org/ro/crate/1.2", + "@type": "Profile", + "name": "RO-Crate 1.2" + } + ] +} \ No newline at end of file diff --git a/tests/integration/profiles/ro-crate-1.2/test_referenced_rocrate.py b/tests/integration/profiles/ro-crate-1.2/test_referenced_rocrate.py new file mode 100644 index 000000000..a8fcc8204 --- /dev/null +++ b/tests/integration/profiles/ro-crate-1.2/test_referenced_rocrate.py @@ -0,0 +1,150 @@ +# Copyright (c) 2024-2026 CRS4 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging + +from rocrate_validator import models +from tests.ro_crates_1_2 import ReferencedROCrates +from tests.shared import do_entity_test + +logger = logging.getLogger(__name__) + +__referenced_rocrate_crates__ = ReferencedROCrates() + + +def test_valid_referenced_rocrate(): + """ + A crate referencing another RO-Crate with all recommended properties + SHOULD pass RECOMMENDED validation. + """ + do_entity_test( + __referenced_rocrate_crates__.valid, + models.Severity.RECOMMENDED, + True, + profile_identifier="ro-crate-1.2", + skip_checks=["ro-crate-1.2_49.1", "ro-crate-1.2_32.0", "ro-crate-1.2_55.1", "ro-crate-1.2_33.0"], + ) + + +def test_invalid_referenced_rocrate_no_versionless_conformsto(): + """ + A referenced RO-Crate data entity missing the version-less conformsTo + SHOULD trigger a RECOMMENDED warning. + """ + do_entity_test( + __referenced_rocrate_crates__.invalid_no_versionless_conformsto, + models.Severity.RECOMMENDED, + False, + profile_identifier="ro-crate-1.2", + expected_triggered_requirements=[ + "Referenced RO-Crate: SHOULD include version-less profile in conformsTo" + ], + expected_triggered_issues=[ + "version-less" + ], + ) + + +def test_invalid_root_conformsto_versionless(): + """ + The Root Data Entity MUST NOT declare the version-less conformsTo profile URI. + """ + do_entity_test( + __referenced_rocrate_crates__.invalid_root_conformsto_versionless, + models.Severity.REQUIRED, + False, + profile_identifier="ro-crate-1.2", + expected_triggered_requirements=[ + "RO-Crate Root Data Entity: MUST NOT declare version-less conformsTo profile" + ], + expected_triggered_issues=[ + "version-less" + ], + ) + + +def test_invalid_referenced_rocrate_no_subjectof(): + """ + A referenced RO-Crate data entity missing subjectOf SHOULD trigger + a RECOMMENDED warning. + """ + do_entity_test( + __referenced_rocrate_crates__.invalid_no_subjectof, + models.Severity.RECOMMENDED, + False, + profile_identifier="ro-crate-1.2", + expected_triggered_requirements=[ + "Referenced RO-Crate: SHOULD have subjectOf" + ], + expected_triggered_issues=[ + "subjectOf" + ], + ) + + +def test_invalid_referenced_rocrate_md_encoding_format(): + """ + A referenced RO-Crate metadata descriptor with wrong encodingFormat + SHOULD trigger a RECOMMENDED warning. + """ + do_entity_test( + __referenced_rocrate_crates__.invalid_md_encoding_format, + models.Severity.RECOMMENDED, + False, + profile_identifier="ro-crate-1.2", + expected_triggered_requirements=[ + "Referenced RO-Crate metadata descriptor: recommended properties" + ], + expected_triggered_issues=[ + "encodingFormat" + ], + ) + + +def test_invalid_referenced_rocrate_md_conformsto(): + """ + A referenced RO-Crate metadata descriptor with conformsTo SHOULD trigger + a RECOMMENDED warning (conformsTo belongs on the data entity). + """ + do_entity_test( + __referenced_rocrate_crates__.invalid_md_conformsto, + models.Severity.RECOMMENDED, + False, + profile_identifier="ro-crate-1.2", + expected_triggered_requirements=[ + "Referenced RO-Crate metadata descriptor: recommended properties" + ], + expected_triggered_issues=[ + "conformsTo" + ], + ) + + +def test_invalid_referenced_rocrate_md_about(): + """ + A referenced RO-Crate metadata descriptor with about SHOULD trigger + a RECOMMENDED warning (about belongs on the crate's own metadata descriptor). + """ + do_entity_test( + __referenced_rocrate_crates__.invalid_md_about, + models.Severity.RECOMMENDED, + False, + profile_identifier="ro-crate-1.2", + expected_triggered_requirements=[ + "Referenced RO-Crate metadata descriptor: recommended properties" + ], + expected_triggered_issues=[ + "about" + ], + ) From f17374152692980f5a81e2573e91a629c73a917e Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Thu, 16 Apr 2026 23:26:32 +0200 Subject: [PATCH 106/352] feat(ro-crate-1.2): :sparkles: check recommended day precision for `datePublished` --- .../should/2_root_data_entity_metadata.ttl | 112 ++---------------- 1 file changed, 9 insertions(+), 103 deletions(-) diff --git a/rocrate_validator/profiles/ro-crate/1.2/should/2_root_data_entity_metadata.ttl b/rocrate_validator/profiles/ro-crate/1.2/should/2_root_data_entity_metadata.ttl index 673baf380..7052f6512 100644 --- a/rocrate_validator/profiles/ro-crate/1.2/should/2_root_data_entity_metadata.ttl +++ b/rocrate_validator/profiles/ro-crate/1.2/should/2_root_data_entity_metadata.ttl @@ -11,114 +11,20 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - -@prefix ro: <./> . @prefix ro-crate: . -@prefix rdf: . @prefix schema_org: . -@prefix prof: . @prefix sh: . -@prefix validator: . -ro-crate:RootDataEntityDirectRecommendedProperties a sh:NodeShape ; - sh:name "RO-Crate Root Data Entity RECOMMENDED properties" ; - sh:description """The Root Data Entity SHOULD have - the properties `name`, `description` and `license` defined as described - in the RO-Crate specification """; +ro-crate:RootDataEntityDatePublishedDayPrecision a sh:NodeShape ; + sh:name "Root Data Entity: datePublished SHOULD specify at least day precision" ; + sh:description """The Root Data Entity SHOULD specify datePublished to at least + the precision of a day (YYYY-MM-DD) per RO-Crate 1.2.""" ; sh:targetClass ro-crate:RootDataEntity ; sh:property [ a sh:PropertyShape ; - sh:name "Root Data Entity: `name` property" ; - sh:description """Check if the Root Data Entity includes a `name` (as specified by schema.org) - to clearly identify the dataset and distinguish it from other datasets.""" ; - sh:minCount 1 ; - sh:nodeKind sh:Literal ; - sh:path schema_org:name; - sh:message "The Root Data Entity SHOULD have a `name` property (as specified by schema.org)" ; - ] ; - sh:property [ - a sh:PropertyShape ; - sh:name "Root Data Entity: `description` property" ; - sh:description """Check if the Root Data Entity includes a `description` (as specified by schema.org) - to provide a human-readable description of the dataset.""" ; - sh:minCount 1 ; - sh:nodeKind sh:Literal ; - sh:path schema_org:description; - sh:message "The Root Data Entity SHOULD have a `description` property (as specified by schema.org)" ; - ] ; - sh:property [ - a sh:PropertyShape ; - sh:name "Root Data Entity: `license` SHOULD link to a Contextual Entity" ; - sh:description """Check if the Root Data Entity includes a `license` property - that links to a Contextual Entity with type `schema_org:CreativeWork` to describe the license.""" ; - sh:nodeKind sh:BlankNodeOrIRI ; - sh:or ( - [ sh:class schema_org:CreativeWork ] - [ sh:class schema_org:MediaObject ] - [ sh:class schema_org:Dataset ] - ) ; - sh:path schema_org:license; - sh:minCount 1 ; - sh:message """The Root Data Entity SHOULD have a link to a Contextual Entity representing the schema_org:license type""" ; - ] ; - sh:property [ - a sh:PropertyShape ; - sh:name "Root Data Entity: `author` property" ; - sh:description """Check if the Root Data Entity includes a `author` property (as specified by schema.org) - to provide information about its author.""" ; - sh:or ( - [ sh:class schema_org:Person ;] - [ sh:class schema_org:Organization ;] - ) ; - sh:path schema_org:author; - sh:minCount 1 ; - sh:message """The Root Data Entity SHOULD have a link to a Contextual Entity representing the `author` of the RO-Crate""" ; - ] ; - sh:property [ - sh:minCount 1 ; - sh:maxCount 1 ; - sh:path schema_org:publisher ; - sh:severity sh:Warning ; - sh:name "Root Data Entity: `publisher` property" ; - sh:description """Check if the Root Data Entity has a `publisher` property of type `Organization`.""" ; - sh:message "The `publisher` property of a `Root Data Entity` SHOULD be an `Organization`"; - sh:nodeKind sh:IRI ; - sh:class schema_org:Organization ; - ] ; - sh:property [ - sh:severity sh:Warning ; - sh:name "Root Data Entity: `funder` property" ; - sh:description """Check if the Root Data Entity has `funder` properties referencing Organizations.""" ; - sh:path schema_org:funder ; - sh:class schema_org:Organization ; - sh:message "The Root Data Entity SHOULD reference funders using `funder`" ; - ] ; - sh:property [ - a sh:PropertyShape ; - sh:name "Root Data Entity: RECOMMENDED `datePublished` property" ; - sh:description """Check if the Root Data Entity includes a `datePublished` (as specified by schema.org) - to provide the date when the dataset was published. The datePublished MUST be a valid ISO 8601 date. - It SHOULD be specified to at least the day level, but MAY include a time component.""" ; - sh:minCount 1 ; - sh:nodeKind sh:Literal ; + sh:name "Root Data Entity: RECOMMENDED datePublished day precision" ; sh:path schema_org:datePublished ; - sh:pattern "^([\\+-]?\\d{4})((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))|W([0-4]\\d|5[0-2])(-?[1-7])|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)?[0-5]\\d)?|24:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)$" ; - sh:message "The Root Data Entity MUST have a `datePublished` property (as specified by schema.org) with a valid ISO 8601 date and the precision of at least the day level" ; - ] . - -ro-crate:RootDataEntityIdentifierRecommendedValue - a sh:NodeShape ; - sh:name "RO-Crate Root Data Entity RECOMMENDED value" ; - sh:description "The Root Data Entity SHOULD be identified by `./` or an absolute URI" ; - sh:targetNode ro-crate:RootDataEntity ; - sh:property [ - a sh:PropertyShape ; - sh:name "Root Data Entity URI value" ; - sh:description "Check if the Root Data Entity identifier is `./` or an absolute URI" ; - sh:path [ sh:inversePath rdf:type ] ; - sh:message """The Root Data Entity SHOULD be identified by `./` or an absolute URI""" ; - sh:or ( - [ sh:pattern "^\\./$" ] - [ sh:pattern "^[A-Za-z][A-Za-z0-9+\\.-]*:" ] - ) ; - ] . + sh:pattern "^\\d{4}-(0[1-9]|1[0-2])-(0[1-9]|[12]\\d|3[01])" ; + sh:severity sh:Warning ; + sh:message "The Root Data Entity SHOULD specify datePublished to at least the precision of a day (YYYY-MM-DD)" ; + ] . \ No newline at end of file From de0c1ff1d865bd99c83ae9d0064bd68b396e681e Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Thu, 16 Apr 2026 23:33:09 +0200 Subject: [PATCH 107/352] feat(ro-crate-1.2): :sparkles: check recommended `identifier` of Root DataEntity --- .../should/2_root_data_entity_identifier.ttl | 58 +++++++++++++++++++ 1 file changed, 58 insertions(+) create mode 100644 rocrate_validator/profiles/ro-crate/1.2/should/2_root_data_entity_identifier.ttl diff --git a/rocrate_validator/profiles/ro-crate/1.2/should/2_root_data_entity_identifier.ttl b/rocrate_validator/profiles/ro-crate/1.2/should/2_root_data_entity_identifier.ttl new file mode 100644 index 000000000..97a652b4a --- /dev/null +++ b/rocrate_validator/profiles/ro-crate/1.2/should/2_root_data_entity_identifier.ttl @@ -0,0 +1,58 @@ +# Copyright (c) 2024-2026 CRS4 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +@prefix ro-crate: . +@prefix schema_org: . +@prefix sh: . +@prefix xsd: . + +# Root Data Entity: identifier SHOULD be present if its @id is an absolute URI +ro-crate:RootDataEntityIdentifierIfPid a sh:NodeShape ; + sh:name "Root Data Entity: identifier SHOULD be present if PID exists" ; + sh:description """If the Root Data Entity has a persistent identifier + (e.g., its @id is an absolute URI), it SHOULD include an identifier property.""" ; + sh:target [ + a sh:SPARQLTarget ; + sh:prefixes ro-crate:sparqlPrefixes ; + sh:select """ + SELECT ?this + WHERE { + ?this a ro-crate:RootDataEntity . + FILTER(regex(str(?this), "^https?://", "i")) + } + """ + ] ; + sh:property [ + a sh:PropertyShape ; + sh:name "Root Data Entity: RECOMMENDED identifier if persistent ID exists" ; + sh:path schema_org:identifier ; + sh:minCount 1 ; + sh:severity sh:Warning ; + sh:message "The Root Data Entity SHOULD have an `identifier` property if it has a persistent identifier" ; + ] . + +# Root Data Entity: identifier SHOULD use PropertyValue approach (Science On Schema.org) +ro-crate:RootDataEntityIdentifierPropertyValueApproach a sh:NodeShape ; + sh:name "Root Data Entity: identifier SHOULD use PropertyValue approach" ; + sh:description """If the Root Data Entity has an identifier, it SHOULD use + the PropertyValue approach as per Science On Schema.org guides.""" ; + sh:targetClass ro-crate:RootDataEntity ; + sh:property [ + a sh:PropertyShape ; + sh:name "Root Data Entity: RECOMMENDED PropertyValue approach for identifier" ; + sh:path schema_org:identifier ; + sh:class schema_org:PropertyValue ; + sh:minCount 1 ; + sh:severity sh:Warning ; + sh:message "The Root Data Entity SHOULD use PropertyValue entities for identifiers (Science On Schema.org approach)" ; + ] . \ No newline at end of file From 96631980d2866f6c9c8a558ebc5f5fc061862f35 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Thu, 16 Apr 2026 23:36:14 +0200 Subject: [PATCH 108/352] feat(ro-crate-1.2): :sparkles: check `conformsTo` of RO-Crates with additional profiles --- .../should/2_root_data_entity_conformsto.ttl | 47 +++++++++++++++++++ 1 file changed, 47 insertions(+) create mode 100644 rocrate_validator/profiles/ro-crate/1.2/should/2_root_data_entity_conformsto.ttl diff --git a/rocrate_validator/profiles/ro-crate/1.2/should/2_root_data_entity_conformsto.ttl b/rocrate_validator/profiles/ro-crate/1.2/should/2_root_data_entity_conformsto.ttl new file mode 100644 index 000000000..d2e038cc8 --- /dev/null +++ b/rocrate_validator/profiles/ro-crate/1.2/should/2_root_data_entity_conformsto.ttl @@ -0,0 +1,47 @@ +# Copyright (c) 2024-2026 CRS4 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +@prefix ro-crate: . +@prefix schema_org: . +@prefix dct: . +@prefix prof: . +@prefix sh: . + +# Root Data Entity: conformsTo SHOULD be present if profiles exist in the crate +ro-crate:RootDataEntityConformsToIfProfiles a sh:NodeShape ; + sh:name "Root Data Entity: conformsTo SHOULD be present if profiles exist" ; + sh:description """If the RO-Crate conforms to additional profiles, + the Root Data Entity SHOULD have a conformsTo property referencing + Profile entities (RO-Crate 1.2).""" ; + sh:target [ + a sh:SPARQLTarget ; + sh:prefixes ro-crate:sparqlPrefixes ; + sh:select """ + SELECT ?this + WHERE { + ?this a ro-crate:RootDataEntity . + FILTER EXISTS { + ?profile a prof:Profile . + FILTER(?profile != ) + } + } + """ + ] ; + sh:property [ + a sh:PropertyShape ; + sh:name "Root Data Entity: RECOMMENDED conformsTo if profiles exist" ; + sh:path dct:conformsTo ; + sh:minCount 1 ; + sh:severity sh:Warning ; + sh:message "The Root Data Entity SHOULD have a `conformsTo` property if the RO-Crate conforms to profiles" ; + ] . \ No newline at end of file From 3846c5eb150f64b8690609aaa659d81f3f70651e Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Thu, 16 Apr 2026 23:38:51 +0200 Subject: [PATCH 109/352] test(ro-crate-1.2): :white_check_mark: add integration tests for datePublished and identifier properties of the Root Data Entity --- .../valid/ro-crate-metadata.json | 13 +- .../invalid/ro-crate-metadata.json | 74 ++++++++ .../valid/ro-crate-metadata.json | 77 ++++++++ .../invalid/ro-crate-metadata.json | 68 +++++++ .../valid/ro-crate-metadata.json | 68 +++++++ .../invalid/ro-crate-metadata.json | 59 ++++++ .../valid/ro-crate-metadata.json | 68 +++++++ .../invalid/ro-crate-metadata.json | 60 ++++++ .../valid/ro-crate-metadata.json | 68 +++++++ .../invalid/file1.txt | 0 .../invalid/ro-crate-metadata.json | 64 +++++++ .../valid/file1.txt | 0 .../valid/file2.txt | 0 .../valid/ro-crate-metadata.json | 65 +++++++ .../invalid/ro-crate-metadata.json | 10 +- .../valid/ro-crate-metadata.json | 10 +- .../valid/ro-crate-metadata.json | 11 +- .../valid/ro-crate-metadata.json | 10 + .../valid/ro-crate-metadata.json | 10 + .../invalid/ro-crate-metadata.json | 18 +- .../valid/ro-crate-metadata.json | 10 + .../valid/ro-crate-metadata.json | 10 + .../test_metadata_dataEntities.py | 3 +- .../test_metadata_rootDataEntity.py | 173 ++++++++++++++++++ 24 files changed, 941 insertions(+), 8 deletions(-) create mode 100644 tests/data/crates/rocrate-1.2/7_root_data_entity/recommended_conformsto_if_profiles/invalid/ro-crate-metadata.json create mode 100644 tests/data/crates/rocrate-1.2/7_root_data_entity/recommended_conformsto_if_profiles/valid/ro-crate-metadata.json create mode 100644 tests/data/crates/rocrate-1.2/7_root_data_entity/recommended_datepublished_day_precision/invalid/ro-crate-metadata.json create mode 100644 tests/data/crates/rocrate-1.2/7_root_data_entity/recommended_datepublished_day_precision/valid/ro-crate-metadata.json create mode 100644 tests/data/crates/rocrate-1.2/7_root_data_entity/recommended_identifier_if_pid/invalid/ro-crate-metadata.json create mode 100644 tests/data/crates/rocrate-1.2/7_root_data_entity/recommended_identifier_if_pid/valid/ro-crate-metadata.json create mode 100644 tests/data/crates/rocrate-1.2/7_root_data_entity/recommended_identifier_propertyvalue/invalid/ro-crate-metadata.json create mode 100644 tests/data/crates/rocrate-1.2/7_root_data_entity/recommended_identifier_propertyvalue/valid/ro-crate-metadata.json create mode 100644 tests/data/crates/rocrate-1.2/7_root_data_entity/required_haspart_all_data_entities/invalid/file1.txt create mode 100644 tests/data/crates/rocrate-1.2/7_root_data_entity/required_haspart_all_data_entities/invalid/ro-crate-metadata.json create mode 100644 tests/data/crates/rocrate-1.2/7_root_data_entity/required_haspart_all_data_entities/valid/file1.txt create mode 100644 tests/data/crates/rocrate-1.2/7_root_data_entity/required_haspart_all_data_entities/valid/file2.txt create mode 100644 tests/data/crates/rocrate-1.2/7_root_data_entity/required_haspart_all_data_entities/valid/ro-crate-metadata.json diff --git a/tests/data/crates/rocrate-1.2/2_attached_rocrates/attached-preview-not-in-hasPart/valid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/2_attached_rocrates/attached-preview-not-in-hasPart/valid/ro-crate-metadata.json index ea0830172..4bf4ca099 100644 --- a/tests/data/crates/rocrate-1.2/2_attached_rocrates/attached-preview-not-in-hasPart/valid/ro-crate-metadata.json +++ b/tests/data/crates/rocrate-1.2/2_attached_rocrates/attached-preview-not-in-hasPart/valid/ro-crate-metadata.json @@ -30,8 +30,18 @@ ], "funder": { "@id": "https://ror.org/00k4n6c32" + }, + "identifier": { + "@id": "#uuid" } }, + { + "@id": "#uuid", + "@type": "PropertyValue", + "name": "UUID", + "propertyID": "UUID", + "value": "550e8400-e29b-41d4-a716-446655440000" + }, { "@id": "#publisher-org", "@type": "Organization", @@ -67,9 +77,6 @@ "author": { "@id": "https://orcid.org/0000-0001-2345-6789" }, - "license": { - "@id": "https://creativecommons.org/licenses/by/4.0/" - }, "sdDatePublished": "2026-04-15" }, { diff --git a/tests/data/crates/rocrate-1.2/7_root_data_entity/recommended_conformsto_if_profiles/invalid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/7_root_data_entity/recommended_conformsto_if_profiles/invalid/ro-crate-metadata.json new file mode 100644 index 000000000..395d58b56 --- /dev/null +++ b/tests/data/crates/rocrate-1.2/7_root_data_entity/recommended_conformsto_if_profiles/invalid/ro-crate-metadata.json @@ -0,0 +1,74 @@ +{ + "@context": "https://w3id.org/ro/crate/1.2/context", + "@graph": [ + { + "@id": "ro-crate-metadata.json", + "@type": ["CreativeWork", "schema:Book"], + "about": { + "@id": "./" + }, + "conformsTo": { + "@id": "https://w3id.org/ro/crate/1.2" + } + }, + { + "@id": "./", + "@type": "Dataset", + "name": "Test RO-Crate: conformsTo missing despite profile in graph", + "description": "This RO-Crate has a Profile entity in the graph but Root lacks conformsTo", + "datePublished": "2024-01-15", + "license": { + "@id": "https://creativecommons.org/licenses/by/4.0/" + }, + "publisher": { + "@id": "#publisher-org" + }, + "funder": { + "@id": "#funder-org" + }, + "identifier": { + "@id": "#identifier-pv" + }, + "hasPart": [] + }, + { + "@id": "https://creativecommons.org/licenses/by/4.0/", + "@type": "CreativeWork", + "name": "Creative Commons Attribution 4.0 International", + "description": "A Creative Commons license" + }, + { + "@id": "#publisher-org", + "@type": "Organization", + "name": "Example Research Institute", + "url": "https://example.org" + }, + { + "@id": "#funder-org", + "@type": "Organization", + "name": "European Commission", + "url": "https://ec.europa.eu", + "funder": { + "@id": "https://ror.org/00k4n6c32" + } + }, + { + "@id": "https://w3id.org/ro/crate/1.2/profile1", + "@type": "Profile", + "name": "Example Profile", + "url": "https://w3id.org/ro/crate/1.2/profile1" + }, + { + "@id": "#identifier-pv", + "@type": "PropertyValue", + "name": "Persistent identifier", + "value": "10.5281/zenodo.1234567" + }, + { + "@id": "https://ror.org/00k4n6c32", + "@type": "Organization", + "name": "External Funder", + "url": "https://ror.org" + } + ] +} \ No newline at end of file diff --git a/tests/data/crates/rocrate-1.2/7_root_data_entity/recommended_conformsto_if_profiles/valid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/7_root_data_entity/recommended_conformsto_if_profiles/valid/ro-crate-metadata.json new file mode 100644 index 000000000..3e86af31f --- /dev/null +++ b/tests/data/crates/rocrate-1.2/7_root_data_entity/recommended_conformsto_if_profiles/valid/ro-crate-metadata.json @@ -0,0 +1,77 @@ +{ + "@context": "https://w3id.org/ro/crate/1.2/context", + "@graph": [ + { + "@id": "ro-crate-metadata.json", + "@type": ["CreativeWork", "schema:Book"], + "about": { + "@id": "./" + }, + "conformsTo": { + "@id": "https://w3id.org/ro/crate/1.2" + } + }, + { + "@id": "./", + "@type": "Dataset", + "name": "Test RO-Crate: conformsTo present with profile reference", + "description": "This RO-Crate conforms to a profile and has conformsTo on Root", + "datePublished": "2024-01-15", + "license": { + "@id": "https://creativecommons.org/licenses/by/4.0/" + }, + "publisher": { + "@id": "#publisher-org" + }, + "funder": { + "@id": "#funder-org" + }, + "identifier": { + "@id": "#identifier-pv" + }, + "conformsTo": { + "@id": "https://w3id.org/ro/crate/1.2/profile1" + }, + "hasPart": [] + }, + { + "@id": "https://creativecommons.org/licenses/by/4.0/", + "@type": "CreativeWork", + "name": "Creative Commons Attribution 4.0 International", + "description": "A Creative Commons license" + }, + { + "@id": "#publisher-org", + "@type": "Organization", + "name": "Example Research Institute", + "url": "https://example.org" + }, + { + "@id": "#funder-org", + "@type": "Organization", + "name": "European Commission", + "url": "https://ec.europa.eu", + "funder": { + "@id": "https://ror.org/00k4n6c32" + } + }, + { + "@id": "https://w3id.org/ro/crate/1.2/profile1", + "@type": "Profile", + "name": "Example Profile", + "url": "https://w3id.org/ro/crate/1.2/profile1" + }, + { + "@id": "#identifier-pv", + "@type": "PropertyValue", + "name": "Persistent identifier", + "value": "10.5281/zenodo.1234567" + }, + { + "@id": "https://ror.org/00k4n6c32", + "@type": "Organization", + "name": "External Funder", + "url": "https://ror.org" + } + ] +} \ No newline at end of file diff --git a/tests/data/crates/rocrate-1.2/7_root_data_entity/recommended_datepublished_day_precision/invalid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/7_root_data_entity/recommended_datepublished_day_precision/invalid/ro-crate-metadata.json new file mode 100644 index 000000000..5e1c88ebd --- /dev/null +++ b/tests/data/crates/rocrate-1.2/7_root_data_entity/recommended_datepublished_day_precision/invalid/ro-crate-metadata.json @@ -0,0 +1,68 @@ +{ + "@context": "https://w3id.org/ro/crate/1.2/context", + "@graph": [ + { + "@id": "ro-crate-metadata.json", + "@type": ["CreativeWork", "schema:Book"], + "about": { + "@id": "./" + }, + "conformsTo": { + "@id": "https://w3id.org/ro/crate/1.2" + } + }, + { + "@id": "./", + "@type": "Dataset", + "name": "Test RO-Crate: datePublished with year only", + "description": "This RO-Crate has a datePublished property specified to year only (not day precision)", + "datePublished": "2024", + "license": { + "@id": "https://creativecommons.org/licenses/by/4.0/" + }, + "publisher": { + "@id": "#publisher-org" + }, + "funder": { + "@id": "#funder-org" + }, + "identifier": { + "@id": "#identifier-pv" + }, + "hasPart": [] + }, + { + "@id": "https://creativecommons.org/licenses/by/4.0/", + "@type": "CreativeWork", + "name": "Creative Commons Attribution 4.0 International", + "description": "A Creative Commons license" + }, + { + "@id": "#publisher-org", + "@type": "Organization", + "name": "Example Research Institute", + "url": "https://example.org" + }, + { + "@id": "#funder-org", + "@type": "Organization", + "name": "European Commission", + "url": "https://ec.europa.eu", + "funder": { + "@id": "https://ror.org/00k4n6c32" + } + }, + { + "@id": "#identifier-pv", + "@type": "PropertyValue", + "name": "Persistent identifier", + "value": "10.5281/zenodo.1234567" + }, + { + "@id": "https://ror.org/00k4n6c32", + "@type": "Organization", + "name": "External Funder", + "url": "https://ror.org" + } + ] +} \ No newline at end of file diff --git a/tests/data/crates/rocrate-1.2/7_root_data_entity/recommended_datepublished_day_precision/valid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/7_root_data_entity/recommended_datepublished_day_precision/valid/ro-crate-metadata.json new file mode 100644 index 000000000..fccee6e09 --- /dev/null +++ b/tests/data/crates/rocrate-1.2/7_root_data_entity/recommended_datepublished_day_precision/valid/ro-crate-metadata.json @@ -0,0 +1,68 @@ +{ + "@context": "https://w3id.org/ro/crate/1.2/context", + "@graph": [ + { + "@id": "ro-crate-metadata.json", + "@type": ["CreativeWork", "schema:Book"], + "about": { + "@id": "./" + }, + "conformsTo": { + "@id": "https://w3id.org/ro/crate/1.2" + } + }, + { + "@id": "./", + "@type": "Dataset", + "name": "Test RO-Crate: datePublished with day precision", + "description": "This RO-Crate has a datePublished property specified to day precision (YYYY-MM-DD)", + "datePublished": "2024-01-15", + "license": { + "@id": "https://creativecommons.org/licenses/by/4.0/" + }, + "publisher": { + "@id": "#publisher-org" + }, + "funder": { + "@id": "#funder-org" + }, + "identifier": { + "@id": "#identifier-pv" + }, + "hasPart": [] + }, + { + "@id": "https://creativecommons.org/licenses/by/4.0/", + "@type": "CreativeWork", + "name": "Creative Commons Attribution 4.0 International", + "description": "A Creative Commons license" + }, + { + "@id": "#publisher-org", + "@type": "Organization", + "name": "Example Research Institute", + "url": "https://example.org" + }, + { + "@id": "#funder-org", + "@type": "Organization", + "name": "European Commission", + "url": "https://ec.europa.eu", + "funder": { + "@id": "https://ror.org/00k4n6c32" + } + }, + { + "@id": "#identifier-pv", + "@type": "PropertyValue", + "name": "Persistent identifier", + "value": "10.5281/zenodo.1234567" + }, + { + "@id": "https://ror.org/00k4n6c32", + "@type": "Organization", + "name": "External Funder", + "url": "https://ror.org" + } + ] +} \ No newline at end of file diff --git a/tests/data/crates/rocrate-1.2/7_root_data_entity/recommended_identifier_if_pid/invalid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/7_root_data_entity/recommended_identifier_if_pid/invalid/ro-crate-metadata.json new file mode 100644 index 000000000..7499f8a8a --- /dev/null +++ b/tests/data/crates/rocrate-1.2/7_root_data_entity/recommended_identifier_if_pid/invalid/ro-crate-metadata.json @@ -0,0 +1,59 @@ +{ + "@context": "https://w3id.org/ro/crate/1.2/context", + "@graph": [ + { + "@id": "ro-crate-metadata.json", + "@type": ["CreativeWork", "schema:Book"], + "about": { + "@id": "https://example.org/crate1" + }, + "conformsTo": { + "@id": "https://w3id.org/ro/crate/1.2" + } + }, + { + "@id": "https://example.org/crate1", + "@type": "Dataset", + "name": "Test RO-Crate: identifier missing with absolute URI", + "description": "This RO-Crate has an absolute URI as @id but does NOT include an identifier property", + "datePublished": "2024-01-15", + "license": { + "@id": "https://creativecommons.org/licenses/by/4.0/" + }, + "publisher": { + "@id": "#publisher-org" + }, + "funder": { + "@id": "#funder-org" + }, + "hasPart": [] + }, + { + "@id": "https://creativecommons.org/licenses/by/4.0/", + "@type": "CreativeWork", + "name": "Creative Commons Attribution 4.0 International", + "description": "A Creative Commons license" + }, + { + "@id": "#publisher-org", + "@type": "Organization", + "name": "Example Research Institute", + "url": "https://example.org" + }, + { + "@id": "#funder-org", + "@type": "Organization", + "name": "European Commission", + "url": "https://ec.europa.eu", + "funder": { + "@id": "https://ror.org/00k4n6c32" + } + }, + { + "@id": "https://ror.org/00k4n6c32", + "@type": "Organization", + "name": "External Funder", + "url": "https://ror.org" + } + ] +} \ No newline at end of file diff --git a/tests/data/crates/rocrate-1.2/7_root_data_entity/recommended_identifier_if_pid/valid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/7_root_data_entity/recommended_identifier_if_pid/valid/ro-crate-metadata.json new file mode 100644 index 000000000..9d3a2fa33 --- /dev/null +++ b/tests/data/crates/rocrate-1.2/7_root_data_entity/recommended_identifier_if_pid/valid/ro-crate-metadata.json @@ -0,0 +1,68 @@ +{ + "@context": "https://w3id.org/ro/crate/1.2/context", + "@graph": [ + { + "@id": "ro-crate-metadata.json", + "@type": ["CreativeWork", "schema:Book"], + "about": { + "@id": "./" + }, + "conformsTo": { + "@id": "https://w3id.org/ro/crate/1.2" + } + }, + { + "@id": "./", + "@type": "Dataset", + "name": "Test RO-Crate: identifier present with absolute URI", + "description": "This RO-Crate has an absolute URI as @id and includes an identifier property", + "datePublished": "2024-01-15", + "license": { + "@id": "https://creativecommons.org/licenses/by/4.0/" + }, + "publisher": { + "@id": "#publisher-org" + }, + "funder": { + "@id": "#funder-org" + }, + "identifier": { + "@id": "#identifier-pv" + }, + "hasPart": [] + }, + { + "@id": "https://creativecommons.org/licenses/by/4.0/", + "@type": "CreativeWork", + "name": "Creative Commons Attribution 4.0 International", + "description": "A Creative Commons license" + }, + { + "@id": "#publisher-org", + "@type": "Organization", + "name": "Example Research Institute", + "url": "https://example.org" + }, + { + "@id": "#funder-org", + "@type": "Organization", + "name": "European Commission", + "url": "https://ec.europa.eu", + "funder": { + "@id": "https://ror.org/00k4n6c32" + } + }, + { + "@id": "#identifier-pv", + "@type": "PropertyValue", + "name": "Persistent identifier", + "value": "10.5281/zenodo.1234567" + }, + { + "@id": "https://ror.org/00k4n6c32", + "@type": "Organization", + "name": "External Funder", + "url": "https://ror.org" + } + ] +} \ No newline at end of file diff --git a/tests/data/crates/rocrate-1.2/7_root_data_entity/recommended_identifier_propertyvalue/invalid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/7_root_data_entity/recommended_identifier_propertyvalue/invalid/ro-crate-metadata.json new file mode 100644 index 000000000..489b8c209 --- /dev/null +++ b/tests/data/crates/rocrate-1.2/7_root_data_entity/recommended_identifier_propertyvalue/invalid/ro-crate-metadata.json @@ -0,0 +1,60 @@ +{ + "@context": "https://w3id.org/ro/crate/1.2/context", + "@graph": [ + { + "@id": "ro-crate-metadata.json", + "@type": ["CreativeWork", "schema:Book"], + "about": { + "@id": "./" + }, + "conformsTo": { + "@id": "https://w3id.org/ro/crate/1.2" + } + }, + { + "@id": "./", + "@type": "Dataset", + "name": "Test RO-Crate: identifier does NOT use PropertyValue", + "description": "This RO-Crate has identifier as a plain string instead of PropertyValue", + "datePublished": "2024-01-15", + "license": { + "@id": "https://creativecommons.org/licenses/by/4.0/" + }, + "publisher": { + "@id": "#publisher-org" + }, + "funder": { + "@id": "#funder-org" + }, + "identifier": "10.5281/zenodo.1234567", + "hasPart": [] + }, + { + "@id": "https://creativecommons.org/licenses/by/4.0/", + "@type": "CreativeWork", + "name": "Creative Commons Attribution 4.0 International", + "description": "A Creative Commons license" + }, + { + "@id": "#publisher-org", + "@type": "Organization", + "name": "Example Research Institute", + "url": "https://example.org" + }, + { + "@id": "#funder-org", + "@type": "Organization", + "name": "European Commission", + "url": "https://ec.europa.eu", + "funder": { + "@id": "https://ror.org/00k4n6c32" + } + }, + { + "@id": "https://ror.org/00k4n6c32", + "@type": "Organization", + "name": "External Funder", + "url": "https://ror.org" + } + ] +} \ No newline at end of file diff --git a/tests/data/crates/rocrate-1.2/7_root_data_entity/recommended_identifier_propertyvalue/valid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/7_root_data_entity/recommended_identifier_propertyvalue/valid/ro-crate-metadata.json new file mode 100644 index 000000000..5c494fa0a --- /dev/null +++ b/tests/data/crates/rocrate-1.2/7_root_data_entity/recommended_identifier_propertyvalue/valid/ro-crate-metadata.json @@ -0,0 +1,68 @@ +{ + "@context": "https://w3id.org/ro/crate/1.2/context", + "@graph": [ + { + "@id": "ro-crate-metadata.json", + "@type": ["CreativeWork", "schema:Book"], + "about": { + "@id": "./" + }, + "conformsTo": { + "@id": "https://w3id.org/ro/crate/1.2" + } + }, + { + "@id": "./", + "@type": "Dataset", + "name": "Test RO-Crate: identifier uses PropertyValue approach", + "description": "This RO-Crate uses PropertyValue for identifier as per Science On Schema.org", + "datePublished": "2024-01-15", + "license": { + "@id": "https://creativecommons.org/licenses/by/4.0/" + }, + "publisher": { + "@id": "#publisher-org" + }, + "funder": { + "@id": "#funder-org" + }, + "identifier": { + "@id": "#identifier-pv" + }, + "hasPart": [] + }, + { + "@id": "https://creativecommons.org/licenses/by/4.0/", + "@type": "CreativeWork", + "name": "Creative Commons Attribution 4.0 International", + "description": "A Creative Commons license" + }, + { + "@id": "#publisher-org", + "@type": "Organization", + "name": "Example Research Institute", + "url": "https://example.org" + }, + { + "@id": "#funder-org", + "@type": "Organization", + "name": "European Commission", + "url": "https://ec.europa.eu", + "funder": { + "@id": "https://ror.org/00k4n6c32" + } + }, + { + "@id": "#identifier-pv", + "@type": "PropertyValue", + "name": "Persistent identifier", + "value": "10.5281/zenodo.1234567" + }, + { + "@id": "https://ror.org/00k4n6c32", + "@type": "Organization", + "name": "External Funder", + "url": "https://ror.org" + } + ] +} \ No newline at end of file diff --git a/tests/data/crates/rocrate-1.2/7_root_data_entity/required_haspart_all_data_entities/invalid/file1.txt b/tests/data/crates/rocrate-1.2/7_root_data_entity/required_haspart_all_data_entities/invalid/file1.txt new file mode 100644 index 000000000..e69de29bb diff --git a/tests/data/crates/rocrate-1.2/7_root_data_entity/required_haspart_all_data_entities/invalid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/7_root_data_entity/required_haspart_all_data_entities/invalid/ro-crate-metadata.json new file mode 100644 index 000000000..e846154c4 --- /dev/null +++ b/tests/data/crates/rocrate-1.2/7_root_data_entity/required_haspart_all_data_entities/invalid/ro-crate-metadata.json @@ -0,0 +1,64 @@ +{ + "@context": "https://w3id.org/ro/crate/1.2/context", + "@graph": [ + { + "@id": "ro-crate-metadata.json", + "@type": "CreativeWork", + "about": { + "@id": "./" + }, + "conformsTo": { + "@id": "https://w3id.org/ro/crate/1.2" + } + }, + { + "@id": "./", + "@type": "Dataset", + "name": "Test RO-Crate: hasPart does NOT reference all Data Entities", + "description": "This RO-Crate has hasPart that is missing reference to file2.txt", + "datePublished": "2024-01-15", + "license": { + "@id": "https://creativecommons.org/licenses/by/4.0/" + }, + "publisher": { + "@id": "#publisher-org" + }, + "funder": { + "@id": "#funder-org" + }, + "hasPart": [ + { "@id": "file1.txt" } + ] + }, + { + "@id": "https://creativecommons.org/licenses/by/4.0/", + "@type": "CreativeWork", + "name": "Creative Commons Attribution 4.0 International" + }, + { + "@id": "#publisher-org", + "@type": "Organization", + "name": "Example Research Institute", + "url": "https://example.org" + }, + { + "@id": "#funder-org", + "@type": "Organization", + "name": "European Commission", + "url": "https://ec.europa.eu", + "funder": { + "@id": "https://ror.org/00k4n6c32" + } + }, + { + "@id": "file1.txt", + "@type": "File", + "name": "File 1" + }, + { + "@id": "file2.txt", + "@type": "File", + "name": "File 2" + } + ] +} \ No newline at end of file diff --git a/tests/data/crates/rocrate-1.2/7_root_data_entity/required_haspart_all_data_entities/valid/file1.txt b/tests/data/crates/rocrate-1.2/7_root_data_entity/required_haspart_all_data_entities/valid/file1.txt new file mode 100644 index 000000000..e69de29bb diff --git a/tests/data/crates/rocrate-1.2/7_root_data_entity/required_haspart_all_data_entities/valid/file2.txt b/tests/data/crates/rocrate-1.2/7_root_data_entity/required_haspart_all_data_entities/valid/file2.txt new file mode 100644 index 000000000..e69de29bb diff --git a/tests/data/crates/rocrate-1.2/7_root_data_entity/required_haspart_all_data_entities/valid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/7_root_data_entity/required_haspart_all_data_entities/valid/ro-crate-metadata.json new file mode 100644 index 000000000..792c8bcd1 --- /dev/null +++ b/tests/data/crates/rocrate-1.2/7_root_data_entity/required_haspart_all_data_entities/valid/ro-crate-metadata.json @@ -0,0 +1,65 @@ +{ + "@context": "https://w3id.org/ro/crate/1.2/context", + "@graph": [ + { + "@id": "ro-crate-metadata.json", + "@type": "CreativeWork", + "about": { + "@id": "./" + }, + "conformsTo": { + "@id": "https://w3id.org/ro/crate/1.2" + } + }, + { + "@id": "./", + "@type": "Dataset", + "name": "Test RO-Crate: hasPart references all Data Entities", + "description": "This RO-Crate has hasPart that references all Data Entities in the graph", + "datePublished": "2024-01-15", + "license": { + "@id": "https://creativecommons.org/licenses/by/4.0/" + }, + "publisher": { + "@id": "#publisher-org" + }, + "funder": { + "@id": "#funder-org" + }, + "hasPart": [ + { "@id": "file1.txt" }, + { "@id": "file2.txt" } + ] + }, + { + "@id": "https://creativecommons.org/licenses/by/4.0/", + "@type": "CreativeWork", + "name": "Creative Commons Attribution 4.0 International" + }, + { + "@id": "#publisher-org", + "@type": "Organization", + "name": "Example Research Institute", + "url": "https://example.org" + }, + { + "@id": "#funder-org", + "@type": "Organization", + "name": "European Commission", + "url": "https://ec.europa.eu", + "funder": { + "@id": "https://ror.org/00k4n6c32" + } + }, + { + "@id": "file1.txt", + "@type": "File", + "name": "File 1" + }, + { + "@id": "file2.txt", + "@type": "File", + "name": "File 2" + } + ] +} \ No newline at end of file diff --git a/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/data_entity_license/invalid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/data_entity_license/invalid/ro-crate-metadata.json index 7f00f09b7..33550acf0 100644 --- a/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/data_entity_license/invalid/ro-crate-metadata.json +++ b/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/data_entity_license/invalid/ro-crate-metadata.json @@ -18,7 +18,15 @@ "hasPart": [{"@id": "data/research-data.csv"}], "funder": {"@id": "https://ror.org/012345678"}, "cite-as": {"@id": "./"}, - "url": "./" + "url": "./", + "identifier": {"@id": "#uuid"} + }, + { + "@id": "#uuid", + "@type": "PropertyValue", + "name": "UUID", + "propertyID": "UUID", + "value": "550e8400-e29b-41d4-a716-446655440000" }, { "@id": "data/research-data.csv", diff --git a/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/data_entity_license/valid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/data_entity_license/valid/ro-crate-metadata.json index 5911d7ee4..d37482346 100644 --- a/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/data_entity_license/valid/ro-crate-metadata.json +++ b/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/data_entity_license/valid/ro-crate-metadata.json @@ -18,7 +18,15 @@ "hasPart": [{"@id": "data/open-data.csv"}], "funder": {"@id": "https://ror.org/012345678"}, "cite-as": {"@id": "./"}, - "url": "./" + "url": "./", + "identifier": {"@id": "#uuid"} + }, + { + "@id": "#uuid", + "@type": "PropertyValue", + "name": "UUID", + "propertyID": "UUID", + "value": "550e8400-e29b-41d4-a716-446655440000" }, { "@id": "data/open-data.csv", diff --git a/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/missing_file_local_path/valid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/missing_file_local_path/valid/ro-crate-metadata.json index 28b65cae7..cb54a9b95 100644 --- a/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/missing_file_local_path/valid/ro-crate-metadata.json +++ b/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/missing_file_local_path/valid/ro-crate-metadata.json @@ -15,7 +15,16 @@ "license": {"@id": "https://creativecommons.org/licenses/by/4.0/"}, "datePublished": "2024-01-01", "publisher": {"@id": "https://ror.org/012345678"}, - "hasPart": [{"@id": "data/missing-file.csv"}, {"@id": "#output-file.csv"}] + "funder": {"@id": "https://ror.org/012345678"}, + "hasPart": [{"@id": "data/missing-file.csv"}, {"@id": "#output-file.csv"}], + "identifier": {"@id": "#uuid"} + }, + { + "@id": "#uuid", + "@type": "PropertyValue", + "name": "UUID", + "propertyID": "UUID", + "value": "550e8400-e29b-41d4-a716-446655440000" }, { "@id": "https://ror.org/012345678", diff --git a/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/recommended_encoding_format/valid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/recommended_encoding_format/valid/ro-crate-metadata.json index 052a543f5..9a326af6d 100644 --- a/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/recommended_encoding_format/valid/ro-crate-metadata.json +++ b/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/recommended_encoding_format/valid/ro-crate-metadata.json @@ -33,8 +33,18 @@ ], "funder": { "@id": "https://ror.org/05f9q8d28" + }, + "identifier": { + "@id": "#uuid" } }, + { + "@id": "#uuid", + "@type": "PropertyValue", + "name": "UUID", + "propertyID": "UUID", + "value": "550e8400-e29b-41d4-a716-446655440000" + }, { "@id": "https://creativecommons.org/licenses/by/4.0/", "@type": "CreativeWork", diff --git a/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/recommended_properties/valid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/recommended_properties/valid/ro-crate-metadata.json index 052a543f5..9a326af6d 100644 --- a/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/recommended_properties/valid/ro-crate-metadata.json +++ b/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/recommended_properties/valid/ro-crate-metadata.json @@ -33,8 +33,18 @@ ], "funder": { "@id": "https://ror.org/05f9q8d28" + }, + "identifier": { + "@id": "#uuid" } }, + { + "@id": "#uuid", + "@type": "PropertyValue", + "name": "UUID", + "propertyID": "UUID", + "value": "550e8400-e29b-41d4-a716-446655440000" + }, { "@id": "https://creativecommons.org/licenses/by/4.0/", "@type": "CreativeWork", diff --git a/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/web_entity_content_url/invalid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/web_entity_content_url/invalid/ro-crate-metadata.json index c67c99f35..f06a5cff4 100644 --- a/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/web_entity_content_url/invalid/ro-crate-metadata.json +++ b/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/web_entity_content_url/invalid/ro-crate-metadata.json @@ -14,7 +14,23 @@ "description": "RO-Crate with a web-based File Data Entity whose contentUrl is not downloadable (returns HTML).", "datePublished": "2024-01-01", "license": {"@id": "https://creativecommons.org/licenses/by/4.0/"}, - "hasPart": [{"@id": "https://example.com/landing/dataset"}] + "hasPart": [{"@id": "https://example.com/landing/dataset"}], + "publisher": {"@id": "#publisher-org"}, + "funder": {"@id": "#publisher-org"}, + "identifier": {"@id": "#uuid"} + }, + { + "@id": "#publisher-org", + "@type": "Organization", + "name": "Example Research Institute", + "url": "https://example.org" + }, + { + "@id": "#uuid", + "@type": "PropertyValue", + "name": "UUID", + "propertyID": "UUID", + "value": "550e8400-e29b-41d4-a716-446655440000" }, { "@id": "https://creativecommons.org/licenses/by/4.0/", diff --git a/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/web_entity_content_url/valid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/web_entity_content_url/valid/ro-crate-metadata.json index 4b3b258f2..ca0db284c 100644 --- a/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/web_entity_content_url/valid/ro-crate-metadata.json +++ b/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/web_entity_content_url/valid/ro-crate-metadata.json @@ -30,8 +30,18 @@ ], "funder": { "@id": "https://ror.org/00k4n6c32" + }, + "identifier": { + "@id": "#uuid" } }, + { + "@id": "#uuid", + "@type": "PropertyValue", + "name": "UUID", + "propertyID": "UUID", + "value": "550e8400-e29b-41d4-a716-446655440000" + }, { "@id": "#publisher-org", "@type": "Organization", diff --git a/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/web_entity_downloadable/valid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/web_entity_downloadable/valid/ro-crate-metadata.json index 1c71fa7ae..0710f800e 100644 --- a/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/web_entity_downloadable/valid/ro-crate-metadata.json +++ b/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/web_entity_downloadable/valid/ro-crate-metadata.json @@ -30,8 +30,18 @@ ], "funder": { "@id": "https://ror.org/00k4n6c32" + }, + "identifier": { + "@id": "#uuid" } }, + { + "@id": "#uuid", + "@type": "PropertyValue", + "name": "UUID", + "propertyID": "UUID", + "value": "550e8400-e29b-41d4-a716-446655440000" + }, { "@id": "#publisher-org", "@type": "Organization", diff --git a/tests/integration/profiles/ro-crate-1.2/test_metadata_dataEntities.py b/tests/integration/profiles/ro-crate-1.2/test_metadata_dataEntities.py index 8304b7664..fb34e6c93 100644 --- a/tests/integration/profiles/ro-crate-1.2/test_metadata_dataEntities.py +++ b/tests/integration/profiles/ro-crate-1.2/test_metadata_dataEntities.py @@ -277,7 +277,8 @@ def test_valid_missing_file_local_path(): True, profile_identifier="ro-crate-1.2", skip_checks=["ro-crate-1.2_16.1", "ro-crate-1.2_38.1", - "ro-crate-1.2_17.1", "ro-crate-1.2_39.0", "ro-crate-1.2_39.1"], + "ro-crate-1.2_17.1", "ro-crate-1.2_39.0", "ro-crate-1.2_39.1", + "ro-crate-1.2_18.1"], ) diff --git a/tests/integration/profiles/ro-crate-1.2/test_metadata_rootDataEntity.py b/tests/integration/profiles/ro-crate-1.2/test_metadata_rootDataEntity.py index 82f845c24..7feea32c8 100644 --- a/tests/integration/profiles/ro-crate-1.2/test_metadata_rootDataEntity.py +++ b/tests/integration/profiles/ro-crate-1.2/test_metadata_rootDataEntity.py @@ -323,3 +323,176 @@ def test_invalid_recommended_funding_no_project_funder(): expected_triggered_requirements=["Project Organization: recommended funder reference"], expected_triggered_issues=["SHOULD itself reference an external `funder`"], ) + + +# --------------------------------------------------------------------------- +# Root Data Entity: datePublished SHOULD specify at least day precision +# --------------------------------------------------------------------------- + +def test_valid_recommended_datePublished_day_precision(): + """ + Root Data Entity with datePublished in YYYY-MM-DD format passes the + RECOMMENDED day precision check. + """ + do_entity_test( + __metadata_root_data_entity_crates__.valid_recommended_datePublished_day_precision, + models.Severity.RECOMMENDED, + True, + profile_identifier="ro-crate-1.2", + skip_checks=["ro-crate-1.2_35.1"], + ) + + +def test_invalid_recommended_datePublished_day_precision(): + """ + Root Data Entity with datePublished as just year (YYYY) fails the + RECOMMENDED day precision check. + """ + do_entity_test( + __metadata_root_data_entity_crates__.invalid_recommended_datePublished_day_precision, + models.Severity.RECOMMENDED, + False, + profile_identifier="ro-crate-1.2", + expected_triggered_requirements=["Root Data Entity: datePublished SHOULD specify at least day precision"], + expected_triggered_issues=["SHOULD specify datePublished to at least the precision of a day"], + skip_checks=["ro-crate-1.2_35.1"], + ) + + +# --------------------------------------------------------------------------- +# Root Data Entity: hasPart MUST reference all Data Entities +# --------------------------------------------------------------------------- + +def test_valid_required_hasPart_all_data_entities(): + """ + Root Data Entity that references all Data Entities via hasPart (directly + or indirectly) passes the REQUIRED check. + """ + do_entity_test( + __metadata_root_data_entity_crates__.valid_required_hasPart_all_data_entities, + models.Severity.REQUIRED, + True, + profile_identifier="ro-crate-1.2", + skip_checks=["ro-crate-1.2_46.1"], + ) + + +def test_invalid_required_hasPart_all_data_entities(): + """ + Root Data Entity that does NOT reference all Data Entities via hasPart + fails the REQUIRED check. + """ + do_entity_test( + __metadata_root_data_entity_crates__.invalid_required_hasPart_all_data_entities, + models.Severity.REQUIRED, + False, + profile_identifier="ro-crate-1.2", + expected_triggered_requirements=["Root Data Entity: hasPart MUST reference all Data Entities"], + expected_triggered_issues=["MUST reference all Data Entities via hasPart"], + skip_checks=["ro-crate-1.2_46.1"], + ) + + +# --------------------------------------------------------------------------- +# Root Data Entity: identifier SHOULD be present if PID exists (RECOMMENDED) +# --------------------------------------------------------------------------- + +def test_valid_recommended_identifier_if_pid(): + """ + Root Data Entity with absolute URI @id and identifier property passes + the RECOMMENDED check. + """ + do_entity_test( + __metadata_root_data_entity_crates__.valid_recommended_identifier_if_pid, + models.Severity.RECOMMENDED, + True, + profile_identifier="ro-crate-1.2", + skip_checks=["ro-crate-1.2_35.1", "ro-crate-1.2_38.1", "ro-crate-1.2_41.1", + "ro-crate-1.2_41.2", "ro-crate-1.2_41.3", "ro-crate-1.2_42.1", + "ro-crate-1.2_43.1", "ro-crate-1.2_43.2", "ro-crate-1.2_44.1", + "Root Data Entity: use cite-as for resolvable identifiers", + "Root Data Entity: persistent identifier resolution", + "Root Data Entity: identifier SHOULD be present if PID exists"], + skip_availability_check=True, + ) + + +def test_invalid_recommended_identifier_if_pid(): + """ + Root Data Entity with absolute URI @id but no identifier property fails + the RECOMMENDED check. + """ + do_entity_test( + __metadata_root_data_entity_crates__.invalid_recommended_identifier_if_pid, + models.Severity.RECOMMENDED, + False, + profile_identifier="ro-crate-1.2", + expected_triggered_requirements=["Root Data Entity: identifier SHOULD be present if PID exists"], + expected_triggered_issues=["SHOULD have an `identifier` property if it has a persistent identifier"], + ) + + +# --------------------------------------------------------------------------- +# Root Data Entity: identifier SHOULD use PropertyValue approach (RECOMMENDED) +# --------------------------------------------------------------------------- + +def test_valid_recommended_identifier_propertyvalue(): + """ + Root Data Entity that uses PropertyValue for identifier passes the + RECOMMENDED check per Science On Schema.org. + """ + do_entity_test( + __metadata_root_data_entity_crates__.valid_recommended_identifier_propertyvalue, + models.Severity.RECOMMENDED, + True, + profile_identifier="ro-crate-1.2", + skip_checks=["ro-crate-1.2_35.1"], + ) + + +def test_invalid_recommended_identifier_propertyvalue(): + """ + Root Data Entity that uses plain string for identifier fails the + RECOMMENDED PropertyValue approach check. + """ + do_entity_test( + __metadata_root_data_entity_crates__.invalid_recommended_identifier_propertyvalue, + models.Severity.RECOMMENDED, + False, + profile_identifier="ro-crate-1.2", + expected_triggered_requirements=["Root Data Entity: identifier SHOULD use PropertyValue approach"], + expected_triggered_issues=["SHOULD use PropertyValue entities for identifiers"], + ) + + +# --------------------------------------------------------------------------- +# Root Data Entity: conformsTo SHOULD be present if profiles exist (RECOMMENDED) +# --------------------------------------------------------------------------- + +def test_valid_recommended_conformsto_if_profiles(): + """ + Root Data Entity that has a Profile entity in the graph and includes + conformsTo on the Root passes the RECOMMENDED check. + """ + do_entity_test( + __metadata_root_data_entity_crates__.valid_recommended_conformsto_if_profiles, + models.Severity.RECOMMENDED, + True, + profile_identifier="ro-crate-1.2", + skip_checks=["ro-crate-1.2_35.1"], + ) + + +def test_invalid_recommended_conformsto_if_profiles(): + """ + Root Data Entity that has a Profile entity in the graph but does NOT + include conformsTo on the Root fails the RECOMMENDED check. + """ + do_entity_test( + __metadata_root_data_entity_crates__.invalid_recommended_conformsto_if_profiles, + models.Severity.RECOMMENDED, + False, + profile_identifier="ro-crate-1.2", + expected_triggered_requirements=["Root Data Entity: conformsTo SHOULD be present if profiles exist"], + expected_triggered_issues=["SHOULD have a `conformsTo` property if the RO-Crate conforms to profiles"], + ) From 919a14216c9c6bc133a7371898a63881db10e3c8 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Fri, 17 Apr 2026 01:14:45 +0200 Subject: [PATCH 110/352] fix(ro-crate-1.2): :sparkles: fix check for Root DataEntity identifier --- .../should/2_root_data_entity_identifier.ttl | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/rocrate_validator/profiles/ro-crate/1.2/should/2_root_data_entity_identifier.ttl b/rocrate_validator/profiles/ro-crate/1.2/should/2_root_data_entity_identifier.ttl index 97a652b4a..4b579c0a9 100644 --- a/rocrate_validator/profiles/ro-crate/1.2/should/2_root_data_entity_identifier.ttl +++ b/rocrate_validator/profiles/ro-crate/1.2/should/2_root_data_entity_identifier.ttl @@ -42,17 +42,24 @@ ro-crate:RootDataEntityIdentifierIfPid a sh:NodeShape ; ] . # Root Data Entity: identifier SHOULD use PropertyValue approach (Science On Schema.org) +# This shape only fires when an identifier IS present but is not a PropertyValue entity. ro-crate:RootDataEntityIdentifierPropertyValueApproach a sh:NodeShape ; sh:name "Root Data Entity: identifier SHOULD use PropertyValue approach" ; sh:description """If the Root Data Entity has an identifier, it SHOULD use the PropertyValue approach as per Science On Schema.org guides.""" ; sh:targetClass ro-crate:RootDataEntity ; - sh:property [ - a sh:PropertyShape ; + sh:sparql [ + a sh:SPARQLConstraint ; + sh:prefixes ro-crate:sparqlPrefixes ; sh:name "Root Data Entity: RECOMMENDED PropertyValue approach for identifier" ; - sh:path schema_org:identifier ; - sh:class schema_org:PropertyValue ; - sh:minCount 1 ; + sh:description """Check that if an identifier is present it is a PropertyValue entity.""" ; + sh:select """ + SELECT ?this + WHERE { + ?this schema:identifier ?id . + FILTER NOT EXISTS { ?id a schema:PropertyValue } + } + """ ; sh:severity sh:Warning ; sh:message "The Root Data Entity SHOULD use PropertyValue entities for identifiers (Science On Schema.org approach)" ; ] . \ No newline at end of file From 415bd27d560cb623f410125d0a22aab3c2936172 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Fri, 17 Apr 2026 01:15:55 +0200 Subject: [PATCH 111/352] fix(ro-crate-1.2): :adhesive_bandage: fix check for `conformsTo` of File DataEntity --- .../1.2/should/4_data_entity_metadata.ttl | 33 +++++++++++++------ 1 file changed, 23 insertions(+), 10 deletions(-) diff --git a/rocrate_validator/profiles/ro-crate/1.2/should/4_data_entity_metadata.ttl b/rocrate_validator/profiles/ro-crate/1.2/should/4_data_entity_metadata.ttl index 3ee276d97..83f7b1fa0 100644 --- a/rocrate_validator/profiles/ro-crate/1.2/should/4_data_entity_metadata.ttl +++ b/rocrate_validator/profiles/ro-crate/1.2/should/4_data_entity_metadata.ttl @@ -183,13 +183,26 @@ ro-crate:WebDirectoryDistributionRecommended a sh:NodeShape ; sh:message "Web-based Directory Data Entities SHOULD include a `distribution` property pointing to a downloadable archive" ; ] . -# ro-crate:FileConformsToProfile a sh:NodeShape ; -# sh:name "File: RECOMMENDED `conformsTo` profile" ; -# sh:description """If present, `conformsTo` SHOULD reference a Profile entity.""" ; -# sh:targetClass ro-crate:File ; -# sh:property [ -# sh:path schema_org:conformsTo ; -# sh:class prof:Profile ; -# sh:severity sh:Warning ; -# sh:message "File `conformsTo` SHOULD reference a Profile entity" ; -# ] . +ro-crate:FileConformsToProfile a sh:NodeShape ; + sh:name "File: RECOMMENDED `conformsTo` profile" ; + sh:description """If present, `conformsTo` SHOULD reference a Profile or CreativeWork entity.""" ; + sh:targetClass ro-crate:File ; + sh:sparql [ + a sh:SPARQLConstraint ; + sh:prefixes ro-crate:sparqlPrefixes ; + sh:select """ + SELECT ?this + WHERE { + ?this dct:conformsTo ?target . + FILTER NOT EXISTS { + ?target a schema:CreativeWork . + } + FILTER NOT EXISTS { + ?target a . + } + } + """ ; + sh:name "File: RECOMMENDED `conformsTo` profile" ; + sh:message "File `conformsTo` SHOULD reference a Profile or CreativeWork entity" ; + sh:severity sh:Warning ; + ] . From 5e7f3120f9eea080050eee6d6f490cc4896d8396 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Fri, 17 Apr 2026 01:16:41 +0200 Subject: [PATCH 112/352] fix(ro-crate-1.2): :adhesive_bandage: fix check for `contentSize` of File DataEntity --- .../1.2/should/4_data_entity_metadata.ttl | 27 ++++++++++--------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/rocrate_validator/profiles/ro-crate/1.2/should/4_data_entity_metadata.ttl b/rocrate_validator/profiles/ro-crate/1.2/should/4_data_entity_metadata.ttl index 83f7b1fa0..bcb836835 100644 --- a/rocrate_validator/profiles/ro-crate/1.2/should/4_data_entity_metadata.ttl +++ b/rocrate_validator/profiles/ro-crate/1.2/should/4_data_entity_metadata.ttl @@ -15,6 +15,7 @@ @prefix ro-crate: . @prefix rdf: . @prefix schema_org: . +@prefix dct: . @prefix prof: . @prefix sh: . @prefix xsd: . @@ -43,19 +44,19 @@ ro-crate:FileRecommendedProperties a sh:NodeShape ; ] )] . -# ro-crate:FileContentSizeRecommendedProperties a sh:NodeShape ; -# sh:name "File Data Entity: RECOMMENDED contentSize" ; -# sh:description """A `File` Data Entity SHOULD have `contentSize` set to the size in bytes.""" ; -# sh:targetClass ro-crate:File ; -# sh:property [ -# a sh:PropertyShape ; -# sh:minCount 1 ; -# sh:name "File Data Entity: RECOMMENDED `contentSize` property" ; -# sh:path schema_org:contentSize ; -# sh:datatype xsd:string ; -# sh:severity sh:Warning ; -# sh:message "File Data Entities SHOULD have a `contentSize` property" ; -# ] . +ro-crate:FileContentSizeRecommendedProperties a sh:NodeShape ; + sh:name "File Data Entity: RECOMMENDED contentSize" ; + sh:description """A `File` Data Entity SHOULD have `contentSize` set to the size in bytes.""" ; + sh:targetClass ro-crate:File ; + sh:property [ + a sh:PropertyShape ; + sh:minCount 1 ; + sh:name "File Data Entity: RECOMMENDED `contentSize` property" ; + sh:path schema_org:contentSize ; + sh:datatype xsd:string ; + sh:severity sh:Warning ; + sh:message "File Data Entities SHOULD have a `contentSize` property" ; + ] . ro-crate:DataEntityRecommendedProperties a sh:NodeShape ; sh:name "Data Entity: RECOMMENDED properties" ; From a2220cbe94fb88401691bb97483d186714072c06 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Fri, 17 Apr 2026 01:18:07 +0200 Subject: [PATCH 113/352] feat(ro-crate-1.2): :sparkles: extend check for `hasPart` property of Root DataEntity --- .../1.2/must/2_root_data_entity_haspart.ttl | 50 +++++++++++++++++++ 1 file changed, 50 insertions(+) create mode 100644 rocrate_validator/profiles/ro-crate/1.2/must/2_root_data_entity_haspart.ttl diff --git a/rocrate_validator/profiles/ro-crate/1.2/must/2_root_data_entity_haspart.ttl b/rocrate_validator/profiles/ro-crate/1.2/must/2_root_data_entity_haspart.ttl new file mode 100644 index 000000000..4b71e1a47 --- /dev/null +++ b/rocrate_validator/profiles/ro-crate/1.2/must/2_root_data_entity_haspart.ttl @@ -0,0 +1,50 @@ +# Copyright (c) 2024-2026 CRS4 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +@prefix ro-crate: . +@prefix schema_org: . +@prefix sh: . + +ro-crate:RootDataEntityHasPartAllDataEntities a sh:NodeShape ; + sh:name "Root Data Entity: hasPart MUST reference all Data Entities" ; + sh:description """The Root Data Entity MUST directly or indirectly reference + all Data Entities in the RO-Crate via hasPart (RO-Crate 1.2).""" ; + sh:target [ + a sh:SPARQLTarget ; + sh:prefixes ro-crate:sparqlPrefixes ; + sh:select """ + SELECT ?this + WHERE { + ?this a ro-crate:RootDataEntity . + } + """ + ] ; + sh:sparql [ + a sh:SPARQLConstraint ; + sh:prefixes ro-crate:sparqlPrefixes ; + sh:select """ + SELECT $this ?unreferenced + WHERE { + $this a ro-crate:RootDataEntity . + ?unreferenced a ?entityType . + FILTER(?entityType IN (schema:MediaObject, schema:Dataset)) + FILTER(?unreferenced != $this) + FILTER(!strstarts(str(?unreferenced), "#")) + FILTER NOT EXISTS { + $this schema:hasPart+ ?unreferenced + } + } + """ ; + sh:name "Root Data Entity: hasPart MUST reference all Data Entities" ; + sh:message "The Root Data Entity MUST reference all Data Entities via hasPart (directly or indirectly)" ; + ] . \ No newline at end of file From cdb488fdccb5f6af072fe38c92071a0c09660ee2 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Fri, 17 Apr 2026 01:20:01 +0200 Subject: [PATCH 114/352] feat(ro-crate-1.2): :sparkles: implement property checks of Workflow and Script entities --- .../ro-crate/1.2/must/8_workflow_metadata.ttl | 175 ++++++++++++++++ .../profiles/ro-crate/1.2/ontology.ttl | 12 ++ .../1.2/should/8_workflow_metadata.ttl | 189 ++++++++++++++++++ 3 files changed, 376 insertions(+) create mode 100644 rocrate_validator/profiles/ro-crate/1.2/must/8_workflow_metadata.ttl create mode 100644 rocrate_validator/profiles/ro-crate/1.2/should/8_workflow_metadata.ttl diff --git a/rocrate_validator/profiles/ro-crate/1.2/must/8_workflow_metadata.ttl b/rocrate_validator/profiles/ro-crate/1.2/must/8_workflow_metadata.ttl new file mode 100644 index 000000000..de6690e33 --- /dev/null +++ b/rocrate_validator/profiles/ro-crate/1.2/must/8_workflow_metadata.ttl @@ -0,0 +1,175 @@ +# Copyright (c) 2024-2026 CRS4 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +@prefix ro: <./> . +@prefix ro-crate: . +@prefix rdf: . +@prefix schema: . +@prefix bioschemas: . +@prefix sh: . +@prefix xsd: . +@prefix validator: . + +# --------------------------------------------------------------- +# Identify Script entities +# +# A Script is an entity with schema:SoftwareSourceCode in @type +# that is NOT a Workflow (no bioschemas:ComputationalWorkflow). +# --------------------------------------------------------------- +ro-crate:ScriptDefinition a sh:NodeShape, validator:HiddenShape ; + sh:name "Identify Script entities" ; + sh:description """Mark entities as Scripts if they have schema:SoftwareSourceCode + in @type but NOT bioschemas:ComputationalWorkflow.""" ; + sh:target [ + a sh:SPARQLTarget ; + sh:prefixes ro-crate:sparqlPrefixes ; + sh:select """ + SELECT ?this + WHERE { + ?this a schema:SoftwareSourceCode . + ?metadatafile schema:about ?root . + FILTER(contains(str(?metadatafile), "ro-crate-metadata.json")) + FILTER NOT EXISTS { ?this a bioschemas:ComputationalWorkflow } + } + """ + ] ; + sh:rule [ + a sh:TripleRule ; + sh:subject sh:this ; + sh:predicate rdf:type ; + sh:object ro-crate:Script ; + ] . + +# --------------------------------------------------------------- +# Identify Workflow entities +# +# A Workflow is an entity with bioschemas:ComputationalWorkflow +# in @type. +# --------------------------------------------------------------- +ro-crate:WorkflowDefinition a sh:NodeShape, validator:HiddenShape ; + sh:name "Identify Workflow entities" ; + sh:description """Mark entities as Workflows if they have + bioschemas:ComputationalWorkflow in @type.""" ; + sh:target [ + a sh:SPARQLTarget ; + sh:prefixes ro-crate:sparqlPrefixes ; + sh:select """ + SELECT ?this + WHERE { + ?this a bioschemas:ComputationalWorkflow . + ?metadatafile schema:about ?root . + FILTER(contains(str(?metadatafile), "ro-crate-metadata.json")) + } + """ + ] ; + sh:rule [ + a sh:TripleRule ; + sh:subject sh:this ; + sh:predicate rdf:type ; + sh:object ro-crate:Workflow ; + ] . + +# --------------------------------------------------------------- +# Script: MUST include `File` (schema:MediaObject) in @type +# --------------------------------------------------------------- +ro-crate:ScriptRequiredFileType a sh:NodeShape ; + sh:name "Script: REQUIRED `File` type" ; + sh:description """A Script entity MUST include `File` (schema:MediaObject) + in its @type alongside `SoftwareSourceCode` + (RO-Crate 1.2, Workflows and Scripts — Script).""" ; + sh:targetClass ro-crate:Script ; + sh:property [ + a sh:PropertyShape ; + sh:name "Script: REQUIRED `File` type" ; + sh:description """Check that the Script entity includes `File` + (schema:MediaObject) in its @type.""" ; + sh:path rdf:type ; + sh:hasValue schema:MediaObject ; + sh:minCount 1 ; + sh:severity sh:Violation ; + sh:message "A Script MUST include `File` in its `@type` (schema:MediaObject / schema:SoftwareSourceCode)" ; + ] . + +# --------------------------------------------------------------- +# Workflow: MUST include `File` (schema:MediaObject) in @type +# --------------------------------------------------------------- +ro-crate:WorkflowRequiredFileType a sh:NodeShape ; + sh:name "Workflow: REQUIRED `File` type" ; + sh:description """A Workflow entity MUST include `File` (schema:MediaObject) + in its @type alongside `SoftwareSourceCode` and `ComputationalWorkflow` + (RO-Crate 1.2, Workflows and Scripts — Workflow).""" ; + sh:targetClass ro-crate:Workflow ; + sh:property [ + a sh:PropertyShape ; + sh:name "Workflow: REQUIRED `File` type" ; + sh:description """Check that the Workflow entity includes `File` + (schema:MediaObject) in its @type.""" ; + sh:path rdf:type ; + sh:hasValue schema:MediaObject ; + sh:minCount 1 ; + sh:severity sh:Violation ; + sh:message "A Workflow MUST include `File` in its `@type` (schema:MediaObject / schema:SoftwareSourceCode / bioschemas:ComputationalWorkflow)" ; + ] . + +# --------------------------------------------------------------- +# Workflow: MUST include `SoftwareSourceCode` in @type +# --------------------------------------------------------------- +ro-crate:WorkflowRequiredSoftwareSourceCodeType a sh:NodeShape ; + sh:name "Workflow: REQUIRED `SoftwareSourceCode` type" ; + sh:description """A Workflow entity MUST include `SoftwareSourceCode` + in its @type alongside `File` and `ComputationalWorkflow` + (RO-Crate 1.2, Workflows and Scripts — Workflow).""" ; + sh:targetClass ro-crate:Workflow ; + sh:property [ + a sh:PropertyShape ; + sh:name "Workflow: REQUIRED `SoftwareSourceCode` type" ; + sh:description """Check that the Workflow entity includes + `SoftwareSourceCode` in its @type.""" ; + sh:path rdf:type ; + sh:hasValue schema:SoftwareSourceCode ; + sh:minCount 1 ; + sh:severity sh:Violation ; + sh:message "A Workflow MUST include `SoftwareSourceCode` in its `@type` (schema:MediaObject / schema:SoftwareSourceCode / bioschemas:ComputationalWorkflow)" ; + ] . + +# --------------------------------------------------------------- +# Script or Workflow: MUST have a `name` property +# --------------------------------------------------------------- +ro-crate:ScriptOrWorkflowRequiredName a sh:NodeShape ; + sh:name "Script or Workflow: REQUIRED `name`" ; + sh:description """Scripts and Workflows MUST have a human-readable `name` + property (RO-Crate 1.2, Workflows and Scripts).""" ; + sh:target [ + a sh:SPARQLTarget ; + sh:prefixes ro-crate:sparqlPrefixes ; + sh:select """ + SELECT ?this + WHERE { + ?this a schema:SoftwareSourceCode . + ?metadatafile schema:about ?root . + FILTER(contains(str(?metadatafile), "ro-crate-metadata.json")) + } + """ + ] ; + sh:property [ + a sh:PropertyShape ; + sh:name "Script or Workflow: REQUIRED `name` property" ; + sh:description """Check that the Script or Workflow entity has a + human-readable `name` property.""" ; + sh:path schema:name ; + sh:minCount 1 ; + sh:nodeKind sh:Literal ; + sh:severity sh:Violation ; + sh:message "Scripts and Workflows MUST have a human-readable `name` property" ; + ] . diff --git a/rocrate_validator/profiles/ro-crate/1.2/ontology.ttl b/rocrate_validator/profiles/ro-crate/1.2/ontology.ttl index 9e1e7f7bb..84331ccc7 100644 --- a/rocrate_validator/profiles/ro-crate/1.2/ontology.ttl +++ b/rocrate_validator/profiles/ro-crate/1.2/ontology.ttl @@ -54,6 +54,18 @@ schema:SoftwareSourceCode rdf:type owl:Class ; bioschemas:ComputationalWorkflow rdf:type owl:Class . +### https://github.com/crs4/rocrate-validator/profiles/ro-crate/1.2/Script +ro-crate:Script rdf:type owl:Class ; + rdfs:subClassOf schema:SoftwareSourceCode, schema:MediaObject ; + rdfs:label "Script"@en . + + +### https://github.com/crs4/rocrate-validator/profiles/ro-crate/1.2/Workflow +ro-crate:Workflow rdf:type owl:Class ; + rdfs:subClassOf schema:SoftwareSourceCode, schema:MediaObject, bioschemas:ComputationalWorkflow ; + rdfs:label "Workflow"@en . + + ### https://w3id.org/ro/crate/1.2/DataEntity ro-crate:DataEntity rdf:type owl:Class ; rdfs:subClassOf schema:CreativeWork ; diff --git a/rocrate_validator/profiles/ro-crate/1.2/should/8_workflow_metadata.ttl b/rocrate_validator/profiles/ro-crate/1.2/should/8_workflow_metadata.ttl new file mode 100644 index 000000000..46e67aacd --- /dev/null +++ b/rocrate_validator/profiles/ro-crate/1.2/should/8_workflow_metadata.ttl @@ -0,0 +1,189 @@ +# Copyright (c) 2024-2026 CRS4 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +@prefix ro: <./> . +@prefix ro-crate: . +@prefix rdf: . +@prefix schema: . +@prefix bioschemas: . +@prefix dct: . +@prefix sh: . +@prefix xsd: . +@prefix validator: . + +# --------------------------------------------------------------- +# Script or Workflow: programmingLanguage SHOULD reference a +# ComputerLanguage entity +# --------------------------------------------------------------- +ro-crate:ScriptOrWorkflowProgrammingLanguage a sh:NodeShape ; + sh:name "Script or Workflow: RECOMMENDED `programmingLanguage`" ; + sh:description """Scripts and Workflows SHOULD have a `programmingLanguage` + property referencing a contextual entity of type `ComputerLanguage` + (RO-Crate 1.2, Workflows and Scripts).""" ; + sh:target [ + a sh:SPARQLTarget ; + sh:prefixes ro-crate:sparqlPrefixes ; + sh:select """ + SELECT ?this + WHERE { + ?this a schema:SoftwareSourceCode . + ?metadatafile schema:about ?root . + FILTER(contains(str(?metadatafile), "ro-crate-metadata.json")) + } + """ + ] ; + sh:property [ + a sh:PropertyShape ; + sh:name "Script or Workflow: RECOMMENDED `programmingLanguage` property" ; + sh:description """Check that the Script or Workflow has a `programmingLanguage` + property.""" ; + sh:path schema:programmingLanguage ; + sh:minCount 1 ; + sh:severity sh:Warning ; + sh:message "Scripts and Workflows SHOULD have a `programmingLanguage` property referencing a `ComputerLanguage` entity" ; + ] ; + sh:sparql [ + a sh:SPARQLConstraint ; + sh:prefixes ro-crate:sparqlPrefixes ; + sh:name "Script or Workflow: `programmingLanguage` SHOULD reference ComputerLanguage" ; + sh:description """Check that the `programmingLanguage` property references + an entity of type `ComputerLanguage`.""" ; + sh:select """ + SELECT ?this + WHERE { + ?this schema:programmingLanguage ?lang . + FILTER NOT EXISTS { ?lang a schema:ComputerLanguage } + } + """ ; + sh:severity sh:Warning ; + sh:message "The `programmingLanguage` of a Script or Workflow SHOULD reference a `ComputerLanguage` entity" ; + ] . + +# --------------------------------------------------------------- +# Workflow: conformsTo SHOULD include a versioned Bioschemas +# ComputationalWorkflow profile URI +# --------------------------------------------------------------- +ro-crate:WorkflowRecommendedConformsTo a sh:NodeShape ; + sh:name "Workflow: RECOMMENDED Bioschemas `conformsTo`" ; + sh:description """A Workflow SHOULD declare `conformsTo` pointing to a + versioned Bioschemas ComputationalWorkflow profile URI + (https://bioschemas.org/profiles/ComputationalWorkflow/) + (RO-Crate 1.2, Workflows and Scripts).""" ; + sh:targetClass ro-crate:Workflow ; + sh:property [ + a sh:PropertyShape ; + sh:name "Workflow: RECOMMENDED `conformsTo` Bioschemas profile" ; + sh:description """Check that the Workflow has a `conformsTo` pointing + to a Bioschemas ComputationalWorkflow profile URI.""" ; + sh:path dct:conformsTo ; + sh:minCount 1 ; + sh:severity sh:Warning ; + sh:message "Workflows SHOULD declare `conformsTo` referencing a versioned Bioschemas ComputationalWorkflow profile URI (https://bioschemas.org/profiles/ComputationalWorkflow/)" ; + ] ; + sh:sparql [ + a sh:SPARQLConstraint ; + sh:prefixes ro-crate:sparqlPrefixes ; + sh:name "Workflow `conformsTo` SHOULD reference Bioschemas profile" ; + sh:description """If `conformsTo` is present, at least one value SHOULD + start with the Bioschemas ComputationalWorkflow profile URI.""" ; + sh:select """ + SELECT ?this + WHERE { + ?this dct:conformsTo ?profile . + FILTER NOT EXISTS { + ?this dct:conformsTo ?bioprofile . + FILTER(STRSTARTS(STR(?bioprofile), "https://bioschemas.org/profiles/ComputationalWorkflow/")) + } + } + """ ; + sh:severity sh:Warning ; + sh:message "Workflow `conformsTo` SHOULD include a versioned Bioschemas ComputationalWorkflow profile URI (https://bioschemas.org/profiles/ComputationalWorkflow/)" ; + ] . + +# --------------------------------------------------------------- +# ImageObject linked from Script/Workflow: encodingFormat SHOULD +# be present +# --------------------------------------------------------------- +ro-crate:ScriptOrWorkflowImageEncodingFormat a sh:NodeShape ; + sh:name "Script/Workflow ImageObject: RECOMMENDED `encodingFormat`" ; + sh:description """ImageObject entities referenced via `image` from a + Script or Workflow SHOULD have an `encodingFormat` property with an + IANA media type string and/or a Pronom identifier reference + (RO-Crate 1.2, Workflows and Scripts — related ImageObject).""" ; + sh:target [ + a sh:SPARQLTarget ; + sh:prefixes ro-crate:sparqlPrefixes ; + sh:select """ + SELECT ?this + WHERE { + ?sw a schema:SoftwareSourceCode . + ?sw schema:image ?this . + ?metadatafile schema:about ?root . + FILTER(contains(str(?metadatafile), "ro-crate-metadata.json")) + } + """ + ] ; + sh:property [ + a sh:PropertyShape ; + sh:name "Script/Workflow ImageObject: RECOMMENDED `encodingFormat` property" ; + sh:description """Check that the ImageObject has an `encodingFormat` + property.""" ; + sh:path schema:encodingFormat ; + sh:minCount 1 ; + sh:severity sh:Warning ; + sh:message "An ImageObject referenced via `image` from a Script or Workflow SHOULD have an `encodingFormat` property (IANA media type and/or Pronom identifier)" ; + ] . + +# --------------------------------------------------------------- +# ImageObject linked from Script/Workflow: `about` SHOULD +# reference the script/workflow +# --------------------------------------------------------------- +ro-crate:ScriptOrWorkflowImageAbout a sh:NodeShape ; + sh:name "Script/Workflow ImageObject: RECOMMENDED `about` reference" ; + sh:description """ImageObject entities referenced via `image` from a + Script or Workflow SHOULD have an `about` property that references + the script or workflow entity + (RO-Crate 1.2, Workflows and Scripts — related ImageObject).""" ; + sh:target [ + a sh:SPARQLTarget ; + sh:prefixes ro-crate:sparqlPrefixes ; + sh:select """ + SELECT ?this + WHERE { + ?sw a schema:SoftwareSourceCode . + ?sw schema:image ?this . + ?metadatafile schema:about ?root . + FILTER(contains(str(?metadatafile), "ro-crate-metadata.json")) + } + """ + ] ; + sh:sparql [ + a sh:SPARQLConstraint ; + sh:prefixes ro-crate:sparqlPrefixes ; + sh:name "Script/Workflow ImageObject: `about` SHOULD reference the script/workflow" ; + sh:description """Check that the ImageObject's `about` property references + the script or workflow that includes it via `image`.""" ; + sh:select """ + SELECT ?this + WHERE { + ?sw a schema:SoftwareSourceCode . + ?sw schema:image ?this . + FILTER NOT EXISTS { + ?this schema:about ?sw . + } + } + """ ; + sh:severity sh:Warning ; + sh:message "An ImageObject referenced via `image` from a Script or Workflow SHOULD have an `about` property referencing the script or workflow" ; + ] . From 57b21a411ad17090eb46ad997d697d4f8f8ea0c7 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Fri, 17 Apr 2026 01:23:06 +0200 Subject: [PATCH 115/352] test(ro-crate-1.2): :white_check_mark: test requirements of workflow and script entities --- .../image_about/invalid/diagram.png | 0 .../invalid/ro-crate-metadata.json | 51 +++ .../image_about/invalid/workflow.ga | 0 .../image_about/valid/diagram.png | 0 .../image_about/valid/ro-crate-metadata.json | 89 +++++ .../image_about/valid/workflow.ga | 0 .../image_encoding_format/invalid/diagram.png | 0 .../invalid/ro-crate-metadata.json | 51 +++ .../image_encoding_format/invalid/workflow.ga | 0 .../image_encoding_format/valid/diagram.png | 0 .../valid/ro-crate-metadata.json | 89 +++++ .../image_encoding_format/valid/workflow.ga | 0 .../invalid/ro-crate-metadata.json | 32 ++ .../programming_language/invalid/script.sh | 0 .../valid/ro-crate-metadata.json | 66 ++++ .../programming_language/valid/script.sh | 0 .../invalid/ro-crate-metadata.json | 31 ++ .../script_name/invalid/script.sh | 0 .../script_name/valid/ro-crate-metadata.json | 39 +++ .../script_name/valid/script.sh | 0 .../invalid/ro-crate-metadata.json | 30 ++ .../script_type/valid/ro-crate-metadata.json | 39 +++ .../script_type/valid/script.sh | 0 .../invalid/ro-crate-metadata.json | 39 +++ .../workflow_conformsTo/invalid/script.sh | 0 .../workflow_conformsTo/invalid/workflow.ga | 0 .../valid/ro-crate-metadata.json | 70 ++++ .../workflow_conformsTo/valid/script.sh | 0 .../workflow_conformsTo/valid/workflow.ga | 0 .../invalid/ro-crate-metadata.json | 31 ++ .../workflow_name/invalid/script.sh | 0 .../workflow_name/invalid/workflow.ga | 0 .../valid/ro-crate-metadata.json | 40 +++ .../workflow_name/valid/script.sh | 0 .../workflow_name/valid/workflow.ga | 0 .../ro-crate-metadata.json | 30 ++ .../ro-crate-metadata.json | 30 ++ .../valid/ro-crate-metadata.json | 40 +++ .../workflow_type/valid/script.sh | 0 .../workflow_type/valid/workflow.ga | 0 .../ro-crate-1.2/test_workflows_scripts.py | 327 ++++++++++++++++++ 41 files changed, 1124 insertions(+) create mode 100644 tests/data/crates/rocrate-1.2/11_workflows_scripts/image_about/invalid/diagram.png create mode 100644 tests/data/crates/rocrate-1.2/11_workflows_scripts/image_about/invalid/ro-crate-metadata.json create mode 100644 tests/data/crates/rocrate-1.2/11_workflows_scripts/image_about/invalid/workflow.ga create mode 100644 tests/data/crates/rocrate-1.2/11_workflows_scripts/image_about/valid/diagram.png create mode 100644 tests/data/crates/rocrate-1.2/11_workflows_scripts/image_about/valid/ro-crate-metadata.json create mode 100644 tests/data/crates/rocrate-1.2/11_workflows_scripts/image_about/valid/workflow.ga create mode 100644 tests/data/crates/rocrate-1.2/11_workflows_scripts/image_encoding_format/invalid/diagram.png create mode 100644 tests/data/crates/rocrate-1.2/11_workflows_scripts/image_encoding_format/invalid/ro-crate-metadata.json create mode 100644 tests/data/crates/rocrate-1.2/11_workflows_scripts/image_encoding_format/invalid/workflow.ga create mode 100644 tests/data/crates/rocrate-1.2/11_workflows_scripts/image_encoding_format/valid/diagram.png create mode 100644 tests/data/crates/rocrate-1.2/11_workflows_scripts/image_encoding_format/valid/ro-crate-metadata.json create mode 100644 tests/data/crates/rocrate-1.2/11_workflows_scripts/image_encoding_format/valid/workflow.ga create mode 100644 tests/data/crates/rocrate-1.2/11_workflows_scripts/programming_language/invalid/ro-crate-metadata.json create mode 100644 tests/data/crates/rocrate-1.2/11_workflows_scripts/programming_language/invalid/script.sh create mode 100644 tests/data/crates/rocrate-1.2/11_workflows_scripts/programming_language/valid/ro-crate-metadata.json create mode 100644 tests/data/crates/rocrate-1.2/11_workflows_scripts/programming_language/valid/script.sh create mode 100644 tests/data/crates/rocrate-1.2/11_workflows_scripts/script_name/invalid/ro-crate-metadata.json create mode 100644 tests/data/crates/rocrate-1.2/11_workflows_scripts/script_name/invalid/script.sh create mode 100644 tests/data/crates/rocrate-1.2/11_workflows_scripts/script_name/valid/ro-crate-metadata.json create mode 100644 tests/data/crates/rocrate-1.2/11_workflows_scripts/script_name/valid/script.sh create mode 100644 tests/data/crates/rocrate-1.2/11_workflows_scripts/script_type/invalid/ro-crate-metadata.json create mode 100644 tests/data/crates/rocrate-1.2/11_workflows_scripts/script_type/valid/ro-crate-metadata.json create mode 100644 tests/data/crates/rocrate-1.2/11_workflows_scripts/script_type/valid/script.sh create mode 100644 tests/data/crates/rocrate-1.2/11_workflows_scripts/workflow_conformsTo/invalid/ro-crate-metadata.json create mode 100644 tests/data/crates/rocrate-1.2/11_workflows_scripts/workflow_conformsTo/invalid/script.sh create mode 100644 tests/data/crates/rocrate-1.2/11_workflows_scripts/workflow_conformsTo/invalid/workflow.ga create mode 100644 tests/data/crates/rocrate-1.2/11_workflows_scripts/workflow_conformsTo/valid/ro-crate-metadata.json create mode 100644 tests/data/crates/rocrate-1.2/11_workflows_scripts/workflow_conformsTo/valid/script.sh create mode 100644 tests/data/crates/rocrate-1.2/11_workflows_scripts/workflow_conformsTo/valid/workflow.ga create mode 100644 tests/data/crates/rocrate-1.2/11_workflows_scripts/workflow_name/invalid/ro-crate-metadata.json create mode 100644 tests/data/crates/rocrate-1.2/11_workflows_scripts/workflow_name/invalid/script.sh create mode 100644 tests/data/crates/rocrate-1.2/11_workflows_scripts/workflow_name/invalid/workflow.ga create mode 100644 tests/data/crates/rocrate-1.2/11_workflows_scripts/workflow_name/valid/ro-crate-metadata.json create mode 100644 tests/data/crates/rocrate-1.2/11_workflows_scripts/workflow_name/valid/script.sh create mode 100644 tests/data/crates/rocrate-1.2/11_workflows_scripts/workflow_name/valid/workflow.ga create mode 100644 tests/data/crates/rocrate-1.2/11_workflows_scripts/workflow_type/invalid_missing_file/ro-crate-metadata.json create mode 100644 tests/data/crates/rocrate-1.2/11_workflows_scripts/workflow_type/invalid_missing_ssc/ro-crate-metadata.json create mode 100644 tests/data/crates/rocrate-1.2/11_workflows_scripts/workflow_type/valid/ro-crate-metadata.json create mode 100644 tests/data/crates/rocrate-1.2/11_workflows_scripts/workflow_type/valid/script.sh create mode 100644 tests/data/crates/rocrate-1.2/11_workflows_scripts/workflow_type/valid/workflow.ga create mode 100644 tests/integration/profiles/ro-crate-1.2/test_workflows_scripts.py diff --git a/tests/data/crates/rocrate-1.2/11_workflows_scripts/image_about/invalid/diagram.png b/tests/data/crates/rocrate-1.2/11_workflows_scripts/image_about/invalid/diagram.png new file mode 100644 index 000000000..e69de29bb diff --git a/tests/data/crates/rocrate-1.2/11_workflows_scripts/image_about/invalid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/11_workflows_scripts/image_about/invalid/ro-crate-metadata.json new file mode 100644 index 000000000..92ec33c49 --- /dev/null +++ b/tests/data/crates/rocrate-1.2/11_workflows_scripts/image_about/invalid/ro-crate-metadata.json @@ -0,0 +1,51 @@ +{ + "@context": "https://w3id.org/ro/crate/1.2/context", + "@graph": [ + { + "@id": "ro-crate-metadata.json", + "@type": "CreativeWork", + "about": { "@id": "./" }, + "conformsTo": { "@id": "https://w3id.org/ro/crate/1.2" } + }, + { + "@id": "./", + "@type": "Dataset", + "name": "Image about — invalid (missing about)", + "description": "RO-Crate with a Workflow whose image ImageObject is missing the recommended about property.", + "datePublished": "2024-01-01", + "license": { "@id": "https://creativecommons.org/licenses/by/4.0/" }, + "hasPart": [ + { "@id": "workflow.ga" }, + { "@id": "diagram.png" } + ] + }, + { + "@id": "https://creativecommons.org/licenses/by/4.0/", + "@type": "CreativeWork", + "name": "CC BY 4.0" + }, + { + "@id": "workflow.ga", + "@type": ["File", "SoftwareSourceCode", "ComputationalWorkflow"], + "name": "My Workflow", + "description": "A Galaxy workflow with a diagram image.", + "encodingFormat": "application/json", + "programmingLanguage": { "@id": "#galaxy" }, + "conformsTo": { "@id": "https://bioschemas.org/profiles/ComputationalWorkflow/1.0-RELEASE" }, + "image": { "@id": "diagram.png" } + }, + { + "@id": "#galaxy", + "@type": "ComputerLanguage", + "name": "Galaxy", + "url": { "@id": "https://galaxyproject.org/" } + }, + { + "@id": "diagram.png", + "@type": ["File", "ImageObject"], + "name": "Workflow diagram", + "description": "A diagram missing the recommended about property.", + "encodingFormat": "image/png" + } + ] +} diff --git a/tests/data/crates/rocrate-1.2/11_workflows_scripts/image_about/invalid/workflow.ga b/tests/data/crates/rocrate-1.2/11_workflows_scripts/image_about/invalid/workflow.ga new file mode 100644 index 000000000..e69de29bb diff --git a/tests/data/crates/rocrate-1.2/11_workflows_scripts/image_about/valid/diagram.png b/tests/data/crates/rocrate-1.2/11_workflows_scripts/image_about/valid/diagram.png new file mode 100644 index 000000000..e69de29bb diff --git a/tests/data/crates/rocrate-1.2/11_workflows_scripts/image_about/valid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/11_workflows_scripts/image_about/valid/ro-crate-metadata.json new file mode 100644 index 000000000..1c77e54b3 --- /dev/null +++ b/tests/data/crates/rocrate-1.2/11_workflows_scripts/image_about/valid/ro-crate-metadata.json @@ -0,0 +1,89 @@ +{ + "@context": "https://w3id.org/ro/crate/1.2/context", + "@graph": [ + { + "@id": "ro-crate-metadata.json", + "@type": "CreativeWork", + "about": { + "@id": "./" + }, + "conformsTo": { + "@id": "https://w3id.org/ro/crate/1.2" + } + }, + { + "@id": "./", + "@type": "Dataset", + "name": "Image about — valid", + "description": "RO-Crate with a Workflow whose image ImageObject has the recommended about property referencing the workflow.", + "datePublished": "2024-01-01", + "license": { + "@id": "https://creativecommons.org/licenses/by/4.0/" + }, + "hasPart": [ + { + "@id": "workflow.ga" + }, + { + "@id": "diagram.png" + } + ], + "identifier": { + "@id": "#uuid" + } + }, + { + "@id": "#uuid", + "@type": "PropertyValue", + "name": "UUID", + "propertyID": "UUID", + "value": "550e8400-e29b-41d4-a716-446655440000" + }, + { + "@id": "https://creativecommons.org/licenses/by/4.0/", + "@type": "CreativeWork", + "name": "CC BY 4.0" + }, + { + "@id": "workflow.ga", + "@type": [ + "File", + "SoftwareSourceCode", + "ComputationalWorkflow" + ], + "name": "My Workflow", + "description": "A Galaxy workflow with a diagram image.", + "encodingFormat": "application/json", + "programmingLanguage": { + "@id": "#galaxy" + }, + "conformsTo": { + "@id": "https://bioschemas.org/profiles/ComputationalWorkflow/1.0-RELEASE" + }, + "image": { + "@id": "diagram.png" + } + }, + { + "@id": "#galaxy", + "@type": "ComputerLanguage", + "name": "Galaxy", + "url": { + "@id": "https://galaxyproject.org/" + } + }, + { + "@id": "diagram.png", + "@type": [ + "File", + "ImageObject" + ], + "name": "Workflow diagram", + "description": "A diagram with about referencing the workflow.", + "encodingFormat": "image/png", + "about": { + "@id": "workflow.ga" + } + } + ] +} \ No newline at end of file diff --git a/tests/data/crates/rocrate-1.2/11_workflows_scripts/image_about/valid/workflow.ga b/tests/data/crates/rocrate-1.2/11_workflows_scripts/image_about/valid/workflow.ga new file mode 100644 index 000000000..e69de29bb diff --git a/tests/data/crates/rocrate-1.2/11_workflows_scripts/image_encoding_format/invalid/diagram.png b/tests/data/crates/rocrate-1.2/11_workflows_scripts/image_encoding_format/invalid/diagram.png new file mode 100644 index 000000000..e69de29bb diff --git a/tests/data/crates/rocrate-1.2/11_workflows_scripts/image_encoding_format/invalid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/11_workflows_scripts/image_encoding_format/invalid/ro-crate-metadata.json new file mode 100644 index 000000000..0a98560be --- /dev/null +++ b/tests/data/crates/rocrate-1.2/11_workflows_scripts/image_encoding_format/invalid/ro-crate-metadata.json @@ -0,0 +1,51 @@ +{ + "@context": "https://w3id.org/ro/crate/1.2/context", + "@graph": [ + { + "@id": "ro-crate-metadata.json", + "@type": "CreativeWork", + "about": { "@id": "./" }, + "conformsTo": { "@id": "https://w3id.org/ro/crate/1.2" } + }, + { + "@id": "./", + "@type": "Dataset", + "name": "Image encodingFormat — invalid (missing encodingFormat)", + "description": "RO-Crate with a Workflow whose image ImageObject is missing the recommended encodingFormat property.", + "datePublished": "2024-01-01", + "license": { "@id": "https://creativecommons.org/licenses/by/4.0/" }, + "hasPart": [ + { "@id": "workflow.ga" }, + { "@id": "diagram.png" } + ] + }, + { + "@id": "https://creativecommons.org/licenses/by/4.0/", + "@type": "CreativeWork", + "name": "CC BY 4.0" + }, + { + "@id": "workflow.ga", + "@type": ["File", "SoftwareSourceCode", "ComputationalWorkflow"], + "name": "My Workflow", + "description": "A Galaxy workflow with a diagram image.", + "encodingFormat": "application/json", + "programmingLanguage": { "@id": "#galaxy" }, + "conformsTo": { "@id": "https://bioschemas.org/profiles/ComputationalWorkflow/1.0-RELEASE" }, + "image": { "@id": "diagram.png" } + }, + { + "@id": "#galaxy", + "@type": "ComputerLanguage", + "name": "Galaxy", + "url": { "@id": "https://galaxyproject.org/" } + }, + { + "@id": "diagram.png", + "@type": ["File", "ImageObject"], + "name": "Workflow diagram", + "description": "A diagram missing the required encodingFormat.", + "about": { "@id": "workflow.ga" } + } + ] +} diff --git a/tests/data/crates/rocrate-1.2/11_workflows_scripts/image_encoding_format/invalid/workflow.ga b/tests/data/crates/rocrate-1.2/11_workflows_scripts/image_encoding_format/invalid/workflow.ga new file mode 100644 index 000000000..e69de29bb diff --git a/tests/data/crates/rocrate-1.2/11_workflows_scripts/image_encoding_format/valid/diagram.png b/tests/data/crates/rocrate-1.2/11_workflows_scripts/image_encoding_format/valid/diagram.png new file mode 100644 index 000000000..e69de29bb diff --git a/tests/data/crates/rocrate-1.2/11_workflows_scripts/image_encoding_format/valid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/11_workflows_scripts/image_encoding_format/valid/ro-crate-metadata.json new file mode 100644 index 000000000..3ce6794e1 --- /dev/null +++ b/tests/data/crates/rocrate-1.2/11_workflows_scripts/image_encoding_format/valid/ro-crate-metadata.json @@ -0,0 +1,89 @@ +{ + "@context": "https://w3id.org/ro/crate/1.2/context", + "@graph": [ + { + "@id": "ro-crate-metadata.json", + "@type": "CreativeWork", + "about": { + "@id": "./" + }, + "conformsTo": { + "@id": "https://w3id.org/ro/crate/1.2" + } + }, + { + "@id": "./", + "@type": "Dataset", + "name": "Image encodingFormat — valid", + "description": "RO-Crate with a Workflow whose image ImageObject has the recommended encodingFormat property.", + "datePublished": "2024-01-01", + "license": { + "@id": "https://creativecommons.org/licenses/by/4.0/" + }, + "hasPart": [ + { + "@id": "workflow.ga" + }, + { + "@id": "diagram.png" + } + ], + "identifier": { + "@id": "#uuid" + } + }, + { + "@id": "#uuid", + "@type": "PropertyValue", + "name": "UUID", + "propertyID": "UUID", + "value": "550e8400-e29b-41d4-a716-446655440000" + }, + { + "@id": "https://creativecommons.org/licenses/by/4.0/", + "@type": "CreativeWork", + "name": "CC BY 4.0" + }, + { + "@id": "workflow.ga", + "@type": [ + "File", + "SoftwareSourceCode", + "ComputationalWorkflow" + ], + "name": "My Workflow", + "description": "A Galaxy workflow with a diagram image.", + "encodingFormat": "application/json", + "programmingLanguage": { + "@id": "#galaxy" + }, + "conformsTo": { + "@id": "https://bioschemas.org/profiles/ComputationalWorkflow/1.0-RELEASE" + }, + "image": { + "@id": "diagram.png" + } + }, + { + "@id": "#galaxy", + "@type": "ComputerLanguage", + "name": "Galaxy", + "url": { + "@id": "https://galaxyproject.org/" + } + }, + { + "@id": "diagram.png", + "@type": [ + "File", + "ImageObject" + ], + "name": "Workflow diagram", + "description": "A diagram illustrating the workflow steps.", + "encodingFormat": "image/png", + "about": { + "@id": "workflow.ga" + } + } + ] +} \ No newline at end of file diff --git a/tests/data/crates/rocrate-1.2/11_workflows_scripts/image_encoding_format/valid/workflow.ga b/tests/data/crates/rocrate-1.2/11_workflows_scripts/image_encoding_format/valid/workflow.ga new file mode 100644 index 000000000..e69de29bb diff --git a/tests/data/crates/rocrate-1.2/11_workflows_scripts/programming_language/invalid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/11_workflows_scripts/programming_language/invalid/ro-crate-metadata.json new file mode 100644 index 000000000..9b21b0539 --- /dev/null +++ b/tests/data/crates/rocrate-1.2/11_workflows_scripts/programming_language/invalid/ro-crate-metadata.json @@ -0,0 +1,32 @@ +{ + "@context": "https://w3id.org/ro/crate/1.2/context", + "@graph": [ + { + "@id": "ro-crate-metadata.json", + "@type": "CreativeWork", + "about": { "@id": "./" }, + "conformsTo": { "@id": "https://w3id.org/ro/crate/1.2" } + }, + { + "@id": "./", + "@type": "Dataset", + "name": "Programming language — invalid (missing programmingLanguage)", + "description": "RO-Crate with a Script that is missing the recommended programmingLanguage property.", + "datePublished": "2024-01-01", + "license": { "@id": "https://creativecommons.org/licenses/by/4.0/" }, + "hasPart": [{ "@id": "script.sh" }] + }, + { + "@id": "https://creativecommons.org/licenses/by/4.0/", + "@type": "CreativeWork", + "name": "CC BY 4.0" + }, + { + "@id": "script.sh", + "@type": ["File", "SoftwareSourceCode"], + "name": "My Script", + "description": "A script without a programmingLanguage property.", + "encodingFormat": "application/x-sh" + } + ] +} diff --git a/tests/data/crates/rocrate-1.2/11_workflows_scripts/programming_language/invalid/script.sh b/tests/data/crates/rocrate-1.2/11_workflows_scripts/programming_language/invalid/script.sh new file mode 100644 index 000000000..e69de29bb diff --git a/tests/data/crates/rocrate-1.2/11_workflows_scripts/programming_language/valid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/11_workflows_scripts/programming_language/valid/ro-crate-metadata.json new file mode 100644 index 000000000..2c6d887f0 --- /dev/null +++ b/tests/data/crates/rocrate-1.2/11_workflows_scripts/programming_language/valid/ro-crate-metadata.json @@ -0,0 +1,66 @@ +{ + "@context": "https://w3id.org/ro/crate/1.2/context", + "@graph": [ + { + "@id": "ro-crate-metadata.json", + "@type": "CreativeWork", + "about": { + "@id": "./" + }, + "conformsTo": { + "@id": "https://w3id.org/ro/crate/1.2" + } + }, + { + "@id": "./", + "@type": "Dataset", + "name": "Programming language — valid", + "description": "RO-Crate with a Script that declares programmingLanguage referencing a ComputerLanguage entity.", + "datePublished": "2024-01-01", + "license": { + "@id": "https://creativecommons.org/licenses/by/4.0/" + }, + "hasPart": [ + { + "@id": "script.sh" + } + ], + "identifier": { + "@id": "#uuid" + } + }, + { + "@id": "#uuid", + "@type": "PropertyValue", + "name": "UUID", + "propertyID": "UUID", + "value": "550e8400-e29b-41d4-a716-446655440000" + }, + { + "@id": "https://creativecommons.org/licenses/by/4.0/", + "@type": "CreativeWork", + "name": "CC BY 4.0" + }, + { + "@id": "script.sh", + "@type": [ + "File", + "SoftwareSourceCode" + ], + "name": "My Script", + "description": "A script with proper programmingLanguage declaration.", + "encodingFormat": "application/x-sh", + "programmingLanguage": { + "@id": "#bash" + } + }, + { + "@id": "#bash", + "@type": "ComputerLanguage", + "name": "Bash", + "url": { + "@id": "https://www.gnu.org/software/bash/" + } + } + ] +} \ No newline at end of file diff --git a/tests/data/crates/rocrate-1.2/11_workflows_scripts/programming_language/valid/script.sh b/tests/data/crates/rocrate-1.2/11_workflows_scripts/programming_language/valid/script.sh new file mode 100644 index 000000000..e69de29bb diff --git a/tests/data/crates/rocrate-1.2/11_workflows_scripts/script_name/invalid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/11_workflows_scripts/script_name/invalid/ro-crate-metadata.json new file mode 100644 index 000000000..0729de7f1 --- /dev/null +++ b/tests/data/crates/rocrate-1.2/11_workflows_scripts/script_name/invalid/ro-crate-metadata.json @@ -0,0 +1,31 @@ +{ + "@context": "https://w3id.org/ro/crate/1.2/context", + "@graph": [ + { + "@id": "ro-crate-metadata.json", + "@type": "CreativeWork", + "about": { "@id": "./" }, + "conformsTo": { "@id": "https://w3id.org/ro/crate/1.2" } + }, + { + "@id": "./", + "@type": "Dataset", + "name": "Script name — invalid (missing name)", + "description": "RO-Crate with a Script entity that is missing the required name property.", + "datePublished": "2024-01-01", + "license": { "@id": "https://creativecommons.org/licenses/by/4.0/" }, + "hasPart": [{ "@id": "script.sh" }] + }, + { + "@id": "https://creativecommons.org/licenses/by/4.0/", + "@type": "CreativeWork", + "name": "CC BY 4.0" + }, + { + "@id": "script.sh", + "@type": ["File", "SoftwareSourceCode"], + "description": "A script missing its required name property.", + "encodingFormat": "application/x-sh" + } + ] +} diff --git a/tests/data/crates/rocrate-1.2/11_workflows_scripts/script_name/invalid/script.sh b/tests/data/crates/rocrate-1.2/11_workflows_scripts/script_name/invalid/script.sh new file mode 100644 index 000000000..e69de29bb diff --git a/tests/data/crates/rocrate-1.2/11_workflows_scripts/script_name/valid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/11_workflows_scripts/script_name/valid/ro-crate-metadata.json new file mode 100644 index 000000000..5435ee5c8 --- /dev/null +++ b/tests/data/crates/rocrate-1.2/11_workflows_scripts/script_name/valid/ro-crate-metadata.json @@ -0,0 +1,39 @@ +{ + "@context": "https://w3id.org/ro/crate/1.2/context", + "@graph": [ + { + "@id": "ro-crate-metadata.json", + "@type": "CreativeWork", + "about": { "@id": "./" }, + "conformsTo": { "@id": "https://w3id.org/ro/crate/1.2" } + }, + { + "@id": "./", + "@type": "Dataset", + "name": "Script name — valid", + "description": "RO-Crate containing a Script entity with a required name property.", + "datePublished": "2024-01-01", + "license": { "@id": "https://creativecommons.org/licenses/by/4.0/" }, + "hasPart": [{ "@id": "script.sh" }] + }, + { + "@id": "https://creativecommons.org/licenses/by/4.0/", + "@type": "CreativeWork", + "name": "CC BY 4.0" + }, + { + "@id": "script.sh", + "@type": ["File", "SoftwareSourceCode"], + "name": "My Script", + "description": "A sample shell script with a proper name.", + "encodingFormat": "application/x-sh", + "programmingLanguage": { "@id": "#bash" } + }, + { + "@id": "#bash", + "@type": "ComputerLanguage", + "name": "Bash", + "url": { "@id": "https://www.gnu.org/software/bash/" } + } + ] +} diff --git a/tests/data/crates/rocrate-1.2/11_workflows_scripts/script_name/valid/script.sh b/tests/data/crates/rocrate-1.2/11_workflows_scripts/script_name/valid/script.sh new file mode 100644 index 000000000..e69de29bb diff --git a/tests/data/crates/rocrate-1.2/11_workflows_scripts/script_type/invalid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/11_workflows_scripts/script_type/invalid/ro-crate-metadata.json new file mode 100644 index 000000000..ffe4e0a4e --- /dev/null +++ b/tests/data/crates/rocrate-1.2/11_workflows_scripts/script_type/invalid/ro-crate-metadata.json @@ -0,0 +1,30 @@ +{ + "@context": "https://w3id.org/ro/crate/1.2/context", + "@graph": [ + { + "@id": "ro-crate-metadata.json", + "@type": "CreativeWork", + "about": { "@id": "./" }, + "conformsTo": { "@id": "https://w3id.org/ro/crate/1.2" } + }, + { + "@id": "./", + "@type": "Dataset", + "name": "Script type — invalid (missing File type)", + "description": "RO-Crate with a Script entity missing the required File type.", + "datePublished": "2024-01-01", + "license": { "@id": "https://creativecommons.org/licenses/by/4.0/" } + }, + { + "@id": "https://creativecommons.org/licenses/by/4.0/", + "@type": "CreativeWork", + "name": "CC BY 4.0" + }, + { + "@id": "https://example.org/script.sh", + "@type": "SoftwareSourceCode", + "name": "My Script", + "description": "A script with SoftwareSourceCode type only — missing the required File type." + } + ] +} diff --git a/tests/data/crates/rocrate-1.2/11_workflows_scripts/script_type/valid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/11_workflows_scripts/script_type/valid/ro-crate-metadata.json new file mode 100644 index 000000000..4ebe4a154 --- /dev/null +++ b/tests/data/crates/rocrate-1.2/11_workflows_scripts/script_type/valid/ro-crate-metadata.json @@ -0,0 +1,39 @@ +{ + "@context": "https://w3id.org/ro/crate/1.2/context", + "@graph": [ + { + "@id": "ro-crate-metadata.json", + "@type": "CreativeWork", + "about": { "@id": "./" }, + "conformsTo": { "@id": "https://w3id.org/ro/crate/1.2" } + }, + { + "@id": "./", + "@type": "Dataset", + "name": "Script type — valid", + "description": "RO-Crate containing a Script entity with correct @type [File, SoftwareSourceCode].", + "datePublished": "2024-01-01", + "license": { "@id": "https://creativecommons.org/licenses/by/4.0/" }, + "hasPart": [{ "@id": "script.sh" }] + }, + { + "@id": "https://creativecommons.org/licenses/by/4.0/", + "@type": "CreativeWork", + "name": "CC BY 4.0" + }, + { + "@id": "script.sh", + "@type": ["File", "SoftwareSourceCode"], + "name": "My Script", + "description": "A sample shell script.", + "encodingFormat": "application/x-sh", + "programmingLanguage": { "@id": "#bash" } + }, + { + "@id": "#bash", + "@type": "ComputerLanguage", + "name": "Bash", + "url": { "@id": "https://www.gnu.org/software/bash/" } + } + ] +} diff --git a/tests/data/crates/rocrate-1.2/11_workflows_scripts/script_type/valid/script.sh b/tests/data/crates/rocrate-1.2/11_workflows_scripts/script_type/valid/script.sh new file mode 100644 index 000000000..e69de29bb diff --git a/tests/data/crates/rocrate-1.2/11_workflows_scripts/workflow_conformsTo/invalid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/11_workflows_scripts/workflow_conformsTo/invalid/ro-crate-metadata.json new file mode 100644 index 000000000..f0c9bfbe9 --- /dev/null +++ b/tests/data/crates/rocrate-1.2/11_workflows_scripts/workflow_conformsTo/invalid/ro-crate-metadata.json @@ -0,0 +1,39 @@ +{ + "@context": "https://w3id.org/ro/crate/1.2/context", + "@graph": [ + { + "@id": "ro-crate-metadata.json", + "@type": "CreativeWork", + "about": { "@id": "./" }, + "conformsTo": { "@id": "https://w3id.org/ro/crate/1.2" } + }, + { + "@id": "./", + "@type": "Dataset", + "name": "Workflow conformsTo — invalid (missing Bioschemas conformsTo)", + "description": "RO-Crate with a Workflow missing the recommended conformsTo Bioschemas profile.", + "datePublished": "2024-01-01", + "license": { "@id": "https://creativecommons.org/licenses/by/4.0/" }, + "hasPart": [{ "@id": "workflow.ga" }] + }, + { + "@id": "https://creativecommons.org/licenses/by/4.0/", + "@type": "CreativeWork", + "name": "CC BY 4.0" + }, + { + "@id": "workflow.ga", + "@type": ["File", "SoftwareSourceCode", "ComputationalWorkflow"], + "name": "My Workflow", + "description": "A workflow without the recommended Bioschemas conformsTo declaration.", + "encodingFormat": "application/json", + "programmingLanguage": { "@id": "#galaxy" } + }, + { + "@id": "#galaxy", + "@type": "ComputerLanguage", + "name": "Galaxy", + "url": { "@id": "https://galaxyproject.org/" } + } + ] +} diff --git a/tests/data/crates/rocrate-1.2/11_workflows_scripts/workflow_conformsTo/invalid/script.sh b/tests/data/crates/rocrate-1.2/11_workflows_scripts/workflow_conformsTo/invalid/script.sh new file mode 100644 index 000000000..e69de29bb diff --git a/tests/data/crates/rocrate-1.2/11_workflows_scripts/workflow_conformsTo/invalid/workflow.ga b/tests/data/crates/rocrate-1.2/11_workflows_scripts/workflow_conformsTo/invalid/workflow.ga new file mode 100644 index 000000000..e69de29bb diff --git a/tests/data/crates/rocrate-1.2/11_workflows_scripts/workflow_conformsTo/valid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/11_workflows_scripts/workflow_conformsTo/valid/ro-crate-metadata.json new file mode 100644 index 000000000..d0bfb255f --- /dev/null +++ b/tests/data/crates/rocrate-1.2/11_workflows_scripts/workflow_conformsTo/valid/ro-crate-metadata.json @@ -0,0 +1,70 @@ +{ + "@context": "https://w3id.org/ro/crate/1.2/context", + "@graph": [ + { + "@id": "ro-crate-metadata.json", + "@type": "CreativeWork", + "about": { + "@id": "./" + }, + "conformsTo": { + "@id": "https://w3id.org/ro/crate/1.2" + } + }, + { + "@id": "./", + "@type": "Dataset", + "name": "Workflow conformsTo — valid", + "description": "RO-Crate with a Workflow that declares conformsTo referencing a versioned Bioschemas profile URI.", + "datePublished": "2024-01-01", + "license": { + "@id": "https://creativecommons.org/licenses/by/4.0/" + }, + "hasPart": [ + { + "@id": "workflow.ga" + } + ], + "identifier": { + "@id": "#uuid" + } + }, + { + "@id": "#uuid", + "@type": "PropertyValue", + "name": "UUID", + "propertyID": "UUID", + "value": "550e8400-e29b-41d4-a716-446655440000" + }, + { + "@id": "https://creativecommons.org/licenses/by/4.0/", + "@type": "CreativeWork", + "name": "CC BY 4.0" + }, + { + "@id": "workflow.ga", + "@type": [ + "File", + "SoftwareSourceCode", + "ComputationalWorkflow" + ], + "name": "My Workflow", + "description": "A workflow conforming to the Bioschemas ComputationalWorkflow profile.", + "encodingFormat": "application/json", + "programmingLanguage": { + "@id": "#galaxy" + }, + "conformsTo": { + "@id": "https://bioschemas.org/profiles/ComputationalWorkflow/1.0-RELEASE" + } + }, + { + "@id": "#galaxy", + "@type": "ComputerLanguage", + "name": "Galaxy", + "url": { + "@id": "https://galaxyproject.org/" + } + } + ] +} \ No newline at end of file diff --git a/tests/data/crates/rocrate-1.2/11_workflows_scripts/workflow_conformsTo/valid/script.sh b/tests/data/crates/rocrate-1.2/11_workflows_scripts/workflow_conformsTo/valid/script.sh new file mode 100644 index 000000000..e69de29bb diff --git a/tests/data/crates/rocrate-1.2/11_workflows_scripts/workflow_conformsTo/valid/workflow.ga b/tests/data/crates/rocrate-1.2/11_workflows_scripts/workflow_conformsTo/valid/workflow.ga new file mode 100644 index 000000000..e69de29bb diff --git a/tests/data/crates/rocrate-1.2/11_workflows_scripts/workflow_name/invalid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/11_workflows_scripts/workflow_name/invalid/ro-crate-metadata.json new file mode 100644 index 000000000..52ada9885 --- /dev/null +++ b/tests/data/crates/rocrate-1.2/11_workflows_scripts/workflow_name/invalid/ro-crate-metadata.json @@ -0,0 +1,31 @@ +{ + "@context": "https://w3id.org/ro/crate/1.2/context", + "@graph": [ + { + "@id": "ro-crate-metadata.json", + "@type": "CreativeWork", + "about": { "@id": "./" }, + "conformsTo": { "@id": "https://w3id.org/ro/crate/1.2" } + }, + { + "@id": "./", + "@type": "Dataset", + "name": "Workflow name — invalid (missing name)", + "description": "RO-Crate with a Workflow entity missing the required name property.", + "datePublished": "2024-01-01", + "license": { "@id": "https://creativecommons.org/licenses/by/4.0/" }, + "hasPart": [{ "@id": "workflow.ga" }] + }, + { + "@id": "https://creativecommons.org/licenses/by/4.0/", + "@type": "CreativeWork", + "name": "CC BY 4.0" + }, + { + "@id": "workflow.ga", + "@type": ["File", "SoftwareSourceCode", "ComputationalWorkflow"], + "description": "A workflow missing its required name property.", + "encodingFormat": "application/json" + } + ] +} diff --git a/tests/data/crates/rocrate-1.2/11_workflows_scripts/workflow_name/invalid/script.sh b/tests/data/crates/rocrate-1.2/11_workflows_scripts/workflow_name/invalid/script.sh new file mode 100644 index 000000000..e69de29bb diff --git a/tests/data/crates/rocrate-1.2/11_workflows_scripts/workflow_name/invalid/workflow.ga b/tests/data/crates/rocrate-1.2/11_workflows_scripts/workflow_name/invalid/workflow.ga new file mode 100644 index 000000000..e69de29bb diff --git a/tests/data/crates/rocrate-1.2/11_workflows_scripts/workflow_name/valid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/11_workflows_scripts/workflow_name/valid/ro-crate-metadata.json new file mode 100644 index 000000000..a7574bef7 --- /dev/null +++ b/tests/data/crates/rocrate-1.2/11_workflows_scripts/workflow_name/valid/ro-crate-metadata.json @@ -0,0 +1,40 @@ +{ + "@context": "https://w3id.org/ro/crate/1.2/context", + "@graph": [ + { + "@id": "ro-crate-metadata.json", + "@type": "CreativeWork", + "about": { "@id": "./" }, + "conformsTo": { "@id": "https://w3id.org/ro/crate/1.2" } + }, + { + "@id": "./", + "@type": "Dataset", + "name": "Workflow name — valid", + "description": "RO-Crate with a Workflow entity that has the required name property.", + "datePublished": "2024-01-01", + "license": { "@id": "https://creativecommons.org/licenses/by/4.0/" }, + "hasPart": [{ "@id": "workflow.ga" }] + }, + { + "@id": "https://creativecommons.org/licenses/by/4.0/", + "@type": "CreativeWork", + "name": "CC BY 4.0" + }, + { + "@id": "workflow.ga", + "@type": ["File", "SoftwareSourceCode", "ComputationalWorkflow"], + "name": "My Workflow", + "description": "A Galaxy workflow with all required types and name.", + "encodingFormat": "application/json", + "programmingLanguage": { "@id": "#galaxy" }, + "conformsTo": { "@id": "https://bioschemas.org/profiles/ComputationalWorkflow/1.0-RELEASE" } + }, + { + "@id": "#galaxy", + "@type": "ComputerLanguage", + "name": "Galaxy", + "url": { "@id": "https://galaxyproject.org/" } + } + ] +} diff --git a/tests/data/crates/rocrate-1.2/11_workflows_scripts/workflow_name/valid/script.sh b/tests/data/crates/rocrate-1.2/11_workflows_scripts/workflow_name/valid/script.sh new file mode 100644 index 000000000..e69de29bb diff --git a/tests/data/crates/rocrate-1.2/11_workflows_scripts/workflow_name/valid/workflow.ga b/tests/data/crates/rocrate-1.2/11_workflows_scripts/workflow_name/valid/workflow.ga new file mode 100644 index 000000000..e69de29bb diff --git a/tests/data/crates/rocrate-1.2/11_workflows_scripts/workflow_type/invalid_missing_file/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/11_workflows_scripts/workflow_type/invalid_missing_file/ro-crate-metadata.json new file mode 100644 index 000000000..e1c23f8eb --- /dev/null +++ b/tests/data/crates/rocrate-1.2/11_workflows_scripts/workflow_type/invalid_missing_file/ro-crate-metadata.json @@ -0,0 +1,30 @@ +{ + "@context": "https://w3id.org/ro/crate/1.2/context", + "@graph": [ + { + "@id": "ro-crate-metadata.json", + "@type": "CreativeWork", + "about": { "@id": "./" }, + "conformsTo": { "@id": "https://w3id.org/ro/crate/1.2" } + }, + { + "@id": "./", + "@type": "Dataset", + "name": "Workflow type — invalid (missing File type)", + "description": "RO-Crate with a Workflow entity missing the required File type.", + "datePublished": "2024-01-01", + "license": { "@id": "https://creativecommons.org/licenses/by/4.0/" } + }, + { + "@id": "https://creativecommons.org/licenses/by/4.0/", + "@type": "CreativeWork", + "name": "CC BY 4.0" + }, + { + "@id": "https://example.org/workflow.ga", + "@type": ["SoftwareSourceCode", "ComputationalWorkflow"], + "name": "My Workflow", + "description": "A workflow missing the required File type." + } + ] +} diff --git a/tests/data/crates/rocrate-1.2/11_workflows_scripts/workflow_type/invalid_missing_ssc/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/11_workflows_scripts/workflow_type/invalid_missing_ssc/ro-crate-metadata.json new file mode 100644 index 000000000..0fb8daeb1 --- /dev/null +++ b/tests/data/crates/rocrate-1.2/11_workflows_scripts/workflow_type/invalid_missing_ssc/ro-crate-metadata.json @@ -0,0 +1,30 @@ +{ + "@context": "https://w3id.org/ro/crate/1.2/context", + "@graph": [ + { + "@id": "ro-crate-metadata.json", + "@type": "CreativeWork", + "about": { "@id": "./" }, + "conformsTo": { "@id": "https://w3id.org/ro/crate/1.2" } + }, + { + "@id": "./", + "@type": "Dataset", + "name": "Workflow type — invalid (missing SoftwareSourceCode type)", + "description": "RO-Crate with a Workflow entity missing the required SoftwareSourceCode type.", + "datePublished": "2024-01-01", + "license": { "@id": "https://creativecommons.org/licenses/by/4.0/" } + }, + { + "@id": "https://creativecommons.org/licenses/by/4.0/", + "@type": "CreativeWork", + "name": "CC BY 4.0" + }, + { + "@id": "https://example.org/workflow.ga", + "@type": ["File", "ComputationalWorkflow"], + "name": "My Workflow", + "description": "A workflow missing the required SoftwareSourceCode type." + } + ] +} diff --git a/tests/data/crates/rocrate-1.2/11_workflows_scripts/workflow_type/valid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/11_workflows_scripts/workflow_type/valid/ro-crate-metadata.json new file mode 100644 index 000000000..087d86a33 --- /dev/null +++ b/tests/data/crates/rocrate-1.2/11_workflows_scripts/workflow_type/valid/ro-crate-metadata.json @@ -0,0 +1,40 @@ +{ + "@context": "https://w3id.org/ro/crate/1.2/context", + "@graph": [ + { + "@id": "ro-crate-metadata.json", + "@type": "CreativeWork", + "about": { "@id": "./" }, + "conformsTo": { "@id": "https://w3id.org/ro/crate/1.2" } + }, + { + "@id": "./", + "@type": "Dataset", + "name": "Workflow type — valid", + "description": "RO-Crate with a Workflow entity correctly typed as [File, SoftwareSourceCode, ComputationalWorkflow].", + "datePublished": "2024-01-01", + "license": { "@id": "https://creativecommons.org/licenses/by/4.0/" }, + "hasPart": [{ "@id": "workflow.ga" }] + }, + { + "@id": "https://creativecommons.org/licenses/by/4.0/", + "@type": "CreativeWork", + "name": "CC BY 4.0" + }, + { + "@id": "workflow.ga", + "@type": ["File", "SoftwareSourceCode", "ComputationalWorkflow"], + "name": "My Workflow", + "description": "A sample Galaxy workflow.", + "encodingFormat": "application/json", + "programmingLanguage": { "@id": "#galaxy" }, + "conformsTo": { "@id": "https://bioschemas.org/profiles/ComputationalWorkflow/1.0-RELEASE" } + }, + { + "@id": "#galaxy", + "@type": "ComputerLanguage", + "name": "Galaxy", + "url": { "@id": "https://galaxyproject.org/" } + } + ] +} diff --git a/tests/data/crates/rocrate-1.2/11_workflows_scripts/workflow_type/valid/script.sh b/tests/data/crates/rocrate-1.2/11_workflows_scripts/workflow_type/valid/script.sh new file mode 100644 index 000000000..e69de29bb diff --git a/tests/data/crates/rocrate-1.2/11_workflows_scripts/workflow_type/valid/workflow.ga b/tests/data/crates/rocrate-1.2/11_workflows_scripts/workflow_type/valid/workflow.ga new file mode 100644 index 000000000..e69de29bb diff --git a/tests/integration/profiles/ro-crate-1.2/test_workflows_scripts.py b/tests/integration/profiles/ro-crate-1.2/test_workflows_scripts.py new file mode 100644 index 000000000..af190a4f3 --- /dev/null +++ b/tests/integration/profiles/ro-crate-1.2/test_workflows_scripts.py @@ -0,0 +1,327 @@ +# Copyright (c) 2024-2026 CRS4 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging + +from rocrate_validator import models +from tests.ro_crates_1_2 import WorkflowsScripts +from tests.shared import do_entity_test + +logger = logging.getLogger(__name__) + +__workflows_scripts_crates__ = WorkflowsScripts() + +# General RECOMMENDED checks that fire on minimal test crates regardless of the +# workflow/script-specific property being tested. These are skipped in the +# "valid" RECOMMENDED test cases so the assertion is only about the +# workflow-specific shape. +_GENERIC_RECOMMENDED_SKIP = [ + "ro-crate-1.2_40.0", # RO-Crate Metadata Entity: RECOMMENDED properties (check 0) + "ro-crate-1.2_40.1", # RO-Crate Metadata Entity: RECOMMENDED properties (check 1) + "ro-crate-1.2_47.1", # Root Data Entity: recommended funder + "ro-crate-1.2_54.1", # Root Data Entity: recommended publisher + "ro-crate-1.2_61.1", # File Data Entity: RECOMMENDED contentSize + "ro-crate-1.2_62.0", # File: RECOMMENDED conformsTo profile + "ro-crate-1.2_70.1", # Contextual Entity Properties + "ro-crate-1.2_71.1", # Contextual Entity RECOMMENDED description + "ro-crate-1.2_76.2", # License entity: RECOMMENDED properties +] + + +# --------------------------------------------------------------------------- +# Script type checks (MUST) +# --------------------------------------------------------------------------- + +def test_valid_script_type(): + """ + A Script entity with `File`, `SoftwareSourceCode` in @type and a `name` + MUST pass REQUIRED validation. + """ + do_entity_test( + __workflows_scripts_crates__.valid_script_type, + models.Severity.REQUIRED, + True, + profile_identifier="ro-crate-1.2", + ) + + +def test_invalid_script_type(): + """ + A Script entity missing `File` (schema:MediaObject) in its @type + MUST trigger a REQUIRED violation. + """ + do_entity_test( + __workflows_scripts_crates__.invalid_script_type, + models.Severity.REQUIRED, + False, + profile_identifier="ro-crate-1.2", + expected_triggered_requirements=["Script: REQUIRED `File` type"], + expected_triggered_issues=[ + "A Script MUST include `File` in its `@type`" + ], + ) + + +# --------------------------------------------------------------------------- +# Script name checks (MUST) +# --------------------------------------------------------------------------- + +def test_valid_script_name(): + """ + A Script entity with a `name` property MUST pass REQUIRED validation. + """ + do_entity_test( + __workflows_scripts_crates__.valid_script_name, + models.Severity.REQUIRED, + True, + profile_identifier="ro-crate-1.2", + ) + + +def test_invalid_script_name(): + """ + A Script entity missing the `name` property MUST trigger a REQUIRED violation. + """ + do_entity_test( + __workflows_scripts_crates__.invalid_script_name, + models.Severity.REQUIRED, + False, + profile_identifier="ro-crate-1.2", + expected_triggered_requirements=["Script or Workflow: REQUIRED `name`"], + expected_triggered_issues=[ + "Scripts and Workflows MUST have a human-readable `name` property" + ], + ) + + +# --------------------------------------------------------------------------- +# Workflow type checks (MUST) +# --------------------------------------------------------------------------- + +def test_valid_workflow_type(): + """ + A Workflow entity with `File`, `SoftwareSourceCode`, `ComputationalWorkflow` + in @type and all required properties MUST pass REQUIRED validation. + """ + do_entity_test( + __workflows_scripts_crates__.valid_workflow_type, + models.Severity.REQUIRED, + True, + profile_identifier="ro-crate-1.2", + ) + + +def test_invalid_workflow_type_missing_file(): + """ + A Workflow entity missing `File` (schema:MediaObject) in its @type + MUST trigger a REQUIRED violation. + """ + do_entity_test( + __workflows_scripts_crates__.invalid_workflow_type_missing_file, + models.Severity.REQUIRED, + False, + profile_identifier="ro-crate-1.2", + expected_triggered_requirements=["Workflow: REQUIRED `File` type"], + expected_triggered_issues=[ + "A Workflow MUST include `File` in its `@type`" + ], + ) + + +def test_invalid_workflow_type_missing_ssc(): + """ + A Workflow entity missing `SoftwareSourceCode` in its @type + MUST trigger a REQUIRED violation. + """ + do_entity_test( + __workflows_scripts_crates__.invalid_workflow_type_missing_ssc, + models.Severity.REQUIRED, + False, + profile_identifier="ro-crate-1.2", + expected_triggered_requirements=["Workflow: REQUIRED `SoftwareSourceCode` type"], + expected_triggered_issues=[ + "A Workflow MUST include `SoftwareSourceCode` in its `@type`" + ], + ) + + +# --------------------------------------------------------------------------- +# Workflow name checks (MUST) +# --------------------------------------------------------------------------- + +def test_valid_workflow_name(): + """ + A Workflow entity with a `name` property MUST pass REQUIRED validation. + """ + do_entity_test( + __workflows_scripts_crates__.valid_workflow_name, + models.Severity.REQUIRED, + True, + profile_identifier="ro-crate-1.2", + ) + + +def test_invalid_workflow_name(): + """ + A Workflow entity missing the `name` property MUST trigger a REQUIRED violation. + """ + do_entity_test( + __workflows_scripts_crates__.invalid_workflow_name, + models.Severity.REQUIRED, + False, + profile_identifier="ro-crate-1.2", + expected_triggered_requirements=["Script or Workflow: REQUIRED `name`"], + expected_triggered_issues=[ + "Scripts and Workflows MUST have a human-readable `name` property" + ], + ) + + +# --------------------------------------------------------------------------- +# programmingLanguage checks (SHOULD) +# --------------------------------------------------------------------------- + +def test_valid_programming_language(): + """ + A Script/Workflow with a `programmingLanguage` referencing a + `ComputerLanguage` entity SHOULD pass RECOMMENDED validation. + """ + do_entity_test( + __workflows_scripts_crates__.valid_programming_language, + models.Severity.RECOMMENDED, + True, + profile_identifier="ro-crate-1.2", + skip_checks=_GENERIC_RECOMMENDED_SKIP, + ) + + +def test_invalid_programming_language(): + """ + A Script/Workflow missing a `programmingLanguage` property + SHOULD trigger a RECOMMENDED warning. + """ + do_entity_test( + __workflows_scripts_crates__.invalid_programming_language, + models.Severity.RECOMMENDED, + False, + profile_identifier="ro-crate-1.2", + expected_triggered_requirements=["Script or Workflow: RECOMMENDED `programmingLanguage`"], + expected_triggered_issues=[ + "Scripts and Workflows SHOULD have a `programmingLanguage` property" + ], + ) + + +# --------------------------------------------------------------------------- +# Workflow conformsTo checks (SHOULD) +# --------------------------------------------------------------------------- + +def test_valid_workflow_conformsTo(): + """ + A Workflow with `conformsTo` pointing to a versioned Bioschemas profile URI + SHOULD pass RECOMMENDED validation. + """ + do_entity_test( + __workflows_scripts_crates__.valid_workflow_conformsTo, + models.Severity.RECOMMENDED, + True, + profile_identifier="ro-crate-1.2", + skip_checks=_GENERIC_RECOMMENDED_SKIP, + ) + + +def test_invalid_workflow_conformsTo(): + """ + A Workflow missing `conformsTo` SHOULD trigger a RECOMMENDED warning. + """ + do_entity_test( + __workflows_scripts_crates__.invalid_workflow_conformsTo, + models.Severity.RECOMMENDED, + False, + profile_identifier="ro-crate-1.2", + expected_triggered_requirements=["Workflow: RECOMMENDED Bioschemas `conformsTo`"], + expected_triggered_issues=[ + "Workflows SHOULD declare `conformsTo` referencing a versioned Bioschemas ComputationalWorkflow profile URI" + ], + ) + + +# --------------------------------------------------------------------------- +# Image encodingFormat checks (SHOULD) +# --------------------------------------------------------------------------- + +def test_valid_image_encoding_format(): + """ + A Workflow image ImageObject with `encodingFormat` SHOULD pass + RECOMMENDED validation. + """ + do_entity_test( + __workflows_scripts_crates__.valid_image_encoding_format, + models.Severity.RECOMMENDED, + True, + profile_identifier="ro-crate-1.2", + skip_checks=_GENERIC_RECOMMENDED_SKIP, + ) + + +def test_invalid_image_encoding_format(): + """ + A Workflow image ImageObject missing `encodingFormat` SHOULD trigger + a RECOMMENDED warning. + """ + do_entity_test( + __workflows_scripts_crates__.invalid_image_encoding_format, + models.Severity.RECOMMENDED, + False, + profile_identifier="ro-crate-1.2", + expected_triggered_requirements=["Script/Workflow ImageObject: RECOMMENDED `encodingFormat`"], + expected_triggered_issues=[ + "An ImageObject referenced via `image` from a Script or Workflow SHOULD have an `encodingFormat` property" + ], + ) + + +# --------------------------------------------------------------------------- +# Image about checks (SHOULD) +# --------------------------------------------------------------------------- + +def test_valid_image_about(): + """ + A Workflow image ImageObject with `about` referencing the workflow + SHOULD pass RECOMMENDED validation. + """ + do_entity_test( + __workflows_scripts_crates__.valid_image_about, + models.Severity.RECOMMENDED, + True, + profile_identifier="ro-crate-1.2", + skip_checks=_GENERIC_RECOMMENDED_SKIP, + ) + + +def test_invalid_image_about(): + """ + A Workflow image ImageObject missing `about` SHOULD trigger a + RECOMMENDED warning. + """ + do_entity_test( + __workflows_scripts_crates__.invalid_image_about, + models.Severity.RECOMMENDED, + False, + profile_identifier="ro-crate-1.2", + expected_triggered_requirements=["Script/Workflow ImageObject: RECOMMENDED `about` reference"], + expected_triggered_issues=[ + "An ImageObject referenced via `image` from a Script or Workflow SHOULD have an `about` property referencing the script or workflow" + ], + ) From e73d144f0e891a6210170833362edb4ded8622eb Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Fri, 17 Apr 2026 01:23:53 +0200 Subject: [PATCH 116/352] test(ro-crate-1.2): :card_file_box: fix and extend test data --- .../valid/ro-crate-metadata.json | 12 +++- .../valid/ro-crate-metadata.json | 14 +++- .../valid/ro-crate-metadata.json | 14 +++- .../valid/ro-crate-metadata.json | 12 +++- .../valid/ro-crate-metadata.json | 14 +++- .../valid/basic-ro-crate-metadata.json | 12 +++- .../valid/prefix-ro-crate-metadata.json | 12 +++- .../valid/ro-crate-metadata.json | 12 +++- .../valid/ro-crate-metadata.json | 5 +- .../valid/ro-crate-metadata.json | 12 +++- .../valid/ro-crate-metadata.json | 12 +++- .../single_value/valid/ro-crate-metadata.json | 12 +++- .../valid/ro-crate-metadata.json | 42 +++++++++--- .../valid/ro-crate-metadata.json | 14 +++- .../invalid/ro-crate-metadata.json | 1 + .../valid/ro-crate-metadata.json | 1 + .../valid/ro-crate-metadata.json | 2 + .../recommended_conformsto/invalid/data.csv | 0 .../invalid/ro-crate-metadata.json | 63 ++++++++++++++++++ .../recommended_conformsto/valid/data.csv | 0 .../valid/ro-crate-metadata.json | 64 +++++++++++++++++++ .../recommended_contentSize/invalid/data.csv | 0 .../invalid/ro-crate-metadata.json | 56 ++++++++++++++++ .../recommended_contentSize/valid/data.csv | 0 .../valid/ro-crate-metadata.json | 57 +++++++++++++++++ .../invalid/ro-crate-metadata.json | 57 +++++++++++++++++ .../valid/ro-crate-metadata.json | 58 +++++++++++++++++ .../valid/ro-crate-metadata.json | 14 +++- .../valid/ro-crate-metadata.json | 14 +++- .../valid/ro-crate-metadata.json | 52 +++++++++++---- 30 files changed, 596 insertions(+), 42 deletions(-) create mode 100644 tests/data/crates/rocrate-1.2/8_metadata_dataEntities/recommended_conformsto/invalid/data.csv create mode 100644 tests/data/crates/rocrate-1.2/8_metadata_dataEntities/recommended_conformsto/invalid/ro-crate-metadata.json create mode 100644 tests/data/crates/rocrate-1.2/8_metadata_dataEntities/recommended_conformsto/valid/data.csv create mode 100644 tests/data/crates/rocrate-1.2/8_metadata_dataEntities/recommended_conformsto/valid/ro-crate-metadata.json create mode 100644 tests/data/crates/rocrate-1.2/8_metadata_dataEntities/recommended_contentSize/invalid/data.csv create mode 100644 tests/data/crates/rocrate-1.2/8_metadata_dataEntities/recommended_contentSize/invalid/ro-crate-metadata.json create mode 100644 tests/data/crates/rocrate-1.2/8_metadata_dataEntities/recommended_contentSize/valid/data.csv create mode 100644 tests/data/crates/rocrate-1.2/8_metadata_dataEntities/recommended_contentSize/valid/ro-crate-metadata.json create mode 100644 tests/data/crates/rocrate-1.2/8_metadata_dataEntities/recommended_sdDatePublished/invalid/ro-crate-metadata.json create mode 100644 tests/data/crates/rocrate-1.2/8_metadata_dataEntities/recommended_sdDatePublished/valid/ro-crate-metadata.json diff --git a/tests/data/crates/rocrate-1.2/1_metadata_document/described_contextual_entities/valid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/1_metadata_document/described_contextual_entities/valid/ro-crate-metadata.json index 966b53e02..324b92c89 100644 --- a/tests/data/crates/rocrate-1.2/1_metadata_document/described_contextual_entities/valid/ro-crate-metadata.json +++ b/tests/data/crates/rocrate-1.2/1_metadata_document/described_contextual_entities/valid/ro-crate-metadata.json @@ -33,8 +33,18 @@ ], "funder": { "@id": "https://ror.org/05f9q8d28" + }, + "identifier": { + "@id": "#uuid" } }, + { + "@id": "#uuid", + "@type": "PropertyValue", + "name": "UUID", + "propertyID": "UUID", + "value": "550e8400-e29b-41d4-a716-446655440000" + }, { "@id": "https://creativecommons.org/licenses/by/4.0/", "@type": "CreativeWork", @@ -74,4 +84,4 @@ "contentSize": "42" } ] -} +} \ No newline at end of file diff --git a/tests/data/crates/rocrate-1.2/1_metadata_document/named_entity_id_format/valid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/1_metadata_document/named_entity_id_format/valid/ro-crate-metadata.json index 43d8a2e30..1f6d01f41 100644 --- a/tests/data/crates/rocrate-1.2/1_metadata_document/named_entity_id_format/valid/ro-crate-metadata.json +++ b/tests/data/crates/rocrate-1.2/1_metadata_document/named_entity_id_format/valid/ro-crate-metadata.json @@ -14,7 +14,7 @@ { "@id": "./", "@type": "Dataset", - "name": "Named entity #-prefix \u2014 valid", + "name": "Named entity #-prefix — valid", "description": "RO-Crate where all local Person and Organization entities use '#'-prefixed @id.", "datePublished": "2024-01-01", "license": { @@ -29,8 +29,18 @@ "hasPart": [], "funder": { "@id": "https://ror.org/00k4n6c32" + }, + "identifier": { + "@id": "#uuid" } }, + { + "@id": "#uuid", + "@type": "PropertyValue", + "name": "UUID", + "propertyID": "UUID", + "value": "550e8400-e29b-41d4-a716-446655440000" + }, { "@id": "https://creativecommons.org/licenses/by/4.0/", "@type": "CreativeWork", @@ -59,4 +69,4 @@ "url": "https://ec.europa.eu" } ] -} +} \ No newline at end of file diff --git a/tests/data/crates/rocrate-1.2/1_metadata_document/no_parent_traversal/valid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/1_metadata_document/no_parent_traversal/valid/ro-crate-metadata.json index 6f1215ebd..ea398d53e 100644 --- a/tests/data/crates/rocrate-1.2/1_metadata_document/no_parent_traversal/valid/ro-crate-metadata.json +++ b/tests/data/crates/rocrate-1.2/1_metadata_document/no_parent_traversal/valid/ro-crate-metadata.json @@ -14,7 +14,7 @@ { "@id": "./", "@type": "Dataset", - "name": "No parent traversal \u2014 valid", + "name": "No parent traversal — valid", "description": "RO-Crate with no @id values that use ../ to climb out of the root.", "datePublished": "2024-01-01", "license": { @@ -26,8 +26,18 @@ "hasPart": [], "funder": { "@id": "https://ror.org/00k4n6c32" + }, + "identifier": { + "@id": "#uuid" } }, + { + "@id": "#uuid", + "@type": "PropertyValue", + "name": "UUID", + "propertyID": "UUID", + "value": "550e8400-e29b-41d4-a716-446655440000" + }, { "@id": "https://creativecommons.org/licenses/by/4.0/", "@type": "CreativeWork", @@ -48,4 +58,4 @@ "url": "https://ec.europa.eu" } ] -} +} \ No newline at end of file diff --git a/tests/data/crates/rocrate-1.2/1_metadata_document/referenced_contextual_entities/valid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/1_metadata_document/referenced_contextual_entities/valid/ro-crate-metadata.json index 966b53e02..324b92c89 100644 --- a/tests/data/crates/rocrate-1.2/1_metadata_document/referenced_contextual_entities/valid/ro-crate-metadata.json +++ b/tests/data/crates/rocrate-1.2/1_metadata_document/referenced_contextual_entities/valid/ro-crate-metadata.json @@ -33,8 +33,18 @@ ], "funder": { "@id": "https://ror.org/05f9q8d28" + }, + "identifier": { + "@id": "#uuid" } }, + { + "@id": "#uuid", + "@type": "PropertyValue", + "name": "UUID", + "propertyID": "UUID", + "value": "550e8400-e29b-41d4-a716-446655440000" + }, { "@id": "https://creativecommons.org/licenses/by/4.0/", "@type": "CreativeWork", @@ -74,4 +84,4 @@ "contentSize": "42" } ] -} +} \ No newline at end of file diff --git a/tests/data/crates/rocrate-1.2/1_metadata_document/utf8_identifiers/valid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/1_metadata_document/utf8_identifiers/valid/ro-crate-metadata.json index 358605ece..4b85234bc 100644 --- a/tests/data/crates/rocrate-1.2/1_metadata_document/utf8_identifiers/valid/ro-crate-metadata.json +++ b/tests/data/crates/rocrate-1.2/1_metadata_document/utf8_identifiers/valid/ro-crate-metadata.json @@ -14,7 +14,7 @@ { "@id": "./", "@type": "Dataset", - "name": "UTF-8 identifiers \u2014 valid", + "name": "UTF-8 identifiers — valid", "description": "RO-Crate where no @id uses percent-encoded non-ASCII bytes.", "datePublished": "2024-01-01", "license": { @@ -26,8 +26,18 @@ "hasPart": [], "funder": { "@id": "https://ror.org/00k4n6c32" + }, + "identifier": { + "@id": "#uuid" } }, + { + "@id": "#uuid", + "@type": "PropertyValue", + "name": "UUID", + "propertyID": "UUID", + "value": "550e8400-e29b-41d4-a716-446655440000" + }, { "@id": "https://creativecommons.org/licenses/by/4.0/", "@type": "CreativeWork", @@ -48,4 +58,4 @@ "url": "https://ec.europa.eu" } ] -} +} \ No newline at end of file diff --git a/tests/data/crates/rocrate-1.2/3_detached_rocrates/naming-convention/local-descriptor/valid/basic-ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/3_detached_rocrates/naming-convention/local-descriptor/valid/basic-ro-crate-metadata.json index a69a2d283..ecb4a3c9f 100644 --- a/tests/data/crates/rocrate-1.2/3_detached_rocrates/naming-convention/local-descriptor/valid/basic-ro-crate-metadata.json +++ b/tests/data/crates/rocrate-1.2/3_detached_rocrates/naming-convention/local-descriptor/valid/basic-ro-crate-metadata.json @@ -30,8 +30,18 @@ }, "funder": { "@id": "https://ror.org/00k4n6c32" + }, + "identifier": { + "@id": "#uuid" } }, + { + "@id": "#uuid", + "@type": "PropertyValue", + "name": "UUID", + "propertyID": "UUID", + "value": "550e8400-e29b-41d4-a716-446655440000" + }, { "@id": "#publisher-org", "@type": "Organization", @@ -52,4 +62,4 @@ "url": "https://ec.europa.eu" } ] -} +} \ No newline at end of file diff --git a/tests/data/crates/rocrate-1.2/3_detached_rocrates/root-data-entity-identifier/online-available/valid/prefix-ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/3_detached_rocrates/root-data-entity-identifier/online-available/valid/prefix-ro-crate-metadata.json index 83bf0308f..c8e3dbcbc 100644 --- a/tests/data/crates/rocrate-1.2/3_detached_rocrates/root-data-entity-identifier/online-available/valid/prefix-ro-crate-metadata.json +++ b/tests/data/crates/rocrate-1.2/3_detached_rocrates/root-data-entity-identifier/online-available/valid/prefix-ro-crate-metadata.json @@ -30,8 +30,18 @@ }, "funder": { "@id": "https://ror.org/012345678" + }, + "identifier": { + "@id": "#uuid" } }, + { + "@id": "#uuid", + "@type": "PropertyValue", + "name": "UUID", + "propertyID": "UUID", + "value": "550e8400-e29b-41d4-a716-446655440000" + }, { "@id": "https://ror.org/012345678", "@type": "Organization", @@ -45,4 +55,4 @@ "description": "A Creative Commons license." } ] -} +} \ No newline at end of file diff --git a/tests/data/crates/rocrate-1.2/5_metadata_entities/entity_reachability/valid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/5_metadata_entities/entity_reachability/valid/ro-crate-metadata.json index 153609436..534bda896 100644 --- a/tests/data/crates/rocrate-1.2/5_metadata_entities/entity_reachability/valid/ro-crate-metadata.json +++ b/tests/data/crates/rocrate-1.2/5_metadata_entities/entity_reachability/valid/ro-crate-metadata.json @@ -26,8 +26,18 @@ }, "funder": { "@id": "https://ror.org/00k4n6c32" + }, + "identifier": { + "@id": "#uuid" } }, + { + "@id": "#uuid", + "@type": "PropertyValue", + "name": "UUID", + "propertyID": "UUID", + "value": "550e8400-e29b-41d4-a716-446655440000" + }, { "@id": "#publisher-org", "@type": "Organization", @@ -48,4 +58,4 @@ "url": "https://ec.europa.eu" } ] -} +} \ No newline at end of file diff --git a/tests/data/crates/rocrate-1.2/5_metadata_entities/recommended_name/valid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/5_metadata_entities/recommended_name/valid/ro-crate-metadata.json index da03a2b53..a94ea7bb0 100644 --- a/tests/data/crates/rocrate-1.2/5_metadata_entities/recommended_name/valid/ro-crate-metadata.json +++ b/tests/data/crates/rocrate-1.2/5_metadata_entities/recommended_name/valid/ro-crate-metadata.json @@ -49,7 +49,8 @@ "@type": "File", "name": "My Data File", "description": "A data file that is part of the RO-Crate.", - "encodingFormat": "text/plain" + "encodingFormat": "text/plain", + "contentSize": "0" }, { "@id": "https://ror.org/00k4n6c32", @@ -59,4 +60,4 @@ "url": "https://ec.europa.eu" } ] -} +} \ No newline at end of file diff --git a/tests/data/crates/rocrate-1.2/5_metadata_entities/recommended_schema_type/valid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/5_metadata_entities/recommended_schema_type/valid/ro-crate-metadata.json index 153609436..534bda896 100644 --- a/tests/data/crates/rocrate-1.2/5_metadata_entities/recommended_schema_type/valid/ro-crate-metadata.json +++ b/tests/data/crates/rocrate-1.2/5_metadata_entities/recommended_schema_type/valid/ro-crate-metadata.json @@ -26,8 +26,18 @@ }, "funder": { "@id": "https://ror.org/00k4n6c32" + }, + "identifier": { + "@id": "#uuid" } }, + { + "@id": "#uuid", + "@type": "PropertyValue", + "name": "UUID", + "propertyID": "UUID", + "value": "550e8400-e29b-41d4-a716-446655440000" + }, { "@id": "#publisher-org", "@type": "Organization", @@ -48,4 +58,4 @@ "url": "https://ec.europa.eu" } ] -} +} \ No newline at end of file diff --git a/tests/data/crates/rocrate-1.2/6_metadata_descriptor/recommended_conformsTo/recommended_prefix/valid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/6_metadata_descriptor/recommended_conformsTo/recommended_prefix/valid/ro-crate-metadata.json index 153609436..534bda896 100644 --- a/tests/data/crates/rocrate-1.2/6_metadata_descriptor/recommended_conformsTo/recommended_prefix/valid/ro-crate-metadata.json +++ b/tests/data/crates/rocrate-1.2/6_metadata_descriptor/recommended_conformsTo/recommended_prefix/valid/ro-crate-metadata.json @@ -26,8 +26,18 @@ }, "funder": { "@id": "https://ror.org/00k4n6c32" + }, + "identifier": { + "@id": "#uuid" } }, + { + "@id": "#uuid", + "@type": "PropertyValue", + "name": "UUID", + "propertyID": "UUID", + "value": "550e8400-e29b-41d4-a716-446655440000" + }, { "@id": "#publisher-org", "@type": "Organization", @@ -48,4 +58,4 @@ "url": "https://ec.europa.eu" } ] -} +} \ No newline at end of file diff --git a/tests/data/crates/rocrate-1.2/6_metadata_descriptor/recommended_conformsTo/single_value/valid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/6_metadata_descriptor/recommended_conformsTo/single_value/valid/ro-crate-metadata.json index 153609436..534bda896 100644 --- a/tests/data/crates/rocrate-1.2/6_metadata_descriptor/recommended_conformsTo/single_value/valid/ro-crate-metadata.json +++ b/tests/data/crates/rocrate-1.2/6_metadata_descriptor/recommended_conformsTo/single_value/valid/ro-crate-metadata.json @@ -26,8 +26,18 @@ }, "funder": { "@id": "https://ror.org/00k4n6c32" + }, + "identifier": { + "@id": "#uuid" } }, + { + "@id": "#uuid", + "@type": "PropertyValue", + "name": "UUID", + "propertyID": "UUID", + "value": "550e8400-e29b-41d4-a716-446655440000" + }, { "@id": "#publisher-org", "@type": "Organization", @@ -48,4 +58,4 @@ "url": "https://ec.europa.eu" } ] -} +} \ No newline at end of file diff --git a/tests/data/crates/rocrate-1.2/7_root_data_entity/recommended_funding/valid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/7_root_data_entity/recommended_funding/valid/ro-crate-metadata.json index 8104501b9..6d92823f3 100644 --- a/tests/data/crates/rocrate-1.2/7_root_data_entity/recommended_funding/valid/ro-crate-metadata.json +++ b/tests/data/crates/rocrate-1.2/7_root_data_entity/recommended_funding/valid/ro-crate-metadata.json @@ -4,8 +4,12 @@ { "@id": "ro-crate-metadata.json", "@type": "CreativeWork", - "about": {"@id": "./"}, - "conformsTo": {"@id": "https://w3id.org/ro/crate/1.2"} + "about": { + "@id": "./" + }, + "conformsTo": { + "@id": "https://w3id.org/ro/crate/1.2" + } }, { "@id": "./", @@ -13,13 +17,31 @@ "name": "Funded RO-Crate — valid", "description": "RO-Crate whose Root Data Entity references both a project Organization and an external funder directly via `funder`.", "datePublished": "2024-01-01", - "license": {"@id": "https://creativecommons.org/licenses/by/4.0/"}, - "publisher": {"@id": "#publisher-org"}, + "license": { + "@id": "https://creativecommons.org/licenses/by/4.0/" + }, + "publisher": { + "@id": "#publisher-org" + }, "funder": [ - {"@id": "#project-org"}, - {"@id": "https://ror.org/00k4n6c32"} + { + "@id": "#project-org" + }, + { + "@id": "https://ror.org/00k4n6c32" + } ], - "hasPart": [] + "hasPart": [], + "identifier": { + "@id": "#uuid" + } + }, + { + "@id": "#uuid", + "@type": "PropertyValue", + "name": "UUID", + "propertyID": "UUID", + "value": "550e8400-e29b-41d4-a716-446655440000" }, { "@id": "#publisher-org", @@ -33,7 +55,9 @@ "name": "Example Research Project", "description": "The research project associated with this RO-Crate.", "url": "https://example.org/projects/example-project", - "funder": {"@id": "https://ror.org/00k4n6c32"} + "funder": { + "@id": "https://ror.org/00k4n6c32" + } }, { "@id": "https://ror.org/00k4n6c32", @@ -49,4 +73,4 @@ "description": "CC BY 4.0 license." } ] -} +} \ No newline at end of file diff --git a/tests/data/crates/rocrate-1.2/7_root_data_entity/recommended_publisher/valid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/7_root_data_entity/recommended_publisher/valid/ro-crate-metadata.json index 4cd320f64..b3f791165 100644 --- a/tests/data/crates/rocrate-1.2/7_root_data_entity/recommended_publisher/valid/ro-crate-metadata.json +++ b/tests/data/crates/rocrate-1.2/7_root_data_entity/recommended_publisher/valid/ro-crate-metadata.json @@ -14,7 +14,7 @@ { "@id": "./", "@type": "Dataset", - "name": "Publisher present \u2014 valid", + "name": "Publisher present — valid", "description": "RO-Crate whose Root Data Entity declares a publisher Organization.", "datePublished": "2024-01-01", "license": { @@ -26,8 +26,18 @@ "hasPart": [], "funder": { "@id": "https://ror.org/00k4n6c32" + }, + "identifier": { + "@id": "#uuid" } }, + { + "@id": "#uuid", + "@type": "PropertyValue", + "name": "UUID", + "propertyID": "UUID", + "value": "550e8400-e29b-41d4-a716-446655440000" + }, { "@id": "https://creativecommons.org/licenses/by/4.0/", "@type": "CreativeWork", @@ -48,4 +58,4 @@ "url": "https://ec.europa.eu" } ] -} +} \ No newline at end of file diff --git a/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/data_entity_license/invalid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/data_entity_license/invalid/ro-crate-metadata.json index 33550acf0..65095ebf4 100644 --- a/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/data_entity_license/invalid/ro-crate-metadata.json +++ b/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/data_entity_license/invalid/ro-crate-metadata.json @@ -34,6 +34,7 @@ "name": "research-data.csv", "description": "Research data under the same license as the Root.", "encodingFormat": "text/csv", + "contentSize": "2048", "license": {"@id": "https://creativecommons.org/licenses/by/4.0/"} }, { diff --git a/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/data_entity_license/valid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/data_entity_license/valid/ro-crate-metadata.json index d37482346..259b354de 100644 --- a/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/data_entity_license/valid/ro-crate-metadata.json +++ b/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/data_entity_license/valid/ro-crate-metadata.json @@ -34,6 +34,7 @@ "name": "open-data.csv", "description": "Open data file under CC0.", "encodingFormat": "text/csv", + "contentSize": "1024", "license": {"@id": "http://spdx.org/licenses/CC0-1.0"} }, { diff --git a/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/missing_file_local_path/valid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/missing_file_local_path/valid/ro-crate-metadata.json index cb54a9b95..194fbec8a 100644 --- a/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/missing_file_local_path/valid/ro-crate-metadata.json +++ b/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/missing_file_local_path/valid/ro-crate-metadata.json @@ -38,6 +38,7 @@ "name": "missing-file.csv", "description": "A CSV file that is not included in the payload but has localPath.", "encodingFormat": "text/csv", + "contentSize": "1024", "localPath": "/mnt/archive/data/missing-file.csv" }, { @@ -46,6 +47,7 @@ "name": "output-file.csv", "description": "A deliberately absent file with localPath.", "encodingFormat": "text/csv", + "contentSize": "2048", "localPath": "/tmp/output-file.csv" }, { diff --git a/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/recommended_conformsto/invalid/data.csv b/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/recommended_conformsto/invalid/data.csv new file mode 100644 index 000000000..e69de29bb diff --git a/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/recommended_conformsto/invalid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/recommended_conformsto/invalid/ro-crate-metadata.json new file mode 100644 index 000000000..1106e99f7 --- /dev/null +++ b/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/recommended_conformsto/invalid/ro-crate-metadata.json @@ -0,0 +1,63 @@ +{ + "@context": "https://w3id.org/ro/crate/1.2/context", + "@graph": [ + { + "@id": "ro-crate-metadata.json", + "@type": "CreativeWork", + "about": {"@id": "./"}, + "conformsTo": {"@id": "https://w3id.org/ro/crate/1.2"} + }, + { + "@id": "./", + "@type": "Dataset", + "name": "File with invalid conformsTo — invalid", + "description": "This RO-Crate has a File Data Entity with conformsTo referencing a Person (not a Profile or CreativeWork).", + "datePublished": "2024-01-01", + "license": {"@id": "https://creativecommons.org/licenses/by/4.0/"}, + "publisher": {"@id": "#publisher-org"}, + "funder": {"@id": "https://ror.org/00k4n6c32"}, + "hasPart": [{"@id": "data.csv"}], + "identifier": {"@id": "#uuid"} + }, + { + "@id": "#uuid", + "@type": "PropertyValue", + "name": "UUID", + "propertyID": "UUID", + "value": "550e8400-e29b-41d4-a716-446655440000" + }, + { + "@id": "#publisher-org", + "@type": "Organization", + "name": "Example Research Institute", + "url": "https://example.org" + }, + { + "@id": "https://ror.org/00k4n6c32", + "@type": "Organization", + "name": "European Commission", + "description": "The European Commission funds research and innovation programmes.", + "url": "https://ec.europa.eu" + }, + { + "@id": "https://creativecommons.org/licenses/by/4.0/", + "@type": "CreativeWork", + "name": "Creative Commons Attribution 4.0 International", + "description": "CC BY 4.0 license." + }, + { + "@id": "data.csv", + "@type": "File", + "name": "Sample data file", + "description": "A CSV file with an invalid conformsTo reference.", + "encodingFormat": "text/csv", + "contentSize": "2048", + "conformsTo": {"@id": "https://orcid.org/0000-0001-2345-6789"} + }, + { + "@id": "https://orcid.org/0000-0001-2345-6789", + "@type": "Person", + "name": "Dr. Jane Doe" + } + ] +} diff --git a/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/recommended_conformsto/valid/data.csv b/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/recommended_conformsto/valid/data.csv new file mode 100644 index 000000000..e69de29bb diff --git a/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/recommended_conformsto/valid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/recommended_conformsto/valid/ro-crate-metadata.json new file mode 100644 index 000000000..90c09bb28 --- /dev/null +++ b/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/recommended_conformsto/valid/ro-crate-metadata.json @@ -0,0 +1,64 @@ +{ + "@context": "https://w3id.org/ro/crate/1.2/context", + "@graph": [ + { + "@id": "ro-crate-metadata.json", + "@type": "CreativeWork", + "about": {"@id": "./"}, + "conformsTo": {"@id": "https://w3id.org/ro/crate/1.2"} + }, + { + "@id": "./", + "@type": "Dataset", + "name": "File with conformsTo — valid", + "description": "This RO-Crate has a File Data Entity with conformsTo referencing a CreativeWork profile.", + "datePublished": "2024-01-01", + "license": {"@id": "https://creativecommons.org/licenses/by/4.0/"}, + "publisher": {"@id": "#publisher-org"}, + "funder": {"@id": "https://ror.org/00k4n6c32"}, + "hasPart": [{"@id": "data.csv"}], + "identifier": {"@id": "#uuid"} + }, + { + "@id": "#uuid", + "@type": "PropertyValue", + "name": "UUID", + "propertyID": "UUID", + "value": "550e8400-e29b-41d4-a716-446655440000" + }, + { + "@id": "#publisher-org", + "@type": "Organization", + "name": "Example Research Institute", + "url": "https://example.org" + }, + { + "@id": "https://ror.org/00k4n6c32", + "@type": "Organization", + "name": "European Commission", + "description": "The European Commission funds research and innovation programmes.", + "url": "https://ec.europa.eu" + }, + { + "@id": "https://creativecommons.org/licenses/by/4.0/", + "@type": "CreativeWork", + "name": "Creative Commons Attribution 4.0 International", + "description": "CC BY 4.0 license." + }, + { + "@id": "data.csv", + "@type": "File", + "name": "Sample data file", + "description": "A CSV file conforming to a format profile.", + "encodingFormat": "text/csv", + "contentSize": "2048", + "conformsTo": {"@id": "#csv-profile"} + }, + { + "@id": "#csv-profile", + "@type": "CreativeWork", + "name": "CSV Profile for RO-Crate 1.2", + "description": "A profile describing the expected structure of CSV files used in RO-Crates." + } + ] +} diff --git a/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/recommended_contentSize/invalid/data.csv b/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/recommended_contentSize/invalid/data.csv new file mode 100644 index 000000000..e69de29bb diff --git a/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/recommended_contentSize/invalid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/recommended_contentSize/invalid/ro-crate-metadata.json new file mode 100644 index 000000000..2865e89eb --- /dev/null +++ b/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/recommended_contentSize/invalid/ro-crate-metadata.json @@ -0,0 +1,56 @@ +{ + "@context": "https://w3id.org/ro/crate/1.2/context", + "@graph": [ + { + "@id": "ro-crate-metadata.json", + "@type": "CreativeWork", + "about": {"@id": "./"}, + "conformsTo": {"@id": "https://w3id.org/ro/crate/1.2"} + }, + { + "@id": "./", + "@type": "Dataset", + "name": "File without contentSize — invalid", + "description": "This RO-Crate has a File Data Entity missing the recommended contentSize property.", + "datePublished": "2024-01-01", + "license": {"@id": "https://creativecommons.org/licenses/by/4.0/"}, + "publisher": {"@id": "#publisher-org"}, + "funder": {"@id": "https://ror.org/00k4n6c32"}, + "hasPart": [{"@id": "data.csv"}], + "identifier": {"@id": "#uuid"} + }, + { + "@id": "#uuid", + "@type": "PropertyValue", + "name": "UUID", + "propertyID": "UUID", + "value": "550e8400-e29b-41d4-a716-446655440000" + }, + { + "@id": "#publisher-org", + "@type": "Organization", + "name": "Example Research Institute", + "url": "https://example.org" + }, + { + "@id": "https://ror.org/00k4n6c32", + "@type": "Organization", + "name": "European Commission", + "description": "The European Commission funds research and innovation programmes.", + "url": "https://ec.europa.eu" + }, + { + "@id": "https://creativecommons.org/licenses/by/4.0/", + "@type": "CreativeWork", + "name": "Creative Commons Attribution 4.0 International", + "description": "CC BY 4.0 license." + }, + { + "@id": "data.csv", + "@type": "File", + "name": "Sample data file", + "description": "A sample CSV data file without contentSize.", + "encodingFormat": "text/csv" + } + ] +} diff --git a/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/recommended_contentSize/valid/data.csv b/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/recommended_contentSize/valid/data.csv new file mode 100644 index 000000000..e69de29bb diff --git a/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/recommended_contentSize/valid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/recommended_contentSize/valid/ro-crate-metadata.json new file mode 100644 index 000000000..cdd631c47 --- /dev/null +++ b/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/recommended_contentSize/valid/ro-crate-metadata.json @@ -0,0 +1,57 @@ +{ + "@context": "https://w3id.org/ro/crate/1.2/context", + "@graph": [ + { + "@id": "ro-crate-metadata.json", + "@type": "CreativeWork", + "about": {"@id": "./"}, + "conformsTo": {"@id": "https://w3id.org/ro/crate/1.2"} + }, + { + "@id": "./", + "@type": "Dataset", + "name": "File with contentSize — valid", + "description": "This RO-Crate has a File Data Entity with the recommended contentSize property.", + "datePublished": "2024-01-01", + "license": {"@id": "https://creativecommons.org/licenses/by/4.0/"}, + "publisher": {"@id": "#publisher-org"}, + "funder": {"@id": "https://ror.org/00k4n6c32"}, + "hasPart": [{"@id": "data.csv"}], + "identifier": {"@id": "#uuid"} + }, + { + "@id": "#uuid", + "@type": "PropertyValue", + "name": "UUID", + "propertyID": "UUID", + "value": "550e8400-e29b-41d4-a716-446655440000" + }, + { + "@id": "#publisher-org", + "@type": "Organization", + "name": "Example Research Institute", + "url": "https://example.org" + }, + { + "@id": "https://ror.org/00k4n6c32", + "@type": "Organization", + "name": "European Commission", + "description": "The European Commission funds research and innovation programmes.", + "url": "https://ec.europa.eu" + }, + { + "@id": "https://creativecommons.org/licenses/by/4.0/", + "@type": "CreativeWork", + "name": "Creative Commons Attribution 4.0 International", + "description": "CC BY 4.0 license." + }, + { + "@id": "data.csv", + "@type": "File", + "name": "Sample data file", + "description": "A sample CSV data file with contentSize.", + "encodingFormat": "text/csv", + "contentSize": "2048" + } + ] +} diff --git a/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/recommended_sdDatePublished/invalid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/recommended_sdDatePublished/invalid/ro-crate-metadata.json new file mode 100644 index 000000000..968f51b87 --- /dev/null +++ b/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/recommended_sdDatePublished/invalid/ro-crate-metadata.json @@ -0,0 +1,57 @@ +{ + "@context": "https://w3id.org/ro/crate/1.2/context", + "@graph": [ + { + "@id": "ro-crate-metadata.json", + "@type": "CreativeWork", + "about": {"@id": "./"}, + "conformsTo": {"@id": "https://w3id.org/ro/crate/1.2"} + }, + { + "@id": "./", + "@type": "Dataset", + "name": "Web-based File without sdDatePublished — invalid", + "description": "This RO-Crate has a web-based File Data Entity missing the recommended sdDatePublished property.", + "datePublished": "2024-01-01", + "license": {"@id": "https://creativecommons.org/licenses/by/4.0/"}, + "publisher": {"@id": "#publisher-org"}, + "funder": {"@id": "https://ror.org/00k4n6c32"}, + "hasPart": [{"@id": "https://example.com/data.csv"}], + "identifier": {"@id": "#uuid"} + }, + { + "@id": "#uuid", + "@type": "PropertyValue", + "name": "UUID", + "propertyID": "UUID", + "value": "550e8400-e29b-41d4-a716-446655440000" + }, + { + "@id": "#publisher-org", + "@type": "Organization", + "name": "Example Research Institute", + "url": "https://example.org" + }, + { + "@id": "https://ror.org/00k4n6c32", + "@type": "Organization", + "name": "European Commission", + "description": "The European Commission funds research and innovation programmes.", + "url": "https://ec.europa.eu" + }, + { + "@id": "https://creativecommons.org/licenses/by/4.0/", + "@type": "CreativeWork", + "name": "Creative Commons Attribution 4.0 International", + "description": "CC BY 4.0 license." + }, + { + "@id": "https://example.com/data.csv", + "@type": "File", + "name": "Remote data file", + "description": "A web-based CSV file without sdDatePublished.", + "encodingFormat": "text/csv", + "contentSize": "512" + } + ] +} diff --git a/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/recommended_sdDatePublished/valid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/recommended_sdDatePublished/valid/ro-crate-metadata.json new file mode 100644 index 000000000..20e751522 --- /dev/null +++ b/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/recommended_sdDatePublished/valid/ro-crate-metadata.json @@ -0,0 +1,58 @@ +{ + "@context": "https://w3id.org/ro/crate/1.2/context", + "@graph": [ + { + "@id": "ro-crate-metadata.json", + "@type": "CreativeWork", + "about": {"@id": "./"}, + "conformsTo": {"@id": "https://w3id.org/ro/crate/1.2"} + }, + { + "@id": "./", + "@type": "Dataset", + "name": "Web-based File with sdDatePublished — valid", + "description": "This RO-Crate has a web-based File Data Entity with the recommended sdDatePublished property.", + "datePublished": "2024-01-01", + "license": {"@id": "https://creativecommons.org/licenses/by/4.0/"}, + "publisher": {"@id": "#publisher-org"}, + "funder": {"@id": "https://ror.org/00k4n6c32"}, + "hasPart": [{"@id": "https://example.com/data.csv"}], + "identifier": {"@id": "#uuid"} + }, + { + "@id": "#uuid", + "@type": "PropertyValue", + "name": "UUID", + "propertyID": "UUID", + "value": "550e8400-e29b-41d4-a716-446655440000" + }, + { + "@id": "#publisher-org", + "@type": "Organization", + "name": "Example Research Institute", + "url": "https://example.org" + }, + { + "@id": "https://ror.org/00k4n6c32", + "@type": "Organization", + "name": "European Commission", + "description": "The European Commission funds research and innovation programmes.", + "url": "https://ec.europa.eu" + }, + { + "@id": "https://creativecommons.org/licenses/by/4.0/", + "@type": "CreativeWork", + "name": "Creative Commons Attribution 4.0 International", + "description": "CC BY 4.0 license." + }, + { + "@id": "https://example.com/data.csv", + "@type": "File", + "name": "Remote data file", + "description": "A web-based CSV file with sdDatePublished.", + "encodingFormat": "text/csv", + "contentSize": "512", + "sdDatePublished": "2024-06-15" + } + ] +} diff --git a/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/web_dataset_distribution/valid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/web_dataset_distribution/valid/ro-crate-metadata.json index 9d9ba1046..e5d1fde46 100644 --- a/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/web_dataset_distribution/valid/ro-crate-metadata.json +++ b/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/web_dataset_distribution/valid/ro-crate-metadata.json @@ -14,7 +14,7 @@ { "@id": "./", "@type": "Dataset", - "name": "Dataset distribution \u2014 valid", + "name": "Dataset distribution — valid", "description": "RO-Crate whose Root Data Entity declares a distribution pointing to a downloadable DataDownload archive.", "datePublished": "2024-01-01", "license": { @@ -29,8 +29,18 @@ }, "funder": { "@id": "https://ror.org/00k4n6c32" + }, + "identifier": { + "@id": "#uuid" } }, + { + "@id": "#uuid", + "@type": "PropertyValue", + "name": "UUID", + "propertyID": "UUID", + "value": "550e8400-e29b-41d4-a716-446655440000" + }, { "@id": "#publisher-org", "@type": "Organization", @@ -58,4 +68,4 @@ "url": "https://ec.europa.eu" } ] -} +} \ No newline at end of file diff --git a/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/web_directory_distribution/valid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/web_directory_distribution/valid/ro-crate-metadata.json index f0c87695d..dd25ec7db 100644 --- a/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/web_directory_distribution/valid/ro-crate-metadata.json +++ b/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/web_directory_distribution/valid/ro-crate-metadata.json @@ -14,7 +14,7 @@ { "@id": "./", "@type": "Dataset", - "name": "Web directory with distribution \u2014 valid", + "name": "Web directory with distribution — valid", "description": "RO-Crate containing a web-based Directory Data Entity that declares a distribution.", "datePublished": "2024-01-01", "license": { @@ -30,8 +30,18 @@ ], "funder": { "@id": "https://ror.org/00k4n6c32" + }, + "identifier": { + "@id": "#uuid" } }, + { + "@id": "#uuid", + "@type": "PropertyValue", + "name": "UUID", + "propertyID": "UUID", + "value": "550e8400-e29b-41d4-a716-446655440000" + }, { "@id": "https://creativecommons.org/licenses/by/4.0/", "@type": "CreativeWork", @@ -68,4 +78,4 @@ "url": "https://ec.europa.eu" } ] -} +} \ No newline at end of file diff --git a/tests/data/crates/rocrate-1.2/9_referenced_rocrate/valid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/9_referenced_rocrate/valid/ro-crate-metadata.json index 5f1c9b743..d73be8915 100644 --- a/tests/data/crates/rocrate-1.2/9_referenced_rocrate/valid/ro-crate-metadata.json +++ b/tests/data/crates/rocrate-1.2/9_referenced_rocrate/valid/ro-crate-metadata.json @@ -4,8 +4,12 @@ { "@id": "ro-crate-metadata.json", "@type": "CreativeWork", - "conformsTo": {"@id": "https://w3id.org/ro/crate/1.2"}, - "about": {"@id": "./"}, + "conformsTo": { + "@id": "https://w3id.org/ro/crate/1.2" + }, + "about": { + "@id": "./" + }, "name": "RO-Crate Metadata Descriptor" }, { @@ -13,13 +17,31 @@ "@type": "Dataset", "name": "Crate referencing another RO-Crate", "description": "An RO-Crate that references another RO-Crate as a data entity.", - "license": {"@id": "https://creativecommons.org/licenses/by/4.0/"}, + "license": { + "@id": "https://creativecommons.org/licenses/by/4.0/" + }, "datePublished": "2024-01-01", - "publisher": {"@id": "https://ror.org/012345678"}, - "funder": {"@id": "https://ror.org/012345678"}, + "publisher": { + "@id": "https://ror.org/012345678" + }, + "funder": { + "@id": "https://ror.org/012345678" + }, "hasPart": [ - {"@id": "https://example.org/other-ro-crate/"} - ] + { + "@id": "https://example.org/other-ro-crate/" + } + ], + "identifier": { + "@id": "#uuid" + } + }, + { + "@id": "#uuid", + "@type": "PropertyValue", + "name": "UUID", + "propertyID": "UUID", + "value": "550e8400-e29b-41d4-a716-446655440000" }, { "@id": "https://example.org/other-ro-crate/", @@ -27,11 +49,19 @@ "name": "Other RO-Crate", "description": "A referenced RO-Crate.", "conformsTo": [ - {"@id": "https://w3id.org/ro/crate"}, - {"@id": "https://w3id.org/ro/crate/1.2"} + { + "@id": "https://w3id.org/ro/crate" + }, + { + "@id": "https://w3id.org/ro/crate/1.2" + } ], - "subjectOf": {"@id": "#other-ro-crate-metadata"}, - "distribution": {"@id": "https://example.org/other-ro-crate/archive.tar.gz"} + "subjectOf": { + "@id": "#other-ro-crate-metadata" + }, + "distribution": { + "@id": "https://example.org/other-ro-crate/archive.tar.gz" + } }, { "@id": "#other-ro-crate-metadata", From 8348a2cdd16912a6b042cc23b29514c7ce3e87fc Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Fri, 17 Apr 2026 08:43:15 +0200 Subject: [PATCH 117/352] feat(ro-crate-1.2): :sparkles: add checks for recommend properties of local datasets --- .../1.2/should/4_dataset_data_entity.ttl | 68 +++++++++++++++++++ 1 file changed, 68 insertions(+) create mode 100644 rocrate_validator/profiles/ro-crate/1.2/should/4_dataset_data_entity.ttl diff --git a/rocrate_validator/profiles/ro-crate/1.2/should/4_dataset_data_entity.ttl b/rocrate_validator/profiles/ro-crate/1.2/should/4_dataset_data_entity.ttl new file mode 100644 index 000000000..d46263091 --- /dev/null +++ b/rocrate_validator/profiles/ro-crate/1.2/should/4_dataset_data_entity.ttl @@ -0,0 +1,68 @@ +# Copyright (c) 2024-2026 CRS4 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +@prefix ro-crate: . +@prefix schema_org: . +@prefix sh: . + +# Selects local (relative-URI) non-root Dataset entities (directory Data Entities). +ro-crate:LocalDatasetTarget a sh:SPARQLTarget ; + sh:prefixes ro-crate:sparqlPrefixes ; + sh:select """ + SELECT ?this + WHERE { + ?this a schema:Dataset . + ?metadatafile schema:about ?root . + FILTER(contains(str(?metadatafile), "ro-crate-metadata.json")) + FILTER(?this != ?root) + FILTER(!STRSTARTS(STR(?this), CONCAT(STR(?root), "#"))) + FILTER(!regex(str(?this), "^https?://", "i")) + } + """ . + +# SHOULD: relative @id SHOULD end with '/' +ro-crate:DirectoryDataEntityTrailingSlash a sh:NodeShape ; + sh:name "Dataset Data Entity: SHOULD end with trailing slash" ; + sh:description """A Dataset Data Entity whose @id is a relative URI SHOULD end with '/' + (RO-Crate 1.2, Dataset Data Entities).""" ; + sh:target ro-crate:LocalDatasetTarget ; + sh:sparql [ + a sh:SPARQLConstraint ; + sh:prefixes ro-crate:sparqlPrefixes ; + sh:select """ + SELECT ?this + WHERE { + FILTER(!regex(str(?this), "/$")) + } + """ ; + sh:name "Dataset Data Entity: RECOMMENDED trailing slash" ; + sh:description "Check that a local Dataset Data Entity @id ends with '/'." ; + sh:severity sh:Warning ; + sh:message "The @id of a local Dataset Data Entity SHOULD end with '/'" ; + ] . + +# SHOULD: hasPart SHOULD list contained Data Entities +ro-crate:DirectoryDataEntityHasPart a sh:NodeShape ; + sh:name "Dataset Data Entity: SHOULD have hasPart" ; + sh:description """A local Dataset Data Entity SHOULD use `hasPart` to list + the Data Entities it contains (RO-Crate 1.2, Dataset Data Entities).""" ; + sh:target ro-crate:LocalDatasetTarget ; + sh:property [ + a sh:PropertyShape ; + sh:name "Dataset Data Entity: RECOMMENDED `hasPart` property" ; + sh:description "Check that a local Dataset Data Entity lists its contents via `hasPart`." ; + sh:path schema_org:hasPart ; + sh:minCount 1 ; + sh:severity sh:Warning ; + sh:message "Local Dataset Data Entities SHOULD list their contents via `hasPart`" ; + ] . From 3ffca78efae9a8b7a173ed2cb801127b519df2cb Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Fri, 17 Apr 2026 08:44:32 +0200 Subject: [PATCH 118/352] refactor(ro-crate-1.2): :recycle: use shared targets --- .../1.2/must/2_root_data_entity_metadata.ttl | 35 +++++------- .../ro-crate/1.2/must/8_workflow_metadata.ttl | 26 ++++----- .../1.2/should/8_workflow_metadata.ttl | 55 ++++++------------- 3 files changed, 42 insertions(+), 74 deletions(-) diff --git a/rocrate_validator/profiles/ro-crate/1.2/must/2_root_data_entity_metadata.ttl b/rocrate_validator/profiles/ro-crate/1.2/must/2_root_data_entity_metadata.ttl index 7f5139510..32296aee4 100644 --- a/rocrate_validator/profiles/ro-crate/1.2/must/2_root_data_entity_metadata.ttl +++ b/rocrate_validator/profiles/ro-crate/1.2/must/2_root_data_entity_metadata.ttl @@ -21,6 +21,17 @@ @prefix xsd: . @prefix dct: . +# Selects the Root Data Entity (the Dataset that the metadata file is `about`). +ro-crate:MetadataAboutTarget a sh:SPARQLTarget ; + sh:prefixes ro-crate:sparqlPrefixes ; + sh:select """ + SELECT ?this + WHERE { + ?metadatafile schema:about ?this . + FILTER(contains(str(?metadatafile), "ro-crate-metadata.json")) + } + """ . + ro-crate:FindRootDataEntity a sh:NodeShape, validator:HiddenShape ; sh:name "Identify the Root Data Entity of the RO-Crate" ; sh:description """The Root Data Entity is the top-level Data Entity in the RO-Crate and serves as the starting point for the description of the RO-Crate. @@ -51,17 +62,7 @@ ro-crate:FindRootDataEntity a sh:NodeShape, validator:HiddenShape ; ro-crate:RootDataEntityType a sh:NodeShape ; sh:name "RO-Crate Root Data Entity type" ; sh:description "The Root Data Entity MUST be a `Dataset` (as per `schema.org`)" ; - sh:target [ - a sh:SPARQLTarget ; - sh:prefixes ro-crate:sparqlPrefixes ; - sh:select """ - SELECT ?this - WHERE { - ?metadatafile schema:about ?this . - FILTER(contains(str(?metadatafile), "ro-crate-metadata.json")) - } - """ - ] ; + sh:target ro-crate:MetadataAboutTarget ; sh:property [ a sh:PropertyShape ; sh:name "Root Data Entity type" ; @@ -154,17 +155,7 @@ ro-crate:RootDataEntityVersionlessProfileProhibition a sh:NodeShape ; (https://w3id.org/ro/crate) in its conformsTo property. Only versioned profile URIs (e.g., https://w3id.org/ro/crate/1.2) are permitted on the Root Data Entity. The version-less URI is reserved for referenced RO-Crate data entities.""" ; - sh:target [ - a sh:SPARQLTarget ; - sh:prefixes ro-crate:sparqlPrefixes ; - sh:select """ - SELECT ?this - WHERE { - ?metadatafile schema:about ?this . - FILTER(contains(str(?metadatafile), "ro-crate-metadata.json")) - } - """ - ] ; + sh:target ro-crate:MetadataAboutTarget ; sh:sparql [ a sh:SPARQLConstraint ; sh:prefixes ro-crate:sparqlPrefixes ; diff --git a/rocrate_validator/profiles/ro-crate/1.2/must/8_workflow_metadata.ttl b/rocrate_validator/profiles/ro-crate/1.2/must/8_workflow_metadata.ttl index de6690e33..3e1aa66e1 100644 --- a/rocrate_validator/profiles/ro-crate/1.2/must/8_workflow_metadata.ttl +++ b/rocrate_validator/profiles/ro-crate/1.2/must/8_workflow_metadata.ttl @@ -11,7 +11,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - @prefix ro: <./> . @prefix ro-crate: . @prefix rdf: . @@ -21,6 +20,18 @@ @prefix xsd: . @prefix validator: . +# Selects every SoftwareSourceCode entity (Scripts and Workflows). +ro-crate:SoftwareSourceCodeTarget a sh:SPARQLTarget ; + sh:prefixes ro-crate:sparqlPrefixes ; + sh:select """ + SELECT ?this + WHERE { + ?this a schema:SoftwareSourceCode . + ?metadatafile schema:about ?root . + FILTER(contains(str(?metadatafile), "ro-crate-metadata.json")) + } + """ . + # --------------------------------------------------------------- # Identify Script entities # @@ -150,18 +161,7 @@ ro-crate:ScriptOrWorkflowRequiredName a sh:NodeShape ; sh:name "Script or Workflow: REQUIRED `name`" ; sh:description """Scripts and Workflows MUST have a human-readable `name` property (RO-Crate 1.2, Workflows and Scripts).""" ; - sh:target [ - a sh:SPARQLTarget ; - sh:prefixes ro-crate:sparqlPrefixes ; - sh:select """ - SELECT ?this - WHERE { - ?this a schema:SoftwareSourceCode . - ?metadatafile schema:about ?root . - FILTER(contains(str(?metadatafile), "ro-crate-metadata.json")) - } - """ - ] ; + sh:target ro-crate:SoftwareSourceCodeTarget ; sh:property [ a sh:PropertyShape ; sh:name "Script or Workflow: REQUIRED `name` property" ; diff --git a/rocrate_validator/profiles/ro-crate/1.2/should/8_workflow_metadata.ttl b/rocrate_validator/profiles/ro-crate/1.2/should/8_workflow_metadata.ttl index 46e67aacd..c4a9aeebd 100644 --- a/rocrate_validator/profiles/ro-crate/1.2/should/8_workflow_metadata.ttl +++ b/rocrate_validator/profiles/ro-crate/1.2/should/8_workflow_metadata.ttl @@ -11,7 +11,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - @prefix ro: <./> . @prefix ro-crate: . @prefix rdf: . @@ -22,6 +21,19 @@ @prefix xsd: . @prefix validator: . +# Selects ImageObject entities referenced via `image` from a Script or Workflow. +ro-crate:ScriptOrWorkflowImageTarget a sh:SPARQLTarget ; + sh:prefixes ro-crate:sparqlPrefixes ; + sh:select """ + SELECT ?this + WHERE { + ?sw a schema:SoftwareSourceCode . + ?sw schema:image ?this . + ?metadatafile schema:about ?root . + FILTER(contains(str(?metadatafile), "ro-crate-metadata.json")) + } + """ . + # --------------------------------------------------------------- # Script or Workflow: programmingLanguage SHOULD reference a # ComputerLanguage entity @@ -31,18 +43,7 @@ ro-crate:ScriptOrWorkflowProgrammingLanguage a sh:NodeShape ; sh:description """Scripts and Workflows SHOULD have a `programmingLanguage` property referencing a contextual entity of type `ComputerLanguage` (RO-Crate 1.2, Workflows and Scripts).""" ; - sh:target [ - a sh:SPARQLTarget ; - sh:prefixes ro-crate:sparqlPrefixes ; - sh:select """ - SELECT ?this - WHERE { - ?this a schema:SoftwareSourceCode . - ?metadatafile schema:about ?root . - FILTER(contains(str(?metadatafile), "ro-crate-metadata.json")) - } - """ - ] ; + sh:target ro-crate:SoftwareSourceCodeTarget ; sh:property [ a sh:PropertyShape ; sh:name "Script or Workflow: RECOMMENDED `programmingLanguage` property" ; @@ -121,19 +122,7 @@ ro-crate:ScriptOrWorkflowImageEncodingFormat a sh:NodeShape ; Script or Workflow SHOULD have an `encodingFormat` property with an IANA media type string and/or a Pronom identifier reference (RO-Crate 1.2, Workflows and Scripts — related ImageObject).""" ; - sh:target [ - a sh:SPARQLTarget ; - sh:prefixes ro-crate:sparqlPrefixes ; - sh:select """ - SELECT ?this - WHERE { - ?sw a schema:SoftwareSourceCode . - ?sw schema:image ?this . - ?metadatafile schema:about ?root . - FILTER(contains(str(?metadatafile), "ro-crate-metadata.json")) - } - """ - ] ; + sh:target ro-crate:ScriptOrWorkflowImageTarget ; sh:property [ a sh:PropertyShape ; sh:name "Script/Workflow ImageObject: RECOMMENDED `encodingFormat` property" ; @@ -155,19 +144,7 @@ ro-crate:ScriptOrWorkflowImageAbout a sh:NodeShape ; Script or Workflow SHOULD have an `about` property that references the script or workflow entity (RO-Crate 1.2, Workflows and Scripts — related ImageObject).""" ; - sh:target [ - a sh:SPARQLTarget ; - sh:prefixes ro-crate:sparqlPrefixes ; - sh:select """ - SELECT ?this - WHERE { - ?sw a schema:SoftwareSourceCode . - ?sw schema:image ?this . - ?metadatafile schema:about ?root . - FILTER(contains(str(?metadatafile), "ro-crate-metadata.json")) - } - """ - ] ; + sh:target ro-crate:ScriptOrWorkflowImageTarget ; sh:sparql [ a sh:SPARQLConstraint ; sh:prefixes ro-crate:sparqlPrefixes ; From d389bcf5f8b298bd26895144faae34a183c715a1 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Fri, 17 Apr 2026 08:46:10 +0200 Subject: [PATCH 119/352] test(ro-crate-1.2): :white_check_mark: test recommended properties of local datasets --- .../invalid/ro-crate-metadata.json | 55 ++++++ .../valid/ro-crate-metadata.json | 64 +++++++ .../invalid/ro-crate-metadata.json | 64 +++++++ .../valid/ro-crate-metadata.json | 64 +++++++ .../test_metadata_dataEntities.py | 165 ++++++++++++++++++ 5 files changed, 412 insertions(+) create mode 100644 tests/data/crates/rocrate-1.2/8_metadata_dataEntities/dataset_has_part/invalid/ro-crate-metadata.json create mode 100644 tests/data/crates/rocrate-1.2/8_metadata_dataEntities/dataset_has_part/valid/ro-crate-metadata.json create mode 100644 tests/data/crates/rocrate-1.2/8_metadata_dataEntities/dataset_trailing_slash/invalid/ro-crate-metadata.json create mode 100644 tests/data/crates/rocrate-1.2/8_metadata_dataEntities/dataset_trailing_slash/valid/ro-crate-metadata.json diff --git a/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/dataset_has_part/invalid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/dataset_has_part/invalid/ro-crate-metadata.json new file mode 100644 index 000000000..ed51ec703 --- /dev/null +++ b/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/dataset_has_part/invalid/ro-crate-metadata.json @@ -0,0 +1,55 @@ +{ + "@context": "https://w3id.org/ro/crate/1.2/context", + "@graph": [ + { + "@id": "ro-crate-metadata.json", + "@type": "CreativeWork", + "about": {"@id": "./"}, + "conformsTo": {"@id": "https://w3id.org/ro/crate/1.2"} + }, + { + "@id": "./", + "@type": "Dataset", + "name": "Dataset hasPart — invalid", + "description": "RO-Crate where the local Dataset sub-entity does NOT declare hasPart.", + "datePublished": "2024-01-01", + "license": {"@id": "https://creativecommons.org/licenses/by/4.0/"}, + "publisher": {"@id": "#publisher-org"}, + "funder": {"@id": "https://ror.org/00k4n6c32"}, + "identifier": {"@id": "#uuid"}, + "hasPart": [{"@id": "subdir/"}] + }, + { + "@id": "#uuid", + "@type": "PropertyValue", + "name": "UUID", + "propertyID": "UUID", + "value": "550e8400-e29b-41d4-a716-446655440000" + }, + { + "@id": "subdir/", + "@type": "Dataset", + "name": "Sub-directory without hasPart", + "description": "A local Dataset sub-entity that omits hasPart, violating the RECOMMENDED convention." + }, + { + "@id": "#publisher-org", + "@type": "Organization", + "name": "Example Research Institute", + "url": "https://example.org" + }, + { + "@id": "https://ror.org/00k4n6c32", + "@type": "Organization", + "name": "European Commission", + "description": "The European Commission funds research and innovation programmes.", + "url": "https://ec.europa.eu" + }, + { + "@id": "https://creativecommons.org/licenses/by/4.0/", + "@type": "CreativeWork", + "name": "Creative Commons Attribution 4.0 International", + "description": "CC BY 4.0 license." + } + ] +} diff --git a/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/dataset_has_part/valid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/dataset_has_part/valid/ro-crate-metadata.json new file mode 100644 index 000000000..05c7d2f71 --- /dev/null +++ b/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/dataset_has_part/valid/ro-crate-metadata.json @@ -0,0 +1,64 @@ +{ + "@context": "https://w3id.org/ro/crate/1.2/context", + "@graph": [ + { + "@id": "ro-crate-metadata.json", + "@type": "CreativeWork", + "about": {"@id": "./"}, + "conformsTo": {"@id": "https://w3id.org/ro/crate/1.2"} + }, + { + "@id": "./", + "@type": "Dataset", + "name": "Dataset hasPart — valid", + "description": "RO-Crate where the local Dataset sub-entity declares hasPart listing its contents.", + "datePublished": "2024-01-01", + "license": {"@id": "https://creativecommons.org/licenses/by/4.0/"}, + "publisher": {"@id": "#publisher-org"}, + "funder": {"@id": "https://ror.org/00k4n6c32"}, + "identifier": {"@id": "#uuid"}, + "hasPart": [{"@id": "subdir/"}] + }, + { + "@id": "#uuid", + "@type": "PropertyValue", + "name": "UUID", + "propertyID": "UUID", + "value": "550e8400-e29b-41d4-a716-446655440000" + }, + { + "@id": "subdir/", + "@type": "Dataset", + "name": "Sub-directory with hasPart", + "description": "A local Dataset sub-entity that correctly lists its contents via hasPart.", + "hasPart": [{"@id": "subdir/data.csv"}] + }, + { + "@id": "subdir/data.csv", + "@type": "File", + "name": "Data file", + "description": "A data file listed via hasPart.", + "encodingFormat": "text/csv", + "contentSize": "1024" + }, + { + "@id": "#publisher-org", + "@type": "Organization", + "name": "Example Research Institute", + "url": "https://example.org" + }, + { + "@id": "https://ror.org/00k4n6c32", + "@type": "Organization", + "name": "European Commission", + "description": "The European Commission funds research and innovation programmes.", + "url": "https://ec.europa.eu" + }, + { + "@id": "https://creativecommons.org/licenses/by/4.0/", + "@type": "CreativeWork", + "name": "Creative Commons Attribution 4.0 International", + "description": "CC BY 4.0 license." + } + ] +} diff --git a/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/dataset_trailing_slash/invalid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/dataset_trailing_slash/invalid/ro-crate-metadata.json new file mode 100644 index 000000000..1cad5fd91 --- /dev/null +++ b/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/dataset_trailing_slash/invalid/ro-crate-metadata.json @@ -0,0 +1,64 @@ +{ + "@context": "https://w3id.org/ro/crate/1.2/context", + "@graph": [ + { + "@id": "ro-crate-metadata.json", + "@type": "CreativeWork", + "about": {"@id": "./"}, + "conformsTo": {"@id": "https://w3id.org/ro/crate/1.2"} + }, + { + "@id": "./", + "@type": "Dataset", + "name": "Dataset trailing slash — invalid", + "description": "RO-Crate where the local Dataset sub-entity @id does NOT end with '/'.", + "datePublished": "2024-01-01", + "license": {"@id": "https://creativecommons.org/licenses/by/4.0/"}, + "publisher": {"@id": "#publisher-org"}, + "funder": {"@id": "https://ror.org/00k4n6c32"}, + "identifier": {"@id": "#uuid"}, + "hasPart": [{"@id": "subdir"}] + }, + { + "@id": "#uuid", + "@type": "PropertyValue", + "name": "UUID", + "propertyID": "UUID", + "value": "550e8400-e29b-41d4-a716-446655440000" + }, + { + "@id": "subdir", + "@type": "Dataset", + "name": "Sub-directory (missing trailing slash)", + "description": "A local Dataset sub-entity whose @id does NOT end with '/'. This violates the RECOMMENDED convention.", + "hasPart": [{"@id": "subdir/data.csv"}] + }, + { + "@id": "subdir/data.csv", + "@type": "File", + "name": "Data file", + "description": "A data file inside the sub-directory.", + "encodingFormat": "text/csv", + "contentSize": "1024" + }, + { + "@id": "#publisher-org", + "@type": "Organization", + "name": "Example Research Institute", + "url": "https://example.org" + }, + { + "@id": "https://ror.org/00k4n6c32", + "@type": "Organization", + "name": "European Commission", + "description": "The European Commission funds research and innovation programmes.", + "url": "https://ec.europa.eu" + }, + { + "@id": "https://creativecommons.org/licenses/by/4.0/", + "@type": "CreativeWork", + "name": "Creative Commons Attribution 4.0 International", + "description": "CC BY 4.0 license." + } + ] +} diff --git a/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/dataset_trailing_slash/valid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/dataset_trailing_slash/valid/ro-crate-metadata.json new file mode 100644 index 000000000..174b25ac7 --- /dev/null +++ b/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/dataset_trailing_slash/valid/ro-crate-metadata.json @@ -0,0 +1,64 @@ +{ + "@context": "https://w3id.org/ro/crate/1.2/context", + "@graph": [ + { + "@id": "ro-crate-metadata.json", + "@type": "CreativeWork", + "about": {"@id": "./"}, + "conformsTo": {"@id": "https://w3id.org/ro/crate/1.2"} + }, + { + "@id": "./", + "@type": "Dataset", + "name": "Dataset trailing slash — valid", + "description": "RO-Crate where the local Dataset sub-entity @id ends with '/' as required.", + "datePublished": "2024-01-01", + "license": {"@id": "https://creativecommons.org/licenses/by/4.0/"}, + "publisher": {"@id": "#publisher-org"}, + "funder": {"@id": "https://ror.org/00k4n6c32"}, + "identifier": {"@id": "#uuid"}, + "hasPart": [{"@id": "subdir/"}] + }, + { + "@id": "#uuid", + "@type": "PropertyValue", + "name": "UUID", + "propertyID": "UUID", + "value": "550e8400-e29b-41d4-a716-446655440000" + }, + { + "@id": "subdir/", + "@type": "Dataset", + "name": "Sub-directory", + "description": "A local Dataset sub-entity whose @id correctly ends with '/'.", + "hasPart": [{"@id": "subdir/data.csv"}] + }, + { + "@id": "subdir/data.csv", + "@type": "File", + "name": "Data file", + "description": "A data file inside the sub-directory.", + "encodingFormat": "text/csv", + "contentSize": "1024" + }, + { + "@id": "#publisher-org", + "@type": "Organization", + "name": "Example Research Institute", + "url": "https://example.org" + }, + { + "@id": "https://ror.org/00k4n6c32", + "@type": "Organization", + "name": "European Commission", + "description": "The European Commission funds research and innovation programmes.", + "url": "https://ec.europa.eu" + }, + { + "@id": "https://creativecommons.org/licenses/by/4.0/", + "@type": "CreativeWork", + "name": "Creative Commons Attribution 4.0 International", + "description": "CC BY 4.0 license." + } + ] +} diff --git a/tests/integration/profiles/ro-crate-1.2/test_metadata_dataEntities.py b/tests/integration/profiles/ro-crate-1.2/test_metadata_dataEntities.py index fb34e6c93..06d4f4993 100644 --- a/tests/integration/profiles/ro-crate-1.2/test_metadata_dataEntities.py +++ b/tests/integration/profiles/ro-crate-1.2/test_metadata_dataEntities.py @@ -355,3 +355,168 @@ def test_redundant_license_logs_warning(): log_contents = __log_stream__.getvalue() assert "redundant" in log_contents.lower(), \ f"Expected a warning log about redundant license, got:\n{log_contents}" + + +# --------------------------------------------------------------------------- +# File Data Entity — contentSize (SHOULD) +# --------------------------------------------------------------------------- + +def test_valid_recommended_contentSize(): + """ + A File Data Entity with the recommended `contentSize` property SHOULD pass + the RECOMMENDED check. + """ + do_entity_test( + __metadata_root_data_entity_crates__.valid_recommended_contentSize, + models.Severity.RECOMMENDED, + True, + profile_identifier="ro-crate-1.2" + ) + + +def test_invalid_recommended_contentSize(): + """ + A File Data Entity without the recommended `contentSize` property SHOULD + fail the RECOMMENDED check. + """ + do_entity_test( + __metadata_root_data_entity_crates__.invalid_recommended_contentSize, + models.Severity.RECOMMENDED, + False, + profile_identifier="ro-crate-1.2", + expected_triggered_requirements=["File Data Entity: RECOMMENDED contentSize"], + expected_triggered_issues=["contentSize"] + ) + + +# --------------------------------------------------------------------------- +# File Data Entity — conformsTo profile (SHOULD) +# --------------------------------------------------------------------------- + +def test_valid_recommended_conformsto(): + """ + A File Data Entity whose `conformsTo` references a CreativeWork or Profile + SHOULD pass the RECOMMENDED check. + """ + do_entity_test( + __metadata_root_data_entity_crates__.valid_recommended_conformsto, + models.Severity.RECOMMENDED, + True, + profile_identifier="ro-crate-1.2" + ) + + +def test_invalid_recommended_conformsto(): + """ + A File Data Entity whose `conformsTo` references a non-Profile/non-CreativeWork + entity SHOULD fail the RECOMMENDED check. + """ + do_entity_test( + __metadata_root_data_entity_crates__.invalid_recommended_conformsto, + models.Severity.RECOMMENDED, + False, + profile_identifier="ro-crate-1.2", + expected_triggered_requirements=["File: RECOMMENDED `conformsTo` profile"], + expected_triggered_issues=["conformsTo"] + ) + + +# --------------------------------------------------------------------------- +# Web-based File Data Entity — sdDatePublished (SHOULD) +# --------------------------------------------------------------------------- + +def test_valid_recommended_sdDatePublished(monkeypatch): + """ + A web-based File Data Entity with the recommended `sdDatePublished` property + SHOULD pass the RECOMMENDED check. + """ + monkeypatch.setattr(HttpRequester(), "head", lambda url, **kw: _ZipResponse()) + + do_entity_test( + __metadata_root_data_entity_crates__.valid_recommended_sdDatePublished, + models.Severity.RECOMMENDED, + True, + profile_identifier="ro-crate-1.2", + ) + + +def test_invalid_recommended_sdDatePublished(monkeypatch): + """ + A web-based File Data Entity without the recommended `sdDatePublished` property + SHOULD fail the RECOMMENDED check. + """ + monkeypatch.setattr(HttpRequester(), "head", lambda url, **kw: _ZipResponse()) + + do_entity_test( + __metadata_root_data_entity_crates__.invalid_recommended_sdDatePublished, + models.Severity.RECOMMENDED, + False, + profile_identifier="ro-crate-1.2", + expected_triggered_requirements=["Web-based Data Entity: RECOMMENDED properties"], + expected_triggered_issues=["sdDatePublished"] + ) + + +# --------------------------------------------------------------------------- +# 4.3 Dataset (Directory) Data Entity — trailing slash (RECOMMENDED) +# --------------------------------------------------------------------------- + +def test_valid_recommended_dataset_trailing_slash(): + """ + A local Dataset Data Entity whose @id ends with '/' passes the RECOMMENDED check. + """ + do_entity_test( + __metadata_root_data_entity_crates__.valid_recommended_dataset_trailing_slash, + models.Severity.RECOMMENDED, + True, + profile_identifier="ro-crate-1.2", + metadata_only=True, + ) + + +def test_invalid_recommended_dataset_trailing_slash(): + """ + A local Dataset Data Entity whose @id does NOT end with '/' fails the RECOMMENDED check. + """ + do_entity_test( + __metadata_root_data_entity_crates__.invalid_recommended_dataset_trailing_slash, + models.Severity.RECOMMENDED, + False, + profile_identifier="ro-crate-1.2", + metadata_only=True, + expected_triggered_requirements=["Dataset Data Entity: SHOULD end with trailing slash"], + expected_triggered_issues=["The @id of a local Dataset Data Entity SHOULD end with '/'"], + ) + + +# --------------------------------------------------------------------------- +# 4.3 Dataset (Directory) Data Entity — hasPart (RECOMMENDED) +# --------------------------------------------------------------------------- + +def test_valid_recommended_dataset_has_part(): + """ + A local Dataset Data Entity that lists its contents via hasPart passes + the RECOMMENDED check. + """ + do_entity_test( + __metadata_root_data_entity_crates__.valid_recommended_dataset_has_part, + models.Severity.RECOMMENDED, + True, + profile_identifier="ro-crate-1.2", + metadata_only=True, + ) + + +def test_invalid_recommended_dataset_has_part(): + """ + A local Dataset Data Entity without a hasPart property fails the RECOMMENDED check. + """ + do_entity_test( + __metadata_root_data_entity_crates__.invalid_recommended_dataset_has_part, + models.Severity.RECOMMENDED, + False, + profile_identifier="ro-crate-1.2", + metadata_only=True, + expected_triggered_requirements=["Dataset Data Entity: SHOULD have hasPart"], + expected_triggered_issues=["Local Dataset Data Entities SHOULD list their contents via `hasPart`"], + ) From 3058fbe7bff3b53c6d2dc4bd7b739fc31cc850aa Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Fri, 17 Apr 2026 08:48:16 +0200 Subject: [PATCH 120/352] test(ro-crate-1.2): :recycle: update skipped checks --- .../profiles/ro-crate-1.2/test_availability_flags.py | 3 ++- .../profiles/ro-crate-1.2/test_metadata_rootDataEntity.py | 7 ++++++- .../profiles/ro-crate-1.2/test_referenced_rocrate.py | 2 +- .../profiles/ro-crate-1.2/test_workflows_scripts.py | 6 +++--- 4 files changed, 12 insertions(+), 6 deletions(-) diff --git a/tests/integration/profiles/ro-crate-1.2/test_availability_flags.py b/tests/integration/profiles/ro-crate-1.2/test_availability_flags.py index b525fb495..a7a498508 100644 --- a/tests/integration/profiles/ro-crate-1.2/test_availability_flags.py +++ b/tests/integration/profiles/ro-crate-1.2/test_availability_flags.py @@ -30,7 +30,8 @@ "about", "affiliation", "author", "cite-as", "conformsTo", "funder", "contentLocation", "contentSize", "contentUrl", "dateCreated", "dateModified", "datePublished", "description", "encodingFormat", - "hasPart", "license", "name", "publisher", "sdDatePublished", "url", + "hasPart", "identifier", "license", "name", "propertyID", "publisher", + "sdDatePublished", "url", "value", } diff --git a/tests/integration/profiles/ro-crate-1.2/test_metadata_rootDataEntity.py b/tests/integration/profiles/ro-crate-1.2/test_metadata_rootDataEntity.py index 7feea32c8..432da28ae 100644 --- a/tests/integration/profiles/ro-crate-1.2/test_metadata_rootDataEntity.py +++ b/tests/integration/profiles/ro-crate-1.2/test_metadata_rootDataEntity.py @@ -125,7 +125,9 @@ def test_valid_recommended_citeAs_for_resolvable_id(): models.Severity.RECOMMENDED, True, profile_identifier="ro-crate-1.2", - skip_checks=["ro-crate-1.2_44.1"], + # The test crate focuses on cite-as, not identifier format; + # skip the identifier-presence and PropertyValue-approach checks. + skip_checks=["ro-crate-1.2_51.1", "ro-crate-1.2_52.1"], ) @@ -212,6 +214,9 @@ def test_valid_recommended_identifier_resolution(monkeypatch): models.Severity.RECOMMENDED, True, profile_identifier="ro-crate-1.2", + # The identifier uses a plain URL string rather than a PropertyValue entity; + # that format check is not the focus of this test (resolution is). + skip_checks=["ro-crate-1.2_52.0"], ) diff --git a/tests/integration/profiles/ro-crate-1.2/test_referenced_rocrate.py b/tests/integration/profiles/ro-crate-1.2/test_referenced_rocrate.py index a8fcc8204..2487e0984 100644 --- a/tests/integration/profiles/ro-crate-1.2/test_referenced_rocrate.py +++ b/tests/integration/profiles/ro-crate-1.2/test_referenced_rocrate.py @@ -33,7 +33,7 @@ def test_valid_referenced_rocrate(): models.Severity.RECOMMENDED, True, profile_identifier="ro-crate-1.2", - skip_checks=["ro-crate-1.2_49.1", "ro-crate-1.2_32.0", "ro-crate-1.2_55.1", "ro-crate-1.2_33.0"], + skip_checks=["ro-crate-1.2_40.0", "ro-crate-1.2_44.1", "ro-crate-1.2_70.1"], ) diff --git a/tests/integration/profiles/ro-crate-1.2/test_workflows_scripts.py b/tests/integration/profiles/ro-crate-1.2/test_workflows_scripts.py index af190a4f3..12c8b8548 100644 --- a/tests/integration/profiles/ro-crate-1.2/test_workflows_scripts.py +++ b/tests/integration/profiles/ro-crate-1.2/test_workflows_scripts.py @@ -33,9 +33,9 @@ "ro-crate-1.2_54.1", # Root Data Entity: recommended publisher "ro-crate-1.2_61.1", # File Data Entity: RECOMMENDED contentSize "ro-crate-1.2_62.0", # File: RECOMMENDED conformsTo profile - "ro-crate-1.2_70.1", # Contextual Entity Properties - "ro-crate-1.2_71.1", # Contextual Entity RECOMMENDED description - "ro-crate-1.2_76.2", # License entity: RECOMMENDED properties + "ro-crate-1.2_72.1", # Contextual Entity Properties + "ro-crate-1.2_73.1", # Contextual Entity RECOMMENDED description + "ro-crate-1.2_78.2", # License entity: RECOMMENDED properties ] From 6877898ab2fe026146e5e57d4bb618c7f0fd9f99 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Fri, 17 Apr 2026 09:08:37 +0200 Subject: [PATCH 121/352] fix(ro-crate-1.2): :bug: fix shape target --- .../profiles/ro-crate/1.2/may/4_data_entity_metadata.ttl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rocrate_validator/profiles/ro-crate/1.2/may/4_data_entity_metadata.ttl b/rocrate_validator/profiles/ro-crate/1.2/may/4_data_entity_metadata.ttl index 546d1b320..488dbfb16 100644 --- a/rocrate_validator/profiles/ro-crate/1.2/may/4_data_entity_metadata.ttl +++ b/rocrate_validator/profiles/ro-crate/1.2/may/4_data_entity_metadata.ttl @@ -75,7 +75,7 @@ ro-crate:FileDataEntityWebOptionalProperties a sh:NodeShape ; ro-crate:DirectoryDataEntityWebOptionalDistribution a sh:NodeShape ; sh:name "Directory Data Entity: OPTIONAL `distribution` property" ; sh:description """A Directory Data Entity MAY have a `distribution` property to denote the distribution of the files within the directory""" ; - sh:targetClass ro-crate:File ; + sh:targetClass ro-crate:Directory ; # Check if the Web-based Data Entity has a contentSize property sh:property [ a sh:PropertyShape ; From a1791258da896e7c62c009aca0bc41825c852493 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Fri, 17 Apr 2026 09:28:20 +0200 Subject: [PATCH 122/352] feat(ro-crate-1.2): :sparkles: add recommended checks for License entities --- .../1.2/must/6_contextual_entity_metadata.ttl | 11 +++++- .../should/6_contextual_entity_metadata.ttl | 39 +++++++++++++++++++ 2 files changed, 49 insertions(+), 1 deletion(-) diff --git a/rocrate_validator/profiles/ro-crate/1.2/must/6_contextual_entity_metadata.ttl b/rocrate_validator/profiles/ro-crate/1.2/must/6_contextual_entity_metadata.ttl index 4690a9aa2..7db4c22fa 100644 --- a/rocrate_validator/profiles/ro-crate/1.2/must/6_contextual_entity_metadata.ttl +++ b/rocrate_validator/profiles/ro-crate/1.2/must/6_contextual_entity_metadata.ttl @@ -81,10 +81,19 @@ ro-crate:FindLicenseEntity a sh:NodeShape, validator:HiddenShape ; SELECT ?this WHERE { ?subject schema:license ?this . - FILTER EXISTS { ?this ?p ?o . FILTER(?p NOT IN (owl:sameAs, rdf:type)) } + FILTER(isIRI(?this)) + # FILTER EXISTS { ?this ?p ?o . FILTER(?p NOT IN (owl:sameAs, rdf:type)) } } """ ] ; + + # Expand data graph with triples to mark the matching entities as License instances + sh:rule [ + a sh:TripleRule ; + sh:subject sh:this ; + sh:predicate rdf:type ; + sh:object ro-crate:License ; + ] ; # Expand data graph with triples from the file data entity sh:rule [ diff --git a/rocrate_validator/profiles/ro-crate/1.2/should/6_contextual_entity_metadata.ttl b/rocrate_validator/profiles/ro-crate/1.2/should/6_contextual_entity_metadata.ttl index 2de19b93f..2384415dd 100644 --- a/rocrate_validator/profiles/ro-crate/1.2/should/6_contextual_entity_metadata.ttl +++ b/rocrate_validator/profiles/ro-crate/1.2/should/6_contextual_entity_metadata.ttl @@ -209,6 +209,45 @@ ro-crate:GeometryWktRecommendedProperties a sh:NodeShape ; sh:message "Geometry entities SHOULD provide `asWKT`" ; ] . +ro-crate:LicenseEntityAbsoluteUrl a sh:NodeShape ; + sh:name "License entity: SHOULD have absolute URL @id" ; + sh:description """A License entity SHOULD be identified by an absolute HTTP(S) URL, + for example an SPDX license URI (https://spdx.org/licenses/). + (RO-Crate 1.2, Contextual Entities — License)""" ; + sh:targetClass ro-crate:License ; + sh:sparql [ + a sh:SPARQLConstraint ; + sh:prefixes ro-crate:sparqlPrefixes ; + sh:select """ + SELECT ?this + WHERE { + FILTER(!regex(str(?this), "^https?://", "i")) + } + """ ; + sh:severity sh:Warning ; + sh:message "A License entity SHOULD have an absolute HTTP(S) URL as its @id (e.g., an SPDX license URI such as https://spdx.org/licenses/MIT.html)" ; + ] . + +ro-crate:LicenseEntityCreativeWorkType a sh:NodeShape ; + sh:name "License entity: SHOULD be typed as CreativeWork" ; + sh:description """A License entity SHOULD include `schema:CreativeWork` in its `@type`. + (RO-Crate 1.2, Contextual Entities — License)""" ; + sh:targetClass ro-crate:License ; + sh:sparql [ + a sh:SPARQLConstraint ; + sh:prefixes ro-crate:sparqlPrefixes ; + sh:select """ + SELECT ?this + WHERE { + FILTER NOT EXISTS { + ?this a schema:CreativeWork . + } + } + """ ; + sh:severity sh:Warning ; + sh:message "A License entity SHOULD have `CreativeWork` in its `@type`" ; + ] . + ro-crate:EncodingFormatRecommendedTypes a sh:NodeShape ; sh:name "Encoding format: RECOMMENDED types" ; sh:description """Encoding format contextual entities SHOULD include WebPage and/or Standard types.""" ; From 57bf59f2c9a243503dd03a8fa880201f117a5146 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Fri, 17 Apr 2026 09:29:18 +0200 Subject: [PATCH 123/352] test(ro-crate-1.2): :white_check_mark: test recommended properties of License entities --- .../ro-crate-metadata.json | 24 ++++ .../invalid_no_name/ro-crate-metadata.json | 24 ++++ .../invalid_no_type/ro-crate-metadata.json | 25 ++++ .../invalid_no_url/ro-crate-metadata.json | 25 ++++ .../valid/ro-crate-metadata.json | 25 ++++ .../test_metadata_contextualEntities.py | 120 ++++++++++++++++++ 6 files changed, 243 insertions(+) create mode 100644 tests/data/crates/rocrate-1.2/10_metadata_contextualEntities/license_entity/invalid_no_description/ro-crate-metadata.json create mode 100644 tests/data/crates/rocrate-1.2/10_metadata_contextualEntities/license_entity/invalid_no_name/ro-crate-metadata.json create mode 100644 tests/data/crates/rocrate-1.2/10_metadata_contextualEntities/license_entity/invalid_no_type/ro-crate-metadata.json create mode 100644 tests/data/crates/rocrate-1.2/10_metadata_contextualEntities/license_entity/invalid_no_url/ro-crate-metadata.json create mode 100644 tests/data/crates/rocrate-1.2/10_metadata_contextualEntities/license_entity/valid/ro-crate-metadata.json create mode 100644 tests/integration/profiles/ro-crate-1.2/test_metadata_contextualEntities.py diff --git a/tests/data/crates/rocrate-1.2/10_metadata_contextualEntities/license_entity/invalid_no_description/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/10_metadata_contextualEntities/license_entity/invalid_no_description/ro-crate-metadata.json new file mode 100644 index 000000000..0f57a6873 --- /dev/null +++ b/tests/data/crates/rocrate-1.2/10_metadata_contextualEntities/license_entity/invalid_no_description/ro-crate-metadata.json @@ -0,0 +1,24 @@ +{ + "@context": "https://w3id.org/ro/crate/1.2/context", + "@graph": [ + { + "@id": "ro-crate-metadata.json", + "@type": "CreativeWork", + "about": { "@id": "./" }, + "conformsTo": { "@id": "https://w3id.org/ro/crate/1.2" } + }, + { + "@id": "./", + "@type": "Dataset", + "name": "License entity — missing description", + "description": "RO-Crate whose license entity is missing the `description` property.", + "datePublished": "2024-01-01", + "license": { "@id": "https://spdx.org/licenses/MIT.html" } + }, + { + "@id": "https://spdx.org/licenses/MIT.html", + "@type": "CreativeWork", + "name": "MIT License" + } + ] +} diff --git a/tests/data/crates/rocrate-1.2/10_metadata_contextualEntities/license_entity/invalid_no_name/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/10_metadata_contextualEntities/license_entity/invalid_no_name/ro-crate-metadata.json new file mode 100644 index 000000000..41cac57d5 --- /dev/null +++ b/tests/data/crates/rocrate-1.2/10_metadata_contextualEntities/license_entity/invalid_no_name/ro-crate-metadata.json @@ -0,0 +1,24 @@ +{ + "@context": "https://w3id.org/ro/crate/1.2/context", + "@graph": [ + { + "@id": "ro-crate-metadata.json", + "@type": "CreativeWork", + "about": { "@id": "./" }, + "conformsTo": { "@id": "https://w3id.org/ro/crate/1.2" } + }, + { + "@id": "./", + "@type": "Dataset", + "name": "License entity — missing name", + "description": "RO-Crate whose license entity is missing the `name` property.", + "datePublished": "2024-01-01", + "license": { "@id": "https://spdx.org/licenses/MIT.html" } + }, + { + "@id": "https://spdx.org/licenses/MIT.html", + "@type": "CreativeWork", + "description": "A short and simple permissive license with conditions only requiring preservation of copyright and license notices." + } + ] +} diff --git a/tests/data/crates/rocrate-1.2/10_metadata_contextualEntities/license_entity/invalid_no_type/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/10_metadata_contextualEntities/license_entity/invalid_no_type/ro-crate-metadata.json new file mode 100644 index 000000000..15bf6db50 --- /dev/null +++ b/tests/data/crates/rocrate-1.2/10_metadata_contextualEntities/license_entity/invalid_no_type/ro-crate-metadata.json @@ -0,0 +1,25 @@ +{ + "@context": "https://w3id.org/ro/crate/1.2/context", + "@graph": [ + { + "@id": "ro-crate-metadata.json", + "@type": "CreativeWork", + "about": { "@id": "./" }, + "conformsTo": { "@id": "https://w3id.org/ro/crate/1.2" } + }, + { + "@id": "./", + "@type": "Dataset", + "name": "License entity — missing CreativeWork type", + "description": "RO-Crate whose license entity has an absolute URL @id but is missing CreativeWork in @type.", + "datePublished": "2024-01-01", + "license": { "@id": "https://spdx.org/licenses/MIT.html" } + }, + { + "@id": "https://spdx.org/licenses/MIT.html", + "@type": "Thing", + "name": "MIT License", + "description": "A short and simple permissive license with conditions only requiring preservation of copyright and license notices." + } + ] +} diff --git a/tests/data/crates/rocrate-1.2/10_metadata_contextualEntities/license_entity/invalid_no_url/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/10_metadata_contextualEntities/license_entity/invalid_no_url/ro-crate-metadata.json new file mode 100644 index 000000000..fc676b0d9 --- /dev/null +++ b/tests/data/crates/rocrate-1.2/10_metadata_contextualEntities/license_entity/invalid_no_url/ro-crate-metadata.json @@ -0,0 +1,25 @@ +{ + "@context": "https://w3id.org/ro/crate/1.2/context", + "@graph": [ + { + "@id": "ro-crate-metadata.json", + "@type": "CreativeWork", + "about": { "@id": "./" }, + "conformsTo": { "@id": "https://w3id.org/ro/crate/1.2" } + }, + { + "@id": "./", + "@type": "Dataset", + "name": "License entity — non-absolute URL @id", + "description": "RO-Crate whose license entity is identified by a fragment URI, not an absolute HTTP(S) URL.", + "datePublished": "2024-01-01", + "license": { "@id": "#local-license" } + }, + { + "@id": "#local-license", + "@type": "CreativeWork", + "name": "Local License", + "description": "A locally-defined license without an absolute URL identifier." + } + ] +} diff --git a/tests/data/crates/rocrate-1.2/10_metadata_contextualEntities/license_entity/valid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/10_metadata_contextualEntities/license_entity/valid/ro-crate-metadata.json new file mode 100644 index 000000000..5837f0e88 --- /dev/null +++ b/tests/data/crates/rocrate-1.2/10_metadata_contextualEntities/license_entity/valid/ro-crate-metadata.json @@ -0,0 +1,25 @@ +{ + "@context": "https://w3id.org/ro/crate/1.2/context", + "@graph": [ + { + "@id": "ro-crate-metadata.json", + "@type": "CreativeWork", + "about": { "@id": "./" }, + "conformsTo": { "@id": "https://w3id.org/ro/crate/1.2" } + }, + { + "@id": "./", + "@type": "Dataset", + "name": "License entity — valid", + "description": "RO-Crate with a well-formed license entity: absolute HTTPS URL @id, CreativeWork type, name and description.", + "datePublished": "2024-01-01", + "license": { "@id": "https://spdx.org/licenses/MIT.html" } + }, + { + "@id": "https://spdx.org/licenses/MIT.html", + "@type": "CreativeWork", + "name": "MIT License", + "description": "A short and simple permissive license with conditions only requiring preservation of copyright and license notices." + } + ] +} diff --git a/tests/integration/profiles/ro-crate-1.2/test_metadata_contextualEntities.py b/tests/integration/profiles/ro-crate-1.2/test_metadata_contextualEntities.py new file mode 100644 index 000000000..21eaa7707 --- /dev/null +++ b/tests/integration/profiles/ro-crate-1.2/test_metadata_contextualEntities.py @@ -0,0 +1,120 @@ +# Copyright (c) 2024-2026 CRS4 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging + +from rocrate_validator import models +from tests.ro_crates_1_2 import ContextualEntities +from tests.shared import do_entity_test + +logger = logging.getLogger(__name__) + +__contextual_entities_crates__ = ContextualEntities() + +# Generic RECOMMENDED checks that fire on minimal test crates regardless of the +# contextual-entity-specific property being tested. +_GENERIC_RECOMMENDED_SKIP = [ + "ro-crate-1.2_47.1", # Root Data Entity: RECOMMENDED funder + "ro-crate-1.2_54.1", # Root Data Entity: RECOMMENDED publisher +] + + +# --------------------------------------------------------------------------- +# License entity: SHOULD be typed as CreativeWork (SHOULD) +# --------------------------------------------------------------------------- + +def test_valid_license_entity(): + """ + A license entity with an absolute URL @id, CreativeWork type, name and + description SHOULD pass RECOMMENDED validation. + """ + do_entity_test( + __contextual_entities_crates__.valid_license_entity, + models.Severity.RECOMMENDED, + True, + profile_identifier="ro-crate-1.2", + skip_checks=_GENERIC_RECOMMENDED_SKIP, + ) + + +def test_invalid_license_entity_no_type(): + """ + A license entity missing `CreativeWork` in its @type SHOULD trigger a + RECOMMENDED warning. + """ + do_entity_test( + __contextual_entities_crates__.invalid_license_entity_no_type, + models.Severity.RECOMMENDED, + False, + profile_identifier="ro-crate-1.2", + expected_triggered_requirements=["License entity: SHOULD be typed as CreativeWork"], + expected_triggered_issues=[ + "A License entity SHOULD have `CreativeWork` in its `@type`" + ], + ) + + +def test_invalid_license_entity_no_url(): + """ + A license entity whose @id is not an absolute HTTP(S) URL SHOULD trigger + a RECOMMENDED warning. + """ + do_entity_test( + __contextual_entities_crates__.invalid_license_entity_no_url, + models.Severity.RECOMMENDED, + False, + profile_identifier="ro-crate-1.2", + expected_triggered_requirements=["License entity: SHOULD have absolute URL @id"], + expected_triggered_issues=[ + "A License entity SHOULD have an absolute HTTP(S) URL as its @id" + ], + ) + + +# --------------------------------------------------------------------------- +# License entity: name and description SHOULD be present (SHOULD) +# --------------------------------------------------------------------------- + +def test_invalid_license_entity_no_name(): + """ + A license entity missing the `name` property SHOULD trigger a RECOMMENDED + warning. + """ + do_entity_test( + __contextual_entities_crates__.invalid_license_entity_no_name, + models.Severity.RECOMMENDED, + False, + profile_identifier="ro-crate-1.2", + expected_triggered_requirements=["License entity: RECOMMENDED properties"], + expected_triggered_issues=[ + "License entities SHOULD have a name" + ], + ) + + +def test_invalid_license_entity_no_description(): + """ + A license entity missing the `description` property SHOULD trigger a + RECOMMENDED warning. + """ + do_entity_test( + __contextual_entities_crates__.invalid_license_entity_no_description, + models.Severity.RECOMMENDED, + False, + profile_identifier="ro-crate-1.2", + expected_triggered_requirements=["License entity: RECOMMENDED properties"], + expected_triggered_issues=[ + "License entities SHOULD have a description" + ], + ) From f0f11f8b06302807b4be384c9a45d56cb80b9073 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Fri, 17 Apr 2026 13:55:54 +0200 Subject: [PATCH 124/352] feat(ro-crate-1.2): :sparkles: add checks for recommended properties of Organization entity --- .../1.2/should/6_organization_metadata.ttl | 99 +++++++++++++++++++ 1 file changed, 99 insertions(+) create mode 100644 rocrate_validator/profiles/ro-crate/1.2/should/6_organization_metadata.ttl diff --git a/rocrate_validator/profiles/ro-crate/1.2/should/6_organization_metadata.ttl b/rocrate_validator/profiles/ro-crate/1.2/should/6_organization_metadata.ttl new file mode 100644 index 000000000..ae36acf3f --- /dev/null +++ b/rocrate_validator/profiles/ro-crate/1.2/should/6_organization_metadata.ttl @@ -0,0 +1,99 @@ +# Copyright (c) 2024-2026 CRS4 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +@prefix ro: <./> . +@prefix ro-crate: . +@prefix rdf: . +@prefix schema: . +@prefix sh: . +@prefix xsd: . + +ro-crate:OrganizationRorIdentifier a sh:NodeShape ; + sh:name "Organization: SHOULD have ROR identifier" ; + sh:description """An Organization entity SHOULD use a ROR (Research Organization Registry) identifier + as its @id, if possible. + (RO-Crate 1.2, Contextual Entities — Organizations as values; Metadata — Recommended Identifiers)""" ; + sh:targetClass schema:Organization ; + sh:sparql [ + a sh:SPARQLConstraint ; + sh:prefixes ro-crate:sparqlPrefixes ; + sh:select """ + SELECT ?this + WHERE { + FILTER(!STRSTARTS(STR(?this), "https://ror.org/")) + } + """ ; + sh:severity sh:Warning ; + sh:message "An Organization entity SHOULD have a ROR identifier as its @id (e.g., https://ror.org/03f0f6041)" ; + ] . + +ro-crate:OrganizationContactPoint a sh:NodeShape ; + sh:name "Organization: SHOULD have contactPoint referencing ContactPoint" ; + sh:description """An Organization SHOULD have a contactPoint property referencing a ContactPoint contextual entity. + (RO-Crate 1.2, Contextual Entities — Contact Information)""" ; + sh:targetClass schema:Organization ; + sh:property [ + sh:path schema:contactPoint ; + sh:minCount 1 ; + sh:class schema:ContactPoint ; + sh:severity sh:Warning ; + sh:name "Organization: RECOMMENDED contactPoint references ContactPoint entity" ; + sh:description "Check if the Organization has a contactPoint referencing a ContactPoint entity." ; + sh:message "An Organization's contactPoint SHOULD reference a ContactPoint contextual entity" ; + ] . + +ro-crate:PersonContactPoint a sh:NodeShape ; + sh:name "Person: SHOULD have contactPoint referencing ContactPoint" ; + sh:description """A Person's contactPoint property SHOULD reference a ContactPoint contextual entity. + (RO-Crate 1.2, Contextual Entities — Contact Information)""" ; + sh:targetClass schema:Person ; + sh:property [ + sh:path schema:contactPoint ; + sh:class schema:ContactPoint ; + sh:severity sh:Warning ; + sh:name "Person: RECOMMENDED contactPoint references ContactPoint entity" ; + sh:description "Check if the Person's contactPoint references a ContactPoint entity." ; + sh:message "A Person's contactPoint SHOULD reference a ContactPoint contextual entity" ; + ] . + +ro-crate:AuthorPublisherContactPoint a sh:NodeShape ; + sh:name "Root Data Entity: at least one author/publisher SHOULD have contactPoint" ; + sh:description """At least one of the Person or Organization entities referenced as author or publisher + from the Root Data Entity SHOULD have a contactPoint property. + (RO-Crate 1.2, Contextual Entities — Contact Information)""" ; + sh:targetClass ro-crate:RootDataEntity ; + sh:sparql [ + a sh:SPARQLConstraint ; + sh:prefixes ro-crate:sparqlPrefixes ; + sh:select """ + SELECT ?this + WHERE { + { + ?this schema:author ?author . + } UNION { + ?this schema:publisher ?publisher . + BIND(?publisher AS ?author) + } + FILTER NOT EXISTS { + { + ?this schema:author ?a . + } UNION { + ?this schema:publisher ?a . + } + ?a schema:contactPoint ?cp . + } + } + """ ; + sh:severity sh:Warning ; + sh:message "At least one author or publisher Person/Organization SHOULD have a contactPoint property" ; + ] . From e02497115a7c06635bc70f7478051a3cfb013c88 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Fri, 17 Apr 2026 13:57:13 +0200 Subject: [PATCH 125/352] test(ro-crate-1.2): :white_check_mark: test recommended properties of Organization entity --- .../ro-crate-metadata.json | 51 +++++++++ .../ro-crate-metadata.json | 38 +++++++ .../invalid_no_ror_id/ro-crate-metadata.json | 52 +++++++++ .../ro-crate-metadata.json | 45 ++++++++ .../valid/ro-crate-metadata.json | 74 +++++++++++++ .../test_metadata_contextualEntities.py | 101 ++++++++++++++++++ 6 files changed, 361 insertions(+) create mode 100644 tests/data/crates/rocrate-1.2/10_metadata_contextualEntities/organization_entity/invalid_contactpoint_no_contactpoint_entity/ro-crate-metadata.json create mode 100644 tests/data/crates/rocrate-1.2/10_metadata_contextualEntities/organization_entity/invalid_no_author_publisher_contactpoint/ro-crate-metadata.json create mode 100644 tests/data/crates/rocrate-1.2/10_metadata_contextualEntities/organization_entity/invalid_no_ror_id/ro-crate-metadata.json create mode 100644 tests/data/crates/rocrate-1.2/10_metadata_contextualEntities/organization_entity/invalid_org_no_contactpoint/ro-crate-metadata.json create mode 100644 tests/data/crates/rocrate-1.2/10_metadata_contextualEntities/organization_entity/valid/ro-crate-metadata.json diff --git a/tests/data/crates/rocrate-1.2/10_metadata_contextualEntities/organization_entity/invalid_contactpoint_no_contactpoint_entity/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/10_metadata_contextualEntities/organization_entity/invalid_contactpoint_no_contactpoint_entity/ro-crate-metadata.json new file mode 100644 index 000000000..a0a31bf7a --- /dev/null +++ b/tests/data/crates/rocrate-1.2/10_metadata_contextualEntities/organization_entity/invalid_contactpoint_no_contactpoint_entity/ro-crate-metadata.json @@ -0,0 +1,51 @@ +{ + "@context": "https://w3id.org/ro/crate/1.2/context", + "@graph": [ + { + "@id": "ro-crate-metadata.json", + "@type": "CreativeWork", + "about": { "@id": "./" }, + "conformsTo": { "@id": "https://w3id.org/ro/crate/1.2" } + }, + { + "@id": "./", + "@type": "Dataset", + "name": "Organization entity — contactPoint not ContactPoint type", + "description": "RO-Crate with an Organization whose contactPoint does not reference a ContactPoint entity.", + "datePublished": "2024-01-01", + "license": { "@id": "https://spdx.org/licenses/MIT.html" }, + "publisher": { "@id": "https://ror.org/03f0f6041" }, + "author": { "@id": "https://orcid.org/0000-0001-6121-5409" } + }, + { + "@id": "https://ror.org/03f0f6041", + "@type": "Organization", + "name": "University of Technology Sydney", + "url": "https://ror.org/03f0f6041", + "contactPoint": { "@id": "#not-a-contactpoint" } + }, + { + "@id": "#not-a-contactpoint", + "@type": "Thing", + "name": "Not a ContactPoint" + }, + { + "@id": "https://orcid.org/0000-0001-6121-5409", + "@type": "Person", + "name": "Tim Luckett", + "contactPoint": { "@id": "mailto:tim.luckett@uts.edu.au" } + }, + { + "@id": "mailto:tim.luckett@uts.edu.au", + "@type": "ContactPoint", + "contactType": "customer service", + "email": "tim.luckett@uts.edu.au" + }, + { + "@id": "https://spdx.org/licenses/MIT.html", + "@type": "CreativeWork", + "name": "MIT License", + "description": "A permissive license." + } + ] +} diff --git a/tests/data/crates/rocrate-1.2/10_metadata_contextualEntities/organization_entity/invalid_no_author_publisher_contactpoint/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/10_metadata_contextualEntities/organization_entity/invalid_no_author_publisher_contactpoint/ro-crate-metadata.json new file mode 100644 index 000000000..d73e2466c --- /dev/null +++ b/tests/data/crates/rocrate-1.2/10_metadata_contextualEntities/organization_entity/invalid_no_author_publisher_contactpoint/ro-crate-metadata.json @@ -0,0 +1,38 @@ +{ + "@context": "https://w3id.org/ro/crate/1.2/context", + "@graph": [ + { + "@id": "ro-crate-metadata.json", + "@type": "CreativeWork", + "about": { "@id": "./" }, + "conformsTo": { "@id": "https://w3id.org/ro/crate/1.2" } + }, + { + "@id": "./", + "@type": "Dataset", + "name": "Organization entity — no author/publisher contactPoint", + "description": "RO-Crate where neither author nor publisher has a contactPoint property.", + "datePublished": "2024-01-01", + "license": { "@id": "https://spdx.org/licenses/MIT.html" }, + "publisher": { "@id": "https://ror.org/03f0f6041" }, + "author": { "@id": "https://orcid.org/0000-0001-6121-5409" } + }, + { + "@id": "https://ror.org/03f0f6041", + "@type": "Organization", + "name": "University of Technology Sydney", + "url": "https://ror.org/03f0f6041" + }, + { + "@id": "https://orcid.org/0000-0001-6121-5409", + "@type": "Person", + "name": "Tim Luckett" + }, + { + "@id": "https://spdx.org/licenses/MIT.html", + "@type": "CreativeWork", + "name": "MIT License", + "description": "A permissive license." + } + ] +} diff --git a/tests/data/crates/rocrate-1.2/10_metadata_contextualEntities/organization_entity/invalid_no_ror_id/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/10_metadata_contextualEntities/organization_entity/invalid_no_ror_id/ro-crate-metadata.json new file mode 100644 index 000000000..feec29fa1 --- /dev/null +++ b/tests/data/crates/rocrate-1.2/10_metadata_contextualEntities/organization_entity/invalid_no_ror_id/ro-crate-metadata.json @@ -0,0 +1,52 @@ +{ + "@context": "https://w3id.org/ro/crate/1.2/context", + "@graph": [ + { + "@id": "ro-crate-metadata.json", + "@type": "CreativeWork", + "about": { "@id": "./" }, + "conformsTo": { "@id": "https://w3id.org/ro/crate/1.2" } + }, + { + "@id": "./", + "@type": "Dataset", + "name": "Organization entity — non-ROR @id", + "description": "RO-Crate with an Organization entity whose @id is not a ROR identifier.", + "datePublished": "2024-01-01", + "license": { "@id": "https://spdx.org/licenses/MIT.html" }, + "publisher": { "@id": "https://example.com/org" }, + "author": { "@id": "https://orcid.org/0000-0001-6121-5409" } + }, + { + "@id": "https://example.com/org", + "@type": "Organization", + "name": "Example Organization", + "url": "https://example.com/org", + "contactPoint": { "@id": "mailto:contact@example.com" } + }, + { + "@id": "mailto:contact@example.com", + "@type": "ContactPoint", + "contactType": "customer service", + "email": "contact@example.com" + }, + { + "@id": "https://orcid.org/0000-0001-6121-5409", + "@type": "Person", + "name": "Tim Luckett", + "contactPoint": { "@id": "mailto:tim.luckett@uts.edu.au" } + }, + { + "@id": "mailto:tim.luckett@uts.edu.au", + "@type": "ContactPoint", + "contactType": "customer service", + "email": "tim.luckett@uts.edu.au" + }, + { + "@id": "https://spdx.org/licenses/MIT.html", + "@type": "CreativeWork", + "name": "MIT License", + "description": "A permissive license." + } + ] +} diff --git a/tests/data/crates/rocrate-1.2/10_metadata_contextualEntities/organization_entity/invalid_org_no_contactpoint/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/10_metadata_contextualEntities/organization_entity/invalid_org_no_contactpoint/ro-crate-metadata.json new file mode 100644 index 000000000..2e10c3ef4 --- /dev/null +++ b/tests/data/crates/rocrate-1.2/10_metadata_contextualEntities/organization_entity/invalid_org_no_contactpoint/ro-crate-metadata.json @@ -0,0 +1,45 @@ +{ + "@context": "https://w3id.org/ro/crate/1.2/context", + "@graph": [ + { + "@id": "ro-crate-metadata.json", + "@type": "CreativeWork", + "about": { "@id": "./" }, + "conformsTo": { "@id": "https://w3id.org/ro/crate/1.2" } + }, + { + "@id": "./", + "@type": "Dataset", + "name": "Organization entity — no contactPoint", + "description": "RO-Crate with an Organization entity that has a ROR @id but no contactPoint property.", + "datePublished": "2024-01-01", + "license": { "@id": "https://spdx.org/licenses/MIT.html" }, + "publisher": { "@id": "https://ror.org/03f0f6041" }, + "author": { "@id": "https://orcid.org/0000-0001-6121-5409" } + }, + { + "@id": "https://ror.org/03f0f6041", + "@type": "Organization", + "name": "University of Technology Sydney", + "url": "https://ror.org/03f0f6041" + }, + { + "@id": "https://orcid.org/0000-0001-6121-5409", + "@type": "Person", + "name": "Tim Luckett", + "contactPoint": { "@id": "mailto:tim.luckett@uts.edu.au" } + }, + { + "@id": "mailto:tim.luckett@uts.edu.au", + "@type": "ContactPoint", + "contactType": "customer service", + "email": "tim.luckett@uts.edu.au" + }, + { + "@id": "https://spdx.org/licenses/MIT.html", + "@type": "CreativeWork", + "name": "MIT License", + "description": "A permissive license." + } + ] +} diff --git a/tests/data/crates/rocrate-1.2/10_metadata_contextualEntities/organization_entity/valid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/10_metadata_contextualEntities/organization_entity/valid/ro-crate-metadata.json new file mode 100644 index 000000000..ca470b993 --- /dev/null +++ b/tests/data/crates/rocrate-1.2/10_metadata_contextualEntities/organization_entity/valid/ro-crate-metadata.json @@ -0,0 +1,74 @@ +{ + "@context": "https://w3id.org/ro/crate/1.2/context", + "@graph": [ + { + "@id": "ro-crate-metadata.json", + "@type": "CreativeWork", + "about": { + "@id": "./" + }, + "conformsTo": { + "@id": "https://w3id.org/ro/crate/1.2" + } + }, + { + "@id": "./", + "@type": "Dataset", + "name": "Organization entity — valid", + "description": "RO-Crate with a well-formed Organization entity: ROR @id, contactPoint referencing ContactPoint, and author with contactPoint.", + "datePublished": "2024-01-01", + "license": { + "@id": "https://spdx.org/licenses/MIT.html" + }, + "publisher": { + "@id": "https://ror.org/03f0f6041" + }, + "author": { + "@id": "https://orcid.org/0000-0001-6121-5409" + }, + "funder": { + "@id": "https://ror.org/03f0f6041" + } + }, + { + "@id": "https://ror.org/03f0f6041", + "@type": "Organization", + "name": "University of Technology Sydney", + "url": "https://ror.org/03f0f6041", + "contactPoint": { + "@id": "mailto:contact@uts.edu.au" + } + }, + { + "@id": "mailto:contact@uts.edu.au", + "@type": "ContactPoint", + "contactType": "customer service", + "email": "contact@uts.edu.au", + "name": "UTS contact point" + }, + { + "@id": "https://orcid.org/0000-0001-6121-5409", + "@type": "Person", + "name": "Tim Luckett", + "contactPoint": { + "@id": "mailto:tim.luckett@uts.edu.au" + }, + "affiliation": { + "@id": "https://ror.org/03f0f6041" + } + }, + { + "@id": "mailto:tim.luckett@uts.edu.au", + "@type": "ContactPoint", + "contactType": "customer service", + "email": "tim.luckett@uts.edu.au", + "name": "Tim Luckett's contact point" + }, + { + "@id": "https://spdx.org/licenses/MIT.html", + "@type": "CreativeWork", + "name": "MIT License", + "description": "A permissive license." + } + ] +} diff --git a/tests/integration/profiles/ro-crate-1.2/test_metadata_contextualEntities.py b/tests/integration/profiles/ro-crate-1.2/test_metadata_contextualEntities.py index 21eaa7707..73d02865c 100644 --- a/tests/integration/profiles/ro-crate-1.2/test_metadata_contextualEntities.py +++ b/tests/integration/profiles/ro-crate-1.2/test_metadata_contextualEntities.py @@ -118,3 +118,104 @@ def test_invalid_license_entity_no_description(): "License entities SHOULD have a description" ], ) + + +# --------------------------------------------------------------------------- +# Organization entity: SHOULD have ROR identifier as @id (SHOULD) +# --------------------------------------------------------------------------- + +def test_valid_organization_entity(): + """ + An Organization entity with a ROR @id, contactPoint referencing ContactPoint, + and author with contactPoint SHOULD pass RECOMMENDED validation. + """ + do_entity_test( + __contextual_entities_crates__.valid_organization_entity, + models.Severity.RECOMMENDED, + True, + profile_identifier="ro-crate-1.2", + skip_checks=_GENERIC_RECOMMENDED_SKIP, + ) + + +def test_invalid_organization_no_ror_id(): + """ + An Organization entity whose @id is not a ROR identifier SHOULD trigger a + RECOMMENDED warning. + """ + do_entity_test( + __contextual_entities_crates__.invalid_organization_no_ror_id, + models.Severity.RECOMMENDED, + False, + profile_identifier="ro-crate-1.2", + expected_triggered_requirements=["Organization: SHOULD have ROR identifier"], + expected_triggered_issues=[ + "An Organization entity SHOULD have a ROR identifier as its @id" + ], + ) + + +# --------------------------------------------------------------------------- +# Organization/Person contactPoint: SHOULD reference ContactPoint entity (SHOULD) +# --------------------------------------------------------------------------- + +def test_invalid_organization_contactpoint_no_entity(): + """ + An Organization whose contactPoint does not reference a ContactPoint entity + SHOULD trigger a RECOMMENDED warning. + """ + do_entity_test( + __contextual_entities_crates__.invalid_organization_contactpoint_no_entity, + models.Severity.RECOMMENDED, + False, + profile_identifier="ro-crate-1.2", + expected_triggered_requirements=[ + "Organization: SHOULD have contactPoint referencing ContactPoint" + ], + expected_triggered_issues=[ + "An Organization's contactPoint SHOULD reference a ContactPoint contextual entity" + ], + ) + + +def test_invalid_organization_no_contactpoint(): + """ + An Organization without a contactPoint property (where the author still has + contactPoint so only the Organization-specific check fires). + """ + do_entity_test( + __contextual_entities_crates__.invalid_organization_no_contactpoint, + models.Severity.RECOMMENDED, + False, + profile_identifier="ro-crate-1.2", + expected_triggered_requirements=[ + "Organization: SHOULD have contactPoint referencing ContactPoint" + ], + expected_triggered_issues=[ + "An Organization's contactPoint SHOULD reference a ContactPoint contextual entity" + ], + ) + + +# --------------------------------------------------------------------------- +# Author/Publisher: at least one SHOULD have contactPoint (SHOULD) +# --------------------------------------------------------------------------- + +def test_invalid_no_author_publisher_contactpoint(): + """ + When neither the author nor the publisher Person/Organization has a + contactPoint property, the Root Data Entity SHOULD trigger a + RECOMMENDED warning. + """ + do_entity_test( + __contextual_entities_crates__.invalid_no_author_publisher_contactpoint, + models.Severity.RECOMMENDED, + False, + profile_identifier="ro-crate-1.2", + expected_triggered_requirements=[ + "Root Data Entity: at least one author/publisher SHOULD have contactPoint" + ], + expected_triggered_issues=[ + "At least one author or publisher Person/Organization SHOULD have a contactPoint property" + ], + ) From d627c0e3ffeffa38e1cb5372d10c90fcf6ca957c Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Fri, 17 Apr 2026 13:58:56 +0200 Subject: [PATCH 126/352] test(ro-crate-1.2): :recycle: refactor existing test data --- tests/data/crates/1.2/README.md | 15 ----- .../basic-ro-crate-metadata.json | 37 ------------ .../multi-ro-crate-metadata.json | 60 ------------------- .../profiled-ro-crate-metadata.json | 51 ---------------- .../valid/ro-crate-metadata.json | 34 ++++++++--- .../valid/prefix-ro-crate-metadata.json | 14 ++++- tests/data/crates/rocrate-1.2/README.md | 15 ----- .../prefix-ro-crate-metadata.json | 28 ++++++++- .../ro-crate-1.2/test_availability_flags.py | 2 +- 9 files changed, 64 insertions(+), 192 deletions(-) delete mode 100644 tests/data/crates/1.2/README.md delete mode 100644 tests/data/crates/1.2/detached-basic/basic-ro-crate-metadata.json delete mode 100644 tests/data/crates/1.2/detached-multi/multi-ro-crate-metadata.json delete mode 100644 tests/data/crates/1.2/detached-with-profile/profiled-ro-crate-metadata.json delete mode 100644 tests/data/crates/rocrate-1.2/README.md diff --git a/tests/data/crates/1.2/README.md b/tests/data/crates/1.2/README.md deleted file mode 100644 index 6f11496c0..000000000 --- a/tests/data/crates/1.2/README.md +++ /dev/null @@ -1,15 +0,0 @@ -# RO-Crate 1.2 Detached examples - -This folder contains example Detached RO-Crate metadata files for 1.2. Each example is a metadata-only JSON-LD file that references remote data entities. - -## Examples -- `detached-basic/basic-ro-crate-metadata.json` - - Minimal detached RO-Crate with a single remote file. -- `detached-multi/multi-ro-crate-metadata.json` - - Detached RO-Crate with multiple remote files and a remote dataset. -- `detached-with-profile/profiled-ro-crate-metadata.json` - - Detached RO-Crate that declares conformance to an additional profile. - -## Notes -- Detached RO-Crates have no local payload; all data entities use absolute URIs. -- The metadata document uses the 1.2 context by reference. diff --git a/tests/data/crates/1.2/detached-basic/basic-ro-crate-metadata.json b/tests/data/crates/1.2/detached-basic/basic-ro-crate-metadata.json deleted file mode 100644 index 65f04550d..000000000 --- a/tests/data/crates/1.2/detached-basic/basic-ro-crate-metadata.json +++ /dev/null @@ -1,37 +0,0 @@ -{ - "@context": "https://w3id.org/ro/crate/1.2/context", - "@graph": [ - { - "@id": "ro-crate-metadata.json", - "@type": "CreativeWork", - "conformsTo": { - "@id": "https://w3id.org/ro/crate/1.2" - }, - "about": { - "@id": "https://example.org/ro-crate/detached/basic" - } - }, - { - "@id": "https://example.org/ro-crate/detached/basic", - "@type": "Dataset", - "name": "Detached RO-Crate (basic)", - "description": "Minimal detached RO-Crate referencing a remote file.", - "datePublished": "2024-05-17", - "license": { - "@id": "http://spdx.org/licenses/CC0-1.0" - }, - "hasPart": [ - { - "@id": "https://www.w3.org/TR/PNG/iso_8859-1.txt" - } - ] - }, - { - "@id": "https://www.w3.org/TR/PNG/iso_8859-1.txt", - "@type": "File", - "name": "iso_8859-1.txt", - "description": "Remote text file referenced by the detached RO-Crate.", - "encodingFormat": "text/plain" - } - ] -} diff --git a/tests/data/crates/1.2/detached-multi/multi-ro-crate-metadata.json b/tests/data/crates/1.2/detached-multi/multi-ro-crate-metadata.json deleted file mode 100644 index 8e97a9115..000000000 --- a/tests/data/crates/1.2/detached-multi/multi-ro-crate-metadata.json +++ /dev/null @@ -1,60 +0,0 @@ -{ - "@context": "https://w3id.org/ro/crate/1.2/context", - "@graph": [ - { - "@id": "ro-crate-metadata.json", - "@type": "CreativeWork", - "conformsTo": { - "@id": "https://w3id.org/ro/crate/1.2" - }, - "about": { - "@id": "https://example.org/ro-crate/detached/multi" - } - }, - { - "@id": "https://example.org/ro-crate/detached/multi", - "@type": "Dataset", - "name": "Detached RO-Crate (multi)", - "description": "Detached RO-Crate with multiple remote data entities.", - "datePublished": "2024-05-17", - "license": { - "@id": "http://spdx.org/licenses/CC0-1.0" - }, - "hasPart": [ - { - "@id": "https://www.w3.org/TR/PNG/iso_8859-1.txt" - }, - { - "@id": "https://www.w3.org/2008/site/images/logo-w3c-mobile-lg" - }, - { - "@id": "https://example.org/datasets/sample" - } - ] - }, - { - "@id": "https://www.w3.org/TR/PNG/iso_8859-1.txt", - "@type": "File", - "name": "iso_8859-1.txt", - "description": "Remote text file.", - "encodingFormat": "text/plain" - }, - { - "@id": "https://www.w3.org/2008/site/images/logo-w3c-mobile-lg", - "@type": "File", - "name": "W3C logo", - "description": "Remote image file.", - "encodingFormat": "image/png" - }, - { - "@id": "https://example.org/datasets/sample", - "@type": "Dataset", - "name": "Sample remote dataset", - "description": "Remote dataset referenced by the detached RO-Crate.", - "datePublished": "2024-05-17", - "license": { - "@id": "http://spdx.org/licenses/CC0-1.0" - } - } - ] -} diff --git a/tests/data/crates/1.2/detached-with-profile/profiled-ro-crate-metadata.json b/tests/data/crates/1.2/detached-with-profile/profiled-ro-crate-metadata.json deleted file mode 100644 index eaf511599..000000000 --- a/tests/data/crates/1.2/detached-with-profile/profiled-ro-crate-metadata.json +++ /dev/null @@ -1,51 +0,0 @@ -{ - "@context": "https://w3id.org/ro/crate/1.2/context", - "@graph": [ - { - "@id": "ro-crate-metadata.json", - "@type": "CreativeWork", - "conformsTo": { - "@id": "https://w3id.org/ro/crate/1.2" - }, - "about": { - "@id": "https://example.org/ro-crate/detached/profiled" - } - }, - { - "@id": "https://example.org/ro-crate/detached/profiled", - "@type": "Dataset", - "name": "Detached RO-Crate (profiled)", - "description": "Detached RO-Crate declaring conformance to an additional profile.", - "datePublished": "2024-05-17", - "license": { - "@id": "http://spdx.org/licenses/CC0-1.0" - }, - "conformsTo": [ - { - "@id": "https://example.org/profiles/example-profile" - } - ], - "hasPart": [ - { - "@id": "https://www.w3.org/TR/PNG/iso_8859-1.txt" - } - ] - }, - { - "@id": "https://example.org/profiles/example-profile", - "@type": [ - "Profile", - "CreativeWork" - ], - "name": "Example profile", - "description": "Example RO-Crate profile contextual entity." - }, - { - "@id": "https://www.w3.org/TR/PNG/iso_8859-1.txt", - "@type": "File", - "name": "iso_8859-1.txt", - "description": "Remote text file.", - "encodingFormat": "text/plain" - } - ] -} diff --git a/tests/data/crates/rocrate-1.2/2_attached_rocrates/attached-preview-not-in-hasPart/valid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/2_attached_rocrates/attached-preview-not-in-hasPart/valid/ro-crate-metadata.json index 4bf4ca099..b3c407993 100644 --- a/tests/data/crates/rocrate-1.2/2_attached_rocrates/attached-preview-not-in-hasPart/valid/ro-crate-metadata.json +++ b/tests/data/crates/rocrate-1.2/2_attached_rocrates/attached-preview-not-in-hasPart/valid/ro-crate-metadata.json @@ -21,7 +21,7 @@ "@id": "https://creativecommons.org/licenses/by/4.0/" }, "publisher": { - "@id": "#publisher-org" + "@id": "https://ror.org/00k4n6c32" }, "hasPart": [ { @@ -43,10 +43,23 @@ "value": "550e8400-e29b-41d4-a716-446655440000" }, { - "@id": "#publisher-org", + "@id": "https://ror.org/00k4n6c32", "@type": "Organization", "name": "Example Research Institute", - "url": "https://example.org" + "url": "https://example.org", + "contactPoint": { + "@id": "mailto:contact@example.org" + }, + "identifier": { + "@id": "https://ror.org/00k4n6c32" + } + }, + { + "@id": "mailto:contact@example.org", + "@type": "ContactPoint", + "contactType": "customer service", + "email": "contact@example.org", + "name": "Example Research Institute contact point" }, { "@id": "https://creativecommons.org/licenses/by/4.0/", @@ -119,14 +132,17 @@ "@id": "https://ror.org/12345", "@type": "Organization", "name": "Example University", - "url": "https://www.exampleuniversity.edu" + "url": "https://www.exampleuniversity.edu", + "contactPoint": { + "@id": "mailto:contact@exampleuniversity.edu" + } }, { - "@id": "https://ror.org/00k4n6c32", - "@type": "Organization", - "name": "European Commission", - "description": "The European Commission funds research and innovation programmes.", - "url": "https://ec.europa.eu" + "@id": "mailto:contact@exampleuniversity.edu", + "@type": "ContactPoint", + "contactType": "customer service", + "email": "contact@exampleuniversity.edu", + "name": "Example University contact point" } ] } diff --git a/tests/data/crates/rocrate-1.2/3_detached_rocrates/root-data-entity-identifier/online-available/valid/prefix-ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/3_detached_rocrates/root-data-entity-identifier/online-available/valid/prefix-ro-crate-metadata.json index c8e3dbcbc..05614a79b 100644 --- a/tests/data/crates/rocrate-1.2/3_detached_rocrates/root-data-entity-identifier/online-available/valid/prefix-ro-crate-metadata.json +++ b/tests/data/crates/rocrate-1.2/3_detached_rocrates/root-data-entity-identifier/online-available/valid/prefix-ro-crate-metadata.json @@ -46,7 +46,17 @@ "@id": "https://ror.org/012345678", "@type": "Organization", "name": "Example Research Institute", - "url": "https://example.org" + "url": "https://example.org", + "contactPoint": { + "@id": "mailto:contact@example.org" + } + }, + { + "@id": "mailto:contact@example.org", + "@type": "ContactPoint", + "contactType": "customer service", + "email": "contact@example.org", + "name": "Example Research Institute contact point" }, { "@id": "https://creativecommons.org/licenses/by/4.0/", @@ -55,4 +65,4 @@ "description": "A Creative Commons license." } ] -} \ No newline at end of file +} diff --git a/tests/data/crates/rocrate-1.2/README.md b/tests/data/crates/rocrate-1.2/README.md deleted file mode 100644 index 6f11496c0..000000000 --- a/tests/data/crates/rocrate-1.2/README.md +++ /dev/null @@ -1,15 +0,0 @@ -# RO-Crate 1.2 Detached examples - -This folder contains example Detached RO-Crate metadata files for 1.2. Each example is a metadata-only JSON-LD file that references remote data entities. - -## Examples -- `detached-basic/basic-ro-crate-metadata.json` - - Minimal detached RO-Crate with a single remote file. -- `detached-multi/multi-ro-crate-metadata.json` - - Detached RO-Crate with multiple remote files and a remote dataset. -- `detached-with-profile/profiled-ro-crate-metadata.json` - - Detached RO-Crate that declares conformance to an additional profile. - -## Notes -- Detached RO-Crates have no local payload; all data entities use absolute URIs. -- The metadata document uses the 1.2 context by reference. diff --git a/tests/data/crates/valid/ro-crate-1.2-absolute-root/prefix-ro-crate-metadata.json b/tests/data/crates/valid/ro-crate-1.2-absolute-root/prefix-ro-crate-metadata.json index c25bb9e24..71cb8c004 100644 --- a/tests/data/crates/valid/ro-crate-1.2-absolute-root/prefix-ro-crate-metadata.json +++ b/tests/data/crates/valid/ro-crate-1.2-absolute-root/prefix-ro-crate-metadata.json @@ -34,7 +34,17 @@ "cite-as": { "@id": "https://github.com/crs4/pydoop/tree/develop/" }, - "url": "https://github.com/crs4/pydoop/tree/develop/" + "url": "https://github.com/crs4/pydoop/tree/develop/", + "identifier": { + "@id": "#pydoop-pid" + } + }, + { + "@id": "#pydoop-pid", + "@type": "PropertyValue", + "name": "Pydoop repository URL", + "propertyID": "URL", + "value": "https://github.com/crs4/pydoop/tree/develop/" }, { "@id": "https://raw.githubusercontent.com/crs4/pydoop/refs/heads/develop/README.md", @@ -78,6 +88,9 @@ "description": "An example author with an ORCID identifier.", "affiliation": { "@id": "https://ror.org/123456789" + }, + "contactPoint": { + "@id": "#contact-point" } }, { @@ -85,7 +98,18 @@ "@type": "Organization", "name": "Example University", "description": "An example university affiliated with the author.", - "url": "https://exampleuniversity.edu" + "url": "https://exampleuniversity.edu", + "contactPoint": { + "@id": "#contact-point" + } + }, + { + "@id": "#contact-point", + "@type": "ContactPoint", + "name": "Contact", + "contactType": "customer support", + "email": "info@exampleuniversity.edu", + "url": "https://exampleuniversity.edu/contact" }, { "@id": "https://creativecommons.org/licenses/by/4.0/", diff --git a/tests/integration/profiles/ro-crate-1.2/test_availability_flags.py b/tests/integration/profiles/ro-crate-1.2/test_availability_flags.py index a7a498508..48aba136c 100644 --- a/tests/integration/profiles/ro-crate-1.2/test_availability_flags.py +++ b/tests/integration/profiles/ro-crate-1.2/test_availability_flags.py @@ -31,7 +31,7 @@ "contentLocation", "contentSize", "contentUrl", "dateCreated", "dateModified", "datePublished", "description", "encodingFormat", "hasPart", "identifier", "license", "name", "propertyID", "publisher", - "sdDatePublished", "url", "value", + "sdDatePublished", "url", "value", "contactPoint", "contactType", "email", } From c62e0e30c23decbef0262b0744c0cc8acfd5be32 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Fri, 17 Apr 2026 14:40:51 +0200 Subject: [PATCH 127/352] refactor(ro-crate-1.2): :wastebasket: remove obsolete code --- .../1.2/must/2_root_data_entity_haspart.py | 68 -------------- .../1.2/must/2_root_data_entity_haspart.ttl | 50 ---------- .../ro-crate/1.2/must/6_contextual_entity.ttl | 91 ------------------- .../ro-crate/1.2/should/0_contact_point.py | 52 ----------- .../1.2/should/0_contextual_entity_links.ttl | 55 ----------- .../should/0_detached_metadata_filename.py | 48 ---------- .../ro-crate/1.2/should/0_entity_name.ttl | 41 --------- .../1.2/should/0_entity_reachability.py | 64 ------------- .../should/2_root_data_entity_relative_uri.py | 47 ---------- .../1.2/should/4_data_entity_existence.py | 58 ------------ 10 files changed, 574 deletions(-) delete mode 100644 rocrate_validator/profiles/ro-crate/1.2/must/2_root_data_entity_haspart.py delete mode 100644 rocrate_validator/profiles/ro-crate/1.2/must/2_root_data_entity_haspart.ttl delete mode 100644 rocrate_validator/profiles/ro-crate/1.2/must/6_contextual_entity.ttl delete mode 100644 rocrate_validator/profiles/ro-crate/1.2/should/0_contact_point.py delete mode 100644 rocrate_validator/profiles/ro-crate/1.2/should/0_contextual_entity_links.ttl delete mode 100644 rocrate_validator/profiles/ro-crate/1.2/should/0_detached_metadata_filename.py delete mode 100644 rocrate_validator/profiles/ro-crate/1.2/should/0_entity_name.ttl delete mode 100644 rocrate_validator/profiles/ro-crate/1.2/should/0_entity_reachability.py delete mode 100644 rocrate_validator/profiles/ro-crate/1.2/should/2_root_data_entity_relative_uri.py delete mode 100644 rocrate_validator/profiles/ro-crate/1.2/should/4_data_entity_existence.py diff --git a/rocrate_validator/profiles/ro-crate/1.2/must/2_root_data_entity_haspart.py b/rocrate_validator/profiles/ro-crate/1.2/must/2_root_data_entity_haspart.py deleted file mode 100644 index 9cbcae39b..000000000 --- a/rocrate_validator/profiles/ro-crate/1.2/must/2_root_data_entity_haspart.py +++ /dev/null @@ -1,68 +0,0 @@ -# Copyright (c) 2024-2026 CRS4 -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from rocrate_validator.utils import log as logging -from rocrate_validator.models import ValidationContext -from rocrate_validator.requirements.python import (PyFunctionCheck, check, - requirement) - -# set up logging -logger = logging.getLogger(__name__) - - -@requirement(name="Root Data Entity: hasPart coverage") -class RootDataEntityHasPartChecker(PyFunctionCheck): - """ - Root Data Entity MUST reference all Data Entities directly or indirectly via hasPart. - """ - - @check(name="Root Data Entity: hasPart must cover all Data Entities") - def check_has_part(self, context: ValidationContext) -> bool: - try: - root = context.ro_crate.metadata.get_root_data_entity() - data_entities = [ - e for e in context.ro_crate.metadata.get_data_entities() - if not e.has_local_identifier() and e.id != root.id - ] - - reachable = set() - stack = [] - root_has_part = root.get_property("hasPart") - if root_has_part: - stack.extend(root_has_part if isinstance(root_has_part, list) else [root_has_part]) - - while stack: - current = stack.pop() - if hasattr(current, "id"): - current_id = current.id - else: - continue - if current_id in reachable: - continue - reachable.add(current_id) - if hasattr(current, "get_property"): - nested = current.get_property("hasPart") - if nested: - stack.extend(nested if isinstance(nested, list) else [nested]) - - missing = [e.id for e in data_entities if e.id not in reachable] - if missing: - context.result.add_issue( - f"Root Data Entity hasPart does not cover Data Entities: {missing}", self) - return False - return True - except Exception as e: - context.result.add_issue( - f"Error checking hasPart coverage: {str(e)}", self) - return False diff --git a/rocrate_validator/profiles/ro-crate/1.2/must/2_root_data_entity_haspart.ttl b/rocrate_validator/profiles/ro-crate/1.2/must/2_root_data_entity_haspart.ttl deleted file mode 100644 index 4b71e1a47..000000000 --- a/rocrate_validator/profiles/ro-crate/1.2/must/2_root_data_entity_haspart.ttl +++ /dev/null @@ -1,50 +0,0 @@ -# Copyright (c) 2024-2026 CRS4 -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -@prefix ro-crate: . -@prefix schema_org: . -@prefix sh: . - -ro-crate:RootDataEntityHasPartAllDataEntities a sh:NodeShape ; - sh:name "Root Data Entity: hasPart MUST reference all Data Entities" ; - sh:description """The Root Data Entity MUST directly or indirectly reference - all Data Entities in the RO-Crate via hasPart (RO-Crate 1.2).""" ; - sh:target [ - a sh:SPARQLTarget ; - sh:prefixes ro-crate:sparqlPrefixes ; - sh:select """ - SELECT ?this - WHERE { - ?this a ro-crate:RootDataEntity . - } - """ - ] ; - sh:sparql [ - a sh:SPARQLConstraint ; - sh:prefixes ro-crate:sparqlPrefixes ; - sh:select """ - SELECT $this ?unreferenced - WHERE { - $this a ro-crate:RootDataEntity . - ?unreferenced a ?entityType . - FILTER(?entityType IN (schema:MediaObject, schema:Dataset)) - FILTER(?unreferenced != $this) - FILTER(!strstarts(str(?unreferenced), "#")) - FILTER NOT EXISTS { - $this schema:hasPart+ ?unreferenced - } - } - """ ; - sh:name "Root Data Entity: hasPart MUST reference all Data Entities" ; - sh:message "The Root Data Entity MUST reference all Data Entities via hasPart (directly or indirectly)" ; - ] . \ No newline at end of file diff --git a/rocrate_validator/profiles/ro-crate/1.2/must/6_contextual_entity.ttl b/rocrate_validator/profiles/ro-crate/1.2/must/6_contextual_entity.ttl deleted file mode 100644 index a7a02b975..000000000 --- a/rocrate_validator/profiles/ro-crate/1.2/must/6_contextual_entity.ttl +++ /dev/null @@ -1,91 +0,0 @@ -# Copyright (c) 2024-2026 CRS4 -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -@prefix ro: <./> . -@prefix ro-crate: . -@prefix rdf: . -@prefix schema: . -@prefix sh: . -@prefix xsd: . -@prefix owl: . -@prefix validator: . - - -ro-crate:FindLicenseEntity a sh:NodeShape, validator:HiddenShape ; - sh:name "Identify License Entity" ; - sh:description """Mark a license entity any Data Entity referenced by the `schema:license` property.""" ; - sh:target [ - a sh:SPARQLTarget ; - sh:prefixes ro-crate:sparqlPrefixes ; - sh:select """ - SELECT ?this - WHERE { - ?subject schema:license ?this . - } - """ - ] ; - - # Expand data graph with triples from the file data entity - sh:rule [ - a sh:TripleRule ; - sh:subject sh:this ; - sh:predicate rdf:type ; - sh:object ro-crate:ContextualEntity ; - ] . - - -ro-crate:WebSiteRecommendedProperties a sh:NodeShape ; - sh:name "WebSite RECOMMENDED Properties" ; - sh:description """A `WebSite` MUST be identified by a valid IRI and MUST have a `name` property.""" ; - sh:targetClass schema:WebSite ; - sh:property [ - sh:path [sh:inversePath rdf:type] ; - sh:datType sh:IRI ; - sh:name "WebSite: value restriction of its identifier" ; - sh:description "Check if the WebSite has a valid IRI" ; - sh:message "A WebSite MUST have a valid IRI" ; - ] ; - sh:property [ - sh:path schema:name ; - sh:minCount 1 ; - sh:dataType xsd:string ; - sh:name "WebSite: REQUIRED `name` property" ; - sh:description "Check if the WebSite has a `name` property" ; - sh:message "A WebSite MUST have a `name` property" ; - ] . - - -ro-crate:CreativeWorkAuthorDefinition a sh:NodeShape, validator:HiddenShape ; - sh:name "CreativeWork Author Definition" ; - sh:description """Define the `CreativeWorkAuthor` as the `Person` object of the `schema:author` predicate.""" ; - sh:targetObjectsOf schema:author ; - sh:rule [ - a sh:TripleRule ; - sh:subject sh:this ; - sh:predicate rdf:type ; - sh:object ro-crate:CreativeWorkAuthor ; - sh:condition [ - sh:property [ sh:path rdf:type ; sh:hasValue schema:Person ; sh:minCount 1 ] ; - ] ; - ] . - -ro-crate:ThumbnailReferencesFile a sh:NodeShape ; - sh:name "Thumbnail reference" ; - sh:description """If `thumbnail` is present, it MUST reference a File data entity.""" ; - sh:targetSubjectsOf schema:thumbnail ; - sh:property [ - sh:path schema:thumbnail ; - sh:class schema:MediaObject ; - sh:message "If present, `thumbnail` MUST reference a File data entity" ; - ] . diff --git a/rocrate_validator/profiles/ro-crate/1.2/should/0_contact_point.py b/rocrate_validator/profiles/ro-crate/1.2/should/0_contact_point.py deleted file mode 100644 index 654d62efa..000000000 --- a/rocrate_validator/profiles/ro-crate/1.2/should/0_contact_point.py +++ /dev/null @@ -1,52 +0,0 @@ -# Copyright (c) 2024-2026 CRS4 -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from rocrate_validator.utils import log as logging -from rocrate_validator.models import ValidationContext -from rocrate_validator.requirements.python import (PyFunctionCheck, check, - requirement) - -# set up logging -logger = logging.getLogger(__name__) - - -@requirement(name="Contact point for authors or publishers") -class ContactPointChecker(PyFunctionCheck): - """ - At least one author or publisher referenced from the Root Data Entity SHOULD have a contactPoint. - """ - - @check(name="Contact point presence") - def check_contact_point(self, context: ValidationContext) -> bool: - try: - root = context.ro_crate.metadata.get_root_data_entity() - candidates = [] - for prop in ("author", "publisher"): - value = root.get_property(prop) - if value is None: - continue - values = value if isinstance(value, list) else [value] - candidates.extend(values) - if not candidates: - return True - for entity in candidates: - if hasattr(entity, "get_property") and entity.get_property("contactPoint"): - return True - context.result.add_issue( - "At least one author or publisher SHOULD have a contactPoint", self) - return False - except Exception as e: - context.result.add_issue( - f"Error checking contactPoint: {str(e)}", self) - return False diff --git a/rocrate_validator/profiles/ro-crate/1.2/should/0_contextual_entity_links.ttl b/rocrate_validator/profiles/ro-crate/1.2/should/0_contextual_entity_links.ttl deleted file mode 100644 index 7c5aee736..000000000 --- a/rocrate_validator/profiles/ro-crate/1.2/should/0_contextual_entity_links.ttl +++ /dev/null @@ -1,55 +0,0 @@ -# Copyright (c) 2024-2026 CRS4 -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -@prefix ro: <./> . -@prefix ro-crate: . -@prefix schema_org: . -@prefix sh: . - -ro-crate:ContextualEntitiesShouldBeLinked a sh:NodeShape ; - sh:name "Contextual Entities should be linked" ; - sh:description """Contextual entities in the graph SHOULD be linked to from at least one other entity.""" ; - sh:target [ - a sh:SPARQLTarget ; - sh:prefixes ro-crate:sparqlPrefixes ; - sh:select """ - SELECT ?this - WHERE { - ?this a ?type . - FILTER(contains(str(?this), "ro-crate-metadata.json")) - FILTER NOT EXISTS { ?this a schema:Dataset } - FILTER NOT EXISTS { ?this a schema:MediaObject } - FILTER NOT EXISTS { ?s ?p ?this . } - } - """ - ] ; - sh:message "Contextual entities SHOULD be linked to from another entity" . - -ro-crate:ReferencedContextualEntitiesShouldBeDescribed a sh:NodeShape ; - sh:name "Referenced contextual entities should be described" ; - sh:description """Contextual entities referenced by other entities SHOULD be described in the same graph.""" ; - sh:target [ - a sh:SPARQLTarget ; - sh:prefixes ro-crate:sparqlPrefixes ; - sh:select """ - SELECT ?this - WHERE { - ?s ?p ?this . - FILTER(isIRI(?this)) - FILTER NOT EXISTS { ?this a ?type } - FILTER(!regex(str(?this), "^https?://", "i")) - } - """ - ] ; - sh:message "Referenced contextual entities SHOULD be described in the @graph" . diff --git a/rocrate_validator/profiles/ro-crate/1.2/should/0_detached_metadata_filename.py b/rocrate_validator/profiles/ro-crate/1.2/should/0_detached_metadata_filename.py deleted file mode 100644 index 7badf5d72..000000000 --- a/rocrate_validator/profiles/ro-crate/1.2/should/0_detached_metadata_filename.py +++ /dev/null @@ -1,48 +0,0 @@ -# Copyright (c) 2024-2026 CRS4 -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from rocrate_validator.utils import log as logging -from rocrate_validator.models import ValidationContext -from rocrate_validator.requirements.python import (PyFunctionCheck, check, - requirement) - -# set up logging -logger = logging.getLogger(__name__) - - -@requirement(name="Detached RO-Crate metadata filename") -class DetachedMetadataFilenameChecker(PyFunctionCheck): - """ - Detached RO-Crate metadata files SHOULD be named ${prefix}-ro-crate-metadata.json - """ - - @check(name="Detached RO-Crate: metadata filename") - def check_filename(self, context: ValidationContext) -> bool: - try: - if not context.ro_crate.is_detached(): - return True - if not context.ro_crate.uri.is_local_file(): - return True - filename = context.ro_crate.uri.as_path().name - if filename.endswith("-ro-crate-metadata.json"): - return True - if filename == "ro-crate-metadata.json": - context.result.add_issue( - "Detached RO-Crate metadata file SHOULD be named ${prefix}-ro-crate-metadata.json", self) - return False - return True - except Exception as e: - context.result.add_issue( - f"Error checking detached metadata filename: {str(e)}", self) - return False diff --git a/rocrate_validator/profiles/ro-crate/1.2/should/0_entity_name.ttl b/rocrate_validator/profiles/ro-crate/1.2/should/0_entity_name.ttl deleted file mode 100644 index a1b8005e4..000000000 --- a/rocrate_validator/profiles/ro-crate/1.2/should/0_entity_name.ttl +++ /dev/null @@ -1,41 +0,0 @@ -# Copyright (c) 2024-2026 CRS4 -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -@prefix ro: <./> . -@prefix ro-crate: . -@prefix schema: . -@prefix sh: . -@prefix xsd: . - -ro-crate:EntityNameRecommendedProperties a sh:NodeShape ; - sh:name "Entity: RECOMMENDED name" ; - sh:description """Entities SHOULD have a human-readable name.""" ; - sh:target [ - a sh:SPARQLTarget ; - sh:prefixes ro-crate:sparqlPrefixes ; - sh:select """ - SELECT ?this - WHERE { - ?this a ?type . - FILTER(contains(str(?this), "ro-crate-metadata.json")) - } - """ - ] ; - sh:property [ - sh:path schema:name ; - sh:minCount 1 ; - sh:dataType xsd:string ; - sh:severity sh:Warning ; - sh:message "Entities SHOULD have a human-readable name" ; - ] . diff --git a/rocrate_validator/profiles/ro-crate/1.2/should/0_entity_reachability.py b/rocrate_validator/profiles/ro-crate/1.2/should/0_entity_reachability.py deleted file mode 100644 index 6ee8c2eca..000000000 --- a/rocrate_validator/profiles/ro-crate/1.2/should/0_entity_reachability.py +++ /dev/null @@ -1,64 +0,0 @@ -# Copyright (c) 2024-2026 CRS4 -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from rocrate_validator.utils import log as logging -from rocrate_validator.models import ValidationContext -from rocrate_validator.requirements.python import (PyFunctionCheck, check, - requirement) - -# set up logging -logger = logging.getLogger(__name__) - - -@requirement(name="Entity reachability") -class EntityReachabilityChecker(PyFunctionCheck): - """ - Entities SHOULD be referenced from the Root Data Entity (directly or indirectly). - """ - - @check(name="Entity reachability from root") - def check_reachability(self, context: ValidationContext) -> bool: - try: - graph = context.ro_crate.metadata.as_dict().get("@graph", []) - ids = {e.get("@id") for e in graph if e.get("@id")} - referenced = set() - - def collect_refs(value): - if isinstance(value, dict): - if "@id" in value: - referenced.add(value["@id"]) - for v in value.values(): - collect_refs(v) - elif isinstance(value, list): - for v in value: - collect_refs(v) - - for entity in graph: - for key, value in entity.items(): - if key in ("@id", "@type", "@context"): - continue - collect_refs(value) - - root_id = context.ro_crate.metadata.get_root_data_entity().id - always_allowed = {context.ro_crate.metadata_descriptor_id, root_id} - unreferenced = [i for i in ids if i not in referenced and i not in always_allowed] - if unreferenced: - context.result.add_issue( - f"Entities not referenced from the graph: {unreferenced}", self) - return False - return True - except Exception as e: - context.result.add_issue( - f"Error checking entity reachability: {str(e)}", self) - return False diff --git a/rocrate_validator/profiles/ro-crate/1.2/should/2_root_data_entity_relative_uri.py b/rocrate_validator/profiles/ro-crate/1.2/should/2_root_data_entity_relative_uri.py deleted file mode 100644 index 2c9001b89..000000000 --- a/rocrate_validator/profiles/ro-crate/1.2/should/2_root_data_entity_relative_uri.py +++ /dev/null @@ -1,47 +0,0 @@ -# Copyright (c) 2024-2026 CRS4 -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import re - -from rocrate_validator.utils import log as logging -from rocrate_validator.models import ValidationContext -from rocrate_validator.requirements.python import (PyFunctionCheck, check, - requirement) - -# set up logging -logger = logging.getLogger(__name__) - - -@requirement(name="RO-Crate Root Data Entity RECOMMENDED value") -class RootDataEntityRelativeURI(PyFunctionCheck): - """ - The Root Data Entity SHOULD be denoted by the string ./ or an absolute URI - """ - - @check(name="Root Data Entity: RECOMMENDED value") - def check_relative_uris(self, context: ValidationContext) -> bool: - """Check if the Root Data Entity is denoted by the string `./` or an absolute URI""" - try: - root_entity = context.ro_crate.metadata.get_root_data_entity() - if root_entity.id == './': - return True - if re.match(r"^[A-Za-z][A-Za-z0-9+\.-]*:", root_entity.id): - return True - context.result.add_issue( - 'Root Data Entity URI is not denoted by the string `./` or an absolute URI', self) - return False - except Exception as e: - context.result.add_issue( - f'Error checking Root Data Entity URI: {str(e)}', self) - return False diff --git a/rocrate_validator/profiles/ro-crate/1.2/should/4_data_entity_existence.py b/rocrate_validator/profiles/ro-crate/1.2/should/4_data_entity_existence.py deleted file mode 100644 index f5742acd2..000000000 --- a/rocrate_validator/profiles/ro-crate/1.2/should/4_data_entity_existence.py +++ /dev/null @@ -1,58 +0,0 @@ -# Copyright (c) 2024-2026 CRS4 -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from rocrate_validator.utils import log as logging -from rocrate_validator.models import ValidationContext -from rocrate_validator.requirements.python import (PyFunctionCheck, check, - requirement) - -# set up logging -logger = logging.getLogger(__name__) - - -@requirement(name="Data Entity: RECOMMENDED resource availability") -class DataEntityRecommendedChecker(PyFunctionCheck): - """ - Data Entities with absolute URI paths SHOULD be available - at the time of RO-Crate creation - """ - - @check(name="Data Entity: RECOMMENDED resource availability") - def check_availability(self, context: ValidationContext) -> bool: - """ - Check the availability of the Data Entity with absolute URI paths - are available at the time of RO-Crate creation - """ - # Skip the check in metadata-only mode - if context.settings.metadata_only: - logger.debug("Skipping file descriptor existence check in metadata-only mode") - return True - # Perform the check - result = True - for entity in [ - _ for _ in context.ro_crate.metadata.get_data_entities(exclude_web_data_entities=True) - if _.has_absolute_path()]: - assert entity.id is not None, "Entity has no @id" - try: - if not entity.is_available(): - context.result.add_issue( - f'Data Entity {entity.id} is not available', self) - result = False - except Exception as e: - context.result.add_issue( - f'Web-based Data Entity {entity.id} is not available: {e}', self) - result = False - if not result and context.fail_fast: - return result - return result From 0238beb44cda47e76b393f441ef891149d489cf8 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Fri, 17 Apr 2026 15:04:02 +0200 Subject: [PATCH 128/352] test(ro-crate-1.2): :card_file_box: refactor test data --- .../valid/ro-crate-metadata.json | 34 ++++++++++- .../valid/ro-crate-metadata.json | 13 +++- .../valid/ro-crate-metadata.json | 30 ++++++++-- .../valid/ro-crate-metadata.json | 28 +++++++-- .../valid/ro-crate-metadata.json | 13 +++- .../valid/ro-crate-metadata.json | 28 +++++++-- .../valid/basic-ro-crate-metadata.json | 28 +++++++-- .../valid/ro-crate-metadata.json | 28 +++++++-- .../valid/ro-crate-metadata.json | 28 +++++++-- .../valid/ro-crate-metadata.json | 28 +++++++-- .../valid/ro-crate-metadata.json | 28 +++++++-- .../single_value/valid/ro-crate-metadata.json | 28 +++++++-- .../valid/citeas-ro-crate-metadata.json | 36 +++++++++-- .../valid/ro-crate-metadata.json | 55 +++++++++++++---- .../valid/ro-crate-metadata.json | 55 +++++++++++++---- .../valid/ro-crate-metadata.json | 43 ++++++++++--- .../valid/ro-crate-metadata.json | 55 +++++++++++++---- .../valid/ro-crate-metadata.json | 55 +++++++++++++---- .../valid/ro-crate-metadata.json | 39 ++++++++++-- .../valid/ro-crate-metadata.json | 28 +++++++-- .../invalid/ro-crate-metadata.json | 49 +++++++++++---- .../valid/ro-crate-metadata.json | 49 +++++++++++---- .../valid/ro-crate-metadata.json | 60 +++++++++++++++---- .../valid/ro-crate-metadata.json | 60 +++++++++++++++---- .../valid/ro-crate-metadata.json | 46 +++++++++++--- .../valid/ro-crate-metadata.json | 58 ++++++++++++++---- .../valid/ro-crate-metadata.json | 54 +++++++++++++---- .../valid/ro-crate-metadata.json | 11 +++- .../valid/ro-crate-metadata.json | 11 +++- .../valid/ro-crate-metadata.json | 54 +++++++++++++---- .../valid/ro-crate-metadata.json | 28 +++++++-- .../valid/ro-crate-metadata.json | 28 +++++++-- .../valid/ro-crate-metadata.json | 28 +++++++-- .../valid/ro-crate-metadata.json | 28 +++++++-- .../valid/ro-crate-metadata.json | 18 +++++- .../ro-crate-1.2/test_referenced_rocrate.py | 2 +- 36 files changed, 1040 insertions(+), 224 deletions(-) diff --git a/tests/data/crates/rocrate-1.2/10_metadata_contextualEntities/license_entity/valid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/10_metadata_contextualEntities/license_entity/valid/ro-crate-metadata.json index 5837f0e88..253ce4119 100644 --- a/tests/data/crates/rocrate-1.2/10_metadata_contextualEntities/license_entity/valid/ro-crate-metadata.json +++ b/tests/data/crates/rocrate-1.2/10_metadata_contextualEntities/license_entity/valid/ro-crate-metadata.json @@ -4,8 +4,12 @@ { "@id": "ro-crate-metadata.json", "@type": "CreativeWork", - "about": { "@id": "./" }, - "conformsTo": { "@id": "https://w3id.org/ro/crate/1.2" } + "about": { + "@id": "./" + }, + "conformsTo": { + "@id": "https://w3id.org/ro/crate/1.2" + } }, { "@id": "./", @@ -13,7 +17,31 @@ "name": "License entity — valid", "description": "RO-Crate with a well-formed license entity: absolute HTTPS URL @id, CreativeWork type, name and description.", "datePublished": "2024-01-01", - "license": { "@id": "https://spdx.org/licenses/MIT.html" } + "license": { + "@id": "https://spdx.org/licenses/MIT.html" + }, + "funder": { + "@id": "https://ror.org/03yrm5c26" + }, + "publisher": { + "@id": "https://ror.org/03yrm5c26" + } + }, + { + "@id": "https://ror.org/03yrm5c26", + "@type": "Organization", + "name": "Wellcome Trust", + "description": "The Wellcome Trust is a global charitable foundation dedicated to improving health by supporting scientific research and innovation.", + "url": "https://wellcome.org/", + "contactPoint": { + "@id": "mailto:info@wellcome.org" + } + }, + { + "@id": "mailto:info@wellcome.org", + "@type": "ContactPoint", + "contactType": "general inquiries", + "name": "Wellcome Trust Contact" }, { "@id": "https://spdx.org/licenses/MIT.html", diff --git a/tests/data/crates/rocrate-1.2/1_metadata_document/described_contextual_entities/valid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/1_metadata_document/described_contextual_entities/valid/ro-crate-metadata.json index 324b92c89..362c81473 100644 --- a/tests/data/crates/rocrate-1.2/1_metadata_document/described_contextual_entities/valid/ro-crate-metadata.json +++ b/tests/data/crates/rocrate-1.2/1_metadata_document/described_contextual_entities/valid/ro-crate-metadata.json @@ -73,7 +73,10 @@ "@type": "Organization", "name": "Example University", "description": "An example university that the Person entity is affiliated with.", - "url": "https://www.exampleuniversity.edu" + "url": "https://www.exampleuniversity.edu", + "contactPoint": { + "@id": "#contact-point" + } }, { "@id": "data.csv", @@ -82,6 +85,12 @@ "description": "A sample data file.", "encodingFormat": "text/csv", "contentSize": "42" + }, + { + "@id": "#contact-point", + "@type": "ContactPoint", + "name": "Contact for Example University", + "email": "mailto:contact@example.org" } ] -} \ No newline at end of file +} diff --git a/tests/data/crates/rocrate-1.2/1_metadata_document/named_entity_id_format/valid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/1_metadata_document/named_entity_id_format/valid/ro-crate-metadata.json index 1f6d01f41..ea6332608 100644 --- a/tests/data/crates/rocrate-1.2/1_metadata_document/named_entity_id_format/valid/ro-crate-metadata.json +++ b/tests/data/crates/rocrate-1.2/1_metadata_document/named_entity_id_format/valid/ro-crate-metadata.json @@ -21,7 +21,7 @@ "@id": "https://creativecommons.org/licenses/by/4.0/" }, "publisher": { - "@id": "#publisher-org" + "@id": "https://ror.org/05f9q8d28" }, "author": { "@id": "#alice" @@ -52,21 +52,39 @@ "@type": "Person", "name": "Alice Researcher", "affiliation": { - "@id": "#publisher-org" + "@id": "https://ror.org/05f9q8d28" } }, { - "@id": "#publisher-org", + "@id": "https://ror.org/05f9q8d28", "@type": "Organization", "name": "Example University", - "url": "https://example.edu" + "url": "https://example.edu", + "contactPoint": { + "@id": "#contact-point" + } }, { "@id": "https://ror.org/00k4n6c32", "@type": "Organization", "name": "European Commission", "description": "The European Commission funds research and innovation programmes.", - "url": "https://ec.europa.eu" + "url": "https://ec.europa.eu", + "contactPoint": { + "@id": "#contact-point-1" + } + }, + { + "@id": "#contact-point", + "@type": "ContactPoint", + "name": "Contact for Example University", + "email": "mailto:contact@example.org" + }, + { + "@id": "#contact-point-1", + "@type": "ContactPoint", + "name": "Contact for European Commission", + "email": "mailto:contact@example.org" } ] -} \ No newline at end of file +} diff --git a/tests/data/crates/rocrate-1.2/1_metadata_document/no_parent_traversal/valid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/1_metadata_document/no_parent_traversal/valid/ro-crate-metadata.json index ea398d53e..5ff0304aa 100644 --- a/tests/data/crates/rocrate-1.2/1_metadata_document/no_parent_traversal/valid/ro-crate-metadata.json +++ b/tests/data/crates/rocrate-1.2/1_metadata_document/no_parent_traversal/valid/ro-crate-metadata.json @@ -21,7 +21,7 @@ "@id": "https://creativecommons.org/licenses/by/4.0/" }, "publisher": { - "@id": "#publisher-org" + "@id": "https://ror.org/05f9q8d28" }, "hasPart": [], "funder": { @@ -45,17 +45,35 @@ "description": "CC BY 4.0 license." }, { - "@id": "#publisher-org", + "@id": "https://ror.org/05f9q8d28", "@type": "Organization", "name": "Example Research Institute", - "url": "https://example.org" + "url": "https://example.org", + "contactPoint": { + "@id": "#contact-point" + } }, { "@id": "https://ror.org/00k4n6c32", "@type": "Organization", "name": "European Commission", "description": "The European Commission funds research and innovation programmes.", - "url": "https://ec.europa.eu" + "url": "https://ec.europa.eu", + "contactPoint": { + "@id": "#contact-point-1" + } + }, + { + "@id": "#contact-point", + "@type": "ContactPoint", + "name": "Contact for Example Research Institute", + "email": "mailto:contact@example.org" + }, + { + "@id": "#contact-point-1", + "@type": "ContactPoint", + "name": "Contact for European Commission", + "email": "mailto:contact@example.org" } ] -} \ No newline at end of file +} diff --git a/tests/data/crates/rocrate-1.2/1_metadata_document/referenced_contextual_entities/valid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/1_metadata_document/referenced_contextual_entities/valid/ro-crate-metadata.json index 324b92c89..362c81473 100644 --- a/tests/data/crates/rocrate-1.2/1_metadata_document/referenced_contextual_entities/valid/ro-crate-metadata.json +++ b/tests/data/crates/rocrate-1.2/1_metadata_document/referenced_contextual_entities/valid/ro-crate-metadata.json @@ -73,7 +73,10 @@ "@type": "Organization", "name": "Example University", "description": "An example university that the Person entity is affiliated with.", - "url": "https://www.exampleuniversity.edu" + "url": "https://www.exampleuniversity.edu", + "contactPoint": { + "@id": "#contact-point" + } }, { "@id": "data.csv", @@ -82,6 +85,12 @@ "description": "A sample data file.", "encodingFormat": "text/csv", "contentSize": "42" + }, + { + "@id": "#contact-point", + "@type": "ContactPoint", + "name": "Contact for Example University", + "email": "mailto:contact@example.org" } ] -} \ No newline at end of file +} diff --git a/tests/data/crates/rocrate-1.2/1_metadata_document/utf8_identifiers/valid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/1_metadata_document/utf8_identifiers/valid/ro-crate-metadata.json index 4b85234bc..e25422449 100644 --- a/tests/data/crates/rocrate-1.2/1_metadata_document/utf8_identifiers/valid/ro-crate-metadata.json +++ b/tests/data/crates/rocrate-1.2/1_metadata_document/utf8_identifiers/valid/ro-crate-metadata.json @@ -21,7 +21,7 @@ "@id": "https://creativecommons.org/licenses/by/4.0/" }, "publisher": { - "@id": "#publisher-org" + "@id": "https://ror.org/05f9q8d28" }, "hasPart": [], "funder": { @@ -45,17 +45,35 @@ "description": "CC BY 4.0 license." }, { - "@id": "#publisher-org", + "@id": "https://ror.org/05f9q8d28", "@type": "Organization", "name": "Example Research Institute", - "url": "https://example.org" + "url": "https://example.org", + "contactPoint": { + "@id": "#contact-point" + } }, { "@id": "https://ror.org/00k4n6c32", "@type": "Organization", "name": "European Commission", "description": "The European Commission funds research and innovation programmes.", - "url": "https://ec.europa.eu" + "url": "https://ec.europa.eu", + "contactPoint": { + "@id": "#contact-point-1" + } + }, + { + "@id": "#contact-point", + "@type": "ContactPoint", + "name": "Contact for Example Research Institute", + "email": "mailto:contact@example.org" + }, + { + "@id": "#contact-point-1", + "@type": "ContactPoint", + "name": "Contact for European Commission", + "email": "mailto:contact@example.org" } ] -} \ No newline at end of file +} diff --git a/tests/data/crates/rocrate-1.2/3_detached_rocrates/naming-convention/local-descriptor/valid/basic-ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/3_detached_rocrates/naming-convention/local-descriptor/valid/basic-ro-crate-metadata.json index ecb4a3c9f..d5f71f62a 100644 --- a/tests/data/crates/rocrate-1.2/3_detached_rocrates/naming-convention/local-descriptor/valid/basic-ro-crate-metadata.json +++ b/tests/data/crates/rocrate-1.2/3_detached_rocrates/naming-convention/local-descriptor/valid/basic-ro-crate-metadata.json @@ -26,7 +26,7 @@ "url": "https://raw.githubusercontent.com/crs4/rocrate-validator/refs/heads/develop/tests/data/crates/valid/workflow-roc/", "hasPart": [], "publisher": { - "@id": "#publisher-org" + "@id": "https://ror.org/05f9q8d28" }, "funder": { "@id": "https://ror.org/00k4n6c32" @@ -43,10 +43,13 @@ "value": "550e8400-e29b-41d4-a716-446655440000" }, { - "@id": "#publisher-org", + "@id": "https://ror.org/05f9q8d28", "@type": "Organization", "name": "Example Research Institute", - "url": "https://example.org" + "url": "https://example.org", + "contactPoint": { + "@id": "#contact-point" + } }, { "@id": "https://creativecommons.org/licenses/by/4.0/", @@ -59,7 +62,22 @@ "@type": "Organization", "name": "European Commission", "description": "The European Commission funds research and innovation programmes.", - "url": "https://ec.europa.eu" + "url": "https://ec.europa.eu", + "contactPoint": { + "@id": "#contact-point-1" + } + }, + { + "@id": "#contact-point", + "@type": "ContactPoint", + "name": "Contact for Example Research Institute", + "email": "mailto:contact@example.org" + }, + { + "@id": "#contact-point-1", + "@type": "ContactPoint", + "name": "Contact for European Commission", + "email": "mailto:contact@example.org" } ] -} \ No newline at end of file +} diff --git a/tests/data/crates/rocrate-1.2/5_metadata_entities/entity_reachability/valid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/5_metadata_entities/entity_reachability/valid/ro-crate-metadata.json index 534bda896..7aa2e21d0 100644 --- a/tests/data/crates/rocrate-1.2/5_metadata_entities/entity_reachability/valid/ro-crate-metadata.json +++ b/tests/data/crates/rocrate-1.2/5_metadata_entities/entity_reachability/valid/ro-crate-metadata.json @@ -22,7 +22,7 @@ }, "hasPart": [], "publisher": { - "@id": "#publisher-org" + "@id": "https://ror.org/05f9q8d28" }, "funder": { "@id": "https://ror.org/00k4n6c32" @@ -39,10 +39,13 @@ "value": "550e8400-e29b-41d4-a716-446655440000" }, { - "@id": "#publisher-org", + "@id": "https://ror.org/05f9q8d28", "@type": "Organization", "name": "Example Research Institute", - "url": "https://example.org" + "url": "https://example.org", + "contactPoint": { + "@id": "#contact-point" + } }, { "@id": "https://creativecommons.org/licenses/by/4.0/", @@ -55,7 +58,22 @@ "@type": "Organization", "name": "European Commission", "description": "The European Commission funds research and innovation programmes.", - "url": "https://ec.europa.eu" + "url": "https://ec.europa.eu", + "contactPoint": { + "@id": "#contact-point-1" + } + }, + { + "@id": "#contact-point", + "@type": "ContactPoint", + "name": "Contact for Example Research Institute", + "email": "mailto:contact@example.org" + }, + { + "@id": "#contact-point-1", + "@type": "ContactPoint", + "name": "Contact for European Commission", + "email": "mailto:contact@example.org" } ] -} \ No newline at end of file +} diff --git a/tests/data/crates/rocrate-1.2/5_metadata_entities/recommended_name/valid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/5_metadata_entities/recommended_name/valid/ro-crate-metadata.json index a94ea7bb0..92642673d 100644 --- a/tests/data/crates/rocrate-1.2/5_metadata_entities/recommended_name/valid/ro-crate-metadata.json +++ b/tests/data/crates/rocrate-1.2/5_metadata_entities/recommended_name/valid/ro-crate-metadata.json @@ -26,17 +26,20 @@ } ], "publisher": { - "@id": "#publisher-org" + "@id": "https://ror.org/05f9q8d28" }, "funder": { "@id": "https://ror.org/00k4n6c32" } }, { - "@id": "#publisher-org", + "@id": "https://ror.org/05f9q8d28", "@type": "Organization", "name": "Example Research Institute", - "url": "https://example.org" + "url": "https://example.org", + "contactPoint": { + "@id": "#contact-point" + } }, { "@id": "https://creativecommons.org/licenses/by/4.0/", @@ -57,7 +60,22 @@ "@type": "Organization", "name": "European Commission", "description": "The European Commission funds research and innovation programmes.", - "url": "https://ec.europa.eu" + "url": "https://ec.europa.eu", + "contactPoint": { + "@id": "#contact-point-1" + } + }, + { + "@id": "#contact-point", + "@type": "ContactPoint", + "name": "Contact for Example Research Institute", + "email": "mailto:contact@example.org" + }, + { + "@id": "#contact-point-1", + "@type": "ContactPoint", + "name": "Contact for European Commission", + "email": "mailto:contact@example.org" } ] -} \ No newline at end of file +} diff --git a/tests/data/crates/rocrate-1.2/5_metadata_entities/recommended_schema_type/valid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/5_metadata_entities/recommended_schema_type/valid/ro-crate-metadata.json index 534bda896..7aa2e21d0 100644 --- a/tests/data/crates/rocrate-1.2/5_metadata_entities/recommended_schema_type/valid/ro-crate-metadata.json +++ b/tests/data/crates/rocrate-1.2/5_metadata_entities/recommended_schema_type/valid/ro-crate-metadata.json @@ -22,7 +22,7 @@ }, "hasPart": [], "publisher": { - "@id": "#publisher-org" + "@id": "https://ror.org/05f9q8d28" }, "funder": { "@id": "https://ror.org/00k4n6c32" @@ -39,10 +39,13 @@ "value": "550e8400-e29b-41d4-a716-446655440000" }, { - "@id": "#publisher-org", + "@id": "https://ror.org/05f9q8d28", "@type": "Organization", "name": "Example Research Institute", - "url": "https://example.org" + "url": "https://example.org", + "contactPoint": { + "@id": "#contact-point" + } }, { "@id": "https://creativecommons.org/licenses/by/4.0/", @@ -55,7 +58,22 @@ "@type": "Organization", "name": "European Commission", "description": "The European Commission funds research and innovation programmes.", - "url": "https://ec.europa.eu" + "url": "https://ec.europa.eu", + "contactPoint": { + "@id": "#contact-point-1" + } + }, + { + "@id": "#contact-point", + "@type": "ContactPoint", + "name": "Contact for Example Research Institute", + "email": "mailto:contact@example.org" + }, + { + "@id": "#contact-point-1", + "@type": "ContactPoint", + "name": "Contact for European Commission", + "email": "mailto:contact@example.org" } ] -} \ No newline at end of file +} diff --git a/tests/data/crates/rocrate-1.2/6_metadata_descriptor/recommended_conformsTo/recommended_prefix/valid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/6_metadata_descriptor/recommended_conformsTo/recommended_prefix/valid/ro-crate-metadata.json index 534bda896..7aa2e21d0 100644 --- a/tests/data/crates/rocrate-1.2/6_metadata_descriptor/recommended_conformsTo/recommended_prefix/valid/ro-crate-metadata.json +++ b/tests/data/crates/rocrate-1.2/6_metadata_descriptor/recommended_conformsTo/recommended_prefix/valid/ro-crate-metadata.json @@ -22,7 +22,7 @@ }, "hasPart": [], "publisher": { - "@id": "#publisher-org" + "@id": "https://ror.org/05f9q8d28" }, "funder": { "@id": "https://ror.org/00k4n6c32" @@ -39,10 +39,13 @@ "value": "550e8400-e29b-41d4-a716-446655440000" }, { - "@id": "#publisher-org", + "@id": "https://ror.org/05f9q8d28", "@type": "Organization", "name": "Example Research Institute", - "url": "https://example.org" + "url": "https://example.org", + "contactPoint": { + "@id": "#contact-point" + } }, { "@id": "https://creativecommons.org/licenses/by/4.0/", @@ -55,7 +58,22 @@ "@type": "Organization", "name": "European Commission", "description": "The European Commission funds research and innovation programmes.", - "url": "https://ec.europa.eu" + "url": "https://ec.europa.eu", + "contactPoint": { + "@id": "#contact-point-1" + } + }, + { + "@id": "#contact-point", + "@type": "ContactPoint", + "name": "Contact for Example Research Institute", + "email": "mailto:contact@example.org" + }, + { + "@id": "#contact-point-1", + "@type": "ContactPoint", + "name": "Contact for European Commission", + "email": "mailto:contact@example.org" } ] -} \ No newline at end of file +} diff --git a/tests/data/crates/rocrate-1.2/6_metadata_descriptor/recommended_conformsTo/single_value/valid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/6_metadata_descriptor/recommended_conformsTo/single_value/valid/ro-crate-metadata.json index 534bda896..7aa2e21d0 100644 --- a/tests/data/crates/rocrate-1.2/6_metadata_descriptor/recommended_conformsTo/single_value/valid/ro-crate-metadata.json +++ b/tests/data/crates/rocrate-1.2/6_metadata_descriptor/recommended_conformsTo/single_value/valid/ro-crate-metadata.json @@ -22,7 +22,7 @@ }, "hasPart": [], "publisher": { - "@id": "#publisher-org" + "@id": "https://ror.org/05f9q8d28" }, "funder": { "@id": "https://ror.org/00k4n6c32" @@ -39,10 +39,13 @@ "value": "550e8400-e29b-41d4-a716-446655440000" }, { - "@id": "#publisher-org", + "@id": "https://ror.org/05f9q8d28", "@type": "Organization", "name": "Example Research Institute", - "url": "https://example.org" + "url": "https://example.org", + "contactPoint": { + "@id": "#contact-point" + } }, { "@id": "https://creativecommons.org/licenses/by/4.0/", @@ -55,7 +58,22 @@ "@type": "Organization", "name": "European Commission", "description": "The European Commission funds research and innovation programmes.", - "url": "https://ec.europa.eu" + "url": "https://ec.europa.eu", + "contactPoint": { + "@id": "#contact-point-1" + } + }, + { + "@id": "#contact-point", + "@type": "ContactPoint", + "name": "Contact for Example Research Institute", + "email": "mailto:contact@example.org" + }, + { + "@id": "#contact-point-1", + "@type": "ContactPoint", + "name": "Contact for European Commission", + "email": "mailto:contact@example.org" } ] -} \ No newline at end of file +} diff --git a/tests/data/crates/rocrate-1.2/7_root_data_entity/recommended_citeas_for_resolvable_id/valid/citeas-ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/7_root_data_entity/recommended_citeas_for_resolvable_id/valid/citeas-ro-crate-metadata.json index aca82eea6..7f49e8719 100644 --- a/tests/data/crates/rocrate-1.2/7_root_data_entity/recommended_citeas_for_resolvable_id/valid/citeas-ro-crate-metadata.json +++ b/tests/data/crates/rocrate-1.2/7_root_data_entity/recommended_citeas_for_resolvable_id/valid/citeas-ro-crate-metadata.json @@ -22,7 +22,7 @@ }, "hasPart": [], "publisher": { - "@id": "#publisher-org" + "@id": "https://ror.org/05f9q8d28" }, "cite-as": { "@id": "https://example.org/ro-crate/detached/citable-root-data-entity/" @@ -30,13 +30,19 @@ "url": "https://example.org/ro-crate/detached/citable-root-data-entity/", "funder": { "@id": "https://ror.org/00k4n6c32" + }, + "identifier": { + "@id": "#uuid" } }, { - "@id": "#publisher-org", + "@id": "https://ror.org/05f9q8d28", "@type": "Organization", "name": "Example Research Institute", - "url": "https://example.org" + "url": "https://example.org", + "contactPoint": { + "@id": "#contact-point" + } }, { "@id": "https://creativecommons.org/licenses/by/4.0/", @@ -49,7 +55,29 @@ "@type": "Organization", "name": "European Commission", "description": "The European Commission funds research and innovation programmes.", - "url": "https://ec.europa.eu" + "url": "https://ec.europa.eu", + "contactPoint": { + "@id": "#contact-point-1" + } + }, + { + "@id": "#contact-point", + "@type": "ContactPoint", + "name": "Contact for Example Research Institute", + "email": "mailto:contact@example.org" + }, + { + "@id": "#contact-point-1", + "@type": "ContactPoint", + "name": "Contact for European Commission", + "email": "mailto:contact@example.org" + }, + { + "@id": "#uuid", + "@type": "PropertyValue", + "name": "UUID", + "propertyID": "UUID", + "value": "550e8400-e29b-41d4-a716-446655440000" } ] } diff --git a/tests/data/crates/rocrate-1.2/7_root_data_entity/recommended_conformsto_if_profiles/valid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/7_root_data_entity/recommended_conformsto_if_profiles/valid/ro-crate-metadata.json index 3e86af31f..cddd01de8 100644 --- a/tests/data/crates/rocrate-1.2/7_root_data_entity/recommended_conformsto_if_profiles/valid/ro-crate-metadata.json +++ b/tests/data/crates/rocrate-1.2/7_root_data_entity/recommended_conformsto_if_profiles/valid/ro-crate-metadata.json @@ -3,7 +3,10 @@ "@graph": [ { "@id": "ro-crate-metadata.json", - "@type": ["CreativeWork", "schema:Book"], + "@type": [ + "CreativeWork", + "schema:Book" + ], "about": { "@id": "./" }, @@ -21,11 +24,16 @@ "@id": "https://creativecommons.org/licenses/by/4.0/" }, "publisher": { - "@id": "#publisher-org" - }, - "funder": { - "@id": "#funder-org" + "@id": "https://ror.org/05f9q8d28" }, + "funder": [ + { + "@id": "https://ror.org/00qj9e848" + }, + { + "@id": "https://ror.org/00k4n6c32" + } + ], "identifier": { "@id": "#identifier-pv" }, @@ -41,18 +49,24 @@ "description": "A Creative Commons license" }, { - "@id": "#publisher-org", + "@id": "https://ror.org/05f9q8d28", "@type": "Organization", "name": "Example Research Institute", - "url": "https://example.org" + "url": "https://example.org", + "contactPoint": { + "@id": "#contact-point" + } }, { - "@id": "#funder-org", + "@id": "https://ror.org/00qj9e848", "@type": "Organization", "name": "European Commission", "url": "https://ec.europa.eu", "funder": { "@id": "https://ror.org/00k4n6c32" + }, + "contactPoint": { + "@id": "#contact-point-10" } }, { @@ -71,7 +85,28 @@ "@id": "https://ror.org/00k4n6c32", "@type": "Organization", "name": "External Funder", - "url": "https://ror.org" + "url": "https://ror.org", + "contactPoint": { + "@id": "#contact-point-1" + } + }, + { + "@id": "#contact-point", + "@type": "ContactPoint", + "name": "Contact for Example Research Institute", + "email": "mailto:contact@example.org" + }, + { + "@id": "#contact-point-1", + "@type": "ContactPoint", + "name": "Contact for External Funder", + "email": "mailto:contact@example.org" + }, + { + "@id": "#contact-point-10", + "@type": "ContactPoint", + "name": "Contact for European Commission", + "email": "mailto:contact@example.org" } ] -} \ No newline at end of file +} diff --git a/tests/data/crates/rocrate-1.2/7_root_data_entity/recommended_datepublished_day_precision/valid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/7_root_data_entity/recommended_datepublished_day_precision/valid/ro-crate-metadata.json index fccee6e09..95c2a4611 100644 --- a/tests/data/crates/rocrate-1.2/7_root_data_entity/recommended_datepublished_day_precision/valid/ro-crate-metadata.json +++ b/tests/data/crates/rocrate-1.2/7_root_data_entity/recommended_datepublished_day_precision/valid/ro-crate-metadata.json @@ -3,7 +3,10 @@ "@graph": [ { "@id": "ro-crate-metadata.json", - "@type": ["CreativeWork", "schema:Book"], + "@type": [ + "CreativeWork", + "schema:Book" + ], "about": { "@id": "./" }, @@ -21,11 +24,16 @@ "@id": "https://creativecommons.org/licenses/by/4.0/" }, "publisher": { - "@id": "#publisher-org" - }, - "funder": { - "@id": "#funder-org" + "@id": "https://ror.org/05f9q8d28" }, + "funder": [ + { + "@id": "https://ror.org/00qj9e848" + }, + { + "@id": "https://ror.org/00k4n6c32" + } + ], "identifier": { "@id": "#identifier-pv" }, @@ -38,18 +46,24 @@ "description": "A Creative Commons license" }, { - "@id": "#publisher-org", + "@id": "https://ror.org/05f9q8d28", "@type": "Organization", "name": "Example Research Institute", - "url": "https://example.org" + "url": "https://example.org", + "contactPoint": { + "@id": "#contact-point" + } }, { - "@id": "#funder-org", + "@id": "https://ror.org/00qj9e848", "@type": "Organization", "name": "European Commission", "url": "https://ec.europa.eu", "funder": { "@id": "https://ror.org/00k4n6c32" + }, + "contactPoint": { + "@id": "#contact-point-10" } }, { @@ -62,7 +76,28 @@ "@id": "https://ror.org/00k4n6c32", "@type": "Organization", "name": "External Funder", - "url": "https://ror.org" + "url": "https://ror.org", + "contactPoint": { + "@id": "#contact-point-1" + } + }, + { + "@id": "#contact-point", + "@type": "ContactPoint", + "name": "Contact for Example Research Institute", + "email": "mailto:contact@example.org" + }, + { + "@id": "#contact-point-1", + "@type": "ContactPoint", + "name": "Contact for External Funder", + "email": "mailto:contact@example.org" + }, + { + "@id": "#contact-point-10", + "@type": "ContactPoint", + "name": "Contact for European Commission", + "email": "mailto:contact@example.org" } ] -} \ No newline at end of file +} diff --git a/tests/data/crates/rocrate-1.2/7_root_data_entity/recommended_funding/valid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/7_root_data_entity/recommended_funding/valid/ro-crate-metadata.json index 6d92823f3..ec0fd1855 100644 --- a/tests/data/crates/rocrate-1.2/7_root_data_entity/recommended_funding/valid/ro-crate-metadata.json +++ b/tests/data/crates/rocrate-1.2/7_root_data_entity/recommended_funding/valid/ro-crate-metadata.json @@ -21,14 +21,14 @@ "@id": "https://creativecommons.org/licenses/by/4.0/" }, "publisher": { - "@id": "#publisher-org" + "@id": "https://ror.org/05f9q8d28" }, "funder": [ { - "@id": "#project-org" + "@id": "https://ror.org/00k4n6c32" }, { - "@id": "https://ror.org/00k4n6c32" + "@id": "https://ror.org/00qj9e848" } ], "hasPart": [], @@ -44,19 +44,25 @@ "value": "550e8400-e29b-41d4-a716-446655440000" }, { - "@id": "#publisher-org", + "@id": "https://ror.org/05f9q8d28", "@type": "Organization", "name": "Example Research Institute", - "url": "https://example.org" + "url": "https://example.org", + "contactPoint": { + "@id": "#contact-point" + } }, { - "@id": "#project-org", + "@id": "https://ror.org/00qj9e848", "@type": "Organization", "name": "Example Research Project", "description": "The research project associated with this RO-Crate.", "url": "https://example.org/projects/example-project", "funder": { "@id": "https://ror.org/00k4n6c32" + }, + "contactPoint": { + "@id": "#contact-point-10" } }, { @@ -64,13 +70,34 @@ "@type": "Organization", "name": "European Commission", "description": "The European Commission funds research and innovation programmes.", - "url": "https://ec.europa.eu" + "url": "https://ec.europa.eu", + "contactPoint": { + "@id": "#contact-point-1" + } }, { "@id": "https://creativecommons.org/licenses/by/4.0/", "@type": "CreativeWork", "name": "Creative Commons Attribution 4.0 International", "description": "CC BY 4.0 license." + }, + { + "@id": "#contact-point", + "@type": "ContactPoint", + "name": "Contact for Example Research Institute", + "email": "mailto:contact@example.org" + }, + { + "@id": "#contact-point-1", + "@type": "ContactPoint", + "name": "Contact for European Commission", + "email": "mailto:contact@example.org" + }, + { + "@id": "#contact-point-10", + "@type": "ContactPoint", + "name": "Contact for Example Research Project", + "email": "mailto:contact@example.org" } ] -} \ No newline at end of file +} diff --git a/tests/data/crates/rocrate-1.2/7_root_data_entity/recommended_identifier_if_pid/valid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/7_root_data_entity/recommended_identifier_if_pid/valid/ro-crate-metadata.json index 9d3a2fa33..568324dbc 100644 --- a/tests/data/crates/rocrate-1.2/7_root_data_entity/recommended_identifier_if_pid/valid/ro-crate-metadata.json +++ b/tests/data/crates/rocrate-1.2/7_root_data_entity/recommended_identifier_if_pid/valid/ro-crate-metadata.json @@ -3,7 +3,10 @@ "@graph": [ { "@id": "ro-crate-metadata.json", - "@type": ["CreativeWork", "schema:Book"], + "@type": [ + "CreativeWork", + "schema:Book" + ], "about": { "@id": "./" }, @@ -21,11 +24,16 @@ "@id": "https://creativecommons.org/licenses/by/4.0/" }, "publisher": { - "@id": "#publisher-org" - }, - "funder": { - "@id": "#funder-org" + "@id": "https://ror.org/05f9q8d28" }, + "funder": [ + { + "@id": "https://ror.org/00qj9e848" + }, + { + "@id": "https://ror.org/00k4n6c32" + } + ], "identifier": { "@id": "#identifier-pv" }, @@ -38,18 +46,24 @@ "description": "A Creative Commons license" }, { - "@id": "#publisher-org", + "@id": "https://ror.org/05f9q8d28", "@type": "Organization", "name": "Example Research Institute", - "url": "https://example.org" + "url": "https://example.org", + "contactPoint": { + "@id": "#contact-point" + } }, { - "@id": "#funder-org", + "@id": "https://ror.org/00qj9e848", "@type": "Organization", "name": "European Commission", "url": "https://ec.europa.eu", "funder": { "@id": "https://ror.org/00k4n6c32" + }, + "contactPoint": { + "@id": "#contact-point-10" } }, { @@ -62,7 +76,28 @@ "@id": "https://ror.org/00k4n6c32", "@type": "Organization", "name": "External Funder", - "url": "https://ror.org" + "url": "https://ror.org", + "contactPoint": { + "@id": "#contact-point-1" + } + }, + { + "@id": "#contact-point", + "@type": "ContactPoint", + "name": "Contact for Example Research Institute", + "email": "mailto:contact@example.org" + }, + { + "@id": "#contact-point-1", + "@type": "ContactPoint", + "name": "Contact for External Funder", + "email": "mailto:contact@example.org" + }, + { + "@id": "#contact-point-10", + "@type": "ContactPoint", + "name": "Contact for European Commission", + "email": "mailto:contact@example.org" } ] -} \ No newline at end of file +} diff --git a/tests/data/crates/rocrate-1.2/7_root_data_entity/recommended_identifier_propertyvalue/valid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/7_root_data_entity/recommended_identifier_propertyvalue/valid/ro-crate-metadata.json index 5c494fa0a..7b22e745e 100644 --- a/tests/data/crates/rocrate-1.2/7_root_data_entity/recommended_identifier_propertyvalue/valid/ro-crate-metadata.json +++ b/tests/data/crates/rocrate-1.2/7_root_data_entity/recommended_identifier_propertyvalue/valid/ro-crate-metadata.json @@ -3,7 +3,10 @@ "@graph": [ { "@id": "ro-crate-metadata.json", - "@type": ["CreativeWork", "schema:Book"], + "@type": [ + "CreativeWork", + "schema:Book" + ], "about": { "@id": "./" }, @@ -21,11 +24,16 @@ "@id": "https://creativecommons.org/licenses/by/4.0/" }, "publisher": { - "@id": "#publisher-org" - }, - "funder": { - "@id": "#funder-org" + "@id": "https://ror.org/05f9q8d28" }, + "funder": [ + { + "@id": "https://ror.org/00qj9e848" + }, + { + "@id": "https://ror.org/00k4n6c32" + } + ], "identifier": { "@id": "#identifier-pv" }, @@ -38,18 +46,24 @@ "description": "A Creative Commons license" }, { - "@id": "#publisher-org", + "@id": "https://ror.org/05f9q8d28", "@type": "Organization", "name": "Example Research Institute", - "url": "https://example.org" + "url": "https://example.org", + "contactPoint": { + "@id": "#contact-point" + } }, { - "@id": "#funder-org", + "@id": "https://ror.org/00qj9e848", "@type": "Organization", "name": "European Commission", "url": "https://ec.europa.eu", "funder": { "@id": "https://ror.org/00k4n6c32" + }, + "contactPoint": { + "@id": "#contact-point-10" } }, { @@ -62,7 +76,28 @@ "@id": "https://ror.org/00k4n6c32", "@type": "Organization", "name": "External Funder", - "url": "https://ror.org" + "url": "https://ror.org", + "contactPoint": { + "@id": "#contact-point-1" + } + }, + { + "@id": "#contact-point", + "@type": "ContactPoint", + "name": "Contact for Example Research Institute", + "email": "mailto:contact@example.org" + }, + { + "@id": "#contact-point-1", + "@type": "ContactPoint", + "name": "Contact for External Funder", + "email": "mailto:contact@example.org" + }, + { + "@id": "#contact-point-10", + "@type": "ContactPoint", + "name": "Contact for European Commission", + "email": "mailto:contact@example.org" } ] -} \ No newline at end of file +} diff --git a/tests/data/crates/rocrate-1.2/7_root_data_entity/recommended_identifier_resolution/valid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/7_root_data_entity/recommended_identifier_resolution/valid/ro-crate-metadata.json index 268924ab9..8f3c3f9c1 100644 --- a/tests/data/crates/rocrate-1.2/7_root_data_entity/recommended_identifier_resolution/valid/ro-crate-metadata.json +++ b/tests/data/crates/rocrate-1.2/7_root_data_entity/recommended_identifier_resolution/valid/ro-crate-metadata.json @@ -14,7 +14,7 @@ { "@id": "./", "@type": "Dataset", - "name": "Root identifier resolution \u2014 valid", + "name": "Root identifier resolution — valid", "description": "RO-Crate whose Root Data Entity has an identifier URL that resolves to RO-Crate content via Signposting.", "datePublished": "2024-01-01", "license": { @@ -22,18 +22,23 @@ }, "hasPart": [], "publisher": { - "@id": "#publisher-org" + "@id": "https://ror.org/05f9q8d28" + }, + "identifier": { + "@id": "#identifier-pv" }, - "identifier": "https://doi.org/10.1234/resolvable-rocrate", "funder": { "@id": "https://ror.org/00k4n6c32" } }, { - "@id": "#publisher-org", + "@id": "https://ror.org/05f9q8d28", "@type": "Organization", "name": "Example Research Institute", - "url": "https://example.org" + "url": "https://example.org", + "contactPoint": { + "@id": "#contact-point" + } }, { "@id": "https://creativecommons.org/licenses/by/4.0/", @@ -46,7 +51,29 @@ "@type": "Organization", "name": "European Commission", "description": "The European Commission funds research and innovation programmes.", - "url": "https://ec.europa.eu" + "url": "https://ec.europa.eu", + "contactPoint": { + "@id": "#contact-point-1" + } + }, + { + "@id": "#contact-point", + "@type": "ContactPoint", + "name": "Contact for Example Research Institute", + "email": "mailto:contact@example.org" + }, + { + "@id": "#contact-point-1", + "@type": "ContactPoint", + "name": "Contact for European Commission", + "email": "mailto:contact@example.org" + }, + { + "@id": "#identifier-pv", + "@type": "PropertyValue", + "name": "DOI", + "propertyID": "https://registry.identifiers.org/registry/doi", + "value": "https://doi.org/10.1234/resolvable-rocrate" } ] } diff --git a/tests/data/crates/rocrate-1.2/7_root_data_entity/recommended_publisher/valid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/7_root_data_entity/recommended_publisher/valid/ro-crate-metadata.json index b3f791165..770cf8b09 100644 --- a/tests/data/crates/rocrate-1.2/7_root_data_entity/recommended_publisher/valid/ro-crate-metadata.json +++ b/tests/data/crates/rocrate-1.2/7_root_data_entity/recommended_publisher/valid/ro-crate-metadata.json @@ -21,7 +21,7 @@ "@id": "https://creativecommons.org/licenses/by/4.0/" }, "publisher": { - "@id": "#publisher-org" + "@id": "https://ror.org/05f9q8d28" }, "hasPart": [], "funder": { @@ -45,17 +45,35 @@ "description": "CC BY 4.0 license." }, { - "@id": "#publisher-org", + "@id": "https://ror.org/05f9q8d28", "@type": "Organization", "name": "Example Research Institute", - "url": "https://example.org" + "url": "https://example.org", + "contactPoint": { + "@id": "#contact-point" + } }, { "@id": "https://ror.org/00k4n6c32", "@type": "Organization", "name": "European Commission", "description": "The European Commission funds research and innovation programmes.", - "url": "https://ec.europa.eu" + "url": "https://ec.europa.eu", + "contactPoint": { + "@id": "#contact-point-1" + } + }, + { + "@id": "#contact-point", + "@type": "ContactPoint", + "name": "Contact for Example Research Institute", + "email": "mailto:contact@example.org" + }, + { + "@id": "#contact-point-1", + "@type": "ContactPoint", + "name": "Contact for European Commission", + "email": "mailto:contact@example.org" } ] -} \ No newline at end of file +} diff --git a/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/data_entity_license/invalid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/data_entity_license/invalid/ro-crate-metadata.json index 65095ebf4..166a1654e 100644 --- a/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/data_entity_license/invalid/ro-crate-metadata.json +++ b/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/data_entity_license/invalid/ro-crate-metadata.json @@ -4,22 +4,40 @@ { "@id": "ro-crate-metadata.json", "@type": "CreativeWork", - "conformsTo": {"@id": "https://w3id.org/ro/crate/1.2"}, - "about": {"@id": "./"} + "conformsTo": { + "@id": "https://w3id.org/ro/crate/1.2" + }, + "about": { + "@id": "./" + } }, { "@id": "./", "@type": "Dataset", "name": "Test crate with redundant license on Data Entity", "description": "The Data Entity repeats the same license as the Root.", - "license": {"@id": "https://creativecommons.org/licenses/by/4.0/"}, + "license": { + "@id": "https://creativecommons.org/licenses/by/4.0/" + }, "datePublished": "2024-01-01", - "publisher": {"@id": "https://ror.org/012345678"}, - "hasPart": [{"@id": "data/research-data.csv"}], - "funder": {"@id": "https://ror.org/012345678"}, - "cite-as": {"@id": "./"}, + "publisher": { + "@id": "https://ror.org/012345678" + }, + "hasPart": [ + { + "@id": "data/research-data.csv" + } + ], + "funder": { + "@id": "https://ror.org/012345678" + }, + "cite-as": { + "@id": "./" + }, "url": "./", - "identifier": {"@id": "#uuid"} + "identifier": { + "@id": "#uuid" + } }, { "@id": "#uuid", @@ -35,19 +53,30 @@ "description": "Research data under the same license as the Root.", "encodingFormat": "text/csv", "contentSize": "2048", - "license": {"@id": "https://creativecommons.org/licenses/by/4.0/"} + "license": { + "@id": "https://creativecommons.org/licenses/by/4.0/" + } }, { "@id": "https://ror.org/012345678", "@type": "Organization", "name": "Example Research Institute", - "url": "https://example.org" + "url": "https://example.org", + "contactPoint": { + "@id": "#contact-point" + } }, { "@id": "https://creativecommons.org/licenses/by/4.0/", "@type": "CreativeWork", "name": "Creative Commons Attribution 4.0 International", "description": "A Creative Commons license." + }, + { + "@id": "#contact-point", + "@type": "ContactPoint", + "name": "Contact for Example Research Institute", + "email": "mailto:contact@example.org" } ] } diff --git a/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/data_entity_license/valid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/data_entity_license/valid/ro-crate-metadata.json index 259b354de..97bff43db 100644 --- a/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/data_entity_license/valid/ro-crate-metadata.json +++ b/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/data_entity_license/valid/ro-crate-metadata.json @@ -4,22 +4,40 @@ { "@id": "ro-crate-metadata.json", "@type": "CreativeWork", - "conformsTo": {"@id": "https://w3id.org/ro/crate/1.2"}, - "about": {"@id": "./"} + "conformsTo": { + "@id": "https://w3id.org/ro/crate/1.2" + }, + "about": { + "@id": "./" + } }, { "@id": "./", "@type": "Dataset", "name": "Test crate with divergent licenses", "description": "The Root Data Entity has CC-BY-4.0; a Data Entity has CC0.", - "license": {"@id": "https://creativecommons.org/licenses/by/4.0/"}, + "license": { + "@id": "https://creativecommons.org/licenses/by/4.0/" + }, "datePublished": "2024-01-01", - "publisher": {"@id": "https://ror.org/012345678"}, - "hasPart": [{"@id": "data/open-data.csv"}], - "funder": {"@id": "https://ror.org/012345678"}, - "cite-as": {"@id": "./"}, + "publisher": { + "@id": "https://ror.org/012345678" + }, + "hasPart": [ + { + "@id": "data/open-data.csv" + } + ], + "funder": { + "@id": "https://ror.org/012345678" + }, + "cite-as": { + "@id": "./" + }, "url": "./", - "identifier": {"@id": "#uuid"} + "identifier": { + "@id": "#uuid" + } }, { "@id": "#uuid", @@ -35,13 +53,18 @@ "description": "Open data file under CC0.", "encodingFormat": "text/csv", "contentSize": "1024", - "license": {"@id": "http://spdx.org/licenses/CC0-1.0"} + "license": { + "@id": "http://spdx.org/licenses/CC0-1.0" + } }, { "@id": "https://ror.org/012345678", "@type": "Organization", "name": "Example Research Institute", - "url": "https://example.org" + "url": "https://example.org", + "contactPoint": { + "@id": "#contact-point" + } }, { "@id": "https://creativecommons.org/licenses/by/4.0/", @@ -54,6 +77,12 @@ "@type": "CreativeWork", "name": "CC0 1.0 Universal", "description": "Public domain dedication." + }, + { + "@id": "#contact-point", + "@type": "ContactPoint", + "name": "Contact for Example Research Institute", + "email": "mailto:contact@example.org" } ] } diff --git a/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/dataset_has_part/valid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/dataset_has_part/valid/ro-crate-metadata.json index 05c7d2f71..421954310 100644 --- a/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/dataset_has_part/valid/ro-crate-metadata.json +++ b/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/dataset_has_part/valid/ro-crate-metadata.json @@ -4,8 +4,12 @@ { "@id": "ro-crate-metadata.json", "@type": "CreativeWork", - "about": {"@id": "./"}, - "conformsTo": {"@id": "https://w3id.org/ro/crate/1.2"} + "about": { + "@id": "./" + }, + "conformsTo": { + "@id": "https://w3id.org/ro/crate/1.2" + } }, { "@id": "./", @@ -13,11 +17,23 @@ "name": "Dataset hasPart — valid", "description": "RO-Crate where the local Dataset sub-entity declares hasPart listing its contents.", "datePublished": "2024-01-01", - "license": {"@id": "https://creativecommons.org/licenses/by/4.0/"}, - "publisher": {"@id": "#publisher-org"}, - "funder": {"@id": "https://ror.org/00k4n6c32"}, - "identifier": {"@id": "#uuid"}, - "hasPart": [{"@id": "subdir/"}] + "license": { + "@id": "https://creativecommons.org/licenses/by/4.0/" + }, + "publisher": { + "@id": "https://ror.org/05f9q8d28" + }, + "funder": { + "@id": "https://ror.org/00k4n6c32" + }, + "identifier": { + "@id": "#uuid" + }, + "hasPart": [ + { + "@id": "subdir/" + } + ] }, { "@id": "#uuid", @@ -31,7 +47,11 @@ "@type": "Dataset", "name": "Sub-directory with hasPart", "description": "A local Dataset sub-entity that correctly lists its contents via hasPart.", - "hasPart": [{"@id": "subdir/data.csv"}] + "hasPart": [ + { + "@id": "subdir/data.csv" + } + ] }, { "@id": "subdir/data.csv", @@ -42,23 +62,41 @@ "contentSize": "1024" }, { - "@id": "#publisher-org", + "@id": "https://ror.org/05f9q8d28", "@type": "Organization", "name": "Example Research Institute", - "url": "https://example.org" + "url": "https://example.org", + "contactPoint": { + "@id": "#contact-point" + } }, { "@id": "https://ror.org/00k4n6c32", "@type": "Organization", "name": "European Commission", "description": "The European Commission funds research and innovation programmes.", - "url": "https://ec.europa.eu" + "url": "https://ec.europa.eu", + "contactPoint": { + "@id": "#contact-point-1" + } }, { "@id": "https://creativecommons.org/licenses/by/4.0/", "@type": "CreativeWork", "name": "Creative Commons Attribution 4.0 International", "description": "CC BY 4.0 license." + }, + { + "@id": "#contact-point", + "@type": "ContactPoint", + "name": "Contact for Example Research Institute", + "email": "mailto:contact@example.org" + }, + { + "@id": "#contact-point-1", + "@type": "ContactPoint", + "name": "Contact for European Commission", + "email": "mailto:contact@example.org" } ] } diff --git a/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/dataset_trailing_slash/valid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/dataset_trailing_slash/valid/ro-crate-metadata.json index 174b25ac7..c0d6b3128 100644 --- a/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/dataset_trailing_slash/valid/ro-crate-metadata.json +++ b/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/dataset_trailing_slash/valid/ro-crate-metadata.json @@ -4,8 +4,12 @@ { "@id": "ro-crate-metadata.json", "@type": "CreativeWork", - "about": {"@id": "./"}, - "conformsTo": {"@id": "https://w3id.org/ro/crate/1.2"} + "about": { + "@id": "./" + }, + "conformsTo": { + "@id": "https://w3id.org/ro/crate/1.2" + } }, { "@id": "./", @@ -13,11 +17,23 @@ "name": "Dataset trailing slash — valid", "description": "RO-Crate where the local Dataset sub-entity @id ends with '/' as required.", "datePublished": "2024-01-01", - "license": {"@id": "https://creativecommons.org/licenses/by/4.0/"}, - "publisher": {"@id": "#publisher-org"}, - "funder": {"@id": "https://ror.org/00k4n6c32"}, - "identifier": {"@id": "#uuid"}, - "hasPart": [{"@id": "subdir/"}] + "license": { + "@id": "https://creativecommons.org/licenses/by/4.0/" + }, + "publisher": { + "@id": "https://ror.org/05f9q8d28" + }, + "funder": { + "@id": "https://ror.org/00k4n6c32" + }, + "identifier": { + "@id": "#uuid" + }, + "hasPart": [ + { + "@id": "subdir/" + } + ] }, { "@id": "#uuid", @@ -31,7 +47,11 @@ "@type": "Dataset", "name": "Sub-directory", "description": "A local Dataset sub-entity whose @id correctly ends with '/'.", - "hasPart": [{"@id": "subdir/data.csv"}] + "hasPart": [ + { + "@id": "subdir/data.csv" + } + ] }, { "@id": "subdir/data.csv", @@ -42,23 +62,41 @@ "contentSize": "1024" }, { - "@id": "#publisher-org", + "@id": "https://ror.org/05f9q8d28", "@type": "Organization", "name": "Example Research Institute", - "url": "https://example.org" + "url": "https://example.org", + "contactPoint": { + "@id": "#contact-point" + } }, { "@id": "https://ror.org/00k4n6c32", "@type": "Organization", "name": "European Commission", "description": "The European Commission funds research and innovation programmes.", - "url": "https://ec.europa.eu" + "url": "https://ec.europa.eu", + "contactPoint": { + "@id": "#contact-point-1" + } }, { "@id": "https://creativecommons.org/licenses/by/4.0/", "@type": "CreativeWork", "name": "Creative Commons Attribution 4.0 International", "description": "CC BY 4.0 license." + }, + { + "@id": "#contact-point", + "@type": "ContactPoint", + "name": "Contact for Example Research Institute", + "email": "mailto:contact@example.org" + }, + { + "@id": "#contact-point-1", + "@type": "ContactPoint", + "name": "Contact for European Commission", + "email": "mailto:contact@example.org" } ] } diff --git a/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/missing_file_local_path/valid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/missing_file_local_path/valid/ro-crate-metadata.json index 194fbec8a..4477ef70e 100644 --- a/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/missing_file_local_path/valid/ro-crate-metadata.json +++ b/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/missing_file_local_path/valid/ro-crate-metadata.json @@ -4,20 +4,39 @@ { "@id": "ro-crate-metadata.json", "@type": "CreativeWork", - "conformsTo": {"@id": "https://w3id.org/ro/crate/1.2"}, - "about": {"@id": "./"} + "conformsTo": { + "@id": "https://w3id.org/ro/crate/1.2" + }, + "about": { + "@id": "./" + } }, { "@id": "./", "@type": "Dataset", "name": "Test crate with localPath for missing file", "description": "A local file is referenced but not present in the payload; localPath is provided.", - "license": {"@id": "https://creativecommons.org/licenses/by/4.0/"}, + "license": { + "@id": "https://creativecommons.org/licenses/by/4.0/" + }, "datePublished": "2024-01-01", - "publisher": {"@id": "https://ror.org/012345678"}, - "funder": {"@id": "https://ror.org/012345678"}, - "hasPart": [{"@id": "data/missing-file.csv"}, {"@id": "#output-file.csv"}], - "identifier": {"@id": "#uuid"} + "publisher": { + "@id": "https://ror.org/012345678" + }, + "funder": { + "@id": "https://ror.org/012345678" + }, + "hasPart": [ + { + "@id": "data/missing-file.csv" + }, + { + "@id": "#output-file.csv" + } + ], + "identifier": { + "@id": "#uuid" + } }, { "@id": "#uuid", @@ -30,7 +49,10 @@ "@id": "https://ror.org/012345678", "@type": "Organization", "name": "Example Research Institute", - "url": "https://example.org" + "url": "https://example.org", + "contactPoint": { + "@id": "#contact-point" + } }, { "@id": "data/missing-file.csv", @@ -55,6 +77,12 @@ "@type": "CreativeWork", "name": "Creative Commons Attribution 4.0 International", "description": "A Creative Commons license." + }, + { + "@id": "#contact-point", + "@type": "ContactPoint", + "name": "Contact for Example Research Institute", + "email": "mailto:contact@example.org" } ] -} \ No newline at end of file +} diff --git a/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/recommended_conformsto/valid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/recommended_conformsto/valid/ro-crate-metadata.json index 90c09bb28..44d0718e9 100644 --- a/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/recommended_conformsto/valid/ro-crate-metadata.json +++ b/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/recommended_conformsto/valid/ro-crate-metadata.json @@ -4,8 +4,12 @@ { "@id": "ro-crate-metadata.json", "@type": "CreativeWork", - "about": {"@id": "./"}, - "conformsTo": {"@id": "https://w3id.org/ro/crate/1.2"} + "about": { + "@id": "./" + }, + "conformsTo": { + "@id": "https://w3id.org/ro/crate/1.2" + } }, { "@id": "./", @@ -13,11 +17,23 @@ "name": "File with conformsTo — valid", "description": "This RO-Crate has a File Data Entity with conformsTo referencing a CreativeWork profile.", "datePublished": "2024-01-01", - "license": {"@id": "https://creativecommons.org/licenses/by/4.0/"}, - "publisher": {"@id": "#publisher-org"}, - "funder": {"@id": "https://ror.org/00k4n6c32"}, - "hasPart": [{"@id": "data.csv"}], - "identifier": {"@id": "#uuid"} + "license": { + "@id": "https://creativecommons.org/licenses/by/4.0/" + }, + "publisher": { + "@id": "https://ror.org/05f9q8d28" + }, + "funder": { + "@id": "https://ror.org/00k4n6c32" + }, + "hasPart": [ + { + "@id": "data.csv" + } + ], + "identifier": { + "@id": "#uuid" + } }, { "@id": "#uuid", @@ -27,17 +43,23 @@ "value": "550e8400-e29b-41d4-a716-446655440000" }, { - "@id": "#publisher-org", + "@id": "https://ror.org/05f9q8d28", "@type": "Organization", "name": "Example Research Institute", - "url": "https://example.org" + "url": "https://example.org", + "contactPoint": { + "@id": "#contact-point" + } }, { "@id": "https://ror.org/00k4n6c32", "@type": "Organization", "name": "European Commission", "description": "The European Commission funds research and innovation programmes.", - "url": "https://ec.europa.eu" + "url": "https://ec.europa.eu", + "contactPoint": { + "@id": "#contact-point-1" + } }, { "@id": "https://creativecommons.org/licenses/by/4.0/", @@ -52,13 +74,27 @@ "description": "A CSV file conforming to a format profile.", "encodingFormat": "text/csv", "contentSize": "2048", - "conformsTo": {"@id": "#csv-profile"} + "conformsTo": { + "@id": "#csv-profile" + } }, { "@id": "#csv-profile", "@type": "CreativeWork", "name": "CSV Profile for RO-Crate 1.2", "description": "A profile describing the expected structure of CSV files used in RO-Crates." + }, + { + "@id": "#contact-point", + "@type": "ContactPoint", + "name": "Contact for Example Research Institute", + "email": "mailto:contact@example.org" + }, + { + "@id": "#contact-point-1", + "@type": "ContactPoint", + "name": "Contact for European Commission", + "email": "mailto:contact@example.org" } ] } diff --git a/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/recommended_contentSize/valid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/recommended_contentSize/valid/ro-crate-metadata.json index cdd631c47..73d8d4ba9 100644 --- a/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/recommended_contentSize/valid/ro-crate-metadata.json +++ b/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/recommended_contentSize/valid/ro-crate-metadata.json @@ -4,8 +4,12 @@ { "@id": "ro-crate-metadata.json", "@type": "CreativeWork", - "about": {"@id": "./"}, - "conformsTo": {"@id": "https://w3id.org/ro/crate/1.2"} + "about": { + "@id": "./" + }, + "conformsTo": { + "@id": "https://w3id.org/ro/crate/1.2" + } }, { "@id": "./", @@ -13,11 +17,23 @@ "name": "File with contentSize — valid", "description": "This RO-Crate has a File Data Entity with the recommended contentSize property.", "datePublished": "2024-01-01", - "license": {"@id": "https://creativecommons.org/licenses/by/4.0/"}, - "publisher": {"@id": "#publisher-org"}, - "funder": {"@id": "https://ror.org/00k4n6c32"}, - "hasPart": [{"@id": "data.csv"}], - "identifier": {"@id": "#uuid"} + "license": { + "@id": "https://creativecommons.org/licenses/by/4.0/" + }, + "publisher": { + "@id": "https://ror.org/05f9q8d28" + }, + "funder": { + "@id": "https://ror.org/00k4n6c32" + }, + "hasPart": [ + { + "@id": "data.csv" + } + ], + "identifier": { + "@id": "#uuid" + } }, { "@id": "#uuid", @@ -27,17 +43,23 @@ "value": "550e8400-e29b-41d4-a716-446655440000" }, { - "@id": "#publisher-org", + "@id": "https://ror.org/05f9q8d28", "@type": "Organization", "name": "Example Research Institute", - "url": "https://example.org" + "url": "https://example.org", + "contactPoint": { + "@id": "#contact-point" + } }, { "@id": "https://ror.org/00k4n6c32", "@type": "Organization", "name": "European Commission", "description": "The European Commission funds research and innovation programmes.", - "url": "https://ec.europa.eu" + "url": "https://ec.europa.eu", + "contactPoint": { + "@id": "#contact-point-1" + } }, { "@id": "https://creativecommons.org/licenses/by/4.0/", @@ -52,6 +74,18 @@ "description": "A sample CSV data file with contentSize.", "encodingFormat": "text/csv", "contentSize": "2048" + }, + { + "@id": "#contact-point", + "@type": "ContactPoint", + "name": "Contact for Example Research Institute", + "email": "mailto:contact@example.org" + }, + { + "@id": "#contact-point-1", + "@type": "ContactPoint", + "name": "Contact for European Commission", + "email": "mailto:contact@example.org" } ] } diff --git a/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/recommended_encoding_format/valid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/recommended_encoding_format/valid/ro-crate-metadata.json index 9a326af6d..364e3b786 100644 --- a/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/recommended_encoding_format/valid/ro-crate-metadata.json +++ b/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/recommended_encoding_format/valid/ro-crate-metadata.json @@ -73,7 +73,10 @@ "@type": "Organization", "name": "Example University", "description": "An example university that the Person entity is affiliated with.", - "url": "https://www.exampleuniversity.edu" + "url": "https://www.exampleuniversity.edu", + "contactPoint": { + "@id": "#contact-point" + } }, { "@id": "data.csv", @@ -90,6 +93,12 @@ "@id": "#location", "@type": "Place", "name": "Example location" + }, + { + "@id": "#contact-point", + "@type": "ContactPoint", + "name": "Contact for Example University", + "email": "mailto:contact@example.org" } ] } diff --git a/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/recommended_properties/valid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/recommended_properties/valid/ro-crate-metadata.json index 9a326af6d..364e3b786 100644 --- a/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/recommended_properties/valid/ro-crate-metadata.json +++ b/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/recommended_properties/valid/ro-crate-metadata.json @@ -73,7 +73,10 @@ "@type": "Organization", "name": "Example University", "description": "An example university that the Person entity is affiliated with.", - "url": "https://www.exampleuniversity.edu" + "url": "https://www.exampleuniversity.edu", + "contactPoint": { + "@id": "#contact-point" + } }, { "@id": "data.csv", @@ -90,6 +93,12 @@ "@id": "#location", "@type": "Place", "name": "Example location" + }, + { + "@id": "#contact-point", + "@type": "ContactPoint", + "name": "Contact for Example University", + "email": "mailto:contact@example.org" } ] } diff --git a/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/recommended_sdDatePublished/valid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/recommended_sdDatePublished/valid/ro-crate-metadata.json index 20e751522..8fc1776f2 100644 --- a/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/recommended_sdDatePublished/valid/ro-crate-metadata.json +++ b/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/recommended_sdDatePublished/valid/ro-crate-metadata.json @@ -4,8 +4,12 @@ { "@id": "ro-crate-metadata.json", "@type": "CreativeWork", - "about": {"@id": "./"}, - "conformsTo": {"@id": "https://w3id.org/ro/crate/1.2"} + "about": { + "@id": "./" + }, + "conformsTo": { + "@id": "https://w3id.org/ro/crate/1.2" + } }, { "@id": "./", @@ -13,11 +17,23 @@ "name": "Web-based File with sdDatePublished — valid", "description": "This RO-Crate has a web-based File Data Entity with the recommended sdDatePublished property.", "datePublished": "2024-01-01", - "license": {"@id": "https://creativecommons.org/licenses/by/4.0/"}, - "publisher": {"@id": "#publisher-org"}, - "funder": {"@id": "https://ror.org/00k4n6c32"}, - "hasPart": [{"@id": "https://example.com/data.csv"}], - "identifier": {"@id": "#uuid"} + "license": { + "@id": "https://creativecommons.org/licenses/by/4.0/" + }, + "publisher": { + "@id": "https://ror.org/05f9q8d28" + }, + "funder": { + "@id": "https://ror.org/00k4n6c32" + }, + "hasPart": [ + { + "@id": "https://example.com/data.csv" + } + ], + "identifier": { + "@id": "#uuid" + } }, { "@id": "#uuid", @@ -27,17 +43,23 @@ "value": "550e8400-e29b-41d4-a716-446655440000" }, { - "@id": "#publisher-org", + "@id": "https://ror.org/05f9q8d28", "@type": "Organization", "name": "Example Research Institute", - "url": "https://example.org" + "url": "https://example.org", + "contactPoint": { + "@id": "#contact-point" + } }, { "@id": "https://ror.org/00k4n6c32", "@type": "Organization", "name": "European Commission", "description": "The European Commission funds research and innovation programmes.", - "url": "https://ec.europa.eu" + "url": "https://ec.europa.eu", + "contactPoint": { + "@id": "#contact-point-1" + } }, { "@id": "https://creativecommons.org/licenses/by/4.0/", @@ -53,6 +75,18 @@ "encodingFormat": "text/csv", "contentSize": "512", "sdDatePublished": "2024-06-15" + }, + { + "@id": "#contact-point", + "@type": "ContactPoint", + "name": "Contact for Example Research Institute", + "email": "mailto:contact@example.org" + }, + { + "@id": "#contact-point-1", + "@type": "ContactPoint", + "name": "Contact for European Commission", + "email": "mailto:contact@example.org" } ] } diff --git a/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/web_dataset_distribution/valid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/web_dataset_distribution/valid/ro-crate-metadata.json index e5d1fde46..e0d65642c 100644 --- a/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/web_dataset_distribution/valid/ro-crate-metadata.json +++ b/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/web_dataset_distribution/valid/ro-crate-metadata.json @@ -21,7 +21,7 @@ "@id": "https://creativecommons.org/licenses/by/4.0/" }, "publisher": { - "@id": "#publisher-org" + "@id": "https://ror.org/05f9q8d28" }, "hasPart": [], "distribution": { @@ -42,10 +42,13 @@ "value": "550e8400-e29b-41d4-a716-446655440000" }, { - "@id": "#publisher-org", + "@id": "https://ror.org/05f9q8d28", "@type": "Organization", "name": "Example Research Institute", - "url": "https://example.org" + "url": "https://example.org", + "contactPoint": { + "@id": "#contact-point" + } }, { "@id": "https://creativecommons.org/licenses/by/4.0/", @@ -65,7 +68,22 @@ "@type": "Organization", "name": "European Commission", "description": "The European Commission funds research and innovation programmes.", - "url": "https://ec.europa.eu" + "url": "https://ec.europa.eu", + "contactPoint": { + "@id": "#contact-point-1" + } + }, + { + "@id": "#contact-point", + "@type": "ContactPoint", + "name": "Contact for Example Research Institute", + "email": "mailto:contact@example.org" + }, + { + "@id": "#contact-point-1", + "@type": "ContactPoint", + "name": "Contact for European Commission", + "email": "mailto:contact@example.org" } ] -} \ No newline at end of file +} diff --git a/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/web_directory_distribution/valid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/web_directory_distribution/valid/ro-crate-metadata.json index dd25ec7db..e099665cb 100644 --- a/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/web_directory_distribution/valid/ro-crate-metadata.json +++ b/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/web_directory_distribution/valid/ro-crate-metadata.json @@ -21,7 +21,7 @@ "@id": "https://creativecommons.org/licenses/by/4.0/" }, "publisher": { - "@id": "#publisher-org" + "@id": "https://ror.org/05f9q8d28" }, "hasPart": [ { @@ -65,17 +65,35 @@ "encodingFormat": "application/zip" }, { - "@id": "#publisher-org", + "@id": "https://ror.org/05f9q8d28", "@type": "Organization", "name": "Example Research Institute", - "url": "https://example.org" + "url": "https://example.org", + "contactPoint": { + "@id": "#contact-point" + } }, { "@id": "https://ror.org/00k4n6c32", "@type": "Organization", "name": "European Commission", "description": "The European Commission funds research and innovation programmes.", - "url": "https://ec.europa.eu" + "url": "https://ec.europa.eu", + "contactPoint": { + "@id": "#contact-point-1" + } + }, + { + "@id": "#contact-point", + "@type": "ContactPoint", + "name": "Contact for Example Research Institute", + "email": "mailto:contact@example.org" + }, + { + "@id": "#contact-point-1", + "@type": "ContactPoint", + "name": "Contact for European Commission", + "email": "mailto:contact@example.org" } ] -} \ No newline at end of file +} diff --git a/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/web_entity_content_url/valid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/web_entity_content_url/valid/ro-crate-metadata.json index ca0db284c..01ad62a9e 100644 --- a/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/web_entity_content_url/valid/ro-crate-metadata.json +++ b/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/web_entity_content_url/valid/ro-crate-metadata.json @@ -14,14 +14,14 @@ { "@id": "./", "@type": "Dataset", - "name": "Web entity contentUrl \u2014 valid", + "name": "Web entity contentUrl — valid", "description": "RO-Crate with a web-based File Data Entity that declares a downloadable contentUrl.", "datePublished": "2024-01-01", "license": { "@id": "https://creativecommons.org/licenses/by/4.0/" }, "publisher": { - "@id": "#publisher-org" + "@id": "https://ror.org/05f9q8d28" }, "hasPart": [ { @@ -43,10 +43,13 @@ "value": "550e8400-e29b-41d4-a716-446655440000" }, { - "@id": "#publisher-org", + "@id": "https://ror.org/05f9q8d28", "@type": "Organization", "name": "Example Research Institute", - "url": "https://example.org" + "url": "https://example.org", + "contactPoint": { + "@id": "#contact-point" + } }, { "@id": "https://creativecommons.org/licenses/by/4.0/", @@ -69,7 +72,22 @@ "@type": "Organization", "name": "European Commission", "description": "The European Commission funds research and innovation programmes.", - "url": "https://ec.europa.eu" + "url": "https://ec.europa.eu", + "contactPoint": { + "@id": "#contact-point-1" + } + }, + { + "@id": "#contact-point", + "@type": "ContactPoint", + "name": "Contact for Example Research Institute", + "email": "mailto:contact@example.org" + }, + { + "@id": "#contact-point-1", + "@type": "ContactPoint", + "name": "Contact for European Commission", + "email": "mailto:contact@example.org" } ] } diff --git a/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/web_entity_downloadable/valid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/web_entity_downloadable/valid/ro-crate-metadata.json index 0710f800e..564d070b0 100644 --- a/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/web_entity_downloadable/valid/ro-crate-metadata.json +++ b/tests/data/crates/rocrate-1.2/8_metadata_dataEntities/web_entity_downloadable/valid/ro-crate-metadata.json @@ -14,14 +14,14 @@ { "@id": "./", "@type": "Dataset", - "name": "Web entity downloadable \u2014 valid", + "name": "Web entity downloadable — valid", "description": "RO-Crate with a web-based File Data Entity whose @id returns a non-HTML Content-Type (directly downloadable).", "datePublished": "2024-01-01", "license": { "@id": "https://creativecommons.org/licenses/by/4.0/" }, "publisher": { - "@id": "#publisher-org" + "@id": "https://ror.org/05f9q8d28" }, "hasPart": [ { @@ -43,10 +43,13 @@ "value": "550e8400-e29b-41d4-a716-446655440000" }, { - "@id": "#publisher-org", + "@id": "https://ror.org/05f9q8d28", "@type": "Organization", "name": "Example Research Institute", - "url": "https://example.org" + "url": "https://example.org", + "contactPoint": { + "@id": "#contact-point" + } }, { "@id": "https://creativecommons.org/licenses/by/4.0/", @@ -68,7 +71,22 @@ "@type": "Organization", "name": "European Commission", "description": "The European Commission funds research and innovation programmes.", - "url": "https://ec.europa.eu" + "url": "https://ec.europa.eu", + "contactPoint": { + "@id": "#contact-point-1" + } + }, + { + "@id": "#contact-point", + "@type": "ContactPoint", + "name": "Contact for Example Research Institute", + "email": "mailto:contact@example.org" + }, + { + "@id": "#contact-point-1", + "@type": "ContactPoint", + "name": "Contact for European Commission", + "email": "mailto:contact@example.org" } ] } diff --git a/tests/data/crates/rocrate-1.2/9_referenced_rocrate/valid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/9_referenced_rocrate/valid/ro-crate-metadata.json index d73be8915..0850bc7f0 100644 --- a/tests/data/crates/rocrate-1.2/9_referenced_rocrate/valid/ro-crate-metadata.json +++ b/tests/data/crates/rocrate-1.2/9_referenced_rocrate/valid/ro-crate-metadata.json @@ -3,7 +3,7 @@ "@graph": [ { "@id": "ro-crate-metadata.json", - "@type": "CreativeWork", + "@type": ["CreativeWork", "Dataset"], "conformsTo": { "@id": "https://w3id.org/ro/crate/1.2" }, @@ -21,6 +21,9 @@ "@id": "https://creativecommons.org/licenses/by/4.0/" }, "datePublished": "2024-01-01", + "conformsTo": { + "@id": "https://w3id.org/ro/crate/1.2" + }, "publisher": { "@id": "https://ror.org/012345678" }, @@ -79,7 +82,10 @@ "@id": "https://ror.org/012345678", "@type": "Organization", "name": "Example Research Institute", - "url": "https://example.org" + "url": "https://example.org", + "contactPoint": { + "@id": "#contact-point" + } }, { "@id": "https://creativecommons.org/licenses/by/4.0/", @@ -96,6 +102,12 @@ "@id": "https://w3id.org/ro/crate/1.2", "@type": "Profile", "name": "RO-Crate 1.2" + }, + { + "@id": "#contact-point", + "@type": "ContactPoint", + "name": "Contact for Example Research Institute", + "email": "mailto:contact@example.org" } ] -} \ No newline at end of file +} diff --git a/tests/integration/profiles/ro-crate-1.2/test_referenced_rocrate.py b/tests/integration/profiles/ro-crate-1.2/test_referenced_rocrate.py index 2487e0984..189ad6874 100644 --- a/tests/integration/profiles/ro-crate-1.2/test_referenced_rocrate.py +++ b/tests/integration/profiles/ro-crate-1.2/test_referenced_rocrate.py @@ -33,7 +33,7 @@ def test_valid_referenced_rocrate(): models.Severity.RECOMMENDED, True, profile_identifier="ro-crate-1.2", - skip_checks=["ro-crate-1.2_40.0", "ro-crate-1.2_44.1", "ro-crate-1.2_70.1"], + skip_checks=["ro-crate-1.2_40.0", "ro-crate-1.2_44.1", "ro-crate-1.2_70.1", "ro-crate-1.2_69.1"], ) From 364cc5499e2cc7b98d9d9cf06df34cc749e5582e Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Fri, 17 Apr 2026 16:12:51 +0200 Subject: [PATCH 129/352] feat(ro-crate-1.2): :sparkles: restore check for the `hasPart` property --- .../1.2/must/2_root_data_entity_haspart.ttl | 49 +++++++++++++++++++ 1 file changed, 49 insertions(+) create mode 100644 rocrate_validator/profiles/ro-crate/1.2/must/2_root_data_entity_haspart.ttl diff --git a/rocrate_validator/profiles/ro-crate/1.2/must/2_root_data_entity_haspart.ttl b/rocrate_validator/profiles/ro-crate/1.2/must/2_root_data_entity_haspart.ttl new file mode 100644 index 000000000..fe1230668 --- /dev/null +++ b/rocrate_validator/profiles/ro-crate/1.2/must/2_root_data_entity_haspart.ttl @@ -0,0 +1,49 @@ +# Copyright (c) 2024-2026 CRS4 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +@prefix ro-crate: . +@prefix schema_org: . +@prefix sh: . + +ro-crate:RootDataEntityHasPartAllDataEntities a sh:NodeShape ; + sh:name "Root Data Entity: hasPart MUST reference all Data Entities" ; + sh:description """The Root Data Entity MUST directly or indirectly reference + all Data Entities in the RO-Crate via hasPart (RO-Crate 1.2).""" ; + sh:target [ + a sh:SPARQLTarget ; + sh:prefixes ro-crate:sparqlPrefixes ; + sh:select """ + SELECT ?this + WHERE { + ?this a ro-crate:RootDataEntity . + } + """ + ] ; + sh:sparql [ + a sh:SPARQLConstraint ; + sh:prefixes ro-crate:sparqlPrefixes ; + sh:select """ + SELECT $this ?unreferenced + WHERE { + $this a ro-crate:RootDataEntity . + ?unreferenced a ro-crate:DataEntity . + FILTER(?unreferenced != $this) + FILTER NOT EXISTS { + $this schema:hasPart+ ?unreferenced + } + } + """ ; + sh:severity sh:Violation ; + sh:name "Root Data Entity: hasPart MUST reference all Data Entities" ; + sh:message "The Root Data Entity MUST reference all Data Entities via hasPart (directly or indirectly)" ; + ] . \ No newline at end of file From 1a024711a86e8d858cfbdb0c925cd954ab2c38af Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Fri, 17 Apr 2026 16:14:27 +0200 Subject: [PATCH 130/352] test(ro-crate-1.2): :white_check_mark: test check for the `hasPart` property --- .../invalid_dataset_not_in_haspart/file1.txt | 0 .../ro-crate-metadata.json | 44 ++++++++++++++++++ .../local.txt | 0 .../ro-crate-metadata.json | 44 ++++++++++++++++++ .../invalid_workflow_not_in_haspart/data.txt | 0 .../ro-crate-metadata.json | 43 ++++++++++++++++++ .../workflow.ga | 0 .../test_metadata_rootDataEntity.py | 45 +++++++++++++++++++ 8 files changed, 176 insertions(+) create mode 100644 tests/data/crates/rocrate-1.2/7_root_data_entity/required_haspart_all_data_entities/invalid_dataset_not_in_haspart/file1.txt create mode 100644 tests/data/crates/rocrate-1.2/7_root_data_entity/required_haspart_all_data_entities/invalid_dataset_not_in_haspart/ro-crate-metadata.json create mode 100644 tests/data/crates/rocrate-1.2/7_root_data_entity/required_haspart_all_data_entities/invalid_web_entity_not_in_haspart/local.txt create mode 100644 tests/data/crates/rocrate-1.2/7_root_data_entity/required_haspart_all_data_entities/invalid_web_entity_not_in_haspart/ro-crate-metadata.json create mode 100644 tests/data/crates/rocrate-1.2/7_root_data_entity/required_haspart_all_data_entities/invalid_workflow_not_in_haspart/data.txt create mode 100644 tests/data/crates/rocrate-1.2/7_root_data_entity/required_haspart_all_data_entities/invalid_workflow_not_in_haspart/ro-crate-metadata.json create mode 100644 tests/data/crates/rocrate-1.2/7_root_data_entity/required_haspart_all_data_entities/invalid_workflow_not_in_haspart/workflow.ga diff --git a/tests/data/crates/rocrate-1.2/7_root_data_entity/required_haspart_all_data_entities/invalid_dataset_not_in_haspart/file1.txt b/tests/data/crates/rocrate-1.2/7_root_data_entity/required_haspart_all_data_entities/invalid_dataset_not_in_haspart/file1.txt new file mode 100644 index 000000000..e69de29bb diff --git a/tests/data/crates/rocrate-1.2/7_root_data_entity/required_haspart_all_data_entities/invalid_dataset_not_in_haspart/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/7_root_data_entity/required_haspart_all_data_entities/invalid_dataset_not_in_haspart/ro-crate-metadata.json new file mode 100644 index 000000000..0df396d94 --- /dev/null +++ b/tests/data/crates/rocrate-1.2/7_root_data_entity/required_haspart_all_data_entities/invalid_dataset_not_in_haspart/ro-crate-metadata.json @@ -0,0 +1,44 @@ +{ + "@context": "https://w3id.org/ro/crate/1.2/context", + "@graph": [ + { + "@id": "ro-crate-metadata.json", + "@type": "CreativeWork", + "about": { + "@id": "./" + }, + "conformsTo": { + "@id": "https://w3id.org/ro/crate/1.2" + } + }, + { + "@id": "./", + "@type": "Dataset", + "name": "Test RO-Crate: Sub-Dataset not in hasPart", + "description": "This RO-Crate has a sub-Dataset (Directory Data Entity) that is missing from hasPart.", + "datePublished": "2024-01-15", + "license": { + "@id": "https://creativecommons.org/licenses/by/4.0/" + }, + "hasPart": [ + { "@id": "file1.txt" } + ] + }, + { + "@id": "https://creativecommons.org/licenses/by/4.0/", + "@type": "CreativeWork", + "name": "Creative Commons Attribution 4.0 International" + }, + { + "@id": "file1.txt", + "@type": "File", + "name": "File 1" + }, + { + "@id": "subdir/", + "@type": "Dataset", + "name": "Sub-directory Data Entity", + "description": "A sub-Dataset Data Entity that is not referenced in hasPart." + } + ] +} diff --git a/tests/data/crates/rocrate-1.2/7_root_data_entity/required_haspart_all_data_entities/invalid_web_entity_not_in_haspart/local.txt b/tests/data/crates/rocrate-1.2/7_root_data_entity/required_haspart_all_data_entities/invalid_web_entity_not_in_haspart/local.txt new file mode 100644 index 000000000..e69de29bb diff --git a/tests/data/crates/rocrate-1.2/7_root_data_entity/required_haspart_all_data_entities/invalid_web_entity_not_in_haspart/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/7_root_data_entity/required_haspart_all_data_entities/invalid_web_entity_not_in_haspart/ro-crate-metadata.json new file mode 100644 index 000000000..640a0c6de --- /dev/null +++ b/tests/data/crates/rocrate-1.2/7_root_data_entity/required_haspart_all_data_entities/invalid_web_entity_not_in_haspart/ro-crate-metadata.json @@ -0,0 +1,44 @@ +{ + "@context": "https://w3id.org/ro/crate/1.2/context", + "@graph": [ + { + "@id": "ro-crate-metadata.json", + "@type": "CreativeWork", + "about": { + "@id": "./" + }, + "conformsTo": { + "@id": "https://w3id.org/ro/crate/1.2" + } + }, + { + "@id": "./", + "@type": "Dataset", + "name": "Test RO-Crate: Web Data Entity not in hasPart", + "description": "This RO-Crate has a web-based File entity (absolute URL @id) that is missing from hasPart.", + "datePublished": "2024-01-15", + "license": { + "@id": "https://creativecommons.org/licenses/by/4.0/" + }, + "hasPart": [ + { "@id": "local.txt" } + ] + }, + { + "@id": "https://creativecommons.org/licenses/by/4.0/", + "@type": "CreativeWork", + "name": "Creative Commons Attribution 4.0 International" + }, + { + "@id": "local.txt", + "@type": "File", + "name": "Local file" + }, + { + "@id": "https://example.org/data/remote.tsv", + "@type": "File", + "name": "Remote web-based data file", + "encodingFormat": "text/tab-separated-values" + } + ] +} diff --git a/tests/data/crates/rocrate-1.2/7_root_data_entity/required_haspart_all_data_entities/invalid_workflow_not_in_haspart/data.txt b/tests/data/crates/rocrate-1.2/7_root_data_entity/required_haspart_all_data_entities/invalid_workflow_not_in_haspart/data.txt new file mode 100644 index 000000000..e69de29bb diff --git a/tests/data/crates/rocrate-1.2/7_root_data_entity/required_haspart_all_data_entities/invalid_workflow_not_in_haspart/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/7_root_data_entity/required_haspart_all_data_entities/invalid_workflow_not_in_haspart/ro-crate-metadata.json new file mode 100644 index 000000000..898c2e20c --- /dev/null +++ b/tests/data/crates/rocrate-1.2/7_root_data_entity/required_haspart_all_data_entities/invalid_workflow_not_in_haspart/ro-crate-metadata.json @@ -0,0 +1,43 @@ +{ + "@context": "https://w3id.org/ro/crate/1.2/context", + "@graph": [ + { + "@id": "ro-crate-metadata.json", + "@type": "CreativeWork", + "about": { + "@id": "./" + }, + "conformsTo": { + "@id": "https://w3id.org/ro/crate/1.2" + } + }, + { + "@id": "./", + "@type": "Dataset", + "name": "Test RO-Crate: Workflow not in hasPart", + "description": "This RO-Crate has a workflow File entity (File/SoftwareSourceCode/ComputationalWorkflow) that is missing from hasPart.", + "datePublished": "2024-01-15", + "license": { + "@id": "https://creativecommons.org/licenses/by/4.0/" + }, + "hasPart": [ + { "@id": "data.txt" } + ] + }, + { + "@id": "https://creativecommons.org/licenses/by/4.0/", + "@type": "CreativeWork", + "name": "Creative Commons Attribution 4.0 International" + }, + { + "@id": "data.txt", + "@type": "File", + "name": "Data File" + }, + { + "@id": "workflow.ga", + "@type": ["File", "SoftwareSourceCode", "ComputationalWorkflow"], + "name": "My Workflow" + } + ] +} diff --git a/tests/data/crates/rocrate-1.2/7_root_data_entity/required_haspart_all_data_entities/invalid_workflow_not_in_haspart/workflow.ga b/tests/data/crates/rocrate-1.2/7_root_data_entity/required_haspart_all_data_entities/invalid_workflow_not_in_haspart/workflow.ga new file mode 100644 index 000000000..e69de29bb diff --git a/tests/integration/profiles/ro-crate-1.2/test_metadata_rootDataEntity.py b/tests/integration/profiles/ro-crate-1.2/test_metadata_rootDataEntity.py index 432da28ae..9bbf3d759 100644 --- a/tests/integration/profiles/ro-crate-1.2/test_metadata_rootDataEntity.py +++ b/tests/integration/profiles/ro-crate-1.2/test_metadata_rootDataEntity.py @@ -398,6 +398,51 @@ def test_invalid_required_hasPart_all_data_entities(): ) +def test_invalid_hasPart_workflow_not_in_haspart(): + """ + A Workflow File entity (typed as File/SoftwareSourceCode/ComputationalWorkflow) + that is not referenced via hasPart triggers the REQUIRED hasPart check. + """ + do_entity_test( + __metadata_root_data_entity_crates__.invalid_hasPart_workflow_not_in_haspart, + models.Severity.REQUIRED, + False, + profile_identifier="ro-crate-1.2", + expected_triggered_requirements=["Root Data Entity: hasPart MUST reference all Data Entities"], + expected_triggered_issues=["MUST reference all Data Entities via hasPart"], + ) + + +def test_invalid_hasPart_web_entity_not_in_haspart(): + """ + A Web Data Entity (absolute URL @id) that is not referenced via hasPart + triggers the REQUIRED hasPart check. + """ + do_entity_test( + __metadata_root_data_entity_crates__.invalid_hasPart_web_entity_not_in_haspart, + models.Severity.REQUIRED, + False, + profile_identifier="ro-crate-1.2", + expected_triggered_requirements=["Root Data Entity: hasPart MUST reference all Data Entities"], + expected_triggered_issues=["MUST reference all Data Entities via hasPart"], + ) + + +def test_invalid_hasPart_dataset_not_in_haspart(): + """ + A sub-Dataset Directory Data Entity that is not referenced via hasPart + triggers the REQUIRED hasPart check. + """ + do_entity_test( + __metadata_root_data_entity_crates__.invalid_hasPart_dataset_not_in_haspart, + models.Severity.REQUIRED, + False, + profile_identifier="ro-crate-1.2", + expected_triggered_requirements=["Root Data Entity: hasPart MUST reference all Data Entities"], + expected_triggered_issues=["MUST reference all Data Entities via hasPart"], + ) + + # --------------------------------------------------------------------------- # Root Data Entity: identifier SHOULD be present if PID exists (RECOMMENDED) # --------------------------------------------------------------------------- From a4f66d2b179c2291c321f5daf3c4d044842480f9 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Fri, 17 Apr 2026 16:15:30 +0200 Subject: [PATCH 131/352] test(ro-crate-1.2): :card_file_box: refactor test data --- .../image_about/valid/ro-crate-metadata.json | 41 ++++++++++++++++--- .../valid/ro-crate-metadata.json | 41 ++++++++++++++++--- .../valid/ro-crate-metadata.json | 31 +++++++++++--- .../valid/ro-crate-metadata.json | 40 +++++++++++++++--- .../valid/ro-crate-metadata.json | 7 +++- .../valid/ro-crate-metadata.json | 6 +-- .../test_metadata_contextualEntities.py | 6 +++ 7 files changed, 147 insertions(+), 25 deletions(-) diff --git a/tests/data/crates/rocrate-1.2/11_workflows_scripts/image_about/valid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/11_workflows_scripts/image_about/valid/ro-crate-metadata.json index 1c77e54b3..3009cbcb0 100644 --- a/tests/data/crates/rocrate-1.2/11_workflows_scripts/image_about/valid/ro-crate-metadata.json +++ b/tests/data/crates/rocrate-1.2/11_workflows_scripts/image_about/valid/ro-crate-metadata.json @@ -20,6 +20,15 @@ "license": { "@id": "https://creativecommons.org/licenses/by/4.0/" }, + "publisher": { + "@id": "https://ror.org/05f9q8d28" + }, + "funder": { + "@id": "https://ror.org/05f9q8d28" + }, + "conformsTo": { + "@id": "https://bioschemas.org/profiles/ComputationalWorkflow/1.0-RELEASE" + }, "hasPart": [ { "@id": "workflow.ga" @@ -42,7 +51,23 @@ { "@id": "https://creativecommons.org/licenses/by/4.0/", "@type": "CreativeWork", - "name": "CC BY 4.0" + "name": "CC BY 4.0", + "description": "Creative Commons Attribution 4.0 International license." + }, + { + "@id": "https://ror.org/05f9q8d28", + "@type": "Organization", + "name": "Example University", + "url": "https://example.edu", + "contactPoint": { + "@id": "#contact-point" + } + }, + { + "@id": "#contact-point", + "@type": "ContactPoint", + "name": "Example University Contact", + "email": "contact@example.edu" }, { "@id": "workflow.ga", @@ -54,6 +79,7 @@ "name": "My Workflow", "description": "A Galaxy workflow with a diagram image.", "encodingFormat": "application/json", + "contentSize": "2048", "programmingLanguage": { "@id": "#galaxy" }, @@ -68,9 +94,7 @@ "@id": "#galaxy", "@type": "ComputerLanguage", "name": "Galaxy", - "url": { - "@id": "https://galaxyproject.org/" - } + "url": "https://galaxyproject.org/" }, { "@id": "diagram.png", @@ -81,9 +105,16 @@ "name": "Workflow diagram", "description": "A diagram with about referencing the workflow.", "encodingFormat": "image/png", + "contentSize": "10240", "about": { "@id": "workflow.ga" } + }, + { + "@id": "https://bioschemas.org/profiles/ComputationalWorkflow/1.0-RELEASE", + "@type": ["CreativeWork", "Profile"], + "name": "Bioschemas ComputationalWorkflow Profile 1.0-RELEASE", + "description": "Bioschemas profile for computational workflows." } ] -} \ No newline at end of file +} diff --git a/tests/data/crates/rocrate-1.2/11_workflows_scripts/image_encoding_format/valid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/11_workflows_scripts/image_encoding_format/valid/ro-crate-metadata.json index 3ce6794e1..7a606cd6c 100644 --- a/tests/data/crates/rocrate-1.2/11_workflows_scripts/image_encoding_format/valid/ro-crate-metadata.json +++ b/tests/data/crates/rocrate-1.2/11_workflows_scripts/image_encoding_format/valid/ro-crate-metadata.json @@ -20,6 +20,15 @@ "license": { "@id": "https://creativecommons.org/licenses/by/4.0/" }, + "publisher": { + "@id": "https://ror.org/05f9q8d28" + }, + "funder": { + "@id": "https://ror.org/05f9q8d28" + }, + "conformsTo": { + "@id": "https://bioschemas.org/profiles/ComputationalWorkflow/1.0-RELEASE" + }, "hasPart": [ { "@id": "workflow.ga" @@ -42,7 +51,23 @@ { "@id": "https://creativecommons.org/licenses/by/4.0/", "@type": "CreativeWork", - "name": "CC BY 4.0" + "name": "CC BY 4.0", + "description": "Creative Commons Attribution 4.0 International license." + }, + { + "@id": "https://ror.org/05f9q8d28", + "@type": "Organization", + "name": "Example University", + "url": "https://example.edu", + "contactPoint": { + "@id": "#contact-point" + } + }, + { + "@id": "#contact-point", + "@type": "ContactPoint", + "name": "Example University Contact", + "email": "contact@example.edu" }, { "@id": "workflow.ga", @@ -54,6 +79,7 @@ "name": "My Workflow", "description": "A Galaxy workflow with a diagram image.", "encodingFormat": "application/json", + "contentSize": "2048", "programmingLanguage": { "@id": "#galaxy" }, @@ -68,9 +94,7 @@ "@id": "#galaxy", "@type": "ComputerLanguage", "name": "Galaxy", - "url": { - "@id": "https://galaxyproject.org/" - } + "url": "https://galaxyproject.org/" }, { "@id": "diagram.png", @@ -81,9 +105,16 @@ "name": "Workflow diagram", "description": "A diagram illustrating the workflow steps.", "encodingFormat": "image/png", + "contentSize": "10240", "about": { "@id": "workflow.ga" } + }, + { + "@id": "https://bioschemas.org/profiles/ComputationalWorkflow/1.0-RELEASE", + "@type": ["CreativeWork", "Profile"], + "name": "Bioschemas ComputationalWorkflow Profile 1.0-RELEASE", + "description": "Bioschemas profile for computational workflows." } ] -} \ No newline at end of file +} diff --git a/tests/data/crates/rocrate-1.2/11_workflows_scripts/programming_language/valid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/11_workflows_scripts/programming_language/valid/ro-crate-metadata.json index 2c6d887f0..693bda3fc 100644 --- a/tests/data/crates/rocrate-1.2/11_workflows_scripts/programming_language/valid/ro-crate-metadata.json +++ b/tests/data/crates/rocrate-1.2/11_workflows_scripts/programming_language/valid/ro-crate-metadata.json @@ -20,6 +20,12 @@ "license": { "@id": "https://creativecommons.org/licenses/by/4.0/" }, + "publisher": { + "@id": "https://ror.org/05f9q8d28" + }, + "funder": { + "@id": "https://ror.org/05f9q8d28" + }, "hasPart": [ { "@id": "script.sh" @@ -39,7 +45,23 @@ { "@id": "https://creativecommons.org/licenses/by/4.0/", "@type": "CreativeWork", - "name": "CC BY 4.0" + "name": "CC BY 4.0", + "description": "Creative Commons Attribution 4.0 International license." + }, + { + "@id": "https://ror.org/05f9q8d28", + "@type": "Organization", + "name": "Example University", + "url": "https://example.edu", + "contactPoint": { + "@id": "#contact-point" + } + }, + { + "@id": "#contact-point", + "@type": "ContactPoint", + "name": "Example University Contact", + "email": "contact@example.edu" }, { "@id": "script.sh", @@ -50,6 +72,7 @@ "name": "My Script", "description": "A script with proper programmingLanguage declaration.", "encodingFormat": "application/x-sh", + "contentSize": "1024", "programmingLanguage": { "@id": "#bash" } @@ -58,9 +81,7 @@ "@id": "#bash", "@type": "ComputerLanguage", "name": "Bash", - "url": { - "@id": "https://www.gnu.org/software/bash/" - } + "url": "https://www.gnu.org/software/bash/" } ] -} \ No newline at end of file +} diff --git a/tests/data/crates/rocrate-1.2/11_workflows_scripts/workflow_conformsTo/valid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/11_workflows_scripts/workflow_conformsTo/valid/ro-crate-metadata.json index d0bfb255f..f4a48eaa6 100644 --- a/tests/data/crates/rocrate-1.2/11_workflows_scripts/workflow_conformsTo/valid/ro-crate-metadata.json +++ b/tests/data/crates/rocrate-1.2/11_workflows_scripts/workflow_conformsTo/valid/ro-crate-metadata.json @@ -20,6 +20,15 @@ "license": { "@id": "https://creativecommons.org/licenses/by/4.0/" }, + "publisher": { + "@id": "https://ror.org/05f9q8d28" + }, + "funder": { + "@id": "https://ror.org/05f9q8d28" + }, + "conformsTo": { + "@id": "https://bioschemas.org/profiles/ComputationalWorkflow/1.0-RELEASE" + }, "hasPart": [ { "@id": "workflow.ga" @@ -39,7 +48,23 @@ { "@id": "https://creativecommons.org/licenses/by/4.0/", "@type": "CreativeWork", - "name": "CC BY 4.0" + "name": "CC BY 4.0", + "description": "Creative Commons Attribution 4.0 International license." + }, + { + "@id": "https://ror.org/05f9q8d28", + "@type": "Organization", + "name": "Example University", + "url": "https://example.edu", + "contactPoint": { + "@id": "#contact-point" + } + }, + { + "@id": "#contact-point", + "@type": "ContactPoint", + "name": "Example University Contact", + "email": "contact@example.edu" }, { "@id": "workflow.ga", @@ -51,6 +76,7 @@ "name": "My Workflow", "description": "A workflow conforming to the Bioschemas ComputationalWorkflow profile.", "encodingFormat": "application/json", + "contentSize": "2048", "programmingLanguage": { "@id": "#galaxy" }, @@ -62,9 +88,13 @@ "@id": "#galaxy", "@type": "ComputerLanguage", "name": "Galaxy", - "url": { - "@id": "https://galaxyproject.org/" - } + "url": "https://galaxyproject.org/" + }, + { + "@id": "https://bioschemas.org/profiles/ComputationalWorkflow/1.0-RELEASE", + "@type": ["CreativeWork", "Profile"], + "name": "Bioschemas ComputationalWorkflow Profile 1.0-RELEASE", + "description": "Bioschemas profile for computational workflows." } ] -} \ No newline at end of file +} diff --git a/tests/data/crates/rocrate-1.2/1_metadata_document/named_entity_id_format/valid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/1_metadata_document/named_entity_id_format/valid/ro-crate-metadata.json index ea6332608..075d65e0d 100644 --- a/tests/data/crates/rocrate-1.2/1_metadata_document/named_entity_id_format/valid/ro-crate-metadata.json +++ b/tests/data/crates/rocrate-1.2/1_metadata_document/named_entity_id_format/valid/ro-crate-metadata.json @@ -24,7 +24,7 @@ "@id": "https://ror.org/05f9q8d28" }, "author": { - "@id": "#alice" + "@id": "https://orcid.org/0000-0001-0000-0001" }, "hasPart": [], "funder": { @@ -48,11 +48,14 @@ "description": "CC BY 4.0 license." }, { - "@id": "#alice", + "@id": "https://orcid.org/0000-0001-0000-0001", "@type": "Person", "name": "Alice Researcher", "affiliation": { "@id": "https://ror.org/05f9q8d28" + }, + "contactPoint": { + "@id": "#contact-point" } }, { diff --git a/tests/data/crates/rocrate-1.2/9_referenced_rocrate/valid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/9_referenced_rocrate/valid/ro-crate-metadata.json index 0850bc7f0..aed146554 100644 --- a/tests/data/crates/rocrate-1.2/9_referenced_rocrate/valid/ro-crate-metadata.json +++ b/tests/data/crates/rocrate-1.2/9_referenced_rocrate/valid/ro-crate-metadata.json @@ -3,7 +3,7 @@ "@graph": [ { "@id": "ro-crate-metadata.json", - "@type": ["CreativeWork", "Dataset"], + "@type": "CreativeWork", "conformsTo": { "@id": "https://w3id.org/ro/crate/1.2" }, @@ -95,12 +95,12 @@ }, { "@id": "https://w3id.org/ro/crate", - "@type": "Profile", + "@type": ["CreativeWork", "Profile"], "name": "RO-Crate" }, { "@id": "https://w3id.org/ro/crate/1.2", - "@type": "Profile", + "@type": ["CreativeWork", "Profile"], "name": "RO-Crate 1.2" }, { diff --git a/tests/integration/profiles/ro-crate-1.2/test_metadata_contextualEntities.py b/tests/integration/profiles/ro-crate-1.2/test_metadata_contextualEntities.py index 73d02865c..be4405d38 100644 --- a/tests/integration/profiles/ro-crate-1.2/test_metadata_contextualEntities.py +++ b/tests/integration/profiles/ro-crate-1.2/test_metadata_contextualEntities.py @@ -29,6 +29,12 @@ "ro-crate-1.2_54.1", # Root Data Entity: RECOMMENDED publisher ] +# Correct IDs for funder/publisher checks (used in person entity tests). +_PERSON_VALID_SKIP = [ + "ro-crate-1.2_47.1", # Root Data Entity: RECOMMENDED funder + "ro-crate-1.2_54.1", # Root Data Entity: RECOMMENDED publisher +] + # --------------------------------------------------------------------------- # License entity: SHOULD be typed as CreativeWork (SHOULD) From 9b91a5c18e8b0f56d557a58d3eea2ca24f770b70 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Fri, 17 Apr 2026 16:16:54 +0200 Subject: [PATCH 132/352] feat(ro-crate-1.2): :sparkles: add identifier check for Person entities --- .../1.2/should/6_organization_metadata.ttl | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/rocrate_validator/profiles/ro-crate/1.2/should/6_organization_metadata.ttl b/rocrate_validator/profiles/ro-crate/1.2/should/6_organization_metadata.ttl index ae36acf3f..48e338c73 100644 --- a/rocrate_validator/profiles/ro-crate/1.2/should/6_organization_metadata.ttl +++ b/rocrate_validator/profiles/ro-crate/1.2/should/6_organization_metadata.ttl @@ -97,3 +97,21 @@ ro-crate:AuthorPublisherContactPoint a sh:NodeShape ; sh:severity sh:Warning ; sh:message "At least one author or publisher Person/Organization SHOULD have a contactPoint property" ; ] . + +ro-crate:PersonOrcidIdentifier a sh:NodeShape ; + sh:name "Person: SHOULD have ORCID identifier" ; + sh:description """A Person entity SHOULD use an ORCID identifier as its @id. + (RO-Crate 1.2, Contextual Entities — People)""" ; + sh:targetClass schema:Person ; + sh:sparql [ + a sh:SPARQLConstraint ; + sh:prefixes ro-crate:sparqlPrefixes ; + sh:select """ + SELECT ?this + WHERE { + FILTER(!STRSTARTS(STR(?this), "https://orcid.org/")) + } + """ ; + sh:severity sh:Warning ; + sh:message "A Person entity SHOULD have an ORCID identifier as its @id (e.g., https://orcid.org/0000-0001-2345-6789)" ; + ] . From 4cf774aaca82bbb27f30fd529658967ff49cda54 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Fri, 17 Apr 2026 16:17:53 +0200 Subject: [PATCH 133/352] test(ro-crate-1.2): :white_check_mark: test identifier check of Person entities --- .../ro-crate-metadata.json | 57 ++++++++++++++++ .../invalid_no_orcid/ro-crate-metadata.json | 67 ++++++++++++++++++ .../valid/ro-crate-metadata.json | 68 +++++++++++++++++++ .../test_metadata_contextualEntities.py | 52 ++++++++++++++ 4 files changed, 244 insertions(+) create mode 100644 tests/data/crates/rocrate-1.2/10_metadata_contextualEntities/person_entity/invalid_affiliation_not_org/ro-crate-metadata.json create mode 100644 tests/data/crates/rocrate-1.2/10_metadata_contextualEntities/person_entity/invalid_no_orcid/ro-crate-metadata.json create mode 100644 tests/data/crates/rocrate-1.2/10_metadata_contextualEntities/person_entity/valid/ro-crate-metadata.json diff --git a/tests/data/crates/rocrate-1.2/10_metadata_contextualEntities/person_entity/invalid_affiliation_not_org/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/10_metadata_contextualEntities/person_entity/invalid_affiliation_not_org/ro-crate-metadata.json new file mode 100644 index 000000000..795a413ce --- /dev/null +++ b/tests/data/crates/rocrate-1.2/10_metadata_contextualEntities/person_entity/invalid_affiliation_not_org/ro-crate-metadata.json @@ -0,0 +1,57 @@ +{ + "@context": "https://w3id.org/ro/crate/1.2/context", + "@graph": [ + { + "@id": "ro-crate-metadata.json", + "@type": "CreativeWork", + "about": { + "@id": "./" + }, + "conformsTo": { + "@id": "https://w3id.org/ro/crate/1.2" + } + }, + { + "@id": "./", + "@type": "Dataset", + "name": "Person entity — invalid (affiliation not Organization)", + "description": "RO-Crate with a Person entity whose affiliation references a Thing instead of an Organization.", + "datePublished": "2024-01-01", + "license": { + "@id": "https://spdx.org/licenses/MIT.html" + }, + "author": { + "@id": "https://orcid.org/0000-0001-6121-5409" + } + }, + { + "@id": "https://orcid.org/0000-0001-6121-5409", + "@type": "Person", + "name": "Tim Luckett", + "contactPoint": { + "@id": "mailto:tim.luckett@uts.edu.au" + }, + "affiliation": { + "@id": "https://example.org/groups/research-lab" + } + }, + { + "@id": "mailto:tim.luckett@uts.edu.au", + "@type": "ContactPoint", + "contactType": "customer service", + "email": "tim.luckett@uts.edu.au", + "name": "Tim Luckett's contact point" + }, + { + "@id": "https://example.org/groups/research-lab", + "@type": "Thing", + "name": "Research Lab (not an Organization)" + }, + { + "@id": "https://spdx.org/licenses/MIT.html", + "@type": "CreativeWork", + "name": "MIT License", + "description": "A permissive license." + } + ] +} diff --git a/tests/data/crates/rocrate-1.2/10_metadata_contextualEntities/person_entity/invalid_no_orcid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/10_metadata_contextualEntities/person_entity/invalid_no_orcid/ro-crate-metadata.json new file mode 100644 index 000000000..e5068c70d --- /dev/null +++ b/tests/data/crates/rocrate-1.2/10_metadata_contextualEntities/person_entity/invalid_no_orcid/ro-crate-metadata.json @@ -0,0 +1,67 @@ +{ + "@context": "https://w3id.org/ro/crate/1.2/context", + "@graph": [ + { + "@id": "ro-crate-metadata.json", + "@type": "CreativeWork", + "about": { + "@id": "./" + }, + "conformsTo": { + "@id": "https://w3id.org/ro/crate/1.2" + } + }, + { + "@id": "./", + "@type": "Dataset", + "name": "Person entity — invalid (no ORCID @id)", + "description": "RO-Crate with a Person entity whose @id is not an ORCID identifier.", + "datePublished": "2024-01-01", + "license": { + "@id": "https://spdx.org/licenses/MIT.html" + }, + "author": { + "@id": "https://example.org/persons/john-doe" + } + }, + { + "@id": "https://example.org/persons/john-doe", + "@type": "Person", + "name": "John Doe", + "contactPoint": { + "@id": "mailto:john.doe@example.org" + }, + "affiliation": { + "@id": "https://ror.org/03f0f6041" + } + }, + { + "@id": "mailto:john.doe@example.org", + "@type": "ContactPoint", + "contactType": "customer service", + "email": "john.doe@example.org", + "name": "John Doe's contact point" + }, + { + "@id": "https://ror.org/03f0f6041", + "@type": "Organization", + "name": "University of Technology Sydney", + "contactPoint": { + "@id": "mailto:contact@uts.edu.au" + } + }, + { + "@id": "mailto:contact@uts.edu.au", + "@type": "ContactPoint", + "contactType": "customer service", + "email": "contact@uts.edu.au", + "name": "UTS contact point" + }, + { + "@id": "https://spdx.org/licenses/MIT.html", + "@type": "CreativeWork", + "name": "MIT License", + "description": "A permissive license." + } + ] +} diff --git a/tests/data/crates/rocrate-1.2/10_metadata_contextualEntities/person_entity/valid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/10_metadata_contextualEntities/person_entity/valid/ro-crate-metadata.json new file mode 100644 index 000000000..50cbaeff0 --- /dev/null +++ b/tests/data/crates/rocrate-1.2/10_metadata_contextualEntities/person_entity/valid/ro-crate-metadata.json @@ -0,0 +1,68 @@ +{ + "@context": "https://w3id.org/ro/crate/1.2/context", + "@graph": [ + { + "@id": "ro-crate-metadata.json", + "@type": "CreativeWork", + "about": { + "@id": "./" + }, + "conformsTo": { + "@id": "https://w3id.org/ro/crate/1.2" + } + }, + { + "@id": "./", + "@type": "Dataset", + "name": "Person entity — valid", + "description": "RO-Crate with a well-formed Person entity: ORCID @id, contactPoint referencing ContactPoint, and affiliation referencing Organization.", + "datePublished": "2024-01-01", + "license": { + "@id": "https://spdx.org/licenses/MIT.html" + }, + "author": { + "@id": "https://orcid.org/0000-0001-6121-5409" + } + }, + { + "@id": "https://orcid.org/0000-0001-6121-5409", + "@type": "Person", + "name": "Tim Luckett", + "contactPoint": { + "@id": "mailto:tim.luckett@uts.edu.au" + }, + "affiliation": { + "@id": "https://ror.org/03f0f6041" + } + }, + { + "@id": "mailto:tim.luckett@uts.edu.au", + "@type": "ContactPoint", + "contactType": "customer service", + "email": "tim.luckett@uts.edu.au", + "name": "Tim Luckett's contact point" + }, + { + "@id": "https://ror.org/03f0f6041", + "@type": "Organization", + "name": "University of Technology Sydney", + "url": "https://www.uts.edu.au/", + "contactPoint": { + "@id": "mailto:contact@uts.edu.au" + } + }, + { + "@id": "mailto:contact@uts.edu.au", + "@type": "ContactPoint", + "contactType": "customer service", + "email": "contact@uts.edu.au", + "name": "UTS contact point" + }, + { + "@id": "https://spdx.org/licenses/MIT.html", + "@type": "CreativeWork", + "name": "MIT License", + "description": "A permissive license." + } + ] +} diff --git a/tests/integration/profiles/ro-crate-1.2/test_metadata_contextualEntities.py b/tests/integration/profiles/ro-crate-1.2/test_metadata_contextualEntities.py index be4405d38..375d6947d 100644 --- a/tests/integration/profiles/ro-crate-1.2/test_metadata_contextualEntities.py +++ b/tests/integration/profiles/ro-crate-1.2/test_metadata_contextualEntities.py @@ -225,3 +225,55 @@ def test_invalid_no_author_publisher_contactpoint(): "At least one author or publisher Person/Organization SHOULD have a contactPoint property" ], ) + + +# --------------------------------------------------------------------------- +# Person entity: SHOULD have ORCID identifier as @id (SHOULD) +# --------------------------------------------------------------------------- + +def test_valid_person_entity(): + """ + A Person entity with an ORCID @id, contactPoint referencing ContactPoint, + and affiliation referencing Organization SHOULD pass RECOMMENDED validation. + """ + do_entity_test( + __contextual_entities_crates__.valid_person_entity, + models.Severity.RECOMMENDED, + True, + profile_identifier="ro-crate-1.2", + skip_checks=_PERSON_VALID_SKIP, + ) + + +def test_invalid_person_no_orcid(): + """ + A Person entity whose @id is not an ORCID identifier SHOULD trigger a + RECOMMENDED warning. + """ + do_entity_test( + __contextual_entities_crates__.invalid_person_no_orcid, + models.Severity.RECOMMENDED, + False, + profile_identifier="ro-crate-1.2", + expected_triggered_requirements=["Person: SHOULD have ORCID identifier"], + expected_triggered_issues=[ + "A Person entity SHOULD have an ORCID identifier as its @id" + ], + ) + + +def test_invalid_person_affiliation_not_org(): + """ + A Person entity whose affiliation does not reference an Organization + SHOULD trigger a RECOMMENDED warning. + """ + do_entity_test( + __contextual_entities_crates__.invalid_person_affiliation_not_org, + models.Severity.RECOMMENDED, + False, + profile_identifier="ro-crate-1.2", + expected_triggered_requirements=["Person: RECOMMENDED affiliation"], + expected_triggered_issues=[ + "Persons SHOULD reference an Organization for affiliation" + ], + ) From a5fc010b3a28d3f30dcff0455ffed72d68d6e5e5 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Fri, 17 Apr 2026 16:21:43 +0200 Subject: [PATCH 134/352] test(ro-crate-1.2): :wrench: add test data folder index --- .../ro-crate-1.2/test_attached_rocrates.py | 2 +- .../ro-crate-1.2/test_detached_rocrates.py | 2 +- .../test_metadata_contextualEntities.py | 2 +- .../test_metadata_dataEntities.py | 2 +- .../ro-crate-1.2/test_metadata_descriptor.py | 2 +- .../ro-crate-1.2/test_metadata_document.py | 2 +- .../ro-crate-1.2/test_metadata_entities.py | 2 +- .../test_metadata_rootDataEntity.py | 2 +- .../ro-crate-1.2/test_metadata_webDatasets.py | 2 +- .../ro-crate-1.2/test_referenced_rocrate.py | 2 +- .../ro-crate-1.2/test_workflows_scripts.py | 2 +- tests/ro_crates_v1_2.py | 740 ++++++++++++++++++ 12 files changed, 751 insertions(+), 11 deletions(-) create mode 100644 tests/ro_crates_v1_2.py diff --git a/tests/integration/profiles/ro-crate-1.2/test_attached_rocrates.py b/tests/integration/profiles/ro-crate-1.2/test_attached_rocrates.py index 2d2b9faf7..457b151be 100644 --- a/tests/integration/profiles/ro-crate-1.2/test_attached_rocrates.py +++ b/tests/integration/profiles/ro-crate-1.2/test_attached_rocrates.py @@ -15,7 +15,7 @@ import logging from rocrate_validator import models -from tests.ro_crates_1_2 import AttachedROCrates, MetadataDocument, MetadataDocumentFormat +from tests.ro_crates_v1_2 import AttachedROCrates, MetadataDocument, MetadataDocumentFormat from tests.shared import do_entity_test logger = logging.getLogger(__name__) diff --git a/tests/integration/profiles/ro-crate-1.2/test_detached_rocrates.py b/tests/integration/profiles/ro-crate-1.2/test_detached_rocrates.py index a64fe070b..c45a9c2a5 100644 --- a/tests/integration/profiles/ro-crate-1.2/test_detached_rocrates.py +++ b/tests/integration/profiles/ro-crate-1.2/test_detached_rocrates.py @@ -15,7 +15,7 @@ import logging from rocrate_validator import models -from tests.ro_crates_1_2 import AttachedROCrates, DetachedROCrates, MetadataDocument, MetadataDocumentFormat +from tests.ro_crates_v1_2 import AttachedROCrates, DetachedROCrates, MetadataDocument, MetadataDocumentFormat from tests.shared import do_entity_test logger = logging.getLogger(__name__) diff --git a/tests/integration/profiles/ro-crate-1.2/test_metadata_contextualEntities.py b/tests/integration/profiles/ro-crate-1.2/test_metadata_contextualEntities.py index 375d6947d..7ea99b2b1 100644 --- a/tests/integration/profiles/ro-crate-1.2/test_metadata_contextualEntities.py +++ b/tests/integration/profiles/ro-crate-1.2/test_metadata_contextualEntities.py @@ -15,7 +15,7 @@ import logging from rocrate_validator import models -from tests.ro_crates_1_2 import ContextualEntities +from tests.ro_crates_v1_2 import ContextualEntities from tests.shared import do_entity_test logger = logging.getLogger(__name__) diff --git a/tests/integration/profiles/ro-crate-1.2/test_metadata_dataEntities.py b/tests/integration/profiles/ro-crate-1.2/test_metadata_dataEntities.py index 06d4f4993..ff46f1e59 100644 --- a/tests/integration/profiles/ro-crate-1.2/test_metadata_dataEntities.py +++ b/tests/integration/profiles/ro-crate-1.2/test_metadata_dataEntities.py @@ -16,7 +16,7 @@ from rocrate_validator import models from rocrate_validator.utils.http import HttpRequester -from tests.ro_crates_1_2 import DataEntities +from tests.ro_crates_v1_2 import DataEntities from tests.shared import do_entity_test logger = logging.getLogger(__name__) diff --git a/tests/integration/profiles/ro-crate-1.2/test_metadata_descriptor.py b/tests/integration/profiles/ro-crate-1.2/test_metadata_descriptor.py index d2b6cad12..fb83ff1f9 100644 --- a/tests/integration/profiles/ro-crate-1.2/test_metadata_descriptor.py +++ b/tests/integration/profiles/ro-crate-1.2/test_metadata_descriptor.py @@ -15,7 +15,7 @@ import logging from rocrate_validator import models -from tests.ro_crates_1_2 import MetadataDescriptor, MetadataDocument, MetadataDocumentFormat +from tests.ro_crates_v1_2 import MetadataDescriptor, MetadataDocument, MetadataDocumentFormat from tests.shared import do_entity_test logger = logging.getLogger(__name__) diff --git a/tests/integration/profiles/ro-crate-1.2/test_metadata_document.py b/tests/integration/profiles/ro-crate-1.2/test_metadata_document.py index 8678b4479..0f2afa9e2 100644 --- a/tests/integration/profiles/ro-crate-1.2/test_metadata_document.py +++ b/tests/integration/profiles/ro-crate-1.2/test_metadata_document.py @@ -15,7 +15,7 @@ import logging from rocrate_validator import models -from tests.ro_crates_1_2 import MetadataDocument, MetadataDocumentFormat +from tests.ro_crates_v1_2 import MetadataDocument, MetadataDocumentFormat from tests.shared import do_entity_test logger = logging.getLogger(__name__) diff --git a/tests/integration/profiles/ro-crate-1.2/test_metadata_entities.py b/tests/integration/profiles/ro-crate-1.2/test_metadata_entities.py index de0a1a404..420fc6b83 100644 --- a/tests/integration/profiles/ro-crate-1.2/test_metadata_entities.py +++ b/tests/integration/profiles/ro-crate-1.2/test_metadata_entities.py @@ -15,7 +15,7 @@ import logging from rocrate_validator import models -from tests.ro_crates_1_2 import MetadataEntities +from tests.ro_crates_v1_2 import MetadataEntities from tests.shared import do_entity_test logger = logging.getLogger(__name__) diff --git a/tests/integration/profiles/ro-crate-1.2/test_metadata_rootDataEntity.py b/tests/integration/profiles/ro-crate-1.2/test_metadata_rootDataEntity.py index 9bbf3d759..24a3674fd 100644 --- a/tests/integration/profiles/ro-crate-1.2/test_metadata_rootDataEntity.py +++ b/tests/integration/profiles/ro-crate-1.2/test_metadata_rootDataEntity.py @@ -16,7 +16,7 @@ from rocrate_validator import models from rocrate_validator.utils.http import HttpRequester -from tests.ro_crates_1_2 import RootDataEntity +from tests.ro_crates_v1_2 import RootDataEntity from tests.shared import do_entity_test logger = logging.getLogger(__name__) diff --git a/tests/integration/profiles/ro-crate-1.2/test_metadata_webDatasets.py b/tests/integration/profiles/ro-crate-1.2/test_metadata_webDatasets.py index 4c68acb36..0fb53ef2f 100644 --- a/tests/integration/profiles/ro-crate-1.2/test_metadata_webDatasets.py +++ b/tests/integration/profiles/ro-crate-1.2/test_metadata_webDatasets.py @@ -16,7 +16,7 @@ from rocrate_validator import models from rocrate_validator.utils.http import HttpRequester -from tests.ro_crates_1_2 import DataEntities +from tests.ro_crates_v1_2 import DataEntities from tests.shared import do_entity_test logger = logging.getLogger(__name__) diff --git a/tests/integration/profiles/ro-crate-1.2/test_referenced_rocrate.py b/tests/integration/profiles/ro-crate-1.2/test_referenced_rocrate.py index 189ad6874..8a4c6358c 100644 --- a/tests/integration/profiles/ro-crate-1.2/test_referenced_rocrate.py +++ b/tests/integration/profiles/ro-crate-1.2/test_referenced_rocrate.py @@ -15,7 +15,7 @@ import logging from rocrate_validator import models -from tests.ro_crates_1_2 import ReferencedROCrates +from tests.ro_crates_v1_2 import ReferencedROCrates from tests.shared import do_entity_test logger = logging.getLogger(__name__) diff --git a/tests/integration/profiles/ro-crate-1.2/test_workflows_scripts.py b/tests/integration/profiles/ro-crate-1.2/test_workflows_scripts.py index 12c8b8548..5620b8078 100644 --- a/tests/integration/profiles/ro-crate-1.2/test_workflows_scripts.py +++ b/tests/integration/profiles/ro-crate-1.2/test_workflows_scripts.py @@ -15,7 +15,7 @@ import logging from rocrate_validator import models -from tests.ro_crates_1_2 import WorkflowsScripts +from tests.ro_crates_v1_2 import WorkflowsScripts from tests.shared import do_entity_test logger = logging.getLogger(__name__) diff --git a/tests/ro_crates_v1_2.py b/tests/ro_crates_v1_2.py new file mode 100644 index 000000000..fa64116a2 --- /dev/null +++ b/tests/ro_crates_v1_2.py @@ -0,0 +1,740 @@ +# Copyright (c) 2024-2026 CRS4 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from pathlib import Path +from tempfile import TemporaryDirectory + +from pytest import fixture + +CURRENT_PATH = Path(__file__).resolve().parent +TEST_DATA_PATH = (CURRENT_PATH / "data").absolute() +CRATES_DATA_PATH = TEST_DATA_PATH / "crates" +VALID_CRATES_DATA_PATH = CRATES_DATA_PATH / "valid" +INVALID_CRATES_DATA_PATH = CRATES_DATA_PATH / "invalid" + + +@fixture +def ro_crates_path() -> Path: + return CRATES_DATA_PATH + + +BASE_PATH = CRATES_DATA_PATH / "rocrate-1.2" + + +class MetadataDocument: + + METADATA_DOCUMENT_CRATES_PATH = BASE_PATH / "1_metadata_document" + + @property + def invalid_context_reference(self) -> Path: + return self.METADATA_DOCUMENT_CRATES_PATH / "context_reference" / "invalid" + + @property + def valid_context_reference(self) -> Path: + return self.METADATA_DOCUMENT_CRATES_PATH / "context_reference" / "valid" + + @property + def not_referenced_contextual_entity(self) -> Path: + return self.METADATA_DOCUMENT_CRATES_PATH / "referenced_contextual_entities" / "invalid" + + @property + def valid_referenced_contextual_entity(self) -> Path: + return self.METADATA_DOCUMENT_CRATES_PATH / "referenced_contextual_entities" / "valid" + + @property + def described_contextual_entity(self) -> Path: + return self.METADATA_DOCUMENT_CRATES_PATH / "described_contextual_entities" / "valid" + + @property + def not_described_contextual_entity(self) -> Path: + return self.METADATA_DOCUMENT_CRATES_PATH / "described_contextual_entities" / "invalid" + + @property + def valid_no_parent_traversal(self) -> Path: + return self.METADATA_DOCUMENT_CRATES_PATH / "no_parent_traversal" / "valid" + + @property + def invalid_no_parent_traversal(self) -> Path: + return self.METADATA_DOCUMENT_CRATES_PATH / "no_parent_traversal" / "invalid" + + @property + def valid_utf8_identifiers(self) -> Path: + return self.METADATA_DOCUMENT_CRATES_PATH / "utf8_identifiers" / "valid" + + @property + def invalid_utf8_identifiers(self) -> Path: + return self.METADATA_DOCUMENT_CRATES_PATH / "utf8_identifiers" / "invalid" + + @property + def valid_named_entity_id_format(self) -> Path: + return self.METADATA_DOCUMENT_CRATES_PATH / "named_entity_id_format" / "valid" + + @property + def invalid_named_entity_id_format(self) -> Path: + return self.METADATA_DOCUMENT_CRATES_PATH / "named_entity_id_format" / "invalid" + + +class MetadataDocumentFormat: + + METADATA_DOCUMENT_FORMAT_CRATES_PATH = MetadataDocument.METADATA_DOCUMENT_CRATES_PATH / "format" + + @property + def not_compacted(self) -> Path: + return self.METADATA_DOCUMENT_FORMAT_CRATES_PATH / "compacted" + + @property + def not_flattened(self) -> Path: + return self.METADATA_DOCUMENT_FORMAT_CRATES_PATH / "flattened" + + @property + def not_jsonld(self) -> Path: + return self.METADATA_DOCUMENT_FORMAT_CRATES_PATH / "jsonld" + + @property + def not_utf8(self) -> Path: + return self.METADATA_DOCUMENT_FORMAT_CRATES_PATH / "utf8" + + +class AttachedROCrates: + + ATTACHED_ROCRATES_CRATES_PATH = BASE_PATH / "2_attached_rocrates" + + @property + def valid_preview_not_in_hasPart(self) -> Path: + return self.ATTACHED_ROCRATES_CRATES_PATH / "attached-preview-not-in-hasPart" / "valid" + + @property + def invalid_preview_not_in_hasPart(self) -> Path: + return self.ATTACHED_ROCRATES_CRATES_PATH / "attached-preview-not-in-hasPart" / "invalid" + + @property + def valid_non_relative_root_entity_id(self) -> Path: + return self.ATTACHED_ROCRATES_CRATES_PATH / "non-relative-root-identifier" / "valid" + + @property + def invalid_non_relative_root_entity_id(self) -> Path: + return self.ATTACHED_ROCRATES_CRATES_PATH / "non-relative-root-identifier" / "invalid" + + @property + def valid_relative_root_entity_id(self) -> Path: + return self.ATTACHED_ROCRATES_CRATES_PATH / "relative-root-identifier" / "valid" + + @property + def invalid_relative_root_entity_id(self) -> Path: + return self.ATTACHED_ROCRATES_CRATES_PATH / "relative-root-identifier" / "invalid" + + +class DetachedROCrates: + + DETACHED_ROCRATES_CRATES_PATH = BASE_PATH / "3_detached_rocrates" + + __remote_sha__ = "1a54cc4c0152575357d937982de3e2567ab4a0f8" + + @property + def valid_local_descriptor_filename(self) -> Path: + return self.DETACHED_ROCRATES_CRATES_PATH / "naming-convention" / "local-descriptor" / "valid" + + @property + def invalid_local_descriptor_filename(self) -> Path: + return self.DETACHED_ROCRATES_CRATES_PATH / "naming-convention" / "local-descriptor" / "invalid" + + @property + def valid_root_data_entity_identifier_when_online_available(self) -> Path: + return self.DETACHED_ROCRATES_CRATES_PATH / "root-data-entity-identifier" / "online-available" / "valid" + + @property + def invalid_root_data_entity_identifier_when_online_available(self) -> Path: + return f"https://bitbucket.org/kikkomep/ro-crates/raw/{self.__remote_sha__}/online-available/invalid/basic-ro-crate-metadata.json" + + @property + def valid_web_data_entity(self) -> Path: + return f"https://bitbucket.org/kikkomep/ro-crates/raw/{self.__remote_sha__}/online-available/valid/basic-ro-crate-metadata.json" + + @property + def invalid_web_data_entity(self) -> Path: + return f"https://bitbucket.org/kikkomep/ro-crates/raw/{self.__remote_sha__}/online-available/invalid/basic-ro-crate-metadata.json" + + +class MetadataEntities: + + METADATA_ENTITIES_CRATES_PATH = BASE_PATH / "5_metadata_entities" + + @property + def valid_recommended_schema_type(self) -> Path: + return self.METADATA_ENTITIES_CRATES_PATH / "recommended_schema_type" / "valid" + + @property + def invalid_recommended_schema_type(self) -> Path: + return self.METADATA_ENTITIES_CRATES_PATH / "recommended_schema_type" / "invalid" + + @property + def valid_recommended_name(self) -> Path: + return self.METADATA_ENTITIES_CRATES_PATH / "recommended_name" / "valid" + + @property + def invalid_recommended_name(self) -> Path: + return self.METADATA_ENTITIES_CRATES_PATH / "recommended_name" / "invalid" + + @property + def valid_entity_reachability(self) -> Path: + return self.METADATA_ENTITIES_CRATES_PATH / "entity_reachability" / "valid" + + @property + def invalid_entity_reachability(self) -> Path: + return self.METADATA_ENTITIES_CRATES_PATH / "entity_reachability" / "invalid" + + +class MetadataDescriptor: + + METADATA_DESCRIPTOR_CRATES_PATH = BASE_PATH / "6_metadata_descriptor" + + @property + def valid_single_value_conformsTo(self) -> Path: + return self.METADATA_DESCRIPTOR_CRATES_PATH / "recommended_conformsTo" / "single_value" / "valid" + + @property + def invalid_single_value_conformsTo(self) -> Path: + return self.METADATA_DESCRIPTOR_CRATES_PATH / "recommended_conformsTo" / "single_value" / "invalid" + + @property + def valid_recommended_prefix_conformsTo(self) -> Path: + return self.METADATA_DESCRIPTOR_CRATES_PATH / "recommended_conformsTo" / "recommended_prefix" / "valid" + + @property + def invalid_recommended_prefix_conformsTo(self) -> Path: + return self.METADATA_DESCRIPTOR_CRATES_PATH / "recommended_conformsTo" / "recommended_prefix" / "invalid" + + +class RootDataEntity: + + ROOT_DATA_ENTITY_CRATES_PATH = BASE_PATH / "7_root_data_entity" + + @property + def valid_required_datePublished(self) -> Path: + return self.ROOT_DATA_ENTITY_CRATES_PATH / "required_datePublished" / "valid" + + @property + def invalid_required_datePublished(self) -> Path: + return self.ROOT_DATA_ENTITY_CRATES_PATH / "required_datePublished" / "invalid" + + @property + def valid_required_downloadable_citeAs(self) -> Path: + return self.ROOT_DATA_ENTITY_CRATES_PATH / "required_downloadable_citeas" / "valid" + + @property + def invalid_required_downloadable_citeAs(self) -> Path: + return self.ROOT_DATA_ENTITY_CRATES_PATH / "required_downloadable_citeas" / "invalid" + + @property + def valid_recommended_citeAs_for_resolvable_id(self) -> Path: + return self.ROOT_DATA_ENTITY_CRATES_PATH / "recommended_citeas_for_resolvable_id" / "valid" + + @property + def invalid_recommended_citeAs_for_resolvable_id(self) -> Path: + return self.ROOT_DATA_ENTITY_CRATES_PATH / "recommended_citeas_for_resolvable_id" / "invalid" + + @property + def valid_additional_conformsTo_reference(self) -> Path: + return self.ROOT_DATA_ENTITY_CRATES_PATH / "additional_conformsTo" / "valid" + + @property + def invalid_additional_conformsTo_reference(self) -> Path: + return self.ROOT_DATA_ENTITY_CRATES_PATH / "additional_conformsTo" / "invalid" + + @property + def missing_root(self) -> Path: + return self.ROOT_DATA_ENTITY_CRATES_PATH / "missing_root" / "invalid" + + @property + def invalid_root_type(self) -> Path: + return self.ROOT_DATA_ENTITY_CRATES_PATH / "invalid_root_type" / "invalid" + + @property + def invalid_root_value(self) -> Path: + return self.ROOT_DATA_ENTITY_CRATES_PATH / "invalid_root_value" / "invalid" + + @property + def recommended_root_value(self) -> Path: + return self.ROOT_DATA_ENTITY_CRATES_PATH / "recommended_root_value" / "valid" + + @property + def invalid_root_date(self) -> Path: + return self.ROOT_DATA_ENTITY_CRATES_PATH / "invalid_root_date" / "invalid" + + @property + def invalid_recommended_root_date(self) -> Path: + return self.ROOT_DATA_ENTITY_CRATES_PATH / "invalid_recommended_root_date" / "invalid" + + @property + def missing_root_name(self) -> Path: + return self.ROOT_DATA_ENTITY_CRATES_PATH / "missing_root_name" / "invalid" + + @property + def missing_root_description(self) -> Path: + return self.ROOT_DATA_ENTITY_CRATES_PATH / "missing_root_description" / "invalid" + + @property + def missing_root_license(self) -> Path: + return self.ROOT_DATA_ENTITY_CRATES_PATH / "missing_root_license" / "invalid" + + @property + def missing_root_license_name(self) -> Path: + return self.ROOT_DATA_ENTITY_CRATES_PATH / "missing_root_license_name" / "invalid" + + @property + def missing_root_license_description(self) -> Path: + return self.ROOT_DATA_ENTITY_CRATES_PATH / "missing_root_license_description" / "invalid" + + @property + def valid_referenced_generic_data_entities(self) -> Path: + return self.ROOT_DATA_ENTITY_CRATES_PATH / "valid_referenced_generic_data_entities" / "valid" + + @property + def valid_recommended_identifier_resolution(self) -> Path: + return self.ROOT_DATA_ENTITY_CRATES_PATH / "recommended_identifier_resolution" / "valid" + + @property + def invalid_recommended_identifier_resolution(self) -> Path: + return self.ROOT_DATA_ENTITY_CRATES_PATH / "recommended_identifier_resolution" / "invalid" + + @property + def valid_recommended_publisher(self) -> Path: + return self.ROOT_DATA_ENTITY_CRATES_PATH / "recommended_publisher" / "valid" + + @property + def invalid_recommended_publisher(self) -> Path: + return self.ROOT_DATA_ENTITY_CRATES_PATH / "recommended_publisher" / "invalid" + + @property + def valid_recommended_funding(self) -> Path: + return self.ROOT_DATA_ENTITY_CRATES_PATH / "recommended_funding" / "valid" + + @property + def invalid_recommended_funding_no_funder(self) -> Path: + return self.ROOT_DATA_ENTITY_CRATES_PATH / "recommended_funding" / "invalid_no_funder" + + @property + def invalid_recommended_funding_non_org_funder(self) -> Path: + return self.ROOT_DATA_ENTITY_CRATES_PATH / "recommended_funding" / "invalid_non_org_funder" + + @property + def invalid_recommended_funding_no_project_funder(self) -> Path: + return self.ROOT_DATA_ENTITY_CRATES_PATH / "recommended_funding" / "invalid_no_project_funder" + + # R1: datePublished day precision (SHOULD) + @property + def valid_recommended_datePublished_day_precision(self) -> Path: + return self.ROOT_DATA_ENTITY_CRATES_PATH / "recommended_datepublished_day_precision" / "valid" + + @property + def invalid_recommended_datePublished_day_precision(self) -> Path: + return self.ROOT_DATA_ENTITY_CRATES_PATH / "recommended_datepublished_day_precision" / "invalid" + + # R2: hasPart MUST reference all Data Entities + @property + def valid_required_hasPart_all_data_entities(self) -> Path: + return self.ROOT_DATA_ENTITY_CRATES_PATH / "required_haspart_all_data_entities" / "valid" + + @property + def invalid_required_hasPart_all_data_entities(self) -> Path: + return self.ROOT_DATA_ENTITY_CRATES_PATH / "required_haspart_all_data_entities" / "invalid" + + @property + def invalid_hasPart_workflow_not_in_haspart(self) -> Path: + return self.ROOT_DATA_ENTITY_CRATES_PATH / "required_haspart_all_data_entities" / "invalid_workflow_not_in_haspart" + + @property + def invalid_hasPart_web_entity_not_in_haspart(self) -> Path: + return self.ROOT_DATA_ENTITY_CRATES_PATH / "required_haspart_all_data_entities" / "invalid_web_entity_not_in_haspart" + + @property + def invalid_hasPart_dataset_not_in_haspart(self) -> Path: + return self.ROOT_DATA_ENTITY_CRATES_PATH / "required_haspart_all_data_entities" / "invalid_dataset_not_in_haspart" + + # R3: identifier SHOULD be present if PID exists (SHOULD) + @property + def valid_recommended_identifier_if_pid(self) -> Path: + return self.ROOT_DATA_ENTITY_CRATES_PATH / "recommended_identifier_if_pid" / "valid" + + @property + def invalid_recommended_identifier_if_pid(self) -> Path: + return self.ROOT_DATA_ENTITY_CRATES_PATH / "recommended_identifier_if_pid" / "invalid" + + # R4: identifier SHOULD use PropertyValue approach (SHOULD) + @property + def valid_recommended_identifier_propertyvalue(self) -> Path: + return self.ROOT_DATA_ENTITY_CRATES_PATH / "recommended_identifier_propertyvalue" / "valid" + + @property + def invalid_recommended_identifier_propertyvalue(self) -> Path: + return self.ROOT_DATA_ENTITY_CRATES_PATH / "recommended_identifier_propertyvalue" / "invalid" + + # R5: conformsTo SHOULD be present if profiles exist (SHOULD) + @property + def valid_recommended_conformsto_if_profiles(self) -> Path: + return self.ROOT_DATA_ENTITY_CRATES_PATH / "recommended_conformsto_if_profiles" / "valid" + + @property + def invalid_recommended_conformsto_if_profiles(self) -> Path: + return self.ROOT_DATA_ENTITY_CRATES_PATH / "recommended_conformsto_if_profiles" / "invalid" + + +class DataEntities: + + DATA_ENTITIES_CRATES_PATH = BASE_PATH / "8_metadata_dataEntities" + + @property + def valid_local_entity_reference(self) -> Path: + return self.DATA_ENTITIES_CRATES_PATH / "local_entity_reference" / "valid" + + @property + def invalid_local_entity_reference(self) -> Path: + return self.DATA_ENTITIES_CRATES_PATH / "local_entity_reference" / "invalid" + + @property + def valid_detached_rocrate_dataEntities(self) -> Path: + return self.DATA_ENTITIES_CRATES_PATH / "detached_rocrate_dataEntities" / "valid" + + @property + def invalid_detached_rocrate_dataEntities(self) -> Path: + return self.DATA_ENTITIES_CRATES_PATH / "detached_rocrate_dataEntities" / "invalid" + + @property + def valid_recommended_properties(self) -> Path: + return self.DATA_ENTITIES_CRATES_PATH / "recommended_properties" / "valid" + + @property + def invalid_recommended_properties(self) -> Path: + return self.DATA_ENTITIES_CRATES_PATH / "recommended_properties" / "invalid" + + @property + def valid_recommended_encoding_format(self) -> Path: + return self.DATA_ENTITIES_CRATES_PATH / "recommended_encoding_format" / "valid" + + @property + def invalid_recommended_encoding_format(self) -> Path: + return self.DATA_ENTITIES_CRATES_PATH / "recommended_encoding_format" / "invalid" + + @property + def valid_web_entity_downloadable(self) -> Path: + return self.DATA_ENTITIES_CRATES_PATH / "web_entity_downloadable" / "valid" + + @property + def invalid_web_entity_splash_page(self) -> Path: + return self.DATA_ENTITIES_CRATES_PATH / "web_entity_downloadable" / "invalid" + + @property + def valid_recommended_content_url(self) -> Path: + return self.DATA_ENTITIES_CRATES_PATH / "web_entity_content_url" / "valid" + + @property + def invalid_recommended_content_url(self) -> Path: + return self.DATA_ENTITIES_CRATES_PATH / "web_entity_content_url" / "invalid" + + @property + def valid_recommended_distribution(self) -> Path: + return self.DATA_ENTITIES_CRATES_PATH / "web_dataset_distribution" / "valid" + + @property + def invalid_recommended_distribution(self) -> Path: + return self.DATA_ENTITIES_CRATES_PATH / "web_dataset_distribution" / "invalid" + + @property + def valid_web_directory_distribution(self) -> Path: + return self.DATA_ENTITIES_CRATES_PATH / "web_directory_distribution" / "valid" + + @property + def invalid_web_directory_distribution(self) -> Path: + return self.DATA_ENTITIES_CRATES_PATH / "web_directory_distribution" / "invalid" + + @property + def valid_missing_file_local_path(self) -> Path: + return self.DATA_ENTITIES_CRATES_PATH / "missing_file_local_path" / "valid" + + @property + def invalid_missing_file_local_path(self) -> Path: + return self.DATA_ENTITIES_CRATES_PATH / "missing_file_local_path" / "invalid" + + @property + def valid_data_entity_license_divergence(self) -> Path: + return self.DATA_ENTITIES_CRATES_PATH / "data_entity_license" / "valid" + + @property + def invalid_data_entity_license_divergence(self) -> Path: + return self.DATA_ENTITIES_CRATES_PATH / "data_entity_license" / "invalid" + + @property + def valid_recommended_contentSize(self) -> Path: + return self.DATA_ENTITIES_CRATES_PATH / "recommended_contentSize" / "valid" + + @property + def invalid_recommended_contentSize(self) -> Path: + return self.DATA_ENTITIES_CRATES_PATH / "recommended_contentSize" / "invalid" + + @property + def valid_recommended_conformsto(self) -> Path: + return self.DATA_ENTITIES_CRATES_PATH / "recommended_conformsto" / "valid" + + @property + def invalid_recommended_conformsto(self) -> Path: + return self.DATA_ENTITIES_CRATES_PATH / "recommended_conformsto" / "invalid" + + @property + def valid_recommended_sdDatePublished(self) -> Path: + return self.DATA_ENTITIES_CRATES_PATH / "recommended_sdDatePublished" / "valid" + + @property + def invalid_recommended_sdDatePublished(self) -> Path: + return self.DATA_ENTITIES_CRATES_PATH / "recommended_sdDatePublished" / "invalid" + + # 4.3 Dataset (Directory) Data Entity — trailing slash + @property + def valid_recommended_dataset_trailing_slash(self) -> Path: + return self.DATA_ENTITIES_CRATES_PATH / "dataset_trailing_slash" / "valid" + + @property + def invalid_recommended_dataset_trailing_slash(self) -> Path: + return self.DATA_ENTITIES_CRATES_PATH / "dataset_trailing_slash" / "invalid" + + # 4.3 Dataset (Directory) Data Entity — hasPart + @property + def valid_recommended_dataset_has_part(self) -> Path: + return self.DATA_ENTITIES_CRATES_PATH / "dataset_has_part" / "valid" + + @property + def invalid_recommended_dataset_has_part(self) -> Path: + return self.DATA_ENTITIES_CRATES_PATH / "dataset_has_part" / "invalid" + + +class WorkflowsScripts: + + WORKFLOWS_SCRIPTS_CRATES_PATH = BASE_PATH / "11_workflows_scripts" + + # --- Script type --- + @property + def valid_script_type(self) -> Path: + return self.WORKFLOWS_SCRIPTS_CRATES_PATH / "script_type" / "valid" + + @property + def invalid_script_type(self) -> Path: + return self.WORKFLOWS_SCRIPTS_CRATES_PATH / "script_type" / "invalid" + + # --- Script name --- + @property + def valid_script_name(self) -> Path: + return self.WORKFLOWS_SCRIPTS_CRATES_PATH / "script_name" / "valid" + + @property + def invalid_script_name(self) -> Path: + return self.WORKFLOWS_SCRIPTS_CRATES_PATH / "script_name" / "invalid" + + # --- Workflow type --- + @property + def valid_workflow_type(self) -> Path: + return self.WORKFLOWS_SCRIPTS_CRATES_PATH / "workflow_type" / "valid" + + @property + def invalid_workflow_type_missing_file(self) -> Path: + return self.WORKFLOWS_SCRIPTS_CRATES_PATH / "workflow_type" / "invalid_missing_file" + + @property + def invalid_workflow_type_missing_ssc(self) -> Path: + return self.WORKFLOWS_SCRIPTS_CRATES_PATH / "workflow_type" / "invalid_missing_ssc" + + # --- Workflow name --- + @property + def valid_workflow_name(self) -> Path: + return self.WORKFLOWS_SCRIPTS_CRATES_PATH / "workflow_name" / "valid" + + @property + def invalid_workflow_name(self) -> Path: + return self.WORKFLOWS_SCRIPTS_CRATES_PATH / "workflow_name" / "invalid" + + # --- programmingLanguage --- + @property + def valid_programming_language(self) -> Path: + return self.WORKFLOWS_SCRIPTS_CRATES_PATH / "programming_language" / "valid" + + @property + def invalid_programming_language(self) -> Path: + return self.WORKFLOWS_SCRIPTS_CRATES_PATH / "programming_language" / "invalid" + + # --- Workflow conformsTo --- + @property + def valid_workflow_conformsTo(self) -> Path: + return self.WORKFLOWS_SCRIPTS_CRATES_PATH / "workflow_conformsTo" / "valid" + + @property + def invalid_workflow_conformsTo(self) -> Path: + return self.WORKFLOWS_SCRIPTS_CRATES_PATH / "workflow_conformsTo" / "invalid" + + # --- ImageObject encodingFormat --- + @property + def valid_image_encoding_format(self) -> Path: + return self.WORKFLOWS_SCRIPTS_CRATES_PATH / "image_encoding_format" / "valid" + + @property + def invalid_image_encoding_format(self) -> Path: + return self.WORKFLOWS_SCRIPTS_CRATES_PATH / "image_encoding_format" / "invalid" + + # --- ImageObject about --- + @property + def valid_image_about(self) -> Path: + return self.WORKFLOWS_SCRIPTS_CRATES_PATH / "image_about" / "valid" + + @property + def invalid_image_about(self) -> Path: + return self.WORKFLOWS_SCRIPTS_CRATES_PATH / "image_about" / "invalid" + + +class ContextualEntities: + + CONTEXTUAL_ENTITIES_CRATES_PATH = BASE_PATH / "10_metadata_contextualEntities" + + # --- License entity: SHOULD be typed as CreativeWork --- + @property + def valid_license_entity(self) -> Path: + return self.CONTEXTUAL_ENTITIES_CRATES_PATH / "license_entity" / "valid" + + @property + def invalid_license_entity_no_type(self) -> Path: + return self.CONTEXTUAL_ENTITIES_CRATES_PATH / "license_entity" / "invalid_no_type" + + @property + def invalid_license_entity_no_url(self) -> Path: + return self.CONTEXTUAL_ENTITIES_CRATES_PATH / "license_entity" / "invalid_no_url" + + @property + def invalid_license_entity_no_name(self) -> Path: + return self.CONTEXTUAL_ENTITIES_CRATES_PATH / "license_entity" / "invalid_no_name" + + @property + def invalid_license_entity_no_description(self) -> Path: + return self.CONTEXTUAL_ENTITIES_CRATES_PATH / "license_entity" / "invalid_no_description" + + # --- Organization entity: SHOULD have ROR @id and contactPoint --- + @property + def valid_organization_entity(self) -> Path: + return self.CONTEXTUAL_ENTITIES_CRATES_PATH / "organization_entity" / "valid" + + @property + def invalid_organization_no_ror_id(self) -> Path: + return self.CONTEXTUAL_ENTITIES_CRATES_PATH / "organization_entity" / "invalid_no_ror_id" + + @property + def invalid_organization_no_contactpoint(self) -> Path: + return self.CONTEXTUAL_ENTITIES_CRATES_PATH / "organization_entity" / "invalid_org_no_contactpoint" + + @property + def invalid_organization_contactpoint_no_entity(self) -> Path: + return self.CONTEXTUAL_ENTITIES_CRATES_PATH / "organization_entity" / "invalid_contactpoint_no_contactpoint_entity" + + @property + def invalid_no_author_publisher_contactpoint(self) -> Path: + return self.CONTEXTUAL_ENTITIES_CRATES_PATH / "organization_entity" / "invalid_no_author_publisher_contactpoint" + + # --- Person entity: SHOULD have ORCID @id and valid affiliation --- + @property + def valid_person_entity(self) -> Path: + return self.CONTEXTUAL_ENTITIES_CRATES_PATH / "person_entity" / "valid" + + @property + def invalid_person_no_orcid(self) -> Path: + return self.CONTEXTUAL_ENTITIES_CRATES_PATH / "person_entity" / "invalid_no_orcid" + + @property + def invalid_person_affiliation_not_org(self) -> Path: + return self.CONTEXTUAL_ENTITIES_CRATES_PATH / "person_entity" / "invalid_affiliation_not_org" + + +class InvalidMultiProfileROC: + + @property + def invalid_multi_profile_crate(self) -> Path: + return INVALID_CRATES_DATA_PATH / "0_multi_profile_crate" + + +class ReferencedROCrates: + + REFERENCED_ROCRATES_CRATES_PATH = CRATES_DATA_PATH / "rocrate-1.2" / "9_referenced_rocrate" + + @property + def valid(self) -> Path: + return self.REFERENCED_ROCRATES_CRATES_PATH / "valid" + + @property + def invalid_no_versionless_conformsto(self) -> Path: + return self.REFERENCED_ROCRATES_CRATES_PATH / "invalid_no_versionless_conformsto" + + @property + def invalid_root_conformsto_versionless(self) -> Path: + return self.REFERENCED_ROCRATES_CRATES_PATH / "invalid_root_conformsto_versionless" + + @property + def invalid_no_subjectof(self) -> Path: + return self.REFERENCED_ROCRATES_CRATES_PATH / "invalid_no_subjectof" + + @property + def invalid_md_encoding_format(self) -> Path: + return self.REFERENCED_ROCRATES_CRATES_PATH / "invalid_md_encoding_format" + + @property + def invalid_md_conformsto(self) -> Path: + return self.REFERENCED_ROCRATES_CRATES_PATH / "invalid_md_conformsto" + + @property + def invalid_md_about(self) -> Path: + return self.REFERENCED_ROCRATES_CRATES_PATH / "invalid_md_about" + + +class ValidROCrate12: + + base_path = VALID_CRATES_DATA_PATH + + @property + def attached(self) -> Path: + return self.base_path / "ro-crate-1.2-attached" + + @property + def attached_absolute_root(self) -> Path: + return self.base_path / "ro-crate-1.2-absolute-root" + + @property + def detached(self) -> Path: + return self.base_path / "detached" / "dataset-ro-crate-metadata.json" + + @property + def detached_prefixed(self) -> Path: + return self.base_path / "detached" / "test-ro-crate-metadata.json" + + +class InvalidROCrate12: + + base_path = INVALID_CRATES_DATA_PATH / "ro-crate-1.2" + + @property + def invalid_context(self) -> Path: + return self.base_path / "invalid-context" + + @property + def invalid_date_published(self) -> Path: + return self.base_path / "invalid-date-published" + + @property + def detached_relative_entity(self) -> Path: + return self.base_path / "detached-relative-entity" / "dataset-ro-crate-metadata.json" + + @property + def detached_bad_filename(self) -> Path: + return self.base_path / "detached-bad-filename" / "ro-crate-metadata.json" From 2429ecc97d298e6db16fb732687a1b5ae17e8b03 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Fri, 17 Apr 2026 16:34:39 +0200 Subject: [PATCH 135/352] fix: :rotating_light: fix linter warnings --- .../1.2/should/1_file_descriptor_name.py | 8 +++---- .../should/2_root_data_entity_identifier.py | 3 ++- .../1.2/should/4_data_entity_metadata.py | 3 ++- .../ro-crate-1.2/test_detached_rocrates.py | 2 +- .../ro-crate-1.2/test_workflows_scripts.py | 3 ++- tests/ro_crates_v1_2.py | 22 ++++++++++++------- 6 files changed, 24 insertions(+), 17 deletions(-) diff --git a/rocrate_validator/profiles/ro-crate/1.2/should/1_file_descriptor_name.py b/rocrate_validator/profiles/ro-crate/1.2/should/1_file_descriptor_name.py index f255c81c2..e2683fa5d 100644 --- a/rocrate_validator/profiles/ro-crate/1.2/should/1_file_descriptor_name.py +++ b/rocrate_validator/profiles/ro-crate/1.2/should/1_file_descriptor_name.py @@ -25,7 +25,7 @@ @requirement(name="File Descriptor naming convention") class FileDescriptorExistence(PyFunctionCheck): """ - If stored in a file, SHOULD be named ${prefix}-ro-crate-metadata.json, where the variable ${prefix} + If stored in a file, SHOULD be named ${prefix}-ro-crate-metadata.json, where the variable ${prefix} is a human readable version of the dataset’s ID or name. """ @@ -36,9 +36,6 @@ def test_detached_descriptor_filename(self, context: ValidationContext) -> bool: In a Detached RO-Crate, the file descriptor SHOULD be named `{prefix}-ro-crate-metadata.json`, where `{prefix}` is a human readable version of the dataset’s ID or name. """ - # context.result.add_issue( - # 'In a detached RO-Crate, the metadata descriptor filename MUST be `ro-crate-metadata.json` or `ro-crate-metadata.yaml`', self) - # return False if context.settings.metadata_only: logger.debug("Skipping file descriptor existence check in metadata-only mode") return True @@ -49,7 +46,8 @@ def test_detached_descriptor_filename(self, context: ValidationContext) -> bool: if context.ro_crate.is_detached(): # Check if the filename follows the convention fd_filename = context.ro_crate.get_descriptor_path() - if fd_filename and not (fd_filename.name.endswith("-ro-crate-metadata.json") or fd_filename.name.endswith("-ro-crate-metadata.yaml")): + if fd_filename and not (fd_filename.name.endswith("-ro-crate-metadata.json") or + fd_filename.name.endswith("-ro-crate-metadata.yaml")): context.result.add_issue( 'In a detached RO-Crate, the metadata descriptor filename ' 'SHOULD be named according to the convention `{prefix}-ro-crate-metadata.json` ', diff --git a/rocrate_validator/profiles/ro-crate/1.2/should/2_root_data_entity_identifier.py b/rocrate_validator/profiles/ro-crate/1.2/should/2_root_data_entity_identifier.py index 35ed9fa90..3443acf47 100644 --- a/rocrate_validator/profiles/ro-crate/1.2/should/2_root_data_entity_identifier.py +++ b/rocrate_validator/profiles/ro-crate/1.2/should/2_root_data_entity_identifier.py @@ -59,7 +59,8 @@ def check_identifier(self, context: ValidationContext) -> bool: @requirement(name="Root Data Entity: use cite-as for resolvable identifiers") class RootDataEntityCiteAsIdentifierChecker(PyFunctionCheck): """ - If the Root Data Entity has a resolvable identifier, it SHOULD be included in the `cite-as` property of the RO-Crate Metadata Entity. + If the Root Data Entity has a resolvable identifier, + it SHOULD be included in the `cite-as` property of the RO-Crate Metadata Entity. """ @check(name="Root Data Entity: use cite-as for resolvable identifiers") diff --git a/rocrate_validator/profiles/ro-crate/1.2/should/4_data_entity_metadata.py b/rocrate_validator/profiles/ro-crate/1.2/should/4_data_entity_metadata.py index dd79eac2d..1350eb5b8 100644 --- a/rocrate_validator/profiles/ro-crate/1.2/should/4_data_entity_metadata.py +++ b/rocrate_validator/profiles/ro-crate/1.2/should/4_data_entity_metadata.py @@ -153,7 +153,8 @@ def check_content_url(self, context: ValidationContext) -> bool: try: dl = check_downloadable(url_value) if not dl.is_downloadable: - msg = f"contentUrl '{url_value}' for Web-based Data Entity '{entity.id}' is not directly downloadable" + msg = f"contentUrl '{url_value}' for Web-based Data Entity '{entity.id}' " \ + "is not directly downloadable" if dl.reason: msg += f": {dl.reason}" context.result.add_issue(msg, self) diff --git a/tests/integration/profiles/ro-crate-1.2/test_detached_rocrates.py b/tests/integration/profiles/ro-crate-1.2/test_detached_rocrates.py index c45a9c2a5..cc142da76 100644 --- a/tests/integration/profiles/ro-crate-1.2/test_detached_rocrates.py +++ b/tests/integration/profiles/ro-crate-1.2/test_detached_rocrates.py @@ -79,7 +79,7 @@ def test_root_data_entity_identifier_when_online_available(): def test_invalid_root_data_entity_identifier_when_online_available(): """ - Test that when the RO-Crate is online available, + Test that when the RO-Crate is online available, the Root Data Entity @id SHOULD be an absolute URL in a detached RO-Crate. """ do_entity_test( diff --git a/tests/integration/profiles/ro-crate-1.2/test_workflows_scripts.py b/tests/integration/profiles/ro-crate-1.2/test_workflows_scripts.py index 5620b8078..a947b04fe 100644 --- a/tests/integration/profiles/ro-crate-1.2/test_workflows_scripts.py +++ b/tests/integration/profiles/ro-crate-1.2/test_workflows_scripts.py @@ -322,6 +322,7 @@ def test_invalid_image_about(): profile_identifier="ro-crate-1.2", expected_triggered_requirements=["Script/Workflow ImageObject: RECOMMENDED `about` reference"], expected_triggered_issues=[ - "An ImageObject referenced via `image` from a Script or Workflow SHOULD have an `about` property referencing the script or workflow" + "An ImageObject referenced via `image` from a Script or Workflow SHOULD have an `about` property " + "referencing the script or workflow" ], ) diff --git a/tests/ro_crates_v1_2.py b/tests/ro_crates_v1_2.py index fa64116a2..423630227 100644 --- a/tests/ro_crates_v1_2.py +++ b/tests/ro_crates_v1_2.py @@ -13,7 +13,6 @@ # limitations under the License. from pathlib import Path -from tempfile import TemporaryDirectory from pytest import fixture @@ -155,15 +154,18 @@ def valid_root_data_entity_identifier_when_online_available(self) -> Path: @property def invalid_root_data_entity_identifier_when_online_available(self) -> Path: - return f"https://bitbucket.org/kikkomep/ro-crates/raw/{self.__remote_sha__}/online-available/invalid/basic-ro-crate-metadata.json" + return f"https://bitbucket.org/kikkomep/ro-crates/raw/{self.__remote_sha__}"\ + "/online-available/invalid/basic-ro-crate-metadata.json" @property def valid_web_data_entity(self) -> Path: - return f"https://bitbucket.org/kikkomep/ro-crates/raw/{self.__remote_sha__}/online-available/valid/basic-ro-crate-metadata.json" + return f"https://bitbucket.org/kikkomep/ro-crates/raw/{self.__remote_sha__}"\ + "/online-available/valid/basic-ro-crate-metadata.json" @property def invalid_web_data_entity(self) -> Path: - return f"https://bitbucket.org/kikkomep/ro-crates/raw/{self.__remote_sha__}/online-available/invalid/basic-ro-crate-metadata.json" + return f"https://bitbucket.org/kikkomep/ro-crates/raw/{self.__remote_sha__}"\ + "/online-available/invalid/basic-ro-crate-metadata.json" class MetadataEntities: @@ -352,15 +354,18 @@ def invalid_required_hasPart_all_data_entities(self) -> Path: @property def invalid_hasPart_workflow_not_in_haspart(self) -> Path: - return self.ROOT_DATA_ENTITY_CRATES_PATH / "required_haspart_all_data_entities" / "invalid_workflow_not_in_haspart" + return self.ROOT_DATA_ENTITY_CRATES_PATH \ + / "required_haspart_all_data_entities" / "invalid_workflow_not_in_haspart" @property def invalid_hasPart_web_entity_not_in_haspart(self) -> Path: - return self.ROOT_DATA_ENTITY_CRATES_PATH / "required_haspart_all_data_entities" / "invalid_web_entity_not_in_haspart" + return self.ROOT_DATA_ENTITY_CRATES_PATH \ + / "required_haspart_all_data_entities" / "invalid_web_entity_not_in_haspart" @property def invalid_hasPart_dataset_not_in_haspart(self) -> Path: - return self.ROOT_DATA_ENTITY_CRATES_PATH / "required_haspart_all_data_entities" / "invalid_dataset_not_in_haspart" + return self.ROOT_DATA_ENTITY_CRATES_PATH \ + / "required_haspart_all_data_entities" / "invalid_dataset_not_in_haspart" # R3: identifier SHOULD be present if PID exists (SHOULD) @property @@ -638,7 +643,8 @@ def invalid_organization_no_contactpoint(self) -> Path: @property def invalid_organization_contactpoint_no_entity(self) -> Path: - return self.CONTEXTUAL_ENTITIES_CRATES_PATH / "organization_entity" / "invalid_contactpoint_no_contactpoint_entity" + return self.CONTEXTUAL_ENTITIES_CRATES_PATH \ + / "organization_entity" / "invalid_contactpoint_no_contactpoint_entity" @property def invalid_no_author_publisher_contactpoint(self) -> Path: From ef75e8e9005b38c17e1dc0d2451effd9158895b2 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Fri, 17 Apr 2026 17:46:06 +0200 Subject: [PATCH 136/352] feat(ro-crate-1.2): :sparkles: additional check for Contextual Entities identifiers --- .../1.2/should/0_entity_identifier_format.py | 51 +++++++++++++------ 1 file changed, 35 insertions(+), 16 deletions(-) diff --git a/rocrate_validator/profiles/ro-crate/1.2/should/0_entity_identifier_format.py b/rocrate_validator/profiles/ro-crate/1.2/should/0_entity_identifier_format.py index 2b5613a87..9ee80b02b 100644 --- a/rocrate_validator/profiles/ro-crate/1.2/should/0_entity_identifier_format.py +++ b/rocrate_validator/profiles/ro-crate/1.2/should/0_entity_identifier_format.py @@ -16,7 +16,7 @@ RECOMMENDED checks on entity @id values: - @id SHOULD NOT use ../ to climb out of the RO-Crate Root - International characters SHOULD be native UTF-8, not percent-encoded - - Named contextual entities (Person, Organization) SHOULD use #-prefixed @id + - Contextual entities SHOULD use absolute URI (permalink) or #-prefixed @id """ import re @@ -31,8 +31,9 @@ # These are UTF-8 continuation / leading bytes for multi-byte code-points. _PCT_NON_ASCII_RE = re.compile(r"%[89A-Fa-f][0-9A-Fa-f]") -# Types for which a non-absolute @id SHOULD start with '#' -_NAMED_ENTITY_TYPES = frozenset({"Person", "Organization"}) +# RFC 3986 absolute URI scheme prefix: scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." ) +# e.g. http:, https:, mailto:, urn:, doi:, ftp:, file: +_ABSOLUTE_URI_RE = re.compile(r"^[a-zA-Z][a-zA-Z0-9+\-.]*:") @requirement(name="Entity identifier: format recommendations") @@ -88,34 +89,52 @@ def check_utf8_identifiers(self, context: ValidationContext) -> bool: return result @check( - name="Named contextual entity @id SHOULD use '#' prefix", + name="Contextual entity @id SHOULD be absolute URI or '#'-prefixed", severity=Severity.RECOMMENDED, ) def check_named_entity_id_format(self, context: ValidationContext) -> bool: """ - Named entities such as Person or Organization that are referenced locally - SHOULD use an @id starting with '#' rather than a bare relative path - (RO-Crate 1.2, JSON-LD appendix). + Any Contextual Entity (Person, Organization, ContactPoint, PropertyValue, + Place, etc.) SHOULD use an @id that is an absolute URI (permalink), + a '#'-prefixed local identifier, or a blank node — not a bare relative + path (RO-Crate 1.2, 5.1 Any Contextual Entity). """ result = True - for entity in context.ro_crate.metadata.as_dict().get("@graph", []): + ro_crate_metadata = context.ro_crate.metadata + non_contextual_ids = set() + try: + non_contextual_ids.add(ro_crate_metadata.get_file_descriptor_entity().id) + except Exception: + pass + try: + non_contextual_ids.add(ro_crate_metadata.get_root_data_entity().id) + except Exception: + pass + try: + non_contextual_ids.update(e.id for e in ro_crate_metadata.get_data_entities()) + except Exception: + pass + + for entity in ro_crate_metadata.as_dict().get("@graph", []): entity_id = entity.get("@id", "") - raw_type = entity.get("@type", "") - types = [raw_type] if isinstance(raw_type, str) else raw_type - if not _NAMED_ENTITY_TYPES.intersection(types): + if entity_id in non_contextual_ids: + continue + raw_type = entity.get("@type") + if not raw_type: + # Reference-only stub (no @type) — out of scope for this check continue - # Absolute IRIs and blank nodes are fine; only flag bare relative paths + # Absolute URIs (any scheme), fragment-prefixed IDs, and blank nodes + # are all valid; only flag bare relative paths if ( - entity_id.startswith("http://") - or entity_id.startswith("https://") - or entity_id.startswith("#") + entity_id.startswith("#") or entity_id.startswith("_:") + or _ABSOLUTE_URI_RE.match(entity_id) ): continue context.result.add_issue( f"Entity @id '{entity_id}' of type '{raw_type}' is a local identifier " f"that does not start with '#'; named local entities SHOULD use a " - f"'#'-prefixed @id (e.g. '#alice')", + f"'#'-prefixed @id (e.g. '#alice') or an absolute URI (permalink)", self, ) result = False From 754dbbaf00116d89e84c758d96bbc5af8ff75945 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Fri, 17 Apr 2026 17:49:24 +0200 Subject: [PATCH 137/352] test(ro-crate-1.2): :white_check_mark: test recommended identifier for Contextual Entities --- .../ro-crate-metadata.json | 47 ++++++++++ .../ro-crate-metadata.json | 40 ++++++++ .../valid/ro-crate-metadata.json | 91 +++++++++++++++++++ .../test_metadata_contextualEntities.py | 52 +++++++++++ 4 files changed, 230 insertions(+) create mode 100644 tests/data/crates/rocrate-1.2/10_metadata_contextualEntities/contextual_entity_id_format/invalid_bare_contactpoint/ro-crate-metadata.json create mode 100644 tests/data/crates/rocrate-1.2/10_metadata_contextualEntities/contextual_entity_id_format/invalid_bare_propertyvalue/ro-crate-metadata.json create mode 100644 tests/data/crates/rocrate-1.2/10_metadata_contextualEntities/contextual_entity_id_format/valid/ro-crate-metadata.json diff --git a/tests/data/crates/rocrate-1.2/10_metadata_contextualEntities/contextual_entity_id_format/invalid_bare_contactpoint/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/10_metadata_contextualEntities/contextual_entity_id_format/invalid_bare_contactpoint/ro-crate-metadata.json new file mode 100644 index 000000000..b2e16f2e6 --- /dev/null +++ b/tests/data/crates/rocrate-1.2/10_metadata_contextualEntities/contextual_entity_id_format/invalid_bare_contactpoint/ro-crate-metadata.json @@ -0,0 +1,47 @@ +{ + "@context": "https://w3id.org/ro/crate/1.2/context", + "@graph": [ + { + "@id": "ro-crate-metadata.json", + "@type": "CreativeWork", + "about": { + "@id": "./" + }, + "conformsTo": { + "@id": "https://w3id.org/ro/crate/1.2" + } + }, + { + "@id": "./", + "@type": "Dataset", + "name": "Contextual entity @id format — invalid (bare ContactPoint id)", + "description": "RO-Crate where a ContactPoint contextual entity has a bare relative @id (no '#' prefix, not an absolute URI).", + "datePublished": "2024-01-01", + "license": { + "@id": "https://creativecommons.org/licenses/by/4.0/" + }, + "author": { + "@id": "https://orcid.org/0000-0001-0000-0001" + } + }, + { + "@id": "https://creativecommons.org/licenses/by/4.0/", + "@type": "CreativeWork", + "name": "CC BY 4.0" + }, + { + "@id": "https://orcid.org/0000-0001-0000-0001", + "@type": "Person", + "name": "Alice Researcher", + "contactPoint": { + "@id": "alice-contact" + } + }, + { + "@id": "alice-contact", + "@type": "ContactPoint", + "name": "Alice's contact point", + "email": "alice@example.edu" + } + ] +} diff --git a/tests/data/crates/rocrate-1.2/10_metadata_contextualEntities/contextual_entity_id_format/invalid_bare_propertyvalue/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/10_metadata_contextualEntities/contextual_entity_id_format/invalid_bare_propertyvalue/ro-crate-metadata.json new file mode 100644 index 000000000..aa8c029c0 --- /dev/null +++ b/tests/data/crates/rocrate-1.2/10_metadata_contextualEntities/contextual_entity_id_format/invalid_bare_propertyvalue/ro-crate-metadata.json @@ -0,0 +1,40 @@ +{ + "@context": "https://w3id.org/ro/crate/1.2/context", + "@graph": [ + { + "@id": "ro-crate-metadata.json", + "@type": "CreativeWork", + "about": { + "@id": "./" + }, + "conformsTo": { + "@id": "https://w3id.org/ro/crate/1.2" + } + }, + { + "@id": "./", + "@type": "Dataset", + "name": "Contextual entity @id format — invalid (bare PropertyValue id)", + "description": "RO-Crate where a PropertyValue contextual entity has a bare relative @id (no '#' prefix, not an absolute URI).", + "datePublished": "2024-01-01", + "license": { + "@id": "https://creativecommons.org/licenses/by/4.0/" + }, + "identifier": { + "@id": "uuid-property" + } + }, + { + "@id": "https://creativecommons.org/licenses/by/4.0/", + "@type": "CreativeWork", + "name": "CC BY 4.0" + }, + { + "@id": "uuid-property", + "@type": "PropertyValue", + "name": "UUID", + "propertyID": "UUID", + "value": "550e8400-e29b-41d4-a716-446655440000" + } + ] +} diff --git a/tests/data/crates/rocrate-1.2/10_metadata_contextualEntities/contextual_entity_id_format/valid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/10_metadata_contextualEntities/contextual_entity_id_format/valid/ro-crate-metadata.json new file mode 100644 index 000000000..53e94703f --- /dev/null +++ b/tests/data/crates/rocrate-1.2/10_metadata_contextualEntities/contextual_entity_id_format/valid/ro-crate-metadata.json @@ -0,0 +1,91 @@ +{ + "@context": "https://w3id.org/ro/crate/1.2/context", + "@graph": [ + { + "@id": "ro-crate-metadata.json", + "@type": "CreativeWork", + "about": { + "@id": "./" + }, + "conformsTo": { + "@id": "https://w3id.org/ro/crate/1.2" + } + }, + { + "@id": "./", + "@type": "Dataset", + "name": "Contextual entity @id format — valid", + "description": "RO-Crate whose contextual entities all use '#'-prefixed local identifiers or absolute URI permalinks.", + "datePublished": "2024-01-01", + "license": { + "@id": "https://creativecommons.org/licenses/by/4.0/" + }, + "author": { + "@id": "https://orcid.org/0000-0001-0000-0001" + }, + "publisher": { + "@id": "https://ror.org/05f9q8d28" + }, + "funder": { + "@id": "https://ror.org/05f9q8d28" + }, + "identifier": { + "@id": "#uuid" + }, + "contentLocation": { + "@id": "#lab-place" + } + }, + { + "@id": "https://creativecommons.org/licenses/by/4.0/", + "@type": "CreativeWork", + "name": "CC BY 4.0", + "description": "Creative Commons Attribution 4.0 International license." + }, + { + "@id": "https://orcid.org/0000-0001-0000-0001", + "@type": "Person", + "name": "Alice Researcher", + "affiliation": { + "@id": "https://ror.org/05f9q8d28" + }, + "contactPoint": { + "@id": "#alice-contact" + } + }, + { + "@id": "https://ror.org/05f9q8d28", + "@type": "Organization", + "name": "Example University", + "url": "https://example.edu", + "contactPoint": { + "@id": "#org-contact" + } + }, + { + "@id": "#alice-contact", + "@type": "ContactPoint", + "name": "Alice's contact point", + "email": "alice@example.edu" + }, + { + "@id": "#org-contact", + "@type": "ContactPoint", + "name": "Example University contact", + "email": "contact@example.edu" + }, + { + "@id": "#uuid", + "@type": "PropertyValue", + "name": "UUID", + "propertyID": "UUID", + "value": "550e8400-e29b-41d4-a716-446655440000" + }, + { + "@id": "#lab-place", + "@type": "Place", + "name": "Example Research Lab", + "description": "The physical location where the research took place." + } + ] +} diff --git a/tests/integration/profiles/ro-crate-1.2/test_metadata_contextualEntities.py b/tests/integration/profiles/ro-crate-1.2/test_metadata_contextualEntities.py index 7ea99b2b1..b855cfd26 100644 --- a/tests/integration/profiles/ro-crate-1.2/test_metadata_contextualEntities.py +++ b/tests/integration/profiles/ro-crate-1.2/test_metadata_contextualEntities.py @@ -277,3 +277,55 @@ def test_invalid_person_affiliation_not_org(): "Persons SHOULD reference an Organization for affiliation" ], ) + + +# --------------------------------------------------------------------------- +# Any Contextual Entity: @id SHOULD be absolute URI or '#'-prefixed (SHOULD) +# --------------------------------------------------------------------------- + +def test_valid_contextual_entity_id_format(): + """ + A crate whose contextual entities all use absolute URI permalinks or + '#'-prefixed local identifiers SHOULD pass RECOMMENDED validation. + """ + do_entity_test( + __contextual_entities_crates__.valid_contextual_entity_id_format, + models.Severity.RECOMMENDED, + True, + profile_identifier="ro-crate-1.2", + skip_checks=_PERSON_VALID_SKIP, + ) + + +def test_invalid_bare_contactpoint_id(): + """ + A ContactPoint contextual entity with a bare relative @id (no '#', not an + absolute URI) SHOULD trigger a RECOMMENDED warning. + """ + do_entity_test( + __contextual_entities_crates__.invalid_contextual_entity_bare_contactpoint, + models.Severity.RECOMMENDED, + False, + profile_identifier="ro-crate-1.2", + expected_triggered_requirements=["Entity identifier: format recommendations"], + expected_triggered_issues=[ + "named local entities SHOULD use a '#'-prefixed @id" + ], + ) + + +def test_invalid_bare_propertyvalue_id(): + """ + A PropertyValue contextual entity with a bare relative @id (no '#', not an + absolute URI) SHOULD trigger a RECOMMENDED warning. + """ + do_entity_test( + __contextual_entities_crates__.invalid_contextual_entity_bare_propertyvalue, + models.Severity.RECOMMENDED, + False, + profile_identifier="ro-crate-1.2", + expected_triggered_requirements=["Entity identifier: format recommendations"], + expected_triggered_issues=[ + "named local entities SHOULD use a '#'-prefixed @id" + ], + ) From 746f5b7ec7dc6940cb0237d800454d0470578914 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Fri, 17 Apr 2026 18:27:42 +0200 Subject: [PATCH 138/352] feat(ro-crate-1.2): :sparkles: improve identifier check; add sdDatePublished check --- .../1.2/should/4_referenced_rocrate.py | 110 ++++++++++++++++++ .../1.2/should/4_referenced_rocrate.ttl | 35 ++++++ rocrate_validator/utils/signposting.py | 20 ++++ 3 files changed, 165 insertions(+) create mode 100644 rocrate_validator/profiles/ro-crate/1.2/should/4_referenced_rocrate.py diff --git a/rocrate_validator/profiles/ro-crate/1.2/should/4_referenced_rocrate.py b/rocrate_validator/profiles/ro-crate/1.2/should/4_referenced_rocrate.py new file mode 100644 index 000000000..af03810e0 --- /dev/null +++ b/rocrate_validator/profiles/ro-crate/1.2/should/4_referenced_rocrate.py @@ -0,0 +1,110 @@ +# Copyright (c) 2024-2026 CRS4 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +Network-aware refinement for Referenced RO-Crate `sdDatePublished` check. + +The structural SHACL shape `ReferencedROCrateSdDatePublishedRecommended` +already warns when a referenced RO-Crate data entity with an absolute URI +@id omits both `identifier` and `sdDatePublished`. RO-Crate 1.2 § 4.5 +relaxes that obligation when the URI declares Signposting +`Link: rel="cite-as"` (which supplies a persistent citation surrogate). +This Python check performs the network-dependent refinement. +""" + +from rocrate_validator.models import Severity, ValidationContext +from rocrate_validator.requirements.python import PyFunctionCheck, check, requirement +from rocrate_validator.utils import log as logging +from rocrate_validator.utils.signposting import has_signposting_cite_as + +logger = logging.getLogger(__name__) + + +@requirement( + name="Referenced RO-Crate: Signposting cite-as refinement for sdDatePublished" +) +class ReferencedROCrateSignpostingCiteAsChecker(PyFunctionCheck): + """ + Network-aware refinement of the `sdDatePublished` requirement for + referenced RO-Crate data entities (RO-Crate 1.2, § 4.5). + """ + + @check( + name="Referenced RO-Crate: `sdDatePublished` SHOULD be present when Signposting cite-as is not declared", + severity=Severity.RECOMMENDED, + ) + def check_sddatepublished_signposting(self, context: ValidationContext) -> bool: + """ + For each referenced RO-Crate data entity whose @id is an absolute URI + with no declared `identifier` and no `sdDatePublished`, verify whether + the URI declares Signposting `Link: rel="cite-as"`. If cite-as is + absent the entity SHOULD include `sdDatePublished`. + """ + if context.settings.skip_availability_check: + return True + if not (context.settings.creation_time or context.settings.enforce_availability): + return True + if context.settings.metadata_only: + return True + + result = True + try: + root = context.ro_crate.metadata.get_root_data_entity() + except Exception: + return True + + for entity in context.ro_crate.metadata.get_dataset_entities(): + if entity.id == root.id: + continue + entity_id = entity.id + if not (entity_id.startswith("http://") or entity_id.startswith("https://")): + continue + conforms_to = entity.get_property("conformsTo", []) + if not isinstance(conforms_to, list): + conforms_to = [conforms_to] + conforms_to_ids = [c.id if hasattr(c, "id") else str(c) for c in conforms_to] + if not any(c and c.startswith("https://w3id.org/ro/crate") for c in conforms_to_ids): + continue + if entity.get_property("identifier"): + continue + if entity.get_property("sdDatePublished"): + continue + + cite_as_present = has_signposting_cite_as(entity_id) + if cite_as_present is None: + logger.debug( + "Signposting probe for referenced RO-Crate '%s' failed; " + "skipping cite-as refinement.", + entity_id, + ) + continue + if cite_as_present: + logger.debug( + "Referenced RO-Crate '%s' declares Signposting rel='cite-as'; " + "`sdDatePublished` not required.", + entity_id, + ) + continue + + context.result.add_issue( + f"Referenced RO-Crate '{entity_id}' has no Signposting " + f"`Link: rel=\"cite-as\"` declared; `sdDatePublished` SHOULD be " + f"present to indicate when the absolute URI was accessed", + self, + ) + result = False + if context.fail_fast: + return result + + return result diff --git a/rocrate_validator/profiles/ro-crate/1.2/should/4_referenced_rocrate.ttl b/rocrate_validator/profiles/ro-crate/1.2/should/4_referenced_rocrate.ttl index 13e3558bd..26a96b3a2 100644 --- a/rocrate_validator/profiles/ro-crate/1.2/should/4_referenced_rocrate.ttl +++ b/rocrate_validator/profiles/ro-crate/1.2/should/4_referenced_rocrate.ttl @@ -115,6 +115,41 @@ ro-crate:ReferencedROCrateSubjectOfRecommended a sh:NodeShape ; sh:message "Referenced RO-Crate data entity SHOULD have a subjectOf property linking to the referenced crate metadata descriptor" ; ] . +# --------------------------------------------------------------- +# Referenced RO-Crate data entity whose @id is an absolute URI +# without a declared persistent identifier SHOULD include +# `sdDatePublished` to indicate when the absolute URI was +# accessed (unless Signposting rel=cite-as is declared — that +# conditional refinement is verified separately by the Python +# check `ReferencedROCrateSignpostingCiteAsChecker`). +# (RO-Crate 1.2, § 4.5 — Determining entity identifier for a +# referenced RO-Crate, step #3.) +# --------------------------------------------------------------- +ro-crate:ReferencedROCrateSdDatePublishedRecommended a sh:NodeShape ; + sh:name "Referenced RO-Crate: SHOULD have sdDatePublished when @id is an absolute URI without persistent identifier" ; + sh:description """A data entity that represents a referenced RO-Crate + whose @id is an absolute HTTP(S) URI — and that does not declare a + persistent identifier via `identifier` — SHOULD include the + `sdDatePublished` timestamp to indicate when the URI was accessed + (RO-Crate 1.2, § 4.5). This structural check does not inspect + Signposting `Link: rel=cite-as` headers; that refinement is performed + by a separate network-aware Python check.""" ; + sh:target ro-crate:ReferencedROCrateDataEntityTarget ; + sh:sparql [ + a sh:SPARQLConstraint ; + sh:prefixes ro-crate:sparqlPrefixes ; + sh:select """ + SELECT $this + WHERE { + FILTER(STRSTARTS(STR($this), "http://") || STRSTARTS(STR($this), "https://")) + FILTER NOT EXISTS { $this schema:identifier ?id } + FILTER NOT EXISTS { $this schema:sdDatePublished ?date } + } + """ ; + sh:severity sh:Warning ; + sh:message "A Referenced RO-Crate data entity with an absolute URI @id and no declared persistent identifier SHOULD include `sdDatePublished` to indicate when the URI was accessed (unless Signposting `Link: rel=cite-as` is declared on that URI)" ; + ] . + # --------------------------------------------------------------- # Metadata descriptor of a referenced RO-Crate # diff --git a/rocrate_validator/utils/signposting.py b/rocrate_validator/utils/signposting.py index a3e9adfa2..4f21a4259 100644 --- a/rocrate_validator/utils/signposting.py +++ b/rocrate_validator/utils/signposting.py @@ -163,3 +163,23 @@ def check_downloadable(url: str) -> DownloadabilityResult: is_downloadable=False, reason=str(e), ) + + +def has_signposting_cite_as(url: str) -> Optional[bool]: + """ + Probe *url* for a Signposting ``Link: rel="cite-as"`` header (RFC 8574). + + :param url: The URL to probe via HTTP HEAD. + :returns: ``True`` if the response declares a ``rel="cite-as"`` link, + ``False`` if the response succeeds but no such link is declared, + ``None`` if the request fails (caller can treat as "unknown" and + skip network-dependent checks). + """ + try: + response = HttpRequester().head(url, allow_redirects=True) + response.raise_for_status() + cite_as = response.links.get("cite-as") + return cite_as is not None + except Exception as e: + logger.debug("Error checking Signposting cite-as for '%s': %s", url, e) + return None From faca1d91483d575e85036b34cab9d513c5dee15e Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Fri, 17 Apr 2026 18:28:47 +0200 Subject: [PATCH 139/352] test(ro-crate-1.2): :white_check_mark: test identifiers and sdDataPublished property --- .../ro-crate-metadata.json | 113 ++++++++++++++++ .../valid/ro-crate-metadata.json | 1 + .../ro-crate-metadata.json | 114 ++++++++++++++++ .../ro-crate-metadata.json | 117 +++++++++++++++++ .../subcrate/data.txt | 0 .../ro-crate-1.2/test_referenced_rocrate.py | 123 +++++++++++++++++- tests/ro_crates_v1_2.py | 25 ++++ 7 files changed, 492 insertions(+), 1 deletion(-) create mode 100644 tests/data/crates/rocrate-1.2/9_referenced_rocrate/invalid_missing_sddatepublished/ro-crate-metadata.json create mode 100644 tests/data/crates/rocrate-1.2/9_referenced_rocrate/valid_with_identifier/ro-crate-metadata.json create mode 100644 tests/data/crates/rocrate-1.2/9_referenced_rocrate/valid_with_relative_path/ro-crate-metadata.json create mode 100644 tests/data/crates/rocrate-1.2/9_referenced_rocrate/valid_with_relative_path/subcrate/data.txt diff --git a/tests/data/crates/rocrate-1.2/9_referenced_rocrate/invalid_missing_sddatepublished/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/9_referenced_rocrate/invalid_missing_sddatepublished/ro-crate-metadata.json new file mode 100644 index 000000000..cbc280e2d --- /dev/null +++ b/tests/data/crates/rocrate-1.2/9_referenced_rocrate/invalid_missing_sddatepublished/ro-crate-metadata.json @@ -0,0 +1,113 @@ +{ + "@context": "https://w3id.org/ro/crate/1.2/context", + "@graph": [ + { + "@id": "ro-crate-metadata.json", + "@type": "CreativeWork", + "conformsTo": { + "@id": "https://w3id.org/ro/crate/1.2" + }, + "about": { + "@id": "./" + }, + "name": "RO-Crate Metadata Descriptor" + }, + { + "@id": "./", + "@type": "Dataset", + "name": "Crate referencing another RO-Crate (missing sdDatePublished)", + "description": "An RO-Crate referencing another RO-Crate whose @id is an absolute URI not declared as a persistent identifier; the referenced entity lacks sdDatePublished.", + "license": { + "@id": "https://creativecommons.org/licenses/by/4.0/" + }, + "datePublished": "2024-01-01", + "conformsTo": { + "@id": "https://w3id.org/ro/crate/1.2" + }, + "publisher": { + "@id": "https://ror.org/012345678" + }, + "funder": { + "@id": "https://ror.org/012345678" + }, + "hasPart": [ + { + "@id": "https://example.org/other-ro-crate/" + } + ], + "identifier": { + "@id": "#uuid" + } + }, + { + "@id": "#uuid", + "@type": "PropertyValue", + "name": "UUID", + "propertyID": "UUID", + "value": "550e8400-e29b-41d4-a716-446655440000" + }, + { + "@id": "https://example.org/other-ro-crate/", + "@type": "Dataset", + "name": "Other RO-Crate", + "description": "A referenced RO-Crate with neither identifier nor sdDatePublished.", + "conformsTo": [ + { + "@id": "https://w3id.org/ro/crate" + }, + { + "@id": "https://w3id.org/ro/crate/1.2" + } + ], + "subjectOf": { + "@id": "#other-ro-crate-metadata" + }, + "distribution": { + "@id": "https://example.org/other-ro-crate/archive.tar.gz" + } + }, + { + "@id": "#other-ro-crate-metadata", + "@type": "CreativeWork", + "name": "Other RO-Crate Metadata Descriptor", + "encodingFormat": "application/ld+json" + }, + { + "@id": "https://example.org/other-ro-crate/archive.tar.gz", + "@type": "DataDownload", + "name": "Other RO-Crate Archive", + "encodingFormat": "application/gzip" + }, + { + "@id": "https://ror.org/012345678", + "@type": "Organization", + "name": "Example Research Institute", + "url": "https://example.org", + "contactPoint": { + "@id": "#contact-point" + } + }, + { + "@id": "https://creativecommons.org/licenses/by/4.0/", + "@type": "CreativeWork", + "name": "Creative Commons Attribution 4.0 International", + "description": "A Creative Commons license." + }, + { + "@id": "https://w3id.org/ro/crate", + "@type": ["CreativeWork", "Profile"], + "name": "RO-Crate" + }, + { + "@id": "https://w3id.org/ro/crate/1.2", + "@type": ["CreativeWork", "Profile"], + "name": "RO-Crate 1.2" + }, + { + "@id": "#contact-point", + "@type": "ContactPoint", + "name": "Contact for Example Research Institute", + "email": "mailto:contact@example.org" + } + ] +} diff --git a/tests/data/crates/rocrate-1.2/9_referenced_rocrate/valid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/9_referenced_rocrate/valid/ro-crate-metadata.json index aed146554..7d605e478 100644 --- a/tests/data/crates/rocrate-1.2/9_referenced_rocrate/valid/ro-crate-metadata.json +++ b/tests/data/crates/rocrate-1.2/9_referenced_rocrate/valid/ro-crate-metadata.json @@ -62,6 +62,7 @@ "subjectOf": { "@id": "#other-ro-crate-metadata" }, + "sdDatePublished": "2024-01-01", "distribution": { "@id": "https://example.org/other-ro-crate/archive.tar.gz" } diff --git a/tests/data/crates/rocrate-1.2/9_referenced_rocrate/valid_with_identifier/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/9_referenced_rocrate/valid_with_identifier/ro-crate-metadata.json new file mode 100644 index 000000000..512cdfd0d --- /dev/null +++ b/tests/data/crates/rocrate-1.2/9_referenced_rocrate/valid_with_identifier/ro-crate-metadata.json @@ -0,0 +1,114 @@ +{ + "@context": "https://w3id.org/ro/crate/1.2/context", + "@graph": [ + { + "@id": "ro-crate-metadata.json", + "@type": "CreativeWork", + "conformsTo": { + "@id": "https://w3id.org/ro/crate/1.2" + }, + "about": { + "@id": "./" + }, + "name": "RO-Crate Metadata Descriptor" + }, + { + "@id": "./", + "@type": "Dataset", + "name": "Crate referencing another RO-Crate with declared identifier", + "description": "An RO-Crate referencing another RO-Crate whose @id is a declared persistent identifier (case #1): no sdDatePublished required.", + "license": { + "@id": "https://creativecommons.org/licenses/by/4.0/" + }, + "datePublished": "2024-01-01", + "conformsTo": { + "@id": "https://w3id.org/ro/crate/1.2" + }, + "publisher": { + "@id": "https://ror.org/012345678" + }, + "funder": { + "@id": "https://ror.org/012345678" + }, + "hasPart": [ + { + "@id": "https://example.org/persistent-crate/" + } + ], + "identifier": { + "@id": "#uuid" + } + }, + { + "@id": "#uuid", + "@type": "PropertyValue", + "name": "UUID", + "propertyID": "UUID", + "value": "550e8400-e29b-41d4-a716-446655440000" + }, + { + "@id": "https://example.org/persistent-crate/", + "@type": "Dataset", + "name": "Other RO-Crate with persistent identifier", + "description": "A referenced RO-Crate whose @id IS a declared persistent identifier (DOI).", + "conformsTo": [ + { + "@id": "https://w3id.org/ro/crate" + }, + { + "@id": "https://w3id.org/ro/crate/1.2" + } + ], + "identifier": "10.5281/zenodo.1234567", + "subjectOf": { + "@id": "#other-ro-crate-metadata" + }, + "distribution": { + "@id": "https://example.org/persistent-crate//archive.zip" + } + }, + { + "@id": "https://example.org/persistent-crate//archive.zip", + "@type": "DataDownload", + "name": "Other RO-Crate Archive", + "encodingFormat": "application/zip" + }, + { + "@id": "#other-ro-crate-metadata", + "@type": "CreativeWork", + "name": "Other RO-Crate Metadata Descriptor", + "encodingFormat": "application/ld+json" + }, + { + "@id": "https://ror.org/012345678", + "@type": "Organization", + "name": "Example Research Institute", + "url": "https://example.org", + "contactPoint": { + "@id": "#contact-point" + } + }, + { + "@id": "https://creativecommons.org/licenses/by/4.0/", + "@type": "CreativeWork", + "name": "Creative Commons Attribution 4.0 International", + "description": "A Creative Commons license." + }, + { + "@id": "https://w3id.org/ro/crate", + "@type": ["CreativeWork", "Profile"], + "name": "RO-Crate" + }, + { + "@id": "https://w3id.org/ro/crate/1.2", + "@type": ["CreativeWork", "Profile"], + "name": "RO-Crate 1.2" + }, + { + "@id": "#contact-point", + "@type": "ContactPoint", + "name": "Contact for Example Research Institute", + "email": "mailto:contact@example.org" + } + ] +} diff --git a/tests/data/crates/rocrate-1.2/9_referenced_rocrate/valid_with_relative_path/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/9_referenced_rocrate/valid_with_relative_path/ro-crate-metadata.json new file mode 100644 index 000000000..88e6b56b0 --- /dev/null +++ b/tests/data/crates/rocrate-1.2/9_referenced_rocrate/valid_with_relative_path/ro-crate-metadata.json @@ -0,0 +1,117 @@ +{ + "@context": "https://w3id.org/ro/crate/1.2/context", + "@graph": [ + { + "@id": "ro-crate-metadata.json", + "@type": "CreativeWork", + "conformsTo": { + "@id": "https://w3id.org/ro/crate/1.2" + }, + "about": { + "@id": "./" + }, + "name": "RO-Crate Metadata Descriptor" + }, + { + "@id": "./", + "@type": "Dataset", + "name": "Attached crate referencing a nested RO-Crate by relative path", + "description": "An attached RO-Crate (A) referencing another nested RO-Crate (B) via a relative path (case #2): no sdDatePublished required.", + "license": { + "@id": "https://creativecommons.org/licenses/by/4.0/" + }, + "datePublished": "2024-01-01", + "conformsTo": { + "@id": "https://w3id.org/ro/crate/1.2" + }, + "publisher": { + "@id": "https://ror.org/012345678" + }, + "funder": { + "@id": "https://ror.org/012345678" + }, + "hasPart": [ + { + "@id": "subcrate/" + } + ], + "identifier": { + "@id": "#uuid" + } + }, + { + "@id": "#uuid", + "@type": "PropertyValue", + "name": "UUID", + "propertyID": "UUID", + "value": "550e8400-e29b-41d4-a716-446655440000" + }, + { + "@id": "subcrate/", + "@type": "Dataset", + "name": "Nested RO-Crate", + "description": "A referenced RO-Crate nested within A, identified by relative path.", + "conformsTo": [ + { + "@id": "https://w3id.org/ro/crate" + }, + { + "@id": "https://w3id.org/ro/crate/1.2" + } + ], + "hasPart": [ + { + "@id": "subcrate/data.txt" + } + ], + "subjectOf": { + "@id": "#subcrate-metadata" + } + }, + { + "@id": "subcrate/data.txt", + "@type": "File", + "name": "Nested data file", + "description": "A data file inside the nested RO-Crate.", + "encodingFormat": "text/plain", + "contentSize": "0" + }, + { + "@id": "#subcrate-metadata", + "@type": "CreativeWork", + "name": "Nested RO-Crate Metadata Descriptor", + "encodingFormat": "application/ld+json" + }, + { + "@id": "https://ror.org/012345678", + "@type": "Organization", + "name": "Example Research Institute", + "url": "https://example.org", + "contactPoint": { + "@id": "#contact-point" + } + }, + { + "@id": "https://creativecommons.org/licenses/by/4.0/", + "@type": "CreativeWork", + "name": "Creative Commons Attribution 4.0 International", + "description": "A Creative Commons license." + }, + { + "@id": "https://w3id.org/ro/crate", + "@type": ["CreativeWork", "Profile"], + "name": "RO-Crate" + }, + { + "@id": "https://w3id.org/ro/crate/1.2", + "@type": ["CreativeWork", "Profile"], + "name": "RO-Crate 1.2" + }, + { + "@id": "#contact-point", + "@type": "ContactPoint", + "name": "Contact for Example Research Institute", + "email": "mailto:contact@example.org" + } + ] +} diff --git a/tests/data/crates/rocrate-1.2/9_referenced_rocrate/valid_with_relative_path/subcrate/data.txt b/tests/data/crates/rocrate-1.2/9_referenced_rocrate/valid_with_relative_path/subcrate/data.txt new file mode 100644 index 000000000..e69de29bb diff --git a/tests/integration/profiles/ro-crate-1.2/test_referenced_rocrate.py b/tests/integration/profiles/ro-crate-1.2/test_referenced_rocrate.py index 8a4c6358c..66c277e3b 100644 --- a/tests/integration/profiles/ro-crate-1.2/test_referenced_rocrate.py +++ b/tests/integration/profiles/ro-crate-1.2/test_referenced_rocrate.py @@ -15,6 +15,7 @@ import logging from rocrate_validator import models +from rocrate_validator.utils.http import HttpRequester from tests.ro_crates_v1_2 import ReferencedROCrates from tests.shared import do_entity_test @@ -33,7 +34,7 @@ def test_valid_referenced_rocrate(): models.Severity.RECOMMENDED, True, profile_identifier="ro-crate-1.2", - skip_checks=["ro-crate-1.2_40.0", "ro-crate-1.2_44.1", "ro-crate-1.2_70.1", "ro-crate-1.2_69.1"], + skip_checks=["ro-crate-1.2_40.0", "ro-crate-1.2_44.1", "ro-crate-1.2_70.1", "ro-crate-1.2_72.1"], ) @@ -148,3 +149,123 @@ def test_invalid_referenced_rocrate_md_about(): "about" ], ) + + +def test_invalid_referenced_rocrate_missing_sddatepublished(): + """ + A referenced RO-Crate data entity whose @id is an absolute URI not declared + as a persistent identifier SHOULD include `sdDatePublished`; omitting it + triggers a RECOMMENDED warning (RO-Crate 1.2, § 4.5). + """ + do_entity_test( + __referenced_rocrate_crates__.invalid_missing_sddatepublished, + models.Severity.RECOMMENDED, + False, + profile_identifier="ro-crate-1.2", + expected_triggered_requirements=[ + "Referenced RO-Crate: SHOULD have sdDatePublished when @id is an absolute URI without persistent identifier" + ], + expected_triggered_issues=[ + "SHOULD include `sdDatePublished` to indicate when the URI was accessed" + ], + ) + + +def test_valid_referenced_rocrate_with_identifier(): + """ + A referenced RO-Crate with a declared `identifier` (case #1) does NOT + require `sdDatePublished`; the structural SHACL check should NOT fire. + """ + do_entity_test( + __referenced_rocrate_crates__.valid_with_identifier, + models.Severity.RECOMMENDED, + True, + profile_identifier="ro-crate-1.2", + skip_checks=["ro-crate-1.2_40.0", "ro-crate-1.2_44.1", "ro-crate-1.2_70.1", "ro-crate-1.2_72.1"], + ) + + +def test_valid_referenced_rocrate_with_relative_path(): + """ + A referenced RO-Crate with a relative path @id (case #2: attached nested + sub-crate) does NOT require `sdDatePublished`; the structural SHACL check + should NOT fire on relative path @ids. + """ + do_entity_test( + __referenced_rocrate_crates__.valid_with_relative_path, + models.Severity.RECOMMENDED, + True, + profile_identifier="ro-crate-1.2", + skip_checks=["ro-crate-1.2_40.0", "ro-crate-1.2_44.1", "ro-crate-1.2_70.1", "ro-crate-1.2_72.1"], + ) + + +def test_invalid_referenced_rocrate_no_cite_as_signposting(monkeypatch): + """ + Python network-aware refinement: when the referenced RO-Crate @id does NOT + declare Signposting `Link: rel="cite-as"`, `sdDatePublished` SHOULD be + present. Mocked HEAD response returns no `cite-as` link. + """ + class _NoCiteAsResponse: + status_code = 200 + headers = {"Content-Type": "text/html"} + links = {} + + def raise_for_status(self): + pass + + monkeypatch.setattr(HttpRequester(), "head", lambda url, **kw: _NoCiteAsResponse()) + + do_entity_test( + __referenced_rocrate_crates__.invalid_missing_sddatepublished, + models.Severity.RECOMMENDED, + False, + profile_identifier="ro-crate-1.2", + enforce_availability=True, + expected_triggered_requirements=[ + "Referenced RO-Crate: Signposting cite-as refinement for sdDatePublished" + ], + expected_triggered_issues=[ + "has no Signposting `Link: rel=\"cite-as\"` declared" + ], + ) + + +def test_valid_referenced_rocrate_with_cite_as_signposting(monkeypatch): + """ + Python network-aware refinement: when the referenced RO-Crate @id DOES + declare Signposting `Link: rel="cite-as"`, `sdDatePublished` is NOT + required; the Python check suppresses its warning. The structural SHACL + warning still fires (it cannot inspect network headers). + """ + class _CiteAsResponse: + status_code = 200 + headers = {"Content-Type": "text/html"} + links = { + "cite-as": { + "url": "https://doi.org/10.5281/zenodo.1234567", + "rel": "cite-as", + } + } + + def raise_for_status(self): + pass + + monkeypatch.setattr(HttpRequester(), "head", lambda url, **kw: _CiteAsResponse()) + + # Python check passes (cite-as present); SHACL structural shape still + # warns. We verify the Python-specific requirement is NOT triggered. + from rocrate_validator import services + result = services.validate({ + "rocrate_uri": str(__referenced_rocrate_crates__.invalid_missing_sddatepublished), + "profile_identifier": "ro-crate-1.2", + "requirement_severity": models.Severity.RECOMMENDED, + "enforce_availability": True, + }) + failed_requirement_names = { + issue.check.requirement.name for issue in result.get_issues() + } + assert ( + "Referenced RO-Crate: Signposting cite-as refinement for sdDatePublished" + not in failed_requirement_names + ), "Python cite-as refinement should NOT fire when cite-as is declared" diff --git a/tests/ro_crates_v1_2.py b/tests/ro_crates_v1_2.py index 423630227..313c78b30 100644 --- a/tests/ro_crates_v1_2.py +++ b/tests/ro_crates_v1_2.py @@ -663,6 +663,19 @@ def invalid_person_no_orcid(self) -> Path: def invalid_person_affiliation_not_org(self) -> Path: return self.CONTEXTUAL_ENTITIES_CRATES_PATH / "person_entity" / "invalid_affiliation_not_org" + # --- Any Contextual Entity: SHOULD have absolute URI or '#'-prefixed @id --- + @property + def valid_contextual_entity_id_format(self) -> Path: + return self.CONTEXTUAL_ENTITIES_CRATES_PATH / "contextual_entity_id_format" / "valid" + + @property + def invalid_contextual_entity_bare_contactpoint(self) -> Path: + return self.CONTEXTUAL_ENTITIES_CRATES_PATH / "contextual_entity_id_format" / "invalid_bare_contactpoint" + + @property + def invalid_contextual_entity_bare_propertyvalue(self) -> Path: + return self.CONTEXTUAL_ENTITIES_CRATES_PATH / "contextual_entity_id_format" / "invalid_bare_propertyvalue" + class InvalidMultiProfileROC: @@ -703,6 +716,18 @@ def invalid_md_conformsto(self) -> Path: def invalid_md_about(self) -> Path: return self.REFERENCED_ROCRATES_CRATES_PATH / "invalid_md_about" + @property + def invalid_missing_sddatepublished(self) -> Path: + return self.REFERENCED_ROCRATES_CRATES_PATH / "invalid_missing_sddatepublished" + + @property + def valid_with_identifier(self) -> Path: + return self.REFERENCED_ROCRATES_CRATES_PATH / "valid_with_identifier" + + @property + def valid_with_relative_path(self) -> Path: + return self.REFERENCED_ROCRATES_CRATES_PATH / "valid_with_relative_path" + class ValidROCrate12: From 4cbffdf052e20f7c9558ea48fe7e3797613d28e4 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Fri, 17 Apr 2026 19:24:13 +0200 Subject: [PATCH 140/352] refactor(ro-crate-1.2): :recycle: update ContextualEntity definition --- .../1.2/must/6_contextual_entity_metadata.ttl | 41 ++++++++++++++----- 1 file changed, 31 insertions(+), 10 deletions(-) diff --git a/rocrate_validator/profiles/ro-crate/1.2/must/6_contextual_entity_metadata.ttl b/rocrate_validator/profiles/ro-crate/1.2/must/6_contextual_entity_metadata.ttl index 7db4c22fa..612a27297 100644 --- a/rocrate_validator/profiles/ro-crate/1.2/must/6_contextual_entity_metadata.ttl +++ b/rocrate_validator/profiles/ro-crate/1.2/must/6_contextual_entity_metadata.ttl @@ -25,42 +25,63 @@ ro-crate:ContextualEntityDefinition a sh:NodeShape, validator:HiddenShape ; sh:name "Identify Contextual Entities" ; sh:description """Mark entities as Contextual Entities if they are in the RO-Crate metadata but are not Data Entities, not the Root Data Entity, - and not the Metadata File Descriptor.""" ; + and not the Metadata File Descriptor. The query uses `?this ?p ?o` + (subject of any triple) — SHACL rules from earlier shapes (e.g. + CreativeWorkAuthorDefinition) tag referenced-but-undescribed entities + with rdf:type, so they appear as subjects and are therefore captured. + Vocabulary namespaces loaded from ontologies (RDF/RDFS/OWL/XSD, + schema.org, bioschemas, w3id.org/ro, …) are excluded at the query + level; domain-specific legacy filters that wrongly excluded legitimate + crate identifiers (iana, urn, foaf, …) have been removed.""" ; sh:target [ a sh:SPARQLTarget ; sh:prefixes ro-crate:sparqlPrefixes ; sh:select """ SELECT DISTINCT ?this WHERE { + # Structural criteria: IRI or blank node, subject of some triple. + # SHACL rules from earlier shapes (e.g. CreativeWorkAuthorDefinition) + # tag even referenced-but-undescribed entities so they become + # subjects and are correctly classified as Contextual Entities. ?this ?p ?o . - FILTER(isIRI(?this)) + FILTER(isIRI(?this) || isBlank(?this)) + + # Structural exclusions — Metadata Descriptor, Root Data Entity, + # Data Entities (File/Dataset or ro-crate:DataEntity), Ontology. FILTER NOT EXISTS { ?this schema:about ?anyRoot } FILTER NOT EXISTS { ?anyMF schema:about ?this } + FILTER NOT EXISTS { ?this a ro-crate:DataEntity } FILTER NOT EXISTS { ?this a schema:MediaObject } FILTER NOT EXISTS { ?this a schema:Dataset } FILTER NOT EXISTS { ?this a owl:Ontology } - FILTER NOT EXISTS { - ?root schema:hasPart ?this . - ?anyAbout schema:about ?root . - } + + # Vocabulary / profile namespace exclusions. Required because + # the RO-Crate 1.2 profile loads ontology.ttl into the same data + # graph as the metadata file; terms from those vocabularies + # appear as subjects with rdf:type + rdfs:label and would + # otherwise be misclassified. FILTER(!STRSTARTS(STR(?this), "http://www.w3.org/")) FILTER(!STRSTARTS(STR(?this), "https://www.w3.org/")) - FILTER(!STRSTARTS(STR(?this), "https://w3id.org/ro/")) FILTER(!STRSTARTS(STR(?this), "http://schema.org/")) FILTER(!STRSTARTS(STR(?this), "https://schema.org/")) FILTER(!STRSTARTS(STR(?this), "http://purl.org/")) FILTER(!STRSTARTS(STR(?this), "https://bioschemas.org/")) - FILTER(!STRSTARTS(STR(?this), "https://codemeta.github.io/")) FILTER(!STRSTARTS(STR(?this), "http://www.opengis.net/")) + FILTER(!STRSTARTS(STR(?this), "https://w3id.org/")) + FILTER(!STRSTARTS(STR(?this), "https://github.com/crs4/rocrate-validator/")) + FILTER(!STRSTARTS(STR(?this), "https://codemeta.github.io/")) FILTER(!STRSTARTS(STR(?this), "http://pcdm.org/")) FILTER(!STRSTARTS(STR(?this), "http://xmlns.com/foaf/")) FILTER(!STRSTARTS(STR(?this), "http://creativecommons.org/ns")) FILTER(!STRSTARTS(STR(?this), "http://www.iana.org/")) FILTER(!STRSTARTS(STR(?this), "https://www.iana.org/")) - FILTER(!STRSTARTS(STR(?this), "https://github.com/crs4/rocrate-validator/")) FILTER(!STRSTARTS(STR(?this), "https://nationaalarchief.nl/archieven/archief/")) FILTER(!STRSTARTS(STR(?this), "https://www.nationalarchives.gov.uk")) - FILTER(!STRSTARTS(STR(?this), "urn:")) + + # Narrow urn: exclusion — only the internal ontology versionIRI. + # Unlike the legacy blanket `urn:` filter, this keeps legitimate + # `urn:uuid:*` contextual entities (e.g. DigitalDocument) in play. + FILTER(!STRSTARTS(STR(?this), "urn:absolute:")) } """ ] ; From 2dfd023efadae6ef06db24270594ff30f4d05a7e Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Mon, 20 Apr 2026 08:57:12 +0200 Subject: [PATCH 141/352] feat(ro-crate-1.2): :sparkles: check for recommended version of Software entity --- .../1.2/should/6_contextual_entity_metadata.ttl | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/rocrate_validator/profiles/ro-crate/1.2/should/6_contextual_entity_metadata.ttl b/rocrate_validator/profiles/ro-crate/1.2/should/6_contextual_entity_metadata.ttl index 2384415dd..464ab6354 100644 --- a/rocrate_validator/profiles/ro-crate/1.2/should/6_contextual_entity_metadata.ttl +++ b/rocrate_validator/profiles/ro-crate/1.2/should/6_contextual_entity_metadata.ttl @@ -209,6 +209,22 @@ ro-crate:GeometryWktRecommendedProperties a sh:NodeShape ; sh:message "Geometry entities SHOULD provide `asWKT`" ; ] . +ro-crate:SoftwareApplicationVersionRecommended a sh:NodeShape ; + sh:name "SoftwareApplication: SHOULD have `version`" ; + sh:description """A SoftwareApplication contextual entity SHOULD declare a + `version` property identifying the specific release used (RO-Crate 1.2, + Provenance — Software Applications).""" ; + sh:targetClass schema:SoftwareApplication ; + sh:property [ + a sh:PropertyShape ; + sh:name "SoftwareApplication: RECOMMENDED `version` property" ; + sh:description "Check that a SoftwareApplication entity declares a `version` property." ; + sh:path schema:version ; + sh:minCount 1 ; + sh:severity sh:Warning ; + sh:message "A SoftwareApplication SHOULD have a `version` property" ; + ] . + ro-crate:LicenseEntityAbsoluteUrl a sh:NodeShape ; sh:name "License entity: SHOULD have absolute URL @id" ; sh:description """A License entity SHOULD be identified by an absolute HTTP(S) URL, From 534677b6480f9037b8da9b8cd630f58dfb3e2736 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Mon, 20 Apr 2026 08:58:44 +0200 Subject: [PATCH 142/352] test(ro-crate-1.2): :white_check_mark: test recommended version property of Software entities --- .../invalid_no_version/ro-crate-metadata.json | 49 +++++++++++++ .../valid/ro-crate-metadata.json | 71 +++++++++++++++++++ .../test_metadata_contextualEntities.py | 35 +++++++++ tests/ro_crates_v1_2.py | 9 +++ 4 files changed, 164 insertions(+) create mode 100644 tests/data/crates/rocrate-1.2/10_metadata_contextualEntities/software_application/invalid_no_version/ro-crate-metadata.json create mode 100644 tests/data/crates/rocrate-1.2/10_metadata_contextualEntities/software_application/valid/ro-crate-metadata.json diff --git a/tests/data/crates/rocrate-1.2/10_metadata_contextualEntities/software_application/invalid_no_version/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/10_metadata_contextualEntities/software_application/invalid_no_version/ro-crate-metadata.json new file mode 100644 index 000000000..e17de8920 --- /dev/null +++ b/tests/data/crates/rocrate-1.2/10_metadata_contextualEntities/software_application/invalid_no_version/ro-crate-metadata.json @@ -0,0 +1,49 @@ +{ + "@context": "https://w3id.org/ro/crate/1.2/context", + "@graph": [ + { + "@id": "ro-crate-metadata.json", + "@type": "CreativeWork", + "about": { + "@id": "./" + }, + "conformsTo": { + "@id": "https://w3id.org/ro/crate/1.2" + } + }, + { + "@id": "./", + "@type": "Dataset", + "name": "SoftwareApplication entity — invalid (no version)", + "description": "RO-Crate referencing a SoftwareApplication contextual entity that does NOT declare a version property.", + "datePublished": "2024-01-01", + "license": { + "@id": "https://creativecommons.org/licenses/by/4.0/" + }, + "mentions": { + "@id": "#create-action" + } + }, + { + "@id": "https://creativecommons.org/licenses/by/4.0/", + "@type": "CreativeWork", + "name": "Creative Commons Attribution 4.0 International", + "description": "CC BY 4.0 license." + }, + { + "@id": "#create-action", + "@type": "CreateAction", + "name": "Run analysis tool", + "instrument": { + "@id": "#analysis-tool" + } + }, + { + "@id": "#analysis-tool", + "@type": "SoftwareApplication", + "name": "Analysis Tool", + "url": "https://example.org/analysis-tool", + "description": "Software used to perform the data analysis (missing `version`)." + } + ] +} diff --git a/tests/data/crates/rocrate-1.2/10_metadata_contextualEntities/software_application/valid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/10_metadata_contextualEntities/software_application/valid/ro-crate-metadata.json new file mode 100644 index 000000000..638ecdffa --- /dev/null +++ b/tests/data/crates/rocrate-1.2/10_metadata_contextualEntities/software_application/valid/ro-crate-metadata.json @@ -0,0 +1,71 @@ +{ + "@context": "https://w3id.org/ro/crate/1.2/context", + "@graph": [ + { + "@id": "ro-crate-metadata.json", + "@type": "CreativeWork", + "about": { + "@id": "./" + }, + "conformsTo": { + "@id": "https://w3id.org/ro/crate/1.2" + } + }, + { + "@id": "./", + "@type": "Dataset", + "name": "SoftwareApplication entity — valid", + "description": "RO-Crate referencing a SoftwareApplication contextual entity that declares a version (used as instrument of a CreateAction).", + "datePublished": "2024-01-01", + "license": { + "@id": "https://creativecommons.org/licenses/by/4.0/" + }, + "publisher": { + "@id": "https://ror.org/05f9q8d28" + }, + "funder": { + "@id": "https://ror.org/05f9q8d28" + }, + "mentions": { + "@id": "#create-action" + } + }, + { + "@id": "https://creativecommons.org/licenses/by/4.0/", + "@type": "CreativeWork", + "name": "Creative Commons Attribution 4.0 International", + "description": "CC BY 4.0 license." + }, + { + "@id": "https://ror.org/05f9q8d28", + "@type": "Organization", + "name": "Example Research Institute", + "url": "https://example.org", + "contactPoint": { + "@id": "#org-contact" + } + }, + { + "@id": "#org-contact", + "@type": "ContactPoint", + "name": "Example Research Institute Contact", + "email": "contact@example.org" + }, + { + "@id": "#create-action", + "@type": "CreateAction", + "name": "Run analysis tool", + "instrument": { + "@id": "#analysis-tool" + } + }, + { + "@id": "#analysis-tool", + "@type": "SoftwareApplication", + "name": "Analysis Tool", + "version": "1.4.2", + "url": "https://example.org/analysis-tool", + "description": "Software used to perform the data analysis." + } + ] +} diff --git a/tests/integration/profiles/ro-crate-1.2/test_metadata_contextualEntities.py b/tests/integration/profiles/ro-crate-1.2/test_metadata_contextualEntities.py index b855cfd26..d34e376cb 100644 --- a/tests/integration/profiles/ro-crate-1.2/test_metadata_contextualEntities.py +++ b/tests/integration/profiles/ro-crate-1.2/test_metadata_contextualEntities.py @@ -329,3 +329,38 @@ def test_invalid_bare_propertyvalue_id(): "named local entities SHOULD use a '#'-prefixed @id" ], ) + + +# --------------------------------------------------------------------------- +# SoftwareApplication: SHOULD have `version` (SHOULD) +# --------------------------------------------------------------------------- + +def test_valid_software_application(): + """ + A SoftwareApplication contextual entity that declares a `version` property + SHOULD pass RECOMMENDED validation. + """ + do_entity_test( + __contextual_entities_crates__.valid_software_application, + models.Severity.RECOMMENDED, + True, + profile_identifier="ro-crate-1.2", + skip_checks=_GENERIC_RECOMMENDED_SKIP, + ) + + +def test_invalid_software_application_no_version(): + """ + A SoftwareApplication contextual entity missing the `version` property + SHOULD trigger a RECOMMENDED warning. + """ + do_entity_test( + __contextual_entities_crates__.invalid_software_application_no_version, + models.Severity.RECOMMENDED, + False, + profile_identifier="ro-crate-1.2", + expected_triggered_requirements=["SoftwareApplication: SHOULD have `version`"], + expected_triggered_issues=[ + "A SoftwareApplication SHOULD have a `version` property" + ], + ) diff --git a/tests/ro_crates_v1_2.py b/tests/ro_crates_v1_2.py index 313c78b30..2eea67908 100644 --- a/tests/ro_crates_v1_2.py +++ b/tests/ro_crates_v1_2.py @@ -676,6 +676,15 @@ def invalid_contextual_entity_bare_contactpoint(self) -> Path: def invalid_contextual_entity_bare_propertyvalue(self) -> Path: return self.CONTEXTUAL_ENTITIES_CRATES_PATH / "contextual_entity_id_format" / "invalid_bare_propertyvalue" + # --- SoftwareApplication: SHOULD have version (5.7) --- + @property + def valid_software_application(self) -> Path: + return self.CONTEXTUAL_ENTITIES_CRATES_PATH / "software_application" / "valid" + + @property + def invalid_software_application_no_version(self) -> Path: + return self.CONTEXTUAL_ENTITIES_CRATES_PATH / "software_application" / "invalid_no_version" + class InvalidMultiProfileROC: From a4b553f9176cdb792179541c5da55be8168e3d53 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Mon, 20 Apr 2026 09:25:35 +0200 Subject: [PATCH 143/352] feat(ro-crate-1.2): :sparkles: add checks for required properties of SoftwareApplication and ComputerLanguage entities --- .../1.2/must/6_contextual_entity_metadata.ttl | 49 ++++++++++++++++++- .../should/6_contextual_entity_metadata.ttl | 16 ------ 2 files changed, 48 insertions(+), 17 deletions(-) diff --git a/rocrate_validator/profiles/ro-crate/1.2/must/6_contextual_entity_metadata.ttl b/rocrate_validator/profiles/ro-crate/1.2/must/6_contextual_entity_metadata.ttl index 612a27297..8f7e0e877 100644 --- a/rocrate_validator/profiles/ro-crate/1.2/must/6_contextual_entity_metadata.ttl +++ b/rocrate_validator/profiles/ro-crate/1.2/must/6_contextual_entity_metadata.ttl @@ -107,7 +107,7 @@ ro-crate:FindLicenseEntity a sh:NodeShape, validator:HiddenShape ; } """ ] ; - + # Expand data graph with triples to mark the matching entities as License instances sh:rule [ a sh:TripleRule ; @@ -164,3 +164,50 @@ ro-crate:ThumbnailReferencesFile a sh:NodeShape ; sh:class schema:MediaObject ; sh:message "If present, `thumbnail` MUST reference a File data entity" ; ] . + +# A contextual entity representing a `ComputerLanguage` and/or `SoftwareApplication` +# MUST have `name`, `url`, and `version`. +ro-crate:SoftwareApplicationOrComputerLanguageRequiredProperties a sh:NodeShape ; + sh:name "SoftwareApplication or ComputerLanguage: REQUIRED `name`, `url`, `version`" ; + sh:description """A contextual entity representing a `SoftwareApplication` + or a `ComputerLanguage` MUST declare `name`, `url`, and `version` + properties (RO-Crate 1.2, Provenance — Software Applications; + Workflows — programmingLanguage).""" ; + sh:target [ + a sh:SPARQLTarget ; + sh:prefixes ro-crate:sparqlPrefixes ; + sh:select """ + SELECT ?this + WHERE { + ?this a ?t . + FILTER(?t IN (schema:SoftwareApplication, schema:ComputerLanguage)) + } + """ + ] ; + sh:property [ + a sh:PropertyShape ; + sh:name "SoftwareApplication/ComputerLanguage: REQUIRED `name`" ; + sh:description "Check that the entity declares a `name` property." ; + sh:path schema:name ; + sh:minCount 1 ; + sh:severity sh:Violation ; + sh:message "A SoftwareApplication or ComputerLanguage MUST have a `name` property" ; + ] ; + sh:property [ + a sh:PropertyShape ; + sh:name "SoftwareApplication/ComputerLanguage: REQUIRED `url`" ; + sh:description "Check that the entity declares a `url` property." ; + sh:path schema:url ; + sh:minCount 1 ; + sh:severity sh:Violation ; + sh:message "A SoftwareApplication or ComputerLanguage MUST have a `url` property" ; + ] ; + sh:property [ + a sh:PropertyShape ; + sh:name "SoftwareApplication/ComputerLanguage: REQUIRED `version`" ; + sh:description "Check that the entity declares a `version` property indicating a known release." ; + sh:path schema:version ; + sh:minCount 1 ; + sh:severity sh:Violation ; + sh:message "A SoftwareApplication or ComputerLanguage MUST have a `version` property (SHOULD indicate a known version that linked workflows/scripts were developed/tested with)" ; + ] . diff --git a/rocrate_validator/profiles/ro-crate/1.2/should/6_contextual_entity_metadata.ttl b/rocrate_validator/profiles/ro-crate/1.2/should/6_contextual_entity_metadata.ttl index 464ab6354..2384415dd 100644 --- a/rocrate_validator/profiles/ro-crate/1.2/should/6_contextual_entity_metadata.ttl +++ b/rocrate_validator/profiles/ro-crate/1.2/should/6_contextual_entity_metadata.ttl @@ -209,22 +209,6 @@ ro-crate:GeometryWktRecommendedProperties a sh:NodeShape ; sh:message "Geometry entities SHOULD provide `asWKT`" ; ] . -ro-crate:SoftwareApplicationVersionRecommended a sh:NodeShape ; - sh:name "SoftwareApplication: SHOULD have `version`" ; - sh:description """A SoftwareApplication contextual entity SHOULD declare a - `version` property identifying the specific release used (RO-Crate 1.2, - Provenance — Software Applications).""" ; - sh:targetClass schema:SoftwareApplication ; - sh:property [ - a sh:PropertyShape ; - sh:name "SoftwareApplication: RECOMMENDED `version` property" ; - sh:description "Check that a SoftwareApplication entity declares a `version` property." ; - sh:path schema:version ; - sh:minCount 1 ; - sh:severity sh:Warning ; - sh:message "A SoftwareApplication SHOULD have a `version` property" ; - ] . - ro-crate:LicenseEntityAbsoluteUrl a sh:NodeShape ; sh:name "License entity: SHOULD have absolute URL @id" ; sh:description """A License entity SHOULD be identified by an absolute HTTP(S) URL, From 26642a89d1ce9f97bb9ba2f2271720d3bf6f6fcf Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Mon, 20 Apr 2026 09:30:16 +0200 Subject: [PATCH 144/352] test(ro-crate-1.2): :white_check_mark: test required properties of SoftwareApplication and ComputerLanguage entities --- .../invalid_no_name/ro-crate-metadata.json | 49 +++ .../invalid_no_url/ro-crate-metadata.json | 49 +++ .../ro-crate-metadata.json | 76 ++++ .../valid_computer_language/script.sh | 0 .../image_about/valid/ro-crate-metadata.json | 10 +- .../valid/ro-crate-metadata.json | 10 +- .../valid/ro-crate-metadata.json | 5 +- .../script_name/valid/ro-crate-metadata.json | 34 +- .../script_type/valid/ro-crate-metadata.json | 34 +- .../valid/ro-crate-metadata.json | 10 +- .../valid/ro-crate-metadata.json | 39 +- .../valid/ro-crate-metadata.json | 39 +- .../format/compacted/ro-crate-metadata.json | 393 +++++++++--------- .../invalid_missing/ro-crate-metadata.json | 63 +++ .../invalid_wrong_type/ro-crate-metadata.json | 71 ++++ .../valid/ro-crate-metadata.json | 73 ++++ .../test_metadata_contextualEntities.py | 79 +++- .../ro-crate-1.2/test_referenced_rocrate.py | 6 +- tests/ro_crates_v1_2.py | 14 +- 19 files changed, 797 insertions(+), 257 deletions(-) create mode 100644 tests/data/crates/rocrate-1.2/10_metadata_contextualEntities/software_application/invalid_no_name/ro-crate-metadata.json create mode 100644 tests/data/crates/rocrate-1.2/10_metadata_contextualEntities/software_application/invalid_no_url/ro-crate-metadata.json create mode 100644 tests/data/crates/rocrate-1.2/10_metadata_contextualEntities/software_application/valid_computer_language/ro-crate-metadata.json create mode 100644 tests/data/crates/rocrate-1.2/10_metadata_contextualEntities/software_application/valid_computer_language/script.sh create mode 100644 tests/data/crates/rocrate-1.2/7_root_data_entity/recommended_contactpoint/invalid_missing/ro-crate-metadata.json create mode 100644 tests/data/crates/rocrate-1.2/7_root_data_entity/recommended_contactpoint/invalid_wrong_type/ro-crate-metadata.json create mode 100644 tests/data/crates/rocrate-1.2/7_root_data_entity/recommended_contactpoint/valid/ro-crate-metadata.json diff --git a/tests/data/crates/rocrate-1.2/10_metadata_contextualEntities/software_application/invalid_no_name/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/10_metadata_contextualEntities/software_application/invalid_no_name/ro-crate-metadata.json new file mode 100644 index 000000000..a8d99b7c6 --- /dev/null +++ b/tests/data/crates/rocrate-1.2/10_metadata_contextualEntities/software_application/invalid_no_name/ro-crate-metadata.json @@ -0,0 +1,49 @@ +{ + "@context": "https://w3id.org/ro/crate/1.2/context", + "@graph": [ + { + "@id": "ro-crate-metadata.json", + "@type": "CreativeWork", + "about": { + "@id": "./" + }, + "conformsTo": { + "@id": "https://w3id.org/ro/crate/1.2" + } + }, + { + "@id": "./", + "@type": "Dataset", + "name": "SoftwareApplication entity — invalid (no name)", + "description": "RO-Crate referencing a SoftwareApplication contextual entity that does NOT declare a `name` property.", + "datePublished": "2024-01-01", + "license": { + "@id": "https://creativecommons.org/licenses/by/4.0/" + }, + "mentions": { + "@id": "#create-action" + } + }, + { + "@id": "https://creativecommons.org/licenses/by/4.0/", + "@type": "CreativeWork", + "name": "Creative Commons Attribution 4.0 International", + "description": "CC BY 4.0 license." + }, + { + "@id": "#create-action", + "@type": "CreateAction", + "name": "Run analysis tool", + "instrument": { + "@id": "#analysis-tool" + } + }, + { + "@id": "#analysis-tool", + "@type": "SoftwareApplication", + "version": "1.4.2", + "url": "https://example.org/analysis-tool", + "description": "Software used to perform the data analysis (missing `name`)." + } + ] +} diff --git a/tests/data/crates/rocrate-1.2/10_metadata_contextualEntities/software_application/invalid_no_url/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/10_metadata_contextualEntities/software_application/invalid_no_url/ro-crate-metadata.json new file mode 100644 index 000000000..f16e351a5 --- /dev/null +++ b/tests/data/crates/rocrate-1.2/10_metadata_contextualEntities/software_application/invalid_no_url/ro-crate-metadata.json @@ -0,0 +1,49 @@ +{ + "@context": "https://w3id.org/ro/crate/1.2/context", + "@graph": [ + { + "@id": "ro-crate-metadata.json", + "@type": "CreativeWork", + "about": { + "@id": "./" + }, + "conformsTo": { + "@id": "https://w3id.org/ro/crate/1.2" + } + }, + { + "@id": "./", + "@type": "Dataset", + "name": "SoftwareApplication entity — invalid (no url)", + "description": "RO-Crate referencing a SoftwareApplication contextual entity that does NOT declare a `url` property.", + "datePublished": "2024-01-01", + "license": { + "@id": "https://creativecommons.org/licenses/by/4.0/" + }, + "mentions": { + "@id": "#create-action" + } + }, + { + "@id": "https://creativecommons.org/licenses/by/4.0/", + "@type": "CreativeWork", + "name": "Creative Commons Attribution 4.0 International", + "description": "CC BY 4.0 license." + }, + { + "@id": "#create-action", + "@type": "CreateAction", + "name": "Run analysis tool", + "instrument": { + "@id": "#analysis-tool" + } + }, + { + "@id": "#analysis-tool", + "@type": "SoftwareApplication", + "name": "Analysis Tool", + "version": "1.4.2", + "description": "Software used to perform the data analysis (missing `url`)." + } + ] +} diff --git a/tests/data/crates/rocrate-1.2/10_metadata_contextualEntities/software_application/valid_computer_language/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/10_metadata_contextualEntities/software_application/valid_computer_language/ro-crate-metadata.json new file mode 100644 index 000000000..bb6a8486b --- /dev/null +++ b/tests/data/crates/rocrate-1.2/10_metadata_contextualEntities/software_application/valid_computer_language/ro-crate-metadata.json @@ -0,0 +1,76 @@ +{ + "@context": "https://w3id.org/ro/crate/1.2/context", + "@graph": [ + { + "@id": "ro-crate-metadata.json", + "@type": "CreativeWork", + "about": { + "@id": "./" + }, + "conformsTo": { + "@id": "https://w3id.org/ro/crate/1.2" + } + }, + { + "@id": "./", + "@type": "Dataset", + "name": "ComputerLanguage entity — valid", + "description": "RO-Crate referencing a ComputerLanguage contextual entity (used as `programmingLanguage` of a Script) that declares name, url, version, and an optional alternateName.", + "datePublished": "2024-01-01", + "license": { + "@id": "https://creativecommons.org/licenses/by/4.0/" + }, + "publisher": { + "@id": "https://ror.org/05f9q8d28" + }, + "funder": { + "@id": "https://ror.org/05f9q8d28" + }, + "hasPart": [ + { + "@id": "script.sh" + } + ] + }, + { + "@id": "https://creativecommons.org/licenses/by/4.0/", + "@type": "CreativeWork", + "name": "Creative Commons Attribution 4.0 International", + "description": "CC BY 4.0 license." + }, + { + "@id": "https://ror.org/05f9q8d28", + "@type": "Organization", + "name": "Example Research Institute", + "url": "https://example.org", + "contactPoint": { + "@id": "#org-contact" + } + }, + { + "@id": "#org-contact", + "@type": "ContactPoint", + "name": "Example Research Institute Contact", + "email": "contact@example.org" + }, + { + "@id": "script.sh", + "@type": ["File", "SoftwareSourceCode"], + "name": "Analysis script", + "description": "A Bash analysis script.", + "encodingFormat": "application/x-sh", + "contentSize": "256", + "programmingLanguage": { + "@id": "#bash" + } + }, + { + "@id": "#bash", + "@type": "ComputerLanguage", + "name": "Bash", + "alternateName": "GNU Bourne-Again Shell", + "url": "https://www.gnu.org/software/bash/", + "version": "5.2" + } + ] +} diff --git a/tests/data/crates/rocrate-1.2/10_metadata_contextualEntities/software_application/valid_computer_language/script.sh b/tests/data/crates/rocrate-1.2/10_metadata_contextualEntities/software_application/valid_computer_language/script.sh new file mode 100644 index 000000000..e69de29bb diff --git a/tests/data/crates/rocrate-1.2/11_workflows_scripts/image_about/valid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/11_workflows_scripts/image_about/valid/ro-crate-metadata.json index 3009cbcb0..016778ae5 100644 --- a/tests/data/crates/rocrate-1.2/11_workflows_scripts/image_about/valid/ro-crate-metadata.json +++ b/tests/data/crates/rocrate-1.2/11_workflows_scripts/image_about/valid/ro-crate-metadata.json @@ -14,7 +14,7 @@ { "@id": "./", "@type": "Dataset", - "name": "Image about — valid", + "name": "Image about \u2014 valid", "description": "RO-Crate with a Workflow whose image ImageObject has the recommended about property referencing the workflow.", "datePublished": "2024-01-01", "license": { @@ -94,7 +94,8 @@ "@id": "#galaxy", "@type": "ComputerLanguage", "name": "Galaxy", - "url": "https://galaxyproject.org/" + "url": "https://galaxyproject.org/", + "version": "1.0" }, { "@id": "diagram.png", @@ -112,7 +113,10 @@ }, { "@id": "https://bioschemas.org/profiles/ComputationalWorkflow/1.0-RELEASE", - "@type": ["CreativeWork", "Profile"], + "@type": [ + "CreativeWork", + "Profile" + ], "name": "Bioschemas ComputationalWorkflow Profile 1.0-RELEASE", "description": "Bioschemas profile for computational workflows." } diff --git a/tests/data/crates/rocrate-1.2/11_workflows_scripts/image_encoding_format/valid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/11_workflows_scripts/image_encoding_format/valid/ro-crate-metadata.json index 7a606cd6c..651d9abb6 100644 --- a/tests/data/crates/rocrate-1.2/11_workflows_scripts/image_encoding_format/valid/ro-crate-metadata.json +++ b/tests/data/crates/rocrate-1.2/11_workflows_scripts/image_encoding_format/valid/ro-crate-metadata.json @@ -14,7 +14,7 @@ { "@id": "./", "@type": "Dataset", - "name": "Image encodingFormat — valid", + "name": "Image encodingFormat \u2014 valid", "description": "RO-Crate with a Workflow whose image ImageObject has the recommended encodingFormat property.", "datePublished": "2024-01-01", "license": { @@ -94,7 +94,8 @@ "@id": "#galaxy", "@type": "ComputerLanguage", "name": "Galaxy", - "url": "https://galaxyproject.org/" + "url": "https://galaxyproject.org/", + "version": "1.0" }, { "@id": "diagram.png", @@ -112,7 +113,10 @@ }, { "@id": "https://bioschemas.org/profiles/ComputationalWorkflow/1.0-RELEASE", - "@type": ["CreativeWork", "Profile"], + "@type": [ + "CreativeWork", + "Profile" + ], "name": "Bioschemas ComputationalWorkflow Profile 1.0-RELEASE", "description": "Bioschemas profile for computational workflows." } diff --git a/tests/data/crates/rocrate-1.2/11_workflows_scripts/programming_language/valid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/11_workflows_scripts/programming_language/valid/ro-crate-metadata.json index 693bda3fc..1ffe2309e 100644 --- a/tests/data/crates/rocrate-1.2/11_workflows_scripts/programming_language/valid/ro-crate-metadata.json +++ b/tests/data/crates/rocrate-1.2/11_workflows_scripts/programming_language/valid/ro-crate-metadata.json @@ -14,7 +14,7 @@ { "@id": "./", "@type": "Dataset", - "name": "Programming language — valid", + "name": "Programming language \u2014 valid", "description": "RO-Crate with a Script that declares programmingLanguage referencing a ComputerLanguage entity.", "datePublished": "2024-01-01", "license": { @@ -81,7 +81,8 @@ "@id": "#bash", "@type": "ComputerLanguage", "name": "Bash", - "url": "https://www.gnu.org/software/bash/" + "url": "https://www.gnu.org/software/bash/", + "version": "1.0" } ] } diff --git a/tests/data/crates/rocrate-1.2/11_workflows_scripts/script_name/valid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/11_workflows_scripts/script_name/valid/ro-crate-metadata.json index 5435ee5c8..07edea638 100644 --- a/tests/data/crates/rocrate-1.2/11_workflows_scripts/script_name/valid/ro-crate-metadata.json +++ b/tests/data/crates/rocrate-1.2/11_workflows_scripts/script_name/valid/ro-crate-metadata.json @@ -4,17 +4,27 @@ { "@id": "ro-crate-metadata.json", "@type": "CreativeWork", - "about": { "@id": "./" }, - "conformsTo": { "@id": "https://w3id.org/ro/crate/1.2" } + "about": { + "@id": "./" + }, + "conformsTo": { + "@id": "https://w3id.org/ro/crate/1.2" + } }, { "@id": "./", "@type": "Dataset", - "name": "Script name — valid", + "name": "Script name \u2014 valid", "description": "RO-Crate containing a Script entity with a required name property.", "datePublished": "2024-01-01", - "license": { "@id": "https://creativecommons.org/licenses/by/4.0/" }, - "hasPart": [{ "@id": "script.sh" }] + "license": { + "@id": "https://creativecommons.org/licenses/by/4.0/" + }, + "hasPart": [ + { + "@id": "script.sh" + } + ] }, { "@id": "https://creativecommons.org/licenses/by/4.0/", @@ -23,17 +33,25 @@ }, { "@id": "script.sh", - "@type": ["File", "SoftwareSourceCode"], + "@type": [ + "File", + "SoftwareSourceCode" + ], "name": "My Script", "description": "A sample shell script with a proper name.", "encodingFormat": "application/x-sh", - "programmingLanguage": { "@id": "#bash" } + "programmingLanguage": { + "@id": "#bash" + } }, { "@id": "#bash", "@type": "ComputerLanguage", "name": "Bash", - "url": { "@id": "https://www.gnu.org/software/bash/" } + "url": { + "@id": "https://www.gnu.org/software/bash/" + }, + "version": "1.0" } ] } diff --git a/tests/data/crates/rocrate-1.2/11_workflows_scripts/script_type/valid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/11_workflows_scripts/script_type/valid/ro-crate-metadata.json index 4ebe4a154..536dcb556 100644 --- a/tests/data/crates/rocrate-1.2/11_workflows_scripts/script_type/valid/ro-crate-metadata.json +++ b/tests/data/crates/rocrate-1.2/11_workflows_scripts/script_type/valid/ro-crate-metadata.json @@ -4,17 +4,27 @@ { "@id": "ro-crate-metadata.json", "@type": "CreativeWork", - "about": { "@id": "./" }, - "conformsTo": { "@id": "https://w3id.org/ro/crate/1.2" } + "about": { + "@id": "./" + }, + "conformsTo": { + "@id": "https://w3id.org/ro/crate/1.2" + } }, { "@id": "./", "@type": "Dataset", - "name": "Script type — valid", + "name": "Script type \u2014 valid", "description": "RO-Crate containing a Script entity with correct @type [File, SoftwareSourceCode].", "datePublished": "2024-01-01", - "license": { "@id": "https://creativecommons.org/licenses/by/4.0/" }, - "hasPart": [{ "@id": "script.sh" }] + "license": { + "@id": "https://creativecommons.org/licenses/by/4.0/" + }, + "hasPart": [ + { + "@id": "script.sh" + } + ] }, { "@id": "https://creativecommons.org/licenses/by/4.0/", @@ -23,17 +33,25 @@ }, { "@id": "script.sh", - "@type": ["File", "SoftwareSourceCode"], + "@type": [ + "File", + "SoftwareSourceCode" + ], "name": "My Script", "description": "A sample shell script.", "encodingFormat": "application/x-sh", - "programmingLanguage": { "@id": "#bash" } + "programmingLanguage": { + "@id": "#bash" + } }, { "@id": "#bash", "@type": "ComputerLanguage", "name": "Bash", - "url": { "@id": "https://www.gnu.org/software/bash/" } + "url": { + "@id": "https://www.gnu.org/software/bash/" + }, + "version": "1.0" } ] } diff --git a/tests/data/crates/rocrate-1.2/11_workflows_scripts/workflow_conformsTo/valid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/11_workflows_scripts/workflow_conformsTo/valid/ro-crate-metadata.json index f4a48eaa6..af4cfbf1b 100644 --- a/tests/data/crates/rocrate-1.2/11_workflows_scripts/workflow_conformsTo/valid/ro-crate-metadata.json +++ b/tests/data/crates/rocrate-1.2/11_workflows_scripts/workflow_conformsTo/valid/ro-crate-metadata.json @@ -14,7 +14,7 @@ { "@id": "./", "@type": "Dataset", - "name": "Workflow conformsTo — valid", + "name": "Workflow conformsTo \u2014 valid", "description": "RO-Crate with a Workflow that declares conformsTo referencing a versioned Bioschemas profile URI.", "datePublished": "2024-01-01", "license": { @@ -88,11 +88,15 @@ "@id": "#galaxy", "@type": "ComputerLanguage", "name": "Galaxy", - "url": "https://galaxyproject.org/" + "url": "https://galaxyproject.org/", + "version": "1.0" }, { "@id": "https://bioschemas.org/profiles/ComputationalWorkflow/1.0-RELEASE", - "@type": ["CreativeWork", "Profile"], + "@type": [ + "CreativeWork", + "Profile" + ], "name": "Bioschemas ComputationalWorkflow Profile 1.0-RELEASE", "description": "Bioschemas profile for computational workflows." } diff --git a/tests/data/crates/rocrate-1.2/11_workflows_scripts/workflow_name/valid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/11_workflows_scripts/workflow_name/valid/ro-crate-metadata.json index a7574bef7..943eedc77 100644 --- a/tests/data/crates/rocrate-1.2/11_workflows_scripts/workflow_name/valid/ro-crate-metadata.json +++ b/tests/data/crates/rocrate-1.2/11_workflows_scripts/workflow_name/valid/ro-crate-metadata.json @@ -4,17 +4,27 @@ { "@id": "ro-crate-metadata.json", "@type": "CreativeWork", - "about": { "@id": "./" }, - "conformsTo": { "@id": "https://w3id.org/ro/crate/1.2" } + "about": { + "@id": "./" + }, + "conformsTo": { + "@id": "https://w3id.org/ro/crate/1.2" + } }, { "@id": "./", "@type": "Dataset", - "name": "Workflow name — valid", + "name": "Workflow name \u2014 valid", "description": "RO-Crate with a Workflow entity that has the required name property.", "datePublished": "2024-01-01", - "license": { "@id": "https://creativecommons.org/licenses/by/4.0/" }, - "hasPart": [{ "@id": "workflow.ga" }] + "license": { + "@id": "https://creativecommons.org/licenses/by/4.0/" + }, + "hasPart": [ + { + "@id": "workflow.ga" + } + ] }, { "@id": "https://creativecommons.org/licenses/by/4.0/", @@ -23,18 +33,29 @@ }, { "@id": "workflow.ga", - "@type": ["File", "SoftwareSourceCode", "ComputationalWorkflow"], + "@type": [ + "File", + "SoftwareSourceCode", + "ComputationalWorkflow" + ], "name": "My Workflow", "description": "A Galaxy workflow with all required types and name.", "encodingFormat": "application/json", - "programmingLanguage": { "@id": "#galaxy" }, - "conformsTo": { "@id": "https://bioschemas.org/profiles/ComputationalWorkflow/1.0-RELEASE" } + "programmingLanguage": { + "@id": "#galaxy" + }, + "conformsTo": { + "@id": "https://bioschemas.org/profiles/ComputationalWorkflow/1.0-RELEASE" + } }, { "@id": "#galaxy", "@type": "ComputerLanguage", "name": "Galaxy", - "url": { "@id": "https://galaxyproject.org/" } + "url": { + "@id": "https://galaxyproject.org/" + }, + "version": "1.0" } ] } diff --git a/tests/data/crates/rocrate-1.2/11_workflows_scripts/workflow_type/valid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/11_workflows_scripts/workflow_type/valid/ro-crate-metadata.json index 087d86a33..d9dc02c30 100644 --- a/tests/data/crates/rocrate-1.2/11_workflows_scripts/workflow_type/valid/ro-crate-metadata.json +++ b/tests/data/crates/rocrate-1.2/11_workflows_scripts/workflow_type/valid/ro-crate-metadata.json @@ -4,17 +4,27 @@ { "@id": "ro-crate-metadata.json", "@type": "CreativeWork", - "about": { "@id": "./" }, - "conformsTo": { "@id": "https://w3id.org/ro/crate/1.2" } + "about": { + "@id": "./" + }, + "conformsTo": { + "@id": "https://w3id.org/ro/crate/1.2" + } }, { "@id": "./", "@type": "Dataset", - "name": "Workflow type — valid", + "name": "Workflow type \u2014 valid", "description": "RO-Crate with a Workflow entity correctly typed as [File, SoftwareSourceCode, ComputationalWorkflow].", "datePublished": "2024-01-01", - "license": { "@id": "https://creativecommons.org/licenses/by/4.0/" }, - "hasPart": [{ "@id": "workflow.ga" }] + "license": { + "@id": "https://creativecommons.org/licenses/by/4.0/" + }, + "hasPart": [ + { + "@id": "workflow.ga" + } + ] }, { "@id": "https://creativecommons.org/licenses/by/4.0/", @@ -23,18 +33,29 @@ }, { "@id": "workflow.ga", - "@type": ["File", "SoftwareSourceCode", "ComputationalWorkflow"], + "@type": [ + "File", + "SoftwareSourceCode", + "ComputationalWorkflow" + ], "name": "My Workflow", "description": "A sample Galaxy workflow.", "encodingFormat": "application/json", - "programmingLanguage": { "@id": "#galaxy" }, - "conformsTo": { "@id": "https://bioschemas.org/profiles/ComputationalWorkflow/1.0-RELEASE" } + "programmingLanguage": { + "@id": "#galaxy" + }, + "conformsTo": { + "@id": "https://bioschemas.org/profiles/ComputationalWorkflow/1.0-RELEASE" + } }, { "@id": "#galaxy", "@type": "ComputerLanguage", "name": "Galaxy", - "url": { "@id": "https://galaxyproject.org/" } + "url": { + "@id": "https://galaxyproject.org/" + }, + "version": "1.0" } ] } diff --git a/tests/data/crates/rocrate-1.2/1_metadata_document/format/compacted/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/1_metadata_document/format/compacted/ro-crate-metadata.json index 4a482db27..af8a60638 100644 --- a/tests/data/crates/rocrate-1.2/1_metadata_document/format/compacted/ro-crate-metadata.json +++ b/tests/data/crates/rocrate-1.2/1_metadata_document/format/compacted/ro-crate-metadata.json @@ -1,213 +1,216 @@ { - "@context": [ - "https://w3id.org/ro/crate/1.1/context", - "https://w3id.org/ro/terms/workflow-run/context" - ], - "@graph": [ - { - "@id": "ro-crate-metadata.json", - "@type": "CreativeWork", - "conformsTo": { - "@id": "https://w3id.org/ro/crate/1.2" - }, - "about": { - "@id": "./" - } - }, - { - "@id": "./", - "@type": "Dataset", - "https://schema.org/name": "My Pictures", - "description": "A collection of my pictures", - "datePublished": "2024-05-17T01:04:52+01:00", - "conformsTo": [ - { - "@id": "https://w3id.org/ro/crate/1.2" - } - ], - "hasPart": [ - { - "@id": "pics/2017-06-11%2012.56.14.jpg" - }, - { - "@id": "pics/2018-06-11%2012.56.14.jpg" - }, - { - "@id": "pics/2019-06-11 12.56.14.jpg" - }, - { - "@id": "data%20set/" - }, - { - "@id": "data%20set2/" - }, - { - "@id": "data set3/" - }, - { - "@id": "pics/sepia_fence.jpg" - }, - { - "@id": "file:///tmp/test.txt" - } - ], - "isBasedOn": { - "@id": "https://doi.org/10.5281/zenodo.1009240" - }, - "license": { - "@id": "http://spdx.org/licenses/CC0-1.0" - }, - "mentions": { - "@id": "#SepiaConversion_1" - } - }, - { - "@id": "https://w3id.org/ro/wfrun/process/0.5", - "@type": "CreativeWork", - "name": "Process Run Crate", - "version": "0.5" - }, - { - "@id": "https://example.com/otherprofile/0.1", - "@type": "CreativeWork", - "name": "Other Profile", - "version": "0.1" - }, - { - "@id": "https://www.imagemagick.org/", - "@type": "SoftwareApplication", - "url": "https://www.imagemagick.org/", - "name": "ImageMagick", - "softwareVersion": "6.9.7-4", - "softwareRequirements": { - "@id": "https://example.com/foobar/1.0.0/" - } - }, - { - "@id": "https://example.com/foobar/1.0.0/", - "@type": "SoftwareApplication", - "name": "foobar", - "softwareVersion": "1.0.0" - }, - { - "@id": "#SepiaConversion_1", - "@type": "CreateAction", - "name": "Convert dog image to sepia", - "description": "convert -sepia-tone 80% pics/2017-06-11\\ 12.56.14.jpg pics/sepia_fence.jpg", - "startTime": "2024-05-17T01:04:50+01:00", - "endTime": "2024-05-17T01:04:52+01:00", - "instrument": { - "@id": "https://www.imagemagick.org/" - }, - "object": { - "@id": "pics/2017-06-11%2012.56.14.jpg" - }, - "result": { - "@id": "pics/sepia_fence.jpg" - }, - "agent": { - "@id": "https://orcid.org/0000-0001-9842-9718" - }, - "actionStatus": "http://schema.org/FailedActionStatus", - "error": "this is just to test the error property", - "environment": [ - { - "@id": "#height-limit-pv" - }, - { - "@id": "#width-limit-pv" - } - ], - "containerImage": "https://example.com/imagemagick.sif" - }, - { - "@id": "#width-limit-pv", - "@type": "PropertyValue", - "name": "MAGICK_WIDTH_LIMIT", - "value": "4096" - }, - { - "@id": "#height-limit-pv", - "@type": "PropertyValue", - "name": "MAGICK_HEIGHT_LIMIT", - "value": "3072" - }, - { - "@id": "file:///tmp/test.txt", - "@type": "File", - "description": "A test file", - "encodingFormat": "text/plain" - }, - { - "@id": "pics/2017-06-11%2012.56.14.jpg", - "@type": "File", - "description": "Original image", - "encodingFormat": "image/jpeg", - "name": "2017-06-11 12.56.14.jpg (input)", - "author": { - "@id": "https://orcid.org/0000-0002-3545-944X" - } - }, + "@context": [ + "https://w3id.org/ro/crate/1.1/context", + "https://w3id.org/ro/terms/workflow-run/context" + ], + "@graph": [ + { + "@id": "ro-crate-metadata.json", + "@type": "CreativeWork", + "conformsTo": { + "@id": "https://w3id.org/ro/crate/1.2" + }, + "about": { + "@id": "./" + } + }, + { + "@id": "./", + "@type": "Dataset", + "https://schema.org/name": "My Pictures", + "description": "A collection of my pictures", + "datePublished": "2024-05-17T01:04:52+01:00", + "conformsTo": [ + { + "@id": "https://w3id.org/ro/crate/1.2" + } + ], + "hasPart": [ { - "@id": "pics/2018-06-11%2012.56.14.jpg", - "@type": "File", - "description": "Original image", - "encodingFormat": "image/jpeg", - "name": "2018-06-11 12.56.14.jpg (input)" + "@id": "pics/2017-06-11%2012.56.14.jpg" }, { - "@id": "pics/2019-06-11 12.56.14.jpg", - "@type": "File", - "description": "Original image", - "encodingFormat": "image/jpeg", - "name": "2018-06-11 12.56.14.jpg (input)" + "@id": "pics/2018-06-11%2012.56.14.jpg" }, { - "@id": "data%20set/", - "@type": "Dataset", - "name": "Data set", - "description": "A dataset", - "datePublished": "2024-05-17T01:04:52+01:00", - "license": { - "@id": "http://spdx.org/licenses/CC0-1.0" - } + "@id": "pics/2019-06-11 12.56.14.jpg" }, { - "@id": "data%20set2/", - "@type": "Dataset", - "name": "Data set 2", - "description": "A dataset", - "datePublished": "2024-05-17T01:04:52+01:00", - "license": { - "@id": "http://spdx.org/licenses/CC0-1.0" - } + "@id": "data%20set/" }, { - "@id": "data set3/", - "@type": "Dataset", - "name": "Data set 3", - "description": "A dataset", - "datePublished": "2024-05-17T01:04:52+01:00", - "license": { - "@id": "http://spdx.org/licenses/CC0-1.0" - } + "@id": "data%20set2/" }, { - "@id": "pics/sepia_fence.jpg", - "@type": "File", - "description": "The converted picture, now sepia-colored", - "encodingFormat": "image/jpeg", - "name": "sepia_fence (output)" + "@id": "data set3/" }, { - "@id": "https://orcid.org/0000-0001-9842-9718", - "@type": "Person", - "name": "Stian Soiland-Reyes" + "@id": "pics/sepia_fence.jpg" }, { - "@id": "https://orcid.org/0000-0002-3545-944X", - "@type": "Person", - "name": "Peter Sefton" + "@id": "file:///tmp/test.txt" + } + ], + "isBasedOn": { + "@id": "https://doi.org/10.5281/zenodo.1009240" + }, + "license": { + "@id": "http://spdx.org/licenses/CC0-1.0" + }, + "mentions": { + "@id": "#SepiaConversion_1" + } + }, + { + "@id": "https://w3id.org/ro/wfrun/process/0.5", + "@type": "CreativeWork", + "name": "Process Run Crate", + "version": "0.5" + }, + { + "@id": "https://example.com/otherprofile/0.1", + "@type": "CreativeWork", + "name": "Other Profile", + "version": "0.1" + }, + { + "@id": "https://www.imagemagick.org/", + "@type": "SoftwareApplication", + "url": "https://www.imagemagick.org/", + "name": "ImageMagick", + "softwareVersion": "6.9.7-4", + "softwareRequirements": { + "@id": "https://example.com/foobar/1.0.0/" + }, + "version": "1.0" + }, + { + "@id": "https://example.com/foobar/1.0.0/", + "@type": "SoftwareApplication", + "name": "foobar", + "softwareVersion": "1.0.0", + "version": "1.0", + "url": "https://example.org/" + }, + { + "@id": "#SepiaConversion_1", + "@type": "CreateAction", + "name": "Convert dog image to sepia", + "description": "convert -sepia-tone 80% pics/2017-06-11\\ 12.56.14.jpg pics/sepia_fence.jpg", + "startTime": "2024-05-17T01:04:50+01:00", + "endTime": "2024-05-17T01:04:52+01:00", + "instrument": { + "@id": "https://www.imagemagick.org/" + }, + "object": { + "@id": "pics/2017-06-11%2012.56.14.jpg" + }, + "result": { + "@id": "pics/sepia_fence.jpg" + }, + "agent": { + "@id": "https://orcid.org/0000-0001-9842-9718" + }, + "actionStatus": "http://schema.org/FailedActionStatus", + "error": "this is just to test the error property", + "environment": [ + { + "@id": "#height-limit-pv" + }, + { + "@id": "#width-limit-pv" } - ] + ], + "containerImage": "https://example.com/imagemagick.sif" + }, + { + "@id": "#width-limit-pv", + "@type": "PropertyValue", + "name": "MAGICK_WIDTH_LIMIT", + "value": "4096" + }, + { + "@id": "#height-limit-pv", + "@type": "PropertyValue", + "name": "MAGICK_HEIGHT_LIMIT", + "value": "3072" + }, + { + "@id": "file:///tmp/test.txt", + "@type": "File", + "description": "A test file", + "encodingFormat": "text/plain" + }, + { + "@id": "pics/2017-06-11%2012.56.14.jpg", + "@type": "File", + "description": "Original image", + "encodingFormat": "image/jpeg", + "name": "2017-06-11 12.56.14.jpg (input)", + "author": { + "@id": "https://orcid.org/0000-0002-3545-944X" + } + }, + { + "@id": "pics/2018-06-11%2012.56.14.jpg", + "@type": "File", + "description": "Original image", + "encodingFormat": "image/jpeg", + "name": "2018-06-11 12.56.14.jpg (input)" + }, + { + "@id": "pics/2019-06-11 12.56.14.jpg", + "@type": "File", + "description": "Original image", + "encodingFormat": "image/jpeg", + "name": "2018-06-11 12.56.14.jpg (input)" + }, + { + "@id": "data%20set/", + "@type": "Dataset", + "name": "Data set", + "description": "A dataset", + "datePublished": "2024-05-17T01:04:52+01:00", + "license": { + "@id": "http://spdx.org/licenses/CC0-1.0" + } + }, + { + "@id": "data%20set2/", + "@type": "Dataset", + "name": "Data set 2", + "description": "A dataset", + "datePublished": "2024-05-17T01:04:52+01:00", + "license": { + "@id": "http://spdx.org/licenses/CC0-1.0" + } + }, + { + "@id": "data set3/", + "@type": "Dataset", + "name": "Data set 3", + "description": "A dataset", + "datePublished": "2024-05-17T01:04:52+01:00", + "license": { + "@id": "http://spdx.org/licenses/CC0-1.0" + } + }, + { + "@id": "pics/sepia_fence.jpg", + "@type": "File", + "description": "The converted picture, now sepia-colored", + "encodingFormat": "image/jpeg", + "name": "sepia_fence (output)" + }, + { + "@id": "https://orcid.org/0000-0001-9842-9718", + "@type": "Person", + "name": "Stian Soiland-Reyes" + }, + { + "@id": "https://orcid.org/0000-0002-3545-944X", + "@type": "Person", + "name": "Peter Sefton" + } + ] } diff --git a/tests/data/crates/rocrate-1.2/7_root_data_entity/recommended_contactpoint/invalid_missing/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/7_root_data_entity/recommended_contactpoint/invalid_missing/ro-crate-metadata.json new file mode 100644 index 000000000..4b7933176 --- /dev/null +++ b/tests/data/crates/rocrate-1.2/7_root_data_entity/recommended_contactpoint/invalid_missing/ro-crate-metadata.json @@ -0,0 +1,63 @@ +{ + "@context": "https://w3id.org/ro/crate/1.2/context", + "@graph": [ + { + "@id": "ro-crate-metadata.json", + "@type": "CreativeWork", + "about": { + "@id": "./" + }, + "conformsTo": { + "@id": "https://w3id.org/ro/crate/1.2" + } + }, + { + "@id": "./", + "@type": "Dataset", + "name": "Root contactPoint missing — invalid", + "description": "RO-Crate whose Root Data Entity omits the `contactPoint` property entirely.", + "datePublished": "2024-01-01", + "license": { + "@id": "https://creativecommons.org/licenses/by/4.0/" + }, + "publisher": { + "@id": "https://ror.org/05f9q8d28" + }, + "funder": { + "@id": "https://ror.org/05f9q8d28" + }, + "hasPart": [], + "identifier": { + "@id": "#uuid" + } + }, + { + "@id": "#uuid", + "@type": "PropertyValue", + "name": "UUID", + "propertyID": "UUID", + "value": "550e8400-e29b-41d4-a716-446655440000" + }, + { + "@id": "https://creativecommons.org/licenses/by/4.0/", + "@type": "CreativeWork", + "name": "Creative Commons Attribution 4.0 International", + "description": "CC BY 4.0 license." + }, + { + "@id": "https://ror.org/05f9q8d28", + "@type": "Organization", + "name": "Example Research Institute", + "url": "https://example.org", + "contactPoint": { + "@id": "#org-contact" + } + }, + { + "@id": "#org-contact", + "@type": "ContactPoint", + "name": "Contact for Example Research Institute", + "email": "mailto:contact@example.org" + } + ] +} diff --git a/tests/data/crates/rocrate-1.2/7_root_data_entity/recommended_contactpoint/invalid_wrong_type/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/7_root_data_entity/recommended_contactpoint/invalid_wrong_type/ro-crate-metadata.json new file mode 100644 index 000000000..fdbb809e7 --- /dev/null +++ b/tests/data/crates/rocrate-1.2/7_root_data_entity/recommended_contactpoint/invalid_wrong_type/ro-crate-metadata.json @@ -0,0 +1,71 @@ +{ + "@context": "https://w3id.org/ro/crate/1.2/context", + "@graph": [ + { + "@id": "ro-crate-metadata.json", + "@type": "CreativeWork", + "about": { + "@id": "./" + }, + "conformsTo": { + "@id": "https://w3id.org/ro/crate/1.2" + } + }, + { + "@id": "./", + "@type": "Dataset", + "name": "Root contactPoint wrong type — invalid", + "description": "RO-Crate whose Root Data Entity references a contactPoint that is NOT a ContactPoint contextual entity (it's a plain Thing).", + "datePublished": "2024-01-01", + "license": { + "@id": "https://creativecommons.org/licenses/by/4.0/" + }, + "publisher": { + "@id": "https://ror.org/05f9q8d28" + }, + "funder": { + "@id": "https://ror.org/05f9q8d28" + }, + "contactPoint": { + "@id": "#root-contact" + }, + "hasPart": [], + "identifier": { + "@id": "#uuid" + } + }, + { + "@id": "#uuid", + "@type": "PropertyValue", + "name": "UUID", + "propertyID": "UUID", + "value": "550e8400-e29b-41d4-a716-446655440000" + }, + { + "@id": "https://creativecommons.org/licenses/by/4.0/", + "@type": "CreativeWork", + "name": "Creative Commons Attribution 4.0 International", + "description": "CC BY 4.0 license." + }, + { + "@id": "https://ror.org/05f9q8d28", + "@type": "Organization", + "name": "Example Research Institute", + "url": "https://example.org", + "contactPoint": { + "@id": "#org-contact" + } + }, + { + "@id": "#org-contact", + "@type": "ContactPoint", + "name": "Contact for Example Research Institute", + "email": "mailto:contact@example.org" + }, + { + "@id": "#root-contact", + "@type": "Thing", + "name": "Not actually a ContactPoint" + } + ] +} diff --git a/tests/data/crates/rocrate-1.2/7_root_data_entity/recommended_contactpoint/valid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/7_root_data_entity/recommended_contactpoint/valid/ro-crate-metadata.json new file mode 100644 index 000000000..628ffa023 --- /dev/null +++ b/tests/data/crates/rocrate-1.2/7_root_data_entity/recommended_contactpoint/valid/ro-crate-metadata.json @@ -0,0 +1,73 @@ +{ + "@context": "https://w3id.org/ro/crate/1.2/context", + "@graph": [ + { + "@id": "ro-crate-metadata.json", + "@type": "CreativeWork", + "about": { + "@id": "./" + }, + "conformsTo": { + "@id": "https://w3id.org/ro/crate/1.2" + } + }, + { + "@id": "./", + "@type": "Dataset", + "name": "Root contactPoint present — valid", + "description": "RO-Crate whose Root Data Entity declares a contactPoint referencing a ContactPoint contextual entity.", + "datePublished": "2024-01-01", + "license": { + "@id": "https://creativecommons.org/licenses/by/4.0/" + }, + "publisher": { + "@id": "https://ror.org/05f9q8d28" + }, + "funder": { + "@id": "https://ror.org/05f9q8d28" + }, + "contactPoint": { + "@id": "#root-contact" + }, + "hasPart": [], + "identifier": { + "@id": "#uuid" + } + }, + { + "@id": "#uuid", + "@type": "PropertyValue", + "name": "UUID", + "propertyID": "UUID", + "value": "550e8400-e29b-41d4-a716-446655440000" + }, + { + "@id": "https://creativecommons.org/licenses/by/4.0/", + "@type": "CreativeWork", + "name": "Creative Commons Attribution 4.0 International", + "description": "CC BY 4.0 license." + }, + { + "@id": "https://ror.org/05f9q8d28", + "@type": "Organization", + "name": "Example Research Institute", + "url": "https://example.org", + "contactPoint": { + "@id": "#org-contact" + } + }, + { + "@id": "#org-contact", + "@type": "ContactPoint", + "name": "Contact for Example Research Institute", + "email": "mailto:contact@example.org" + }, + { + "@id": "#root-contact", + "@type": "ContactPoint", + "name": "RO-Crate primary contact", + "email": "mailto:crate-contact@example.org", + "contactType": "customer support" + } + ] +} diff --git a/tests/integration/profiles/ro-crate-1.2/test_metadata_contextualEntities.py b/tests/integration/profiles/ro-crate-1.2/test_metadata_contextualEntities.py index d34e376cb..45fdf38b6 100644 --- a/tests/integration/profiles/ro-crate-1.2/test_metadata_contextualEntities.py +++ b/tests/integration/profiles/ro-crate-1.2/test_metadata_contextualEntities.py @@ -25,14 +25,14 @@ # Generic RECOMMENDED checks that fire on minimal test crates regardless of the # contextual-entity-specific property being tested. _GENERIC_RECOMMENDED_SKIP = [ - "ro-crate-1.2_47.1", # Root Data Entity: RECOMMENDED funder - "ro-crate-1.2_54.1", # Root Data Entity: RECOMMENDED publisher + "ro-crate-1.2_48.1", # Root Data Entity: RECOMMENDED funder + "ro-crate-1.2_55.1", # Root Data Entity: RECOMMENDED publisher ] # Correct IDs for funder/publisher checks (used in person entity tests). _PERSON_VALID_SKIP = [ - "ro-crate-1.2_47.1", # Root Data Entity: RECOMMENDED funder - "ro-crate-1.2_54.1", # Root Data Entity: RECOMMENDED publisher + "ro-crate-1.2_48.1", # Root Data Entity: RECOMMENDED funder + "ro-crate-1.2_55.1", # Root Data Entity: RECOMMENDED publisher ] @@ -332,35 +332,88 @@ def test_invalid_bare_propertyvalue_id(): # --------------------------------------------------------------------------- -# SoftwareApplication: SHOULD have `version` (SHOULD) +# SoftwareApplication / ComputerLanguage: MUST have name, url, version (REQUIRED) # --------------------------------------------------------------------------- def test_valid_software_application(): """ - A SoftwareApplication contextual entity that declares a `version` property - SHOULD pass RECOMMENDED validation. + A SoftwareApplication contextual entity that declares `name`, `url`, and + `version` MUST pass REQUIRED validation. """ do_entity_test( __contextual_entities_crates__.valid_software_application, - models.Severity.RECOMMENDED, + models.Severity.REQUIRED, True, profile_identifier="ro-crate-1.2", - skip_checks=_GENERIC_RECOMMENDED_SKIP, ) def test_invalid_software_application_no_version(): """ A SoftwareApplication contextual entity missing the `version` property - SHOULD trigger a RECOMMENDED warning. + MUST trigger a REQUIRED violation. """ do_entity_test( __contextual_entities_crates__.invalid_software_application_no_version, - models.Severity.RECOMMENDED, + models.Severity.REQUIRED, + False, + profile_identifier="ro-crate-1.2", + expected_triggered_requirements=[ + "SoftwareApplication or ComputerLanguage: REQUIRED `name`, `url`, `version`" + ], + expected_triggered_issues=[ + "A SoftwareApplication or ComputerLanguage MUST have a `version` property" + ], + ) + + +def test_invalid_software_application_no_name(): + """ + A SoftwareApplication contextual entity missing the `name` property MUST + trigger a REQUIRED violation. + """ + do_entity_test( + __contextual_entities_crates__.invalid_software_application_no_name, + models.Severity.REQUIRED, + False, + profile_identifier="ro-crate-1.2", + expected_triggered_requirements=[ + "SoftwareApplication or ComputerLanguage: REQUIRED `name`, `url`, `version`" + ], + expected_triggered_issues=[ + "A SoftwareApplication or ComputerLanguage MUST have a `name` property" + ], + ) + + +def test_invalid_software_application_no_url(): + """ + A SoftwareApplication contextual entity missing the `url` property MUST + trigger a REQUIRED violation. + """ + do_entity_test( + __contextual_entities_crates__.invalid_software_application_no_url, + models.Severity.REQUIRED, False, profile_identifier="ro-crate-1.2", - expected_triggered_requirements=["SoftwareApplication: SHOULD have `version`"], + expected_triggered_requirements=[ + "SoftwareApplication or ComputerLanguage: REQUIRED `name`, `url`, `version`" + ], expected_triggered_issues=[ - "A SoftwareApplication SHOULD have a `version` property" + "A SoftwareApplication or ComputerLanguage MUST have a `url` property" ], ) + + +def test_valid_computer_language(): + """ + A ComputerLanguage contextual entity (referenced as `programmingLanguage` + of a Script) that declares `name`, `url`, `version`, and an optional + `alternateName` MUST pass REQUIRED validation. + """ + do_entity_test( + __contextual_entities_crates__.valid_computer_language, + models.Severity.REQUIRED, + True, + profile_identifier="ro-crate-1.2", + ) diff --git a/tests/integration/profiles/ro-crate-1.2/test_referenced_rocrate.py b/tests/integration/profiles/ro-crate-1.2/test_referenced_rocrate.py index 66c277e3b..09201bb81 100644 --- a/tests/integration/profiles/ro-crate-1.2/test_referenced_rocrate.py +++ b/tests/integration/profiles/ro-crate-1.2/test_referenced_rocrate.py @@ -34,7 +34,7 @@ def test_valid_referenced_rocrate(): models.Severity.RECOMMENDED, True, profile_identifier="ro-crate-1.2", - skip_checks=["ro-crate-1.2_40.0", "ro-crate-1.2_44.1", "ro-crate-1.2_70.1", "ro-crate-1.2_72.1"], + skip_checks=["ro-crate-1.2_40.0", "ro-crate-1.2_44.1", "ro-crate-1.2_70.1", "ro-crate-1.2_73.1"], ) @@ -181,7 +181,7 @@ def test_valid_referenced_rocrate_with_identifier(): models.Severity.RECOMMENDED, True, profile_identifier="ro-crate-1.2", - skip_checks=["ro-crate-1.2_40.0", "ro-crate-1.2_44.1", "ro-crate-1.2_70.1", "ro-crate-1.2_72.1"], + skip_checks=["ro-crate-1.2_40.0", "ro-crate-1.2_44.1", "ro-crate-1.2_70.1", "ro-crate-1.2_73.1"], ) @@ -196,7 +196,7 @@ def test_valid_referenced_rocrate_with_relative_path(): models.Severity.RECOMMENDED, True, profile_identifier="ro-crate-1.2", - skip_checks=["ro-crate-1.2_40.0", "ro-crate-1.2_44.1", "ro-crate-1.2_70.1", "ro-crate-1.2_72.1"], + skip_checks=["ro-crate-1.2_40.0", "ro-crate-1.2_44.1", "ro-crate-1.2_70.1", "ro-crate-1.2_73.1"], ) diff --git a/tests/ro_crates_v1_2.py b/tests/ro_crates_v1_2.py index 2eea67908..400b91676 100644 --- a/tests/ro_crates_v1_2.py +++ b/tests/ro_crates_v1_2.py @@ -676,7 +676,7 @@ def invalid_contextual_entity_bare_contactpoint(self) -> Path: def invalid_contextual_entity_bare_propertyvalue(self) -> Path: return self.CONTEXTUAL_ENTITIES_CRATES_PATH / "contextual_entity_id_format" / "invalid_bare_propertyvalue" - # --- SoftwareApplication: SHOULD have version (5.7) --- + # --- SoftwareApplication / ComputerLanguage: MUST have name, url, version (5.7) --- @property def valid_software_application(self) -> Path: return self.CONTEXTUAL_ENTITIES_CRATES_PATH / "software_application" / "valid" @@ -685,6 +685,18 @@ def valid_software_application(self) -> Path: def invalid_software_application_no_version(self) -> Path: return self.CONTEXTUAL_ENTITIES_CRATES_PATH / "software_application" / "invalid_no_version" + @property + def invalid_software_application_no_name(self) -> Path: + return self.CONTEXTUAL_ENTITIES_CRATES_PATH / "software_application" / "invalid_no_name" + + @property + def invalid_software_application_no_url(self) -> Path: + return self.CONTEXTUAL_ENTITIES_CRATES_PATH / "software_application" / "invalid_no_url" + + @property + def valid_computer_language(self) -> Path: + return self.CONTEXTUAL_ENTITIES_CRATES_PATH / "software_application" / "valid_computer_language" + class InvalidMultiProfileROC: From 600fce9b377700ba494c1d6449d860aafec51ad6 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Mon, 20 Apr 2026 10:26:07 +0200 Subject: [PATCH 145/352] fix(ro-crate-1.2): :bug: remove unnecessary check --- .../ro-crate/1.2/should/0_entity_metadata.ttl | 39 ------------------- .../test_metadata_contextualEntities.py | 8 ++-- .../ro-crate-1.2/test_metadata_entities.py | 29 -------------- .../ro-crate-1.2/test_referenced_rocrate.py | 6 +-- .../ro-crate-1.2/test_workflows_scripts.py | 6 +-- tests/ro_crates_v1_2.py | 8 ---- 6 files changed, 10 insertions(+), 86 deletions(-) diff --git a/rocrate_validator/profiles/ro-crate/1.2/should/0_entity_metadata.ttl b/rocrate_validator/profiles/ro-crate/1.2/should/0_entity_metadata.ttl index 1c8e04fff..3bb0d3ce8 100644 --- a/rocrate_validator/profiles/ro-crate/1.2/should/0_entity_metadata.ttl +++ b/rocrate_validator/profiles/ro-crate/1.2/should/0_entity_metadata.ttl @@ -73,42 +73,3 @@ ro-crate:ROCrateMetadataEntityRecommendedType a sh:NodeShape ; sh:severity sh:Warning ; sh:message "Entities SHOULD have a human-readable name" ; ] . - -ro-crate:RecommendedEntityReachability a sh:NodeShape ; - sh:name "RO-Crate Metadata Entity: RECOMMENDED reachability" ; - sh:description """Check if the RO-Crate Metadata Entity is directly or indirectly reachable from the Root Data Entity.""" ; - sh:targetClass ro-crate:RootDataEntity ; - sh:sparql [ - sh:name "RO-Crate Metadata Entity: RECOMMENDED reachability from the Root Data Entity" ; - sh:description """Check if the RO-Crate Metadata Entity is directly or indirectly reachable from the Root Data Entity.""" ; - sh:message "RO-Crate Metadata Entity SHOULD be directly or indirectly reachable from the Root Data Entity" ; - sh:prefixes ro-crate:sparqlPrefixes ; - sh:select """ - SELECT $this ?unreachable - WHERE { - ?unreachable a ?type . - - # Exclude Root and RO-Crate Metadata File entities - ?root a schema:Dataset . - ?metadatafile schema:about ?root . - FILTER(contains(str(?metadatafile), "ro-crate-metadata.json")) - FILTER(?unreachable != ?metadatafile) - FILTER (?unreachable != ?root) - FILTER (isIRI(?unreachable)) - FILTER (?unreachable != $this) - - # Exclude entities with non-IRI identifiers or those from specific namespaces - FILTER ( - !STRSTARTS(STR(?type), "http://www.w3.org/1999/02/22-rdf-syntax-ns#") && - !STRSTARTS(STR(?type), "http://www.w3.org/2000/01/rdf-schema#") && - !STRSTARTS(STR(?type), "http://www.w3.org/2002/07/owl#") && - !STRSTARTS(STR(?type), "http://www.w3.org/2001/XMLSchema#") && - !STRSTARTS(STR(?type), "http://www.w3.org/ns/shacl#") - ) - # Select entities which are not directly or indirectly reachable from the Root Data Entity - FILTER NOT EXISTS { - $this (|!)+ ?unreachable . - } - } - """ ; - ] . diff --git a/tests/integration/profiles/ro-crate-1.2/test_metadata_contextualEntities.py b/tests/integration/profiles/ro-crate-1.2/test_metadata_contextualEntities.py index 45fdf38b6..65ee4214b 100644 --- a/tests/integration/profiles/ro-crate-1.2/test_metadata_contextualEntities.py +++ b/tests/integration/profiles/ro-crate-1.2/test_metadata_contextualEntities.py @@ -25,14 +25,14 @@ # Generic RECOMMENDED checks that fire on minimal test crates regardless of the # contextual-entity-specific property being tested. _GENERIC_RECOMMENDED_SKIP = [ - "ro-crate-1.2_48.1", # Root Data Entity: RECOMMENDED funder - "ro-crate-1.2_55.1", # Root Data Entity: RECOMMENDED publisher + "ro-crate-1.2_47.1", # Root Data Entity: RECOMMENDED funder + "ro-crate-1.2_54.1", # Root Data Entity: RECOMMENDED publisher ] # Correct IDs for funder/publisher checks (used in person entity tests). _PERSON_VALID_SKIP = [ - "ro-crate-1.2_48.1", # Root Data Entity: RECOMMENDED funder - "ro-crate-1.2_55.1", # Root Data Entity: RECOMMENDED publisher + "ro-crate-1.2_47.1", # Root Data Entity: RECOMMENDED funder + "ro-crate-1.2_54.1", # Root Data Entity: RECOMMENDED publisher ] diff --git a/tests/integration/profiles/ro-crate-1.2/test_metadata_entities.py b/tests/integration/profiles/ro-crate-1.2/test_metadata_entities.py index 420fc6b83..56424de39 100644 --- a/tests/integration/profiles/ro-crate-1.2/test_metadata_entities.py +++ b/tests/integration/profiles/ro-crate-1.2/test_metadata_entities.py @@ -80,32 +80,3 @@ def test_invalid_recommended_entity_name_warning(): expected_triggered_issues=[ "Entities SHOULD have a human-readable name"] ) - - -def test_valid_recommended_reachability(): - """ - Test that all metadata entities are directly or indirectly reachable from the Root Data Entity. - """ - do_entity_test( - __metadata_entities__.valid_entity_reachability, - models.Severity.RECOMMENDED, - True, - profile_identifier="ro-crate-1.2" - - ) - - -def test_invalid_recommended_reachability_warning(): - """ - Test that a warning is triggered when at least one metadata entity is not directly - or indirectly reachable from the Root Data Entity. - """ - do_entity_test( - __metadata_entities__.invalid_entity_reachability, - models.Severity.RECOMMENDED, - False, - profile_identifier="ro-crate-1.2", - expected_triggered_requirements=["RO-Crate Metadata Entity: RECOMMENDED reachability"], - expected_triggered_issues=[ - "RO-Crate Metadata Entity SHOULD be directly or indirectly reachable from the Root Data Entity"] - ) diff --git a/tests/integration/profiles/ro-crate-1.2/test_referenced_rocrate.py b/tests/integration/profiles/ro-crate-1.2/test_referenced_rocrate.py index 09201bb81..9baae8095 100644 --- a/tests/integration/profiles/ro-crate-1.2/test_referenced_rocrate.py +++ b/tests/integration/profiles/ro-crate-1.2/test_referenced_rocrate.py @@ -34,7 +34,7 @@ def test_valid_referenced_rocrate(): models.Severity.RECOMMENDED, True, profile_identifier="ro-crate-1.2", - skip_checks=["ro-crate-1.2_40.0", "ro-crate-1.2_44.1", "ro-crate-1.2_70.1", "ro-crate-1.2_73.1"], + skip_checks=["ro-crate-1.2_40.0", "ro-crate-1.2_44.1", "ro-crate-1.2_69.1", "ro-crate-1.2_72.1"], ) @@ -181,7 +181,7 @@ def test_valid_referenced_rocrate_with_identifier(): models.Severity.RECOMMENDED, True, profile_identifier="ro-crate-1.2", - skip_checks=["ro-crate-1.2_40.0", "ro-crate-1.2_44.1", "ro-crate-1.2_70.1", "ro-crate-1.2_73.1"], + skip_checks=["ro-crate-1.2_40.0", "ro-crate-1.2_44.1", "ro-crate-1.2_69.1", "ro-crate-1.2_72.1"], ) @@ -196,7 +196,7 @@ def test_valid_referenced_rocrate_with_relative_path(): models.Severity.RECOMMENDED, True, profile_identifier="ro-crate-1.2", - skip_checks=["ro-crate-1.2_40.0", "ro-crate-1.2_44.1", "ro-crate-1.2_70.1", "ro-crate-1.2_73.1"], + skip_checks=["ro-crate-1.2_40.0", "ro-crate-1.2_44.1", "ro-crate-1.2_69.1", "ro-crate-1.2_72.1"], ) diff --git a/tests/integration/profiles/ro-crate-1.2/test_workflows_scripts.py b/tests/integration/profiles/ro-crate-1.2/test_workflows_scripts.py index a947b04fe..e8854fa55 100644 --- a/tests/integration/profiles/ro-crate-1.2/test_workflows_scripts.py +++ b/tests/integration/profiles/ro-crate-1.2/test_workflows_scripts.py @@ -33,9 +33,9 @@ "ro-crate-1.2_54.1", # Root Data Entity: recommended publisher "ro-crate-1.2_61.1", # File Data Entity: RECOMMENDED contentSize "ro-crate-1.2_62.0", # File: RECOMMENDED conformsTo profile - "ro-crate-1.2_72.1", # Contextual Entity Properties - "ro-crate-1.2_73.1", # Contextual Entity RECOMMENDED description - "ro-crate-1.2_78.2", # License entity: RECOMMENDED properties + "ro-crate-1.2_74.1", # Contextual Entity Properties + "ro-crate-1.2_75.1", # Contextual Entity RECOMMENDED description + "ro-crate-1.2_80.2", # License entity: RECOMMENDED properties ] diff --git a/tests/ro_crates_v1_2.py b/tests/ro_crates_v1_2.py index 400b91676..c144c4ae4 100644 --- a/tests/ro_crates_v1_2.py +++ b/tests/ro_crates_v1_2.py @@ -188,14 +188,6 @@ def valid_recommended_name(self) -> Path: def invalid_recommended_name(self) -> Path: return self.METADATA_ENTITIES_CRATES_PATH / "recommended_name" / "invalid" - @property - def valid_entity_reachability(self) -> Path: - return self.METADATA_ENTITIES_CRATES_PATH / "entity_reachability" / "valid" - - @property - def invalid_entity_reachability(self) -> Path: - return self.METADATA_ENTITIES_CRATES_PATH / "entity_reachability" / "invalid" - class MetadataDescriptor: From ddbb19c9d6dc595e3e5c9e64daea18b494bdb6e2 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Mon, 20 Apr 2026 10:51:30 +0200 Subject: [PATCH 146/352] =?UTF-8?q?feat(ro-crate-1.2):=20:sparkles:=20rest?= =?UTF-8?q?ore=20check=20for=20DataEntity=20reachability=20(DE=20=E2=86=92?= =?UTF-8?q?=20Root=20Data=20Entity)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../1.2/must/4_data_entity_metadata.ttl | 28 +++++++++---------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/rocrate_validator/profiles/ro-crate/1.2/must/4_data_entity_metadata.ttl b/rocrate_validator/profiles/ro-crate/1.2/must/4_data_entity_metadata.ttl index 797820693..35ccac633 100644 --- a/rocrate_validator/profiles/ro-crate/1.2/must/4_data_entity_metadata.ttl +++ b/rocrate_validator/profiles/ro-crate/1.2/must/4_data_entity_metadata.ttl @@ -137,20 +137,20 @@ ro-crate:DirectoryDataEntity a sh:NodeShape ; sh:severity sh:Violation ; ] . -# ro-crate:DataEntityRequiredPropertiesShape a sh:NodeShape ; -# sh:name "Data Entity: REQUIRED properties" ; -# sh:description """A `DataEntity`The file descriptor MUST be a valid JSON-LD file MUST be linked, either directly or indirectly, from the Root Data Entity""" ; -# sh:targetClass ro-crate:DataEntity ; -# sh:property -# [ -# a sh:PropertyShape ; -# sh:path [ sh:oneOrMorePath [ sh:inversePath schema_org:hasPart ] ] ; -# sh:qualifiedValueShape [ sh:class ro-crate:RootDataEntity ] ; -# sh:qualifiedMinCount 1 ; -# sh:name "Data Entity MUST be directly referenced" ; -# sh:description """Check if the Data Entity is linked, either directly or indirectly, to the `Root Data Entity` using the `hasPart` (as defined in `schema.org`) property" """ ; -# # sh:message "A Data Entity MUST be directly or indirectly linked to the `Root Data Entity` through the `hasPart` property" ; -# ] . +ro-crate:DataEntityRequiredPropertiesShape a sh:NodeShape ; + sh:name "Data Entity: REQUIRED properties" ; + sh:description """A `DataEntity`The file descriptor MUST be a valid JSON-LD file MUST be linked, either directly or indirectly, from the Root Data Entity""" ; + sh:targetClass ro-crate:DataEntity ; + sh:property + [ + a sh:PropertyShape ; + sh:path [ sh:oneOrMorePath [ sh:inversePath schema_org:hasPart ] ] ; + sh:qualifiedValueShape [ sh:class ro-crate:RootDataEntity ] ; + sh:qualifiedMinCount 1 ; + sh:name "Data Entity MUST be directly referenced" ; + sh:description """Check if the Data Entity is linked, either directly or indirectly, to the `Root Data Entity` using the `hasPart` (as defined in `schema.org`) property" """ ; + sh:message "A Data Entity MUST be directly or indirectly linked to the `Root Data Entity` through the `hasPart` property" ; + ] . ro-crate:GenericDataEntityRequiredProperties a sh:NodeShape ; sh:name "Generic Data Entity: REQUIRED properties" ; From 37d3527d9c795496dcfd0b0284cc548caf3a9fc0 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Mon, 20 Apr 2026 11:12:39 +0200 Subject: [PATCH 147/352] test(ro-crate-1.2): :bug: fix test configuration --- .../test_metadata_contextualEntities.py | 8 ++++---- .../ro-crate-1.2/test_referenced_rocrate.py | 6 +++--- .../ro-crate-1.2/test_workflows_scripts.py | 14 +++++++------- 3 files changed, 14 insertions(+), 14 deletions(-) diff --git a/tests/integration/profiles/ro-crate-1.2/test_metadata_contextualEntities.py b/tests/integration/profiles/ro-crate-1.2/test_metadata_contextualEntities.py index 65ee4214b..45fdf38b6 100644 --- a/tests/integration/profiles/ro-crate-1.2/test_metadata_contextualEntities.py +++ b/tests/integration/profiles/ro-crate-1.2/test_metadata_contextualEntities.py @@ -25,14 +25,14 @@ # Generic RECOMMENDED checks that fire on minimal test crates regardless of the # contextual-entity-specific property being tested. _GENERIC_RECOMMENDED_SKIP = [ - "ro-crate-1.2_47.1", # Root Data Entity: RECOMMENDED funder - "ro-crate-1.2_54.1", # Root Data Entity: RECOMMENDED publisher + "ro-crate-1.2_48.1", # Root Data Entity: RECOMMENDED funder + "ro-crate-1.2_55.1", # Root Data Entity: RECOMMENDED publisher ] # Correct IDs for funder/publisher checks (used in person entity tests). _PERSON_VALID_SKIP = [ - "ro-crate-1.2_47.1", # Root Data Entity: RECOMMENDED funder - "ro-crate-1.2_54.1", # Root Data Entity: RECOMMENDED publisher + "ro-crate-1.2_48.1", # Root Data Entity: RECOMMENDED funder + "ro-crate-1.2_55.1", # Root Data Entity: RECOMMENDED publisher ] diff --git a/tests/integration/profiles/ro-crate-1.2/test_referenced_rocrate.py b/tests/integration/profiles/ro-crate-1.2/test_referenced_rocrate.py index 9baae8095..b482b023a 100644 --- a/tests/integration/profiles/ro-crate-1.2/test_referenced_rocrate.py +++ b/tests/integration/profiles/ro-crate-1.2/test_referenced_rocrate.py @@ -34,7 +34,7 @@ def test_valid_referenced_rocrate(): models.Severity.RECOMMENDED, True, profile_identifier="ro-crate-1.2", - skip_checks=["ro-crate-1.2_40.0", "ro-crate-1.2_44.1", "ro-crate-1.2_69.1", "ro-crate-1.2_72.1"], + skip_checks=["ro-crate-1.2_40.0", "ro-crate-1.2_45.1", "ro-crate-1.2_70.1", "ro-crate-1.2_73.1"], ) @@ -181,7 +181,7 @@ def test_valid_referenced_rocrate_with_identifier(): models.Severity.RECOMMENDED, True, profile_identifier="ro-crate-1.2", - skip_checks=["ro-crate-1.2_40.0", "ro-crate-1.2_44.1", "ro-crate-1.2_69.1", "ro-crate-1.2_72.1"], + skip_checks=["ro-crate-1.2_40.0", "ro-crate-1.2_45.1", "ro-crate-1.2_70.1", "ro-crate-1.2_73.1"], ) @@ -196,7 +196,7 @@ def test_valid_referenced_rocrate_with_relative_path(): models.Severity.RECOMMENDED, True, profile_identifier="ro-crate-1.2", - skip_checks=["ro-crate-1.2_40.0", "ro-crate-1.2_44.1", "ro-crate-1.2_69.1", "ro-crate-1.2_72.1"], + skip_checks=["ro-crate-1.2_40.0", "ro-crate-1.2_45.1", "ro-crate-1.2_70.1", "ro-crate-1.2_73.1"], ) diff --git a/tests/integration/profiles/ro-crate-1.2/test_workflows_scripts.py b/tests/integration/profiles/ro-crate-1.2/test_workflows_scripts.py index e8854fa55..4ab0e5be2 100644 --- a/tests/integration/profiles/ro-crate-1.2/test_workflows_scripts.py +++ b/tests/integration/profiles/ro-crate-1.2/test_workflows_scripts.py @@ -29,13 +29,13 @@ _GENERIC_RECOMMENDED_SKIP = [ "ro-crate-1.2_40.0", # RO-Crate Metadata Entity: RECOMMENDED properties (check 0) "ro-crate-1.2_40.1", # RO-Crate Metadata Entity: RECOMMENDED properties (check 1) - "ro-crate-1.2_47.1", # Root Data Entity: recommended funder - "ro-crate-1.2_54.1", # Root Data Entity: recommended publisher - "ro-crate-1.2_61.1", # File Data Entity: RECOMMENDED contentSize - "ro-crate-1.2_62.0", # File: RECOMMENDED conformsTo profile - "ro-crate-1.2_74.1", # Contextual Entity Properties - "ro-crate-1.2_75.1", # Contextual Entity RECOMMENDED description - "ro-crate-1.2_80.2", # License entity: RECOMMENDED properties + "ro-crate-1.2_48.1", # Root Data Entity: recommended funder + "ro-crate-1.2_55.1", # Root Data Entity: recommended publisher + "ro-crate-1.2_62.1", # File Data Entity: RECOMMENDED contentSize + "ro-crate-1.2_63.0", # File: RECOMMENDED conformsTo profile + "ro-crate-1.2_75.1", # Contextual Entity Properties + "ro-crate-1.2_76.1", # Contextual Entity RECOMMENDED description + "ro-crate-1.2_81.2", # License entity: RECOMMENDED properties ] From 16b51eed3a03a2161630a0c13649f3819b25bf99 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Mon, 20 Apr 2026 11:25:56 +0200 Subject: [PATCH 148/352] feat(ro-crate-1.2): :sparkles: refine check for the `funder` property of the Root Data Entity --- .../1.2/should/2_root_data_entity_funding.ttl | 32 +++++++++++++------ 1 file changed, 22 insertions(+), 10 deletions(-) diff --git a/rocrate_validator/profiles/ro-crate/1.2/should/2_root_data_entity_funding.ttl b/rocrate_validator/profiles/ro-crate/1.2/should/2_root_data_entity_funding.ttl index 321eb235f..25d708a93 100644 --- a/rocrate_validator/profiles/ro-crate/1.2/should/2_root_data_entity_funding.ttl +++ b/rocrate_validator/profiles/ro-crate/1.2/should/2_root_data_entity_funding.ttl @@ -16,20 +16,32 @@ @prefix schema_org: . @prefix sh: . -# Root Data Entity SHOULD reference funders directly via `funder` ro-crate:RootDataEntityRecommendedFunder a sh:NodeShape ; sh:name "Root Data Entity: recommended funder" ; - sh:description """The Root Data Entity SHOULD reference funders directly via the `funder` - property, using Organization (or Person) entities (RO-Crate 1.2, Funding and Grants).""" ; + sh:description """If the RO-Crate describes a research project, grant, or + monetary grant (via a `schema:Project`, `schema:Grant`, or + `schema:MonetaryGrant` entity in the @graph), the Root Data Entity + SHOULD reference the funding source via the `funder` property + (RO-Crate 1.2, Funding and Grants).""" ; sh:targetClass ro-crate:RootDataEntity ; - sh:property [ - a sh:PropertyShape ; - sh:name "Root Data Entity: RECOMMENDED `funder` property" ; - sh:description """Check if the Root Data Entity references funders directly via `funder`.""" ; - sh:path schema_org:funder ; - sh:minCount 1 ; + sh:sparql [ + a sh:SPARQLConstraint ; + sh:prefixes ro-crate:sparqlPrefixes ; + sh:name "Root Data Entity: RECOMMENDED `funder` property (conditional)" ; + sh:description """Fire a warning only if the crate contains project or + grant entities AND the Root Data Entity has no `funder` property.""" ; + sh:select """ + SELECT $this + WHERE { + # Trigger: at least one project/grant entity is present. + ?project a ?projectType . + FILTER(?projectType IN (schema:Project, schema:Grant, schema:MonetaryGrant)) + # Violation condition: the Root has no `funder` property. + FILTER NOT EXISTS { $this schema:funder ?f } + } + """ ; sh:severity sh:Warning ; - sh:message "The Root Data Entity SHOULD reference funders directly via the `funder` property" ; + sh:message "The Root Data Entity SHOULD reference funders directly via the `funder` property when the crate describes a research project, grant, or monetary grant" ; ] . # funder values SHOULD be Organization entities; From 3f66a5e44c34324661b96fe88c4ab96601988b5c Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Mon, 20 Apr 2026 11:26:09 +0200 Subject: [PATCH 149/352] test(ro-crate-1.2): :card_file_box: update test data --- .../invalid_no_funder/ro-crate-metadata.json | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/tests/data/crates/rocrate-1.2/7_root_data_entity/recommended_funding/invalid_no_funder/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/7_root_data_entity/recommended_funding/invalid_no_funder/ro-crate-metadata.json index 135049fa0..d874065ae 100644 --- a/tests/data/crates/rocrate-1.2/7_root_data_entity/recommended_funding/invalid_no_funder/ro-crate-metadata.json +++ b/tests/data/crates/rocrate-1.2/7_root_data_entity/recommended_funding/invalid_no_funder/ro-crate-metadata.json @@ -11,10 +11,11 @@ "@id": "./", "@type": "Dataset", "name": "Funded RO-Crate — missing funder", - "description": "RO-Crate whose Root Data Entity has no `funder` property, triggering the RECOMMENDED funder check.", + "description": "RO-Crate that describes a research project (Grant entity) but whose Root Data Entity has no `funder` property, triggering the conditional RECOMMENDED funder check.", "datePublished": "2024-01-01", "license": {"@id": "https://creativecommons.org/licenses/by/4.0/"}, "publisher": {"@id": "#publisher-org"}, + "mentions": {"@id": "#grant-eu-2024"}, "hasPart": [] }, { @@ -23,6 +24,12 @@ "name": "Example Research Institute", "url": "https://example.org" }, + { + "@id": "#grant-eu-2024", + "@type": "Grant", + "name": "EU Horizon 2024 Grant", + "description": "A research grant referenced in the crate; the Root SHOULD declare the corresponding funder." + }, { "@id": "https://creativecommons.org/licenses/by/4.0/", "@type": "CreativeWork", From 4f1aad46ead32e56285f0803e1f5068e13a63856 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Mon, 20 Apr 2026 11:51:25 +0200 Subject: [PATCH 150/352] feat(ro-crate-1.2): :sparkles: check optional WebPageElement type of encondig format entities --- .../1.2/may/6_contextual_entity_metadata.ttl | 51 +++++++++++++++++++ 1 file changed, 51 insertions(+) create mode 100644 rocrate_validator/profiles/ro-crate/1.2/may/6_contextual_entity_metadata.ttl diff --git a/rocrate_validator/profiles/ro-crate/1.2/may/6_contextual_entity_metadata.ttl b/rocrate_validator/profiles/ro-crate/1.2/may/6_contextual_entity_metadata.ttl new file mode 100644 index 000000000..bb1007864 --- /dev/null +++ b/rocrate_validator/profiles/ro-crate/1.2/may/6_contextual_entity_metadata.ttl @@ -0,0 +1,51 @@ +# Copyright (c) 2024-2026 CRS4 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +@prefix ro-crate: . +@prefix rdf: . +@prefix schema: . +@prefix sh: . + +# RO-Crate 1.2, Encoding Format Entity — MAY include `WebPageElement` in +# `@type` if the `@id` links to a section of a webpage (i.e. the URI carries +# a fragment identifier such as `https://example.org/page.html#section-foo`). +# This is purely optional; the shape emits an `sh:Info` suggestion — never a +# violation — so users can see the permissive recommendation without their +# crate being marked invalid. +ro-crate:EncodingFormatOptionalWebPageElement a sh:NodeShape ; + sh:name "Encoding format: OPTIONAL `WebPageElement` type for section references" ; + sh:description """An encoding format contextual entity whose `@id` links to + a section of a webpage (the URI contains a fragment identifier) MAY + include `WebPageElement` in its `@type` + (RO-Crate 1.2, Encoding Format Entity).""" ; + sh:target [ + a sh:SPARQLTarget ; + sh:prefixes ro-crate:sparqlPrefixes ; + sh:select """ + SELECT ?this + WHERE { + ?s schema:encodingFormat ?this . + FILTER(isIRI(?this)) + FILTER(CONTAINS(STR(?this), "#")) + } + """ + ] ; + sh:property [ + a sh:PropertyShape ; + sh:name "Encoding format: OPTIONAL `WebPageElement` @type" ; + sh:description "Check if the encoding format entity includes `WebPageElement` in its `@type`." ; + sh:path rdf:type ; + sh:hasValue schema:WebPageElement ; + sh:severity sh:Info ; + sh:message "An encoding format entity whose `@id` is a section of a webpage MAY include `WebPageElement` in its `@type`" ; + ] . From a38c93311970eefbd04c6068e86837ce2885adab Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Mon, 20 Apr 2026 11:52:33 +0200 Subject: [PATCH 151/352] test(ro-crate-1.2): :white_check_mark: test optional WebPageElement type of encoding format entities --- .../ro-crate-metadata.json | 55 ++++++++++++++ .../no_fragment/ro-crate-metadata.json | 55 ++++++++++++++ .../valid/ro-crate-metadata.json | 55 ++++++++++++++ .../test_metadata_contextualEntities.py | 72 +++++++++++++++++++ tests/ro_crates_v1_2.py | 13 ++++ 5 files changed, 250 insertions(+) create mode 100644 tests/data/crates/rocrate-1.2/10_metadata_contextualEntities/encoding_format/info_no_webpageelement/ro-crate-metadata.json create mode 100644 tests/data/crates/rocrate-1.2/10_metadata_contextualEntities/encoding_format/no_fragment/ro-crate-metadata.json create mode 100644 tests/data/crates/rocrate-1.2/10_metadata_contextualEntities/encoding_format/valid/ro-crate-metadata.json diff --git a/tests/data/crates/rocrate-1.2/10_metadata_contextualEntities/encoding_format/info_no_webpageelement/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/10_metadata_contextualEntities/encoding_format/info_no_webpageelement/ro-crate-metadata.json new file mode 100644 index 000000000..3209db181 --- /dev/null +++ b/tests/data/crates/rocrate-1.2/10_metadata_contextualEntities/encoding_format/info_no_webpageelement/ro-crate-metadata.json @@ -0,0 +1,55 @@ +{ + "@context": "https://w3id.org/ro/crate/1.2/context", + "@graph": [ + { + "@id": "ro-crate-metadata.json", + "@type": "CreativeWork", + "about": { + "@id": "./" + }, + "conformsTo": { + "@id": "https://w3id.org/ro/crate/1.2" + } + }, + { + "@id": "./", + "@type": "Dataset", + "name": "Encoding format entity — info (no WebPageElement)", + "description": "RO-Crate whose encoding format entity has an `@id` pointing to a section of a webpage (fragment identifier) but whose `@type` does NOT include `WebPageElement`. The MAY check emits an `sh:Info` suggestion.", + "datePublished": "2024-01-01", + "license": { + "@id": "https://creativecommons.org/licenses/by/4.0/" + }, + "hasPart": [ + { + "@id": "data.csv" + } + ] + }, + { + "@id": "https://creativecommons.org/licenses/by/4.0/", + "@type": "CreativeWork", + "name": "Creative Commons Attribution 4.0 International", + "description": "CC BY 4.0 license." + }, + { + "@id": "data.csv", + "@type": "File", + "name": "Tabular data", + "description": "A CSV file.", + "encodingFormat": [ + "text/csv", + { + "@id": "https://example.org/encoding-formats.html#csv" + } + ], + "contentSize": "1024" + }, + { + "@id": "https://example.org/encoding-formats.html#csv", + "@type": "WebPage", + "name": "CSV encoding format section", + "description": "Section of a webpage that describes the CSV encoding format (but not typed as WebPageElement)." + } + ] +} diff --git a/tests/data/crates/rocrate-1.2/10_metadata_contextualEntities/encoding_format/no_fragment/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/10_metadata_contextualEntities/encoding_format/no_fragment/ro-crate-metadata.json new file mode 100644 index 000000000..f7e9fe1c7 --- /dev/null +++ b/tests/data/crates/rocrate-1.2/10_metadata_contextualEntities/encoding_format/no_fragment/ro-crate-metadata.json @@ -0,0 +1,55 @@ +{ + "@context": "https://w3id.org/ro/crate/1.2/context", + "@graph": [ + { + "@id": "ro-crate-metadata.json", + "@type": "CreativeWork", + "about": { + "@id": "./" + }, + "conformsTo": { + "@id": "https://w3id.org/ro/crate/1.2" + } + }, + { + "@id": "./", + "@type": "Dataset", + "name": "Encoding format entity — no fragment (MAY does not apply)", + "description": "RO-Crate whose encoding format entity has an `@id` that is NOT a fragment identifier (does not point to a section of a webpage). The MAY shape should not apply and no `sh:Info` is emitted.", + "datePublished": "2024-01-01", + "license": { + "@id": "https://creativecommons.org/licenses/by/4.0/" + }, + "hasPart": [ + { + "@id": "data.csv" + } + ] + }, + { + "@id": "https://creativecommons.org/licenses/by/4.0/", + "@type": "CreativeWork", + "name": "Creative Commons Attribution 4.0 International", + "description": "CC BY 4.0 license." + }, + { + "@id": "data.csv", + "@type": "File", + "name": "Tabular data", + "description": "A CSV file.", + "encodingFormat": [ + "text/csv", + { + "@id": "https://example.org/csv-format.html" + } + ], + "contentSize": "1024" + }, + { + "@id": "https://example.org/csv-format.html", + "@type": "WebPage", + "name": "CSV encoding format page", + "description": "A full webpage describing the CSV encoding format (no fragment in @id)." + } + ] +} diff --git a/tests/data/crates/rocrate-1.2/10_metadata_contextualEntities/encoding_format/valid/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/10_metadata_contextualEntities/encoding_format/valid/ro-crate-metadata.json new file mode 100644 index 000000000..776b496ad --- /dev/null +++ b/tests/data/crates/rocrate-1.2/10_metadata_contextualEntities/encoding_format/valid/ro-crate-metadata.json @@ -0,0 +1,55 @@ +{ + "@context": "https://w3id.org/ro/crate/1.2/context", + "@graph": [ + { + "@id": "ro-crate-metadata.json", + "@type": "CreativeWork", + "about": { + "@id": "./" + }, + "conformsTo": { + "@id": "https://w3id.org/ro/crate/1.2" + } + }, + { + "@id": "./", + "@type": "Dataset", + "name": "Encoding format entity — valid (WebPageElement)", + "description": "RO-Crate whose File references an encoding format entity whose `@id` points to a section of a webpage and whose `@type` includes `WebPageElement` (MAY condition satisfied).", + "datePublished": "2024-01-01", + "license": { + "@id": "https://creativecommons.org/licenses/by/4.0/" + }, + "hasPart": [ + { + "@id": "data.csv" + } + ] + }, + { + "@id": "https://creativecommons.org/licenses/by/4.0/", + "@type": "CreativeWork", + "name": "Creative Commons Attribution 4.0 International", + "description": "CC BY 4.0 license." + }, + { + "@id": "data.csv", + "@type": "File", + "name": "Tabular data", + "description": "A CSV file.", + "encodingFormat": [ + "text/csv", + { + "@id": "https://example.org/encoding-formats.html#csv" + } + ], + "contentSize": "1024" + }, + { + "@id": "https://example.org/encoding-formats.html#csv", + "@type": ["WebPage", "WebPageElement"], + "name": "CSV encoding format section", + "description": "Section of a webpage that describes the CSV encoding format." + } + ] +} diff --git a/tests/integration/profiles/ro-crate-1.2/test_metadata_contextualEntities.py b/tests/integration/profiles/ro-crate-1.2/test_metadata_contextualEntities.py index 45fdf38b6..11d555980 100644 --- a/tests/integration/profiles/ro-crate-1.2/test_metadata_contextualEntities.py +++ b/tests/integration/profiles/ro-crate-1.2/test_metadata_contextualEntities.py @@ -417,3 +417,75 @@ def test_valid_computer_language(): True, profile_identifier="ro-crate-1.2", ) + + +# --------------------------------------------------------------------------- +# Encoding format entity: MAY include `WebPageElement` when @id is a section +# of a webpage (sh:Info — optional suggestion) [5.8] +# --------------------------------------------------------------------------- + +_ENCODING_FORMAT_MAY_REQUIREMENT = ( + "Encoding format: OPTIONAL `WebPageElement` type for section references" +) + + +def test_info_encoding_format_no_webpageelement(): + """ + An encoding format entity whose `@id` contains a fragment identifier + (section of a webpage) but whose `@type` does NOT include `WebPageElement` + triggers an `sh:Info` suggestion at OPTIONAL severity (RO-Crate 1.2, § 5.8). + """ + do_entity_test( + __contextual_entities_crates__.info_encoding_format_no_webpageelement, + models.Severity.OPTIONAL, + False, + profile_identifier="ro-crate-1.2", + expected_triggered_requirements=[_ENCODING_FORMAT_MAY_REQUIREMENT], + expected_triggered_issues=[ + "MAY include `WebPageElement` in its `@type`" + ], + ) + + +def test_valid_encoding_format_with_webpageelement(): + """ + An encoding format entity whose `@id` contains a fragment identifier AND + whose `@type` already includes `WebPageElement` does NOT trigger the MAY + suggestion (the optional recommendation is satisfied). + """ + from rocrate_validator import services + result = services.validate({ + "rocrate_uri": str( + __contextual_entities_crates__.valid_encoding_format_webpageelement + ), + "profile_identifier": "ro-crate-1.2", + "requirement_severity": models.Severity.OPTIONAL, + }) + failed_requirement_names = { + issue.check.requirement.name for issue in result.get_issues() + } + assert _ENCODING_FORMAT_MAY_REQUIREMENT not in failed_requirement_names, ( + f"The MAY requirement {_ENCODING_FORMAT_MAY_REQUIREMENT!r} should NOT fire " + f"when the encoding format entity already includes `WebPageElement` in its @type" + ) + + +def test_encoding_format_no_fragment_not_triggered(): + """ + An encoding format entity whose `@id` does NOT contain a fragment + identifier is outside the target of the MAY shape; the suggestion must + not fire. + """ + from rocrate_validator import services + result = services.validate({ + "rocrate_uri": str(__contextual_entities_crates__.encoding_format_no_fragment), + "profile_identifier": "ro-crate-1.2", + "requirement_severity": models.Severity.OPTIONAL, + }) + failed_requirement_names = { + issue.check.requirement.name for issue in result.get_issues() + } + assert _ENCODING_FORMAT_MAY_REQUIREMENT not in failed_requirement_names, ( + f"The MAY requirement {_ENCODING_FORMAT_MAY_REQUIREMENT!r} should NOT fire " + f"when the encoding format entity @id does not contain a fragment identifier" + ) diff --git a/tests/ro_crates_v1_2.py b/tests/ro_crates_v1_2.py index c144c4ae4..916d3a149 100644 --- a/tests/ro_crates_v1_2.py +++ b/tests/ro_crates_v1_2.py @@ -689,6 +689,19 @@ def invalid_software_application_no_url(self) -> Path: def valid_computer_language(self) -> Path: return self.CONTEXTUAL_ENTITIES_CRATES_PATH / "software_application" / "valid_computer_language" + # --- Encoding Format entity: MAY include `WebPageElement` when @id has a fragment (5.8) --- + @property + def valid_encoding_format_webpageelement(self) -> Path: + return self.CONTEXTUAL_ENTITIES_CRATES_PATH / "encoding_format" / "valid" + + @property + def info_encoding_format_no_webpageelement(self) -> Path: + return self.CONTEXTUAL_ENTITIES_CRATES_PATH / "encoding_format" / "info_no_webpageelement" + + @property + def encoding_format_no_fragment(self) -> Path: + return self.CONTEXTUAL_ENTITIES_CRATES_PATH / "encoding_format" / "no_fragment" + class InvalidMultiProfileROC: From 364ec58c99a320d4ae21be9785cc5a6b81add1ed Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Mon, 20 Apr 2026 12:10:50 +0200 Subject: [PATCH 152/352] feat(ro-crate-1.2): :sparkles: add check for the optional `alternateName` of SoftwareApplication and ComputerLanguage entities --- .../1.2/may/6_contextual_entity_metadata.ttl | 31 +++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/rocrate_validator/profiles/ro-crate/1.2/may/6_contextual_entity_metadata.ttl b/rocrate_validator/profiles/ro-crate/1.2/may/6_contextual_entity_metadata.ttl index bb1007864..588e4207b 100644 --- a/rocrate_validator/profiles/ro-crate/1.2/may/6_contextual_entity_metadata.ttl +++ b/rocrate_validator/profiles/ro-crate/1.2/may/6_contextual_entity_metadata.ttl @@ -49,3 +49,34 @@ ro-crate:EncodingFormatOptionalWebPageElement a sh:NodeShape ; sh:severity sh:Info ; sh:message "An encoding format entity whose `@id` is a section of a webpage MAY include `WebPageElement` in its `@type`" ; ] . + +# RO-Crate 1.2, Software Applications / ComputerLanguage — `alternateName` +# MAY be present if there is a shorter colloquial name for the software or +# language (e.g. `"alternateName": "GNU Bourne-Again Shell"` for Bash). +# Emits an `sh:Info` suggestion when the property is missing. +ro-crate:SoftwareApplicationOrComputerLanguageOptionalAlternateName a sh:NodeShape ; + sh:name "SoftwareApplication or ComputerLanguage: OPTIONAL `alternateName`" ; + sh:description """A contextual entity representing a `SoftwareApplication` + or a `ComputerLanguage` MAY declare an `alternateName` property to + provide a shorter colloquial name for the software or language + (RO-Crate 1.2, Provenance — Software Applications).""" ; + sh:target [ + a sh:SPARQLTarget ; + sh:prefixes ro-crate:sparqlPrefixes ; + sh:select """ + SELECT ?this + WHERE { + ?this a ?t . + FILTER(?t IN (schema:SoftwareApplication, schema:ComputerLanguage)) + } + """ + ] ; + sh:property [ + a sh:PropertyShape ; + sh:name "SoftwareApplication/ComputerLanguage: OPTIONAL `alternateName`" ; + sh:description "Check if the entity declares an `alternateName` property." ; + sh:path schema:alternateName ; + sh:minCount 1 ; + sh:severity sh:Info ; + sh:message "A SoftwareApplication or ComputerLanguage MAY declare an `alternateName` property to provide a shorter colloquial name" ; + ] . From b9fc5e92257565978cdfe0dcada6bcecee5b88a3 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Mon, 20 Apr 2026 12:12:37 +0200 Subject: [PATCH 153/352] test(ro-crate-1.2): :white_check_mark: test optional `alternateName` of SoftwareApplication and ComputerLanguage entities --- .../ro-crate-metadata.json | 60 +++++++++++++++++++ .../test_metadata_contextualEntities.py | 58 ++++++++++++++++++ tests/ro_crates_v1_2.py | 4 ++ 3 files changed, 122 insertions(+) create mode 100644 tests/data/crates/rocrate-1.2/10_metadata_contextualEntities/software_application/valid_with_alternatename/ro-crate-metadata.json diff --git a/tests/data/crates/rocrate-1.2/10_metadata_contextualEntities/software_application/valid_with_alternatename/ro-crate-metadata.json b/tests/data/crates/rocrate-1.2/10_metadata_contextualEntities/software_application/valid_with_alternatename/ro-crate-metadata.json new file mode 100644 index 000000000..b397d0daf --- /dev/null +++ b/tests/data/crates/rocrate-1.2/10_metadata_contextualEntities/software_application/valid_with_alternatename/ro-crate-metadata.json @@ -0,0 +1,60 @@ +{ + "@context": "https://w3id.org/ro/crate/1.2/context", + "@graph": [ + { + "@id": "ro-crate-metadata.json", + "@type": "CreativeWork", + "about": { + "@id": "./" + }, + "conformsTo": { + "@id": "https://w3id.org/ro/crate/1.2" + } + }, + { + "@id": "./", + "@type": "Dataset", + "name": "ComputerLanguage with alternateName — fully valid", + "description": "RO-Crate referencing a ComputerLanguage contextual entity that declares all MUST properties (`name`, `url`, `version`) AND the MAY `alternateName`. The crate is minimal — it contains no File/Directory data entities — so no OPTIONAL-level INFO checks should fire.", + "datePublished": "2024-01-01", + "license": { + "@id": "https://creativecommons.org/licenses/by/4.0/" + }, + "publisher": { + "@id": "https://ror.org/05f9q8d28" + }, + "mentions": { + "@id": "#bash" + } + }, + { + "@id": "https://creativecommons.org/licenses/by/4.0/", + "@type": "CreativeWork", + "name": "Creative Commons Attribution 4.0 International", + "description": "CC BY 4.0 license." + }, + { + "@id": "https://ror.org/05f9q8d28", + "@type": "Organization", + "name": "Example Research Institute", + "url": "https://example.org", + "contactPoint": { + "@id": "#org-contact" + } + }, + { + "@id": "#org-contact", + "@type": "ContactPoint", + "name": "Example Research Institute Contact", + "email": "contact@example.org" + }, + { + "@id": "#bash", + "@type": "ComputerLanguage", + "name": "Bash", + "alternateName": "GNU Bourne-Again Shell", + "url": "https://www.gnu.org/software/bash/", + "version": "5.2" + } + ] +} diff --git a/tests/integration/profiles/ro-crate-1.2/test_metadata_contextualEntities.py b/tests/integration/profiles/ro-crate-1.2/test_metadata_contextualEntities.py index 11d555980..d0cd6511e 100644 --- a/tests/integration/profiles/ro-crate-1.2/test_metadata_contextualEntities.py +++ b/tests/integration/profiles/ro-crate-1.2/test_metadata_contextualEntities.py @@ -489,3 +489,61 @@ def test_encoding_format_no_fragment_not_triggered(): f"The MAY requirement {_ENCODING_FORMAT_MAY_REQUIREMENT!r} should NOT fire " f"when the encoding format entity @id does not contain a fragment identifier" ) + + +# --------------------------------------------------------------------------- +# SoftwareApplication / ComputerLanguage: MAY have `alternateName` [5.7] +# --------------------------------------------------------------------------- + +_SOFTWAREAPP_COMPUTERLANG_ALTERNATENAME_REQ = ( + "SoftwareApplication or ComputerLanguage: OPTIONAL `alternateName`" +) + + +def test_info_software_application_no_alternatename(): + """ + A SoftwareApplication contextual entity without an `alternateName` + property triggers an `sh:Info` suggestion at OPTIONAL severity + (RO-Crate 1.2, § 5.7). The existing `valid` SoftwareApplication crate + (used by `test_valid_software_application` at REQUIRED severity) is + reused here: at REQUIRED it passes, at OPTIONAL the MAY suggestion fires. + """ + do_entity_test( + __contextual_entities_crates__.valid_software_application, + models.Severity.OPTIONAL, + False, + profile_identifier="ro-crate-1.2", + expected_triggered_requirements=[_SOFTWAREAPP_COMPUTERLANG_ALTERNATENAME_REQ], + expected_triggered_issues=[ + "MAY declare an `alternateName` property" + ], + ) + + +def test_valid_computer_language_with_alternatename_no_info(): + """ + A ComputerLanguage contextual entity that already declares an + `alternateName` property does NOT trigger the MAY suggestion — the + optional recommendation is satisfied. + """ + do_entity_test( + __contextual_entities_crates__.valid_computer_language_with_alternatename, + models.Severity.OPTIONAL, + True, + profile_identifier="ro-crate-1.2", + ) + + +def test_valid_computer_language_alternatename_passes_at_optional(): + """ + A minimal, fully-specified crate with a + ComputerLanguage declaring `name`, `url`, `version`, and the MAY + `alternateName` passes ALL validation checks including OPTIONAL-level + INFO suggestions. + """ + do_entity_test( + __contextual_entities_crates__.valid_computer_language_with_alternatename, + models.Severity.OPTIONAL, + True, + profile_identifier="ro-crate-1.2", + ) diff --git a/tests/ro_crates_v1_2.py b/tests/ro_crates_v1_2.py index 916d3a149..5baca5d02 100644 --- a/tests/ro_crates_v1_2.py +++ b/tests/ro_crates_v1_2.py @@ -689,6 +689,10 @@ def invalid_software_application_no_url(self) -> Path: def valid_computer_language(self) -> Path: return self.CONTEXTUAL_ENTITIES_CRATES_PATH / "software_application" / "valid_computer_language" + @property + def valid_computer_language_with_alternatename(self) -> Path: + return self.CONTEXTUAL_ENTITIES_CRATES_PATH / "software_application" / "valid_with_alternatename" + # --- Encoding Format entity: MAY include `WebPageElement` when @id has a fragment (5.8) --- @property def valid_encoding_format_webpageelement(self) -> Path: From 0b9ce2560cd41f5fdc3583fbe4dc4aab9dbaf852 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Wed, 20 May 2026 11:08:21 +0200 Subject: [PATCH 154/352] fix(profiles): :sparkles: allow `identifier` to have a plain URL as value --- .../profiles/ro-crate/1.2/must/0_file_descriptor_format.py | 1 + 1 file changed, 1 insertion(+) diff --git a/rocrate_validator/profiles/ro-crate/1.2/must/0_file_descriptor_format.py b/rocrate_validator/profiles/ro-crate/1.2/must/0_file_descriptor_format.py index eb5d14165..107a3d09c 100644 --- a/rocrate_validator/profiles/ro-crate/1.2/must/0_file_descriptor_format.py +++ b/rocrate_validator/profiles/ro-crate/1.2/must/0_file_descriptor_format.py @@ -422,6 +422,7 @@ def check_entity_references(self, context: ValidationContext) -> bool: "startTime", "endTime", "url", + "identifier", } def check_value(value: Any, entity_id: str, key: Optional[str] = None) -> Optional[str]: From fbed5c82f293f46eb37a91a9e6fae2a27801b268 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Wed, 20 May 2026 12:56:44 +0200 Subject: [PATCH 155/352] fix(profiles): :fire: remove duplicate Web Data Entity checks in ro-crate 1.2 --- .../1.2/must/4_data_entity_metadata.py | 152 +++++------------- 1 file changed, 36 insertions(+), 116 deletions(-) diff --git a/rocrate_validator/profiles/ro-crate/1.2/must/4_data_entity_metadata.py b/rocrate_validator/profiles/ro-crate/1.2/must/4_data_entity_metadata.py index e4089bc44..fc4750b45 100644 --- a/rocrate_validator/profiles/ro-crate/1.2/must/4_data_entity_metadata.py +++ b/rocrate_validator/profiles/ro-crate/1.2/must/4_data_entity_metadata.py @@ -14,9 +14,8 @@ import re -from rocrate_validator.models import Severity, ValidationContext -from rocrate_validator.requirements.python import (PyFunctionCheck, check, - requirement) +from rocrate_validator.models import ValidationContext +from rocrate_validator.requirements.python import PyFunctionCheck, check, requirement from rocrate_validator.utils import log as logging from rocrate_validator.utils.signposting import check_downloadable @@ -56,7 +55,8 @@ def check_availability(self, context: ValidationContext) -> bool: "are not required to be included in the RO-Crate payload" "(see https://github.com/ResearchObject/ro-crate/issues/400#issuecomment-2779152885 and " "https://github.com/ResearchObject/ro-crate/pull/426 for more details)", - entity.id) + entity.id, + ) continue if not entity.has_relative_path(): logger.debug( @@ -64,15 +64,18 @@ def check_availability(self, context: ValidationContext) -> bool: "According to the RO-Crate specification, local entities with absolute paths " "are not required to be included in the RO-Crate payload. " "It is only recommended that they exist at the time of RO-Crate creation.", - entity.id) + entity.id, + ) continue if not entity.is_available(): context.result.add_issue( - f"The RO-Crate does not include the Data Entity '{entity.id}' as part of its payload", self) + f"The RO-Crate does not include the Data Entity '{entity.id}' as part of its payload", self + ) result = False except Exception as e: context.result.add_issue( - f"Unable to check the the presence of the Data Entity '{entity.id}' within the RO-Crate", self) + f"Unable to check the the presence of the Data Entity '{entity.id}' within the RO-Crate", self + ) if logger.isEnabledFor(logging.DEBUG): logger.debug(e, exc_info=True) result = False @@ -105,7 +108,9 @@ def check_detached_entities(self, context: ValidationContext) -> bool: context.result.add_issue( f"Data Entity '{entity.id}' is not web-based, " f"but in a detached RO-Crate all Data Entities " - f"MUST have an absolute URL as @id", self) + f"MUST have an absolute URL as @id", + self, + ) result = False if context.fail_fast: return False @@ -127,10 +132,10 @@ def check_identifiers(self, context: ValidationContext) -> bool: try: root_data_entity = context.ro_crate.metadata.get_root_data_entity() root_entity_id = root_data_entity.id - root_entity_is_local = root_data_entity.id_as_uri.is_local_resource() \ - if root_data_entity.id_as_uri else False - root_entity_absolute_path = root_data_entity.id_as_path \ - if root_data_entity.has_absolute_path() else None + root_entity_is_local = ( + root_data_entity.id_as_uri.is_local_resource() if root_data_entity.id_as_uri else False + ) + root_entity_absolute_path = root_data_entity.id_as_path if root_data_entity.has_absolute_path() else None except Exception: pass for entity in context.ro_crate.metadata.get_data_entities(): @@ -139,7 +144,9 @@ def check_identifiers(self, context: ValidationContext) -> bool: if not root_entity_is_local and not entity.is_remote(): context.result.add_issue( f"Data Entity '{entity.id}' has a local identifier but the Root Data Entity " - "does not have a local identifier", self) + "does not have a local identifier", + self, + ) result = False if context.fail_fast: return False @@ -147,18 +154,20 @@ def check_identifiers(self, context: ValidationContext) -> bool: continue if "\\" in entity.id or " " in entity.id: context.result.add_issue( - f"Data Entity '{entity.id}' has an invalid @id; use URI-compatible paths", self) + f"Data Entity '{entity.id}' has an invalid @id; use URI-compatible paths", self + ) result = False if context.fail_fast: return False - if (root_entity_is_local and - not str(entity.id_as_path).startswith(str(root_entity_absolute_path))): - if (root_entity_is_local and not str(entity.id).startswith("./") and ( - str(entity.id).startswith("/") or - str(entity.id).startswith("file://") - )): + if root_entity_is_local and not str(entity.id_as_path).startswith(str(root_entity_absolute_path)): + if ( + root_entity_is_local + and not str(entity.id).startswith("./") + and (str(entity.id).startswith("/") or str(entity.id).startswith("file://")) + ): context.result.add_issue( - f"Data Entity '{entity.id}' MUST use a relative @id within the RO-Crate root", self) + f"Data Entity '{entity.id}' MUST use a relative @id within the RO-Crate root", self + ) result = False if context.fail_fast: return False @@ -175,7 +184,8 @@ def check_relative_paths(self, context: ValidationContext) -> bool: if entity.has_absolute_path(): if context.ro_crate.has_file(entity.id_as_path) or context.ro_crate.has_directory(entity.id_as_path): context.result.add_issue( - f"Data Entity '{entity.id}' should use a relative @id within the RO-Crate root", self) + f"Data Entity '{entity.id}' should use a relative @id within the RO-Crate root", self + ) result = False if context.fail_fast: return False @@ -203,14 +213,14 @@ def check_citation(self, context: ValidationContext) -> bool: citation_id = citation.id else: context.result.add_issue( - f"Citation for Data Entity '{entity.id}' must reference a publication @id", self) + f"Citation for Data Entity '{entity.id}' must reference a publication @id", self + ) result = False if context.fail_fast: return False continue if not re.match(r"^[A-Za-z][A-Za-z0-9+\.-]*:", citation_id): - context.result.add_issue( - f"Citation for Data Entity '{entity.id}' must be an absolute URI", self) + context.result.add_issue(f"Citation for Data Entity '{entity.id}' must be an absolute URI", self) result = False if context.fail_fast: return False @@ -255,98 +265,8 @@ def check_availability(self, context: ValidationContext) -> bool: context.result.add_issue(msg, self) result = False except Exception as e: - context.result.add_issue( - f"Web-based Data Entity '{entity.id}' availability check failed: {e}", self) - result = False - if not result and context.fail_fast: - return result - return result - - @check(name="Web-based Data Entity: RECOMMENDED resource availability", severity=Severity.RECOMMENDED) - def check_availability_warning(self, context: ValidationContext) -> bool: - if context.settings.skip_availability_check: - return True - if context.settings.creation_time or context.settings.enforce_availability: - return True - if context.settings.metadata_only: - return True - result = True - for entity in context.ro_crate.metadata.get_web_data_entities(): - assert entity.id is not None, "Entity has no @id" - try: - if not entity.is_available(): - context.result.add_issue( - f"Web-based Data Entity '{entity.id}' is not directly downloadable", self) - result = False - except Exception as e: - context.result.add_issue( - f"Web-based Data Entity '{entity.id}' availability check failed: {e}", self) + context.result.add_issue(f"Web-based Data Entity '{entity.id}' availability check failed: {e}", self) result = False if not result and context.fail_fast: return result return result - - @check(name="Web-based Data Entity: `contentSize` property", severity=Severity.RECOMMENDED) - def check_content_size(self, context: ValidationContext) -> bool: - if context.settings.skip_availability_check: - return True - result = True - for entity in context.ro_crate.metadata.get_web_data_entities(): - assert entity.id is not None, "Entity has no @id" - if entity.is_available(): - content_size = entity.get_property("contentSize") - if content_size: - if isinstance(content_size, str): - content_value = content_size - elif hasattr(content_size, "id"): - content_value = content_size.id - else: - content_value = str(content_size) - try: - content_int = int(str(content_value)) - except Exception: - content_int = None - external_size = context.ro_crate.get_external_file_size(entity.id) - if external_size is not None and content_int is not None and content_int != external_size: - context.result.add_issue( - f'The property contentSize={content_size} of the Web-based Data Entity ' - f'{entity.id} does not match the actual size of ' - f'the downloadable content, i.e., {external_size} (bytes)', self, - violatingEntity=entity.id, violatingProperty='contentSize', - violatingPropertyValue=str(content_value)) - result = False - if not result and context.fail_fast: - return result - return result - - @check(name="Web-based Data Entity: `contentUrl` availability", severity=Severity.RECOMMENDED) - def check_content_url(self, context: ValidationContext) -> bool: - if context.settings.skip_availability_check: - return True - result = True - for entity in context.ro_crate.metadata.get_web_data_entities(): - content_url = entity.get_property("contentUrl") - if not content_url: - continue - urls = content_url if isinstance(content_url, list) else [content_url] - for url in urls: - url_value = url if isinstance(url, str) else url.id if hasattr(url, "id") else None - if not url_value or not url_value.startswith("http"): - continue - try: - dl = check_downloadable(url_value) - if not dl.is_downloadable: - msg = (f"contentUrl '{url_value}' for Web-based Data Entity '{entity.id}' " - "is not directly downloadable") - if dl.reason: - msg += f": {dl.reason}" - context.result.add_issue(msg, self) - result = False - except Exception as e: - context.result.add_issue( - f"contentUrl '{url_value}' for Web-based Data Entity '{entity.id}' " - f"availability check failed: {e}", self) - result = False - if not result and context.fail_fast: - return result - return result From fb4356ef0e54ac39ec9992694ad81a448f5b3553 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Wed, 20 May 2026 12:58:36 +0200 Subject: [PATCH 156/352] fix(utils): :loud_sound: fix log message level --- rocrate_validator/utils/signposting.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rocrate_validator/utils/signposting.py b/rocrate_validator/utils/signposting.py index 4f21a4259..7730366b3 100644 --- a/rocrate_validator/utils/signposting.py +++ b/rocrate_validator/utils/signposting.py @@ -158,7 +158,7 @@ def check_downloadable(url: str) -> DownloadabilityResult: ) except Exception as e: - logger.error("Error checking downloadability of '%s': %s", url, e, exc_info=True) + logger.debug("Error checking downloadability of '%s': %s", url, e) return DownloadabilityResult( is_downloadable=False, reason=str(e), From 5afa76a3c228145bed7606fe7c7a0592e36ce42d Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Mon, 1 Jun 2026 14:45:57 +0200 Subject: [PATCH 157/352] fix(utils): :adhesive_bandage: fix logger factory --- rocrate_validator/utils/log.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/rocrate_validator/utils/log.py b/rocrate_validator/utils/log.py index d7412a435..c025bd9cb 100644 --- a/rocrate_validator/utils/log.py +++ b/rocrate_validator/utils/log.py @@ -76,7 +76,7 @@ def _releaseLock(): # reference to the list of create loggers -__loggers__ = {} +__loggers__: dict[str, Logger] = {} # user settings for the loggers __settings__ = DEFAULT_SETTINGS.copy() @@ -139,12 +139,13 @@ def __setup_logger__(logger: Logger): def __create_logger__(name: str) -> Logger: - logger: Logger = None if not isinstance(name, str): raise TypeError('A logger name must be a string') _acquireLock() try: - if name not in __loggers__: + # Return the cached logger if it already exists, otherwise create it. + logger = __loggers__.get(name) + if logger is None: logger = colorlog.getLogger(name) __setup_logger__(logger) __loggers__[name] = logger From b3efaab6dcad98b24d2853e7e8ba9570870e4957 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Mon, 1 Jun 2026 14:49:09 +0200 Subject: [PATCH 158/352] =?UTF-8?q?fix:=20=F0=9F=9B=A0=EF=B8=8F=20correct?= =?UTF-8?q?=20index=20access=20and=20handling=20in=20MultiIndexMap?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- rocrate_validator/utils/collections.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rocrate_validator/utils/collections.py b/rocrate_validator/utils/collections.py index 4dfdbac48..bf2a04d19 100644 --- a/rocrate_validator/utils/collections.py +++ b/rocrate_validator/utils/collections.py @@ -52,7 +52,7 @@ def remove_index(self, index_name: str): self._indices.pop(index_name) def get_index(self, index_name: str) -> MapIndex: - return self._indices.get(index_name)["__meta__"] + return self._indices[index_name]["__meta__"] def add(self, key, obj, **indices): self._data[key] = obj From 1e07724d6b1f074a47569ca98f150fda3e8cfa36 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Mon, 1 Jun 2026 14:50:37 +0200 Subject: [PATCH 159/352] refactor(utils): :rotating_light: fix missing typings --- rocrate_validator/utils/collections.py | 8 +++++--- rocrate_validator/utils/log.py | 6 +++--- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/rocrate_validator/utils/collections.py b/rocrate_validator/utils/collections.py index bf2a04d19..a8f2dc98f 100644 --- a/rocrate_validator/utils/collections.py +++ b/rocrate_validator/utils/collections.py @@ -14,6 +14,8 @@ from __future__ import annotations +from typing import Optional + class MapIndex: @@ -23,15 +25,15 @@ def __init__(self, name: str, unique: bool = False): class MultiIndexMap: - def __init__(self, key: str = "id", indexes: list[MapIndex] = None): + def __init__(self, key: str = "id", indexes: Optional[list[MapIndex]] = None): self._key = key # initialize an empty dictionary to store the indexes - self._indices: list[MapIndex] = {} + self._indices: dict[str, dict] = {} if indexes: for index in indexes: self.add_index(index) # initialize an empty dictionary to store the data - self._data = {} + self._data: dict = {} @property def key(self) -> str: diff --git a/rocrate_validator/utils/log.py b/rocrate_validator/utils/log.py index c025bd9cb..9a55bcf86 100644 --- a/rocrate_validator/utils/log.py +++ b/rocrate_validator/utils/log.py @@ -16,7 +16,7 @@ import sys import threading from io import StringIO -from logging import (CRITICAL, DEBUG, ERROR, INFO, WARNING, Logger, +from logging import (CRITICAL, DEBUG, ERROR, INFO, WARNING, Handler, Logger, StreamHandler, basicConfig as logging_basicConfig) from typing import Optional @@ -82,7 +82,7 @@ def _releaseLock(): __settings__ = DEFAULT_SETTINGS.copy() # store logger handlers (only one handler per logger) -__handlers__ = {} +__handlers__: dict[str, Handler] = {} # Create a StringIO stream to capture the logs @@ -196,7 +196,7 @@ def basicConfig(level: int, modules_config: Optional[dict] = None): _releaseLock() -def getLogger(name: str) -> Logger: +def getLogger(name: str) -> "LoggerProxy": return LoggerProxy(name) From 7247dc64153ab6c8366f5be84865e8bfd7e5ea9c Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Mon, 1 Jun 2026 15:21:38 +0200 Subject: [PATCH 160/352] refactor(models): :recycle: fix implicit Optionals and missing typing annotations --- rocrate_validator/models.py | 30 ++++++++++++++++++------------ 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/rocrate_validator/models.py b/rocrate_validator/models.py index cb69fe77d..394b359a7 100644 --- a/rocrate_validator/models.py +++ b/rocrate_validator/models.py @@ -228,7 +228,7 @@ def __init__( profiles_base_path: Path, profile_path: Path, requirements: Optional[list[Requirement]] = None, - identifier: str = None, + identifier: Optional[str] = None, publicID: Optional[str] = None, severity: Severity = Severity.REQUIRED, ): @@ -565,7 +565,7 @@ def get_requirement_check(self, check_name: str) -> Optional[RequirementCheck]: @classmethod def __get_nested_profiles__(cls, source: str) -> list[str]: - result = [] + result: list[str] = [] visited = [] queue = [source] while len(queue) > 0: @@ -712,8 +712,8 @@ def __load_profile_path__( @classmethod def __load_profiles_paths__( cls, - profiles_path: Union[str, Path] = None, - extra_profiles_path: Union[str, Path] = None, + profiles_path: Optional[Union[str, Path]] = None, + extra_profiles_path: Optional[Union[str, Path]] = None, ) -> list[Tuple[Path, Path]]: """ Load the paths of the profiles from the given profiles path and extra profiles path. @@ -756,13 +756,13 @@ def __load_profiles_paths__( def load_profiles( cls, profiles_path: Union[str, Path], - extra_profiles_path: Union[str, Path] = None, + extra_profiles_path: Optional[Union[str, Path]] = None, publicID: Optional[str] = None, severity: Severity = Severity.REQUIRED, allow_requirement_check_override: bool = True, ) -> list[Profile]: # initialize the profiles list - profiles = [] + profiles: list[Profile] = [] # calculate the list of profiles path as the subdirectories of the profiles path # where the profile specification file is present profiles_paths = cls.__load_profiles_paths__(profiles_path, extra_profiles_path) @@ -1329,7 +1329,7 @@ def all_subclasses( return all_subclasses(Requirement) @staticmethod - def load_requirements(profile: Profile, severity: Severity = None) -> list[Requirement]: + def load_requirements(profile: Profile, severity: Optional[Severity] = None) -> list[Requirement]: """ Load the requirements related to the profile """ @@ -1696,7 +1696,7 @@ def __init__( self._context = context self._stats = self.__initialise__(settings) if not skip_initialization else {} self._result: Optional[ValidationResult] = None - self._listeners = [] + self._listeners: list[ValidationStatisticsListener] = [] # self._target_profile: Optional[Profile] = None @property @@ -1896,7 +1896,7 @@ def __initialise__(cls, validation_settings: ValidationSettings): logger.debug("Inherited profiles: %r", profile.inherited_profiles) # Initialize the counters - checks_by_severity = {} + checks_by_severity: dict[Severity, set[RequirementCheck]] = {} checks: set[RequirementCheck] = set() requirements: set[Requirement] = set() @@ -1988,12 +1988,15 @@ def update(self, event: Event, ctx: Optional[ValidationContext] = None) -> None: logger.debug("Validation started") self._stats["started_at"] = datetime.now(timezone.utc) if event.event_type == EventType.PROFILE_VALIDATION_START: + assert isinstance(event, ProfileValidationEvent) logger.debug("Profile validation start: %s", event.profile.identifier) elif event.event_type == EventType.REQUIREMENT_VALIDATION_START: logger.debug("Requirement validation start") elif event.event_type == EventType.REQUIREMENT_CHECK_VALIDATION_START: logger.debug("Requirement check validation start") elif event.event_type == EventType.REQUIREMENT_CHECK_VALIDATION_END: + assert isinstance(event, RequirementCheckValidationEvent) + assert ctx is not None target_profile = ctx.target_validation_profile if not event.requirement_check.requirement.hidden and ( not event.requirement_check.overridden @@ -2017,6 +2020,7 @@ def update(self, event: Event, ctx: Optional[ValidationContext] = None) -> None: event.requirement_check.identifier, ) elif event.event_type == EventType.REQUIREMENT_VALIDATION_END: + assert isinstance(event, RequirementValidationEvent) if not event.requirement.hidden: if event.validation_result: self._stats["passed_requirements"].append(event.requirement) @@ -2025,9 +2029,11 @@ def update(self, event: Event, ctx: Optional[ValidationContext] = None) -> None: self._stats["validated_requirements"].append(event.requirement) self.notify_listeners() elif event.event_type == EventType.PROFILE_VALIDATION_END: + assert isinstance(event, ProfileValidationEvent) self._stats["validated_profiles"].append(event.profile) logger.debug("Profile validation ended: %s", event.profile.identifier) elif event.event_type == EventType.VALIDATION_END: + assert isinstance(event, ValidationEvent) self._result = event.validation_result self._stats["finished_at"] = datetime.now(timezone.utc) logger.debug("Validation ended with result: %s", event.validation_result) @@ -2460,7 +2466,7 @@ def get_issues(self, min_severity: Optional[Severity] = None) -> list[CheckIssue min_severity = min_severity or self.context.requirement_severity return [issue for issue in self._issues if issue.severity >= min_severity] - def get_issues_by_check(self, check: RequirementCheck, min_severity: Severity = None) -> list[CheckIssue]: + def get_issues_by_check(self, check: RequirementCheck, min_severity: Optional[Severity] = None) -> list[CheckIssue]: """ Get the issues found during the validation for a specific check with a severity greater than or equal to `min_severity` @@ -2955,7 +2961,7 @@ def detect_rocrate_profiles(self) -> list[Profile]: try: # initialize the validation context context = ValidationContext(self, self.validation_settings) - candidate_profiles_uris = set() + candidate_profiles_uris: set[str] = set() try: candidate_profiles_uris.update(context.ro_crate.metadata.get_conforms_to()) except Exception as e: @@ -3154,7 +3160,7 @@ def __init__(self, validator: Validator, settings: ValidationSettings): # reference to the validation result self._result = None # additional properties for the context - self._properties = {} + self._properties: dict = {} # URLs already reported as missing from the HTTP cache during this run self._offline_cache_misses_warned: set[str] = set() From 19ff3b45dcdbbfad8be483ebd20fd2a869381137 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Wed, 3 Jun 2026 12:16:47 +0200 Subject: [PATCH 161/352] refactor(models): :recycle: fix implicit Optionals and type annotations Narrow return types (Optional/None), add explicit casts for rdflib nodes and add missing annotations to satisfy mypy. --- rocrate_validator/models.py | 108 ++++++++++++++++++------------------ 1 file changed, 54 insertions(+), 54 deletions(-) diff --git a/rocrate_validator/models.py b/rocrate_validator/models.py index 394b359a7..0b6fb8a9e 100644 --- a/rocrate_validator/models.py +++ b/rocrate_validator/models.py @@ -26,7 +26,7 @@ from datetime import datetime, timezone from functools import total_ordering from pathlib import Path -from typing import Optional, Protocol, Tuple, Type, Union +from typing import Any, Optional, Protocol, Tuple, Type, Union, cast from urllib.error import HTTPError import enum_tools @@ -277,7 +277,7 @@ def __init__( # check if the profile specification file exists spec_file = self.profile_specification_file_path if not spec_file or not spec_file.exists(): - raise ProfileSpecificationNotFound(spec_file) + raise ProfileSpecificationNotFound(str(spec_file)) # load the profile specification expressed using the Profiles Vocabulary profile = Graph() profile.parse(str(spec_file), format="turtle") @@ -290,7 +290,7 @@ def __init__( self._token, self._version = self.__init_token_version__() # Check if the profile is overriding an existing profile - existing_profile = self.__profiles_map.get_by_key(self._profile_node.toPython()) + existing_profile = self.__profiles_map.get_by_key(cast(Any, self._profile_node).toPython()) if existing_profile: # Check if the existing profile is different from the current one if existing_profile.path != profile_path: @@ -306,7 +306,7 @@ def __init__( # add the profile to the profiles map self.__profiles_map.add( - self._profile_node.toPython(), + cast(Any, self._profile_node).toPython(), self, token=self.token, name=self.name, @@ -324,13 +324,12 @@ def __get_specification_property__( namespace: Namespace, pop_first: bool = True, as_Python_object: bool = True, - ) -> Union[str, list[Union[str, URIRef]]]: + ) -> Union[str, list[Union[str, URIRef]], None]: assert self._profile_specification_graph is not None, "Profile specification graph not loaded" - values = list(self._profile_specification_graph.objects(self._profile_node, namespace[property])) - if values and as_Python_object: - values = [v.toPython() for v in values] + nodes = list(self._profile_specification_graph.objects(self._profile_node, namespace[property])) + values: list = [cast(Any, v).toPython() for v in nodes] if (nodes and as_Python_object) else list(nodes) if pop_first: - return values[0] if values and len(values) >= 1 else None + return values[0] if values else None return values def __add_override__(self, profile: Profile): @@ -441,7 +440,7 @@ def is_profile_of(self) -> list[str]: as specified in the profile specification file (i.e., the value of the prof: isProfileOf property in the `profile.ttl` file). """ - return self.__get_specification_property__("isProfileOf", PROF_NS, pop_first=False) + return cast(list[str], self.__get_specification_property__("isProfileOf", PROF_NS, pop_first=False)) @property def is_transitive_profile_of(self) -> list[str]: @@ -450,7 +449,7 @@ def is_transitive_profile_of(self) -> list[str]: as specified in the profile specification file (i.e., the value of the prof: isTransitiveProfileOf property in the `profile.ttl` file). """ - return self.__get_specification_property__("isTransitiveProfileOf", PROF_NS, pop_first=False) + return cast(list[str], self.__get_specification_property__("isTransitiveProfileOf", PROF_NS, pop_first=False)) @property def parents(self) -> list[Profile]: @@ -639,11 +638,11 @@ def __extract_version_from_token__(token: str) -> Optional[str]: return matches[-1][0] return None - def __get_consistent_version__(self, candidate_token: str) -> str: + def __get_consistent_version__(self, candidate_token: str) -> Optional[str]: candidates = { _ for _ in [ - self.__get_specification_property__("version", SCHEMA_ORG_NS), + cast(Optional[str], self.__get_specification_property__("version", SCHEMA_ORG_NS)), self.__extract_version_from_token__(candidate_token), self.__extract_version_from_token__(str(self.path.relative_to(self._profiles_base_path))), self.__extract_version_from_token__(str(self.uri)), @@ -667,9 +666,9 @@ def __extract_token_from_path__(self) -> str: identifier = identifier.replace("/", "-") return identifier - def __init_token_version__(self) -> Tuple[str, str, str]: + def __init_token_version__(self) -> Tuple[str, Optional[str]]: # try to extract the token from the specs or the path - candidate_token = self.__get_specification_property__("hasToken", PROF_NS) + candidate_token = cast(Optional[str], self.__get_specification_property__("hasToken", PROF_NS)) if not candidate_token: candidate_token = self.__extract_token_from_path__() logger.debug("Candidate token: %s", candidate_token) @@ -688,7 +687,7 @@ def __init_token_version__(self) -> Tuple[str, str, str]: @classmethod def __load_profile_path__( cls, - profiles_base_path: str, + profiles_base_path: Union[str, Path], profile_path: Union[str, Path], publicID: Optional[str] = None, severity: Severity = Severity.REQUIRED, @@ -698,10 +697,10 @@ def __load_profile_path__( profile_path = Path(profile_path) # check if the path is a directory if not profile_path.is_dir(): - raise InvalidProfilePath(profile_path) + raise InvalidProfilePath(str(profile_path)) # create a new profile profile = Profile( - profiles_base_path=profiles_base_path, + profiles_base_path=Path(profiles_base_path), profile_path=profile_path, publicID=publicID, severity=severity, @@ -740,7 +739,7 @@ def __load_profiles_paths__( root_profile_directory = Path(root_profile_directory) # check if the path is a directory and raise an error if not if not root_profile_directory.is_dir(): - raise InvalidProfilePath(root_profile_directory) + raise InvalidProfilePath(str(root_profile_directory)) # if the path is a directory, get the profile directories result.extend( [ @@ -863,7 +862,7 @@ def get_by_name(cls, name: str) -> list[Profile]: return cls.__profiles_map.get_by_index("name", name) @classmethod - def get_by_token(cls, token: str) -> Profile: + def get_by_token(cls, token: str) -> list[Profile]: """ Get the profile with the given token @@ -968,9 +967,9 @@ def __init__( self._profile = profile self._description = description self._path = path # path of code implementing the requirement - self._level_from_path = None + self._level_from_path: Optional[RequirementLevel] = None self._checks: list[RequirementCheck] = [] - self._overridden = None + self._overridden: Optional[bool] = None if not name and path: self._name = get_requirement_name_from_file(path) @@ -1025,12 +1024,12 @@ def name(self) -> str: return self._name @property - def severity_from_path(self) -> Severity: + def severity_from_path(self) -> Optional[Severity]: return self.requirement_level_from_path.severity if self.requirement_level_from_path else None @property - def requirement_level_from_path(self) -> RequirementLevel: - if not self._level_from_path: + def requirement_level_from_path(self) -> Optional[RequirementLevel]: + if not self._level_from_path and self._path: try: self._level_from_path = LevelCollection.get(self._path.parent.name) except ValueError: @@ -1326,10 +1325,10 @@ def all_subclasses( result.extend(all_subclasses(subcls)) return result - return all_subclasses(Requirement) + return all_subclasses(Requirement) # type: ignore[type-abstract] @staticmethod - def load_requirements(profile: Profile, severity: Optional[Severity] = None) -> list[Requirement]: + def load_requirements(profile: Profile, severity: Severity = Severity.REQUIRED) -> list[Requirement]: """ Load the requirements related to the profile """ @@ -1364,7 +1363,7 @@ def ok_file(p: Path) -> bool: requirement_path, ) requirement_loader = RequirementLoader.__get_requirement_loader__(profile, requirement_path) - for requirement in requirement_loader.load( + for requirement in cast(Any, requirement_loader).load( profile, requirement_level, requirement_path, @@ -1642,7 +1641,7 @@ def to_dict( with_requirement: bool = True, with_profile: bool = True, ) -> dict: - result = { + result: dict[str, Any] = { "severity": self.severity.name, "message": self.message, "violatingEntity": self.violatingEntity, @@ -1740,7 +1739,7 @@ def profile(self) -> Profile: """ Get the profile being validated """ - return self._stats.get("profile") + return cast(Profile, self._stats.get("profile")) @property def profiles(self) -> list[Profile]: @@ -1754,7 +1753,7 @@ def severity(self) -> Severity: """ Get the validation severity level """ - return self._stats.get("severity") + return cast(Severity, self._stats.get("severity")) @property def checks_by_severity(self) -> dict: @@ -1882,10 +1881,10 @@ def __initialise__(cls, validation_settings: ValidationSettings): profiles: list[Profile] = Profile.load_profiles( validation_settings.profiles_path, extra_profiles_path=validation_settings.extra_profiles_path, - severity=severity_validation, + severity=cast(Severity, severity_validation), allow_requirement_check_override=validation_settings.allow_requirement_check_override, ) - profile: Profile = Profile.find_in_list(profiles, validation_settings.profile_identifier) + profile: Profile = cast(Profile, Profile.find_in_list(profiles, validation_settings.profile_identifier)) target_profile_identifier = profile.identifier # initialize the profiles list profiles = [profile] @@ -2579,10 +2578,10 @@ def to_dict(self) -> dict: validation_settings = { key: value for key, value in self.validation_settings.to_dict().items() if key in allowed_properties } - result = { + result: dict[str, Any] = { "meta": {"version": JSON_OUTPUT_FORMAT_VERSION}, "validation_settings": validation_settings, - "passed": self.passed(self.context.settings.requirement_severity), + "passed": self.passed(cast(Severity, self.context.settings.requirement_severity)), "issues": [issue.to_dict() for issue in self.issues], } # add validator version to the settings @@ -2665,11 +2664,11 @@ class ValidationSettings: #: Flag to disable the check for duplicates disable_check_for_duplicates: bool = False #: Checks to skip - skip_checks: list[str] = None + skip_checks: Optional[list[str]] = None #: Flag to validate only the metadata of the RO-Crate metadata_only: bool = False #: RO-Crate metadata as dictionary - metadata_dict: dict = None + metadata_dict: Optional[dict] = None #: Verbose output verbose: bool = False #: Cache max age in seconds (negative values mean "never expire") @@ -2743,7 +2742,7 @@ def to_dict(self): result.pop("requirement_severity_only", None) return result - @property + @property # type: ignore[no-redef] def rocrate_uri(self) -> Optional[URI]: """ Get the RO-Crate URI @@ -2763,7 +2762,7 @@ def rocrate_uri(self, value: URI): """ if not value: raise ValueError("Invalid RO-Crate URI") - self._rocrate_uri: URI = URI(value) + self._rocrate_uri: URI = URI(str(value)) @classmethod def parse(cls, settings: Union[dict, ValidationSettings]) -> ValidationSettings: @@ -2944,11 +2943,11 @@ class Validator(Publisher): Validates the RO-Crate against the specified subset of the profile requirements. """ - def __init__(self, settings: Union[str, ValidationSettings]): + def __init__(self, settings: Union[dict, ValidationSettings]): self._validation_settings = ValidationSettings.parse(settings) super().__init__() # initialize the current context - self.__current_context__ = None + self.__current_context__: Optional[ValidationContext] = None @property def validation_settings(self) -> ValidationSettings: @@ -2963,18 +2962,18 @@ def detect_rocrate_profiles(self) -> list[Profile]: context = ValidationContext(self, self.validation_settings) candidate_profiles_uris: set[str] = set() try: - candidate_profiles_uris.update(context.ro_crate.metadata.get_conforms_to()) + candidate_profiles_uris.update(context.ro_crate.metadata.get_conforms_to() or []) except Exception as e: logger.debug("Error while getting candidate profiles URIs: %s", e) try: - candidate_profiles_uris.update(context.ro_crate.metadata.get_root_data_entity_conforms_to()) + candidate_profiles_uris.update(context.ro_crate.metadata.get_root_data_entity_conforms_to() or []) except Exception as e: logger.debug("Error while getting candidate profiles URIs: %s", e) logger.debug("Candidate profiles: %s", candidate_profiles_uris) if not candidate_profiles_uris: logger.debug("Unable to determine the profile to validate against") - return None + return [] # load the profiles profiles = [] candidate_profiles = [] @@ -3010,7 +3009,7 @@ def detect_rocrate_profiles(self) -> list[Profile]: except Exception as e: if logger.isEnabledFor(logging.DEBUG): logger.exception(e) - return None + return [] def validate(self) -> ValidationResult: """ @@ -3131,7 +3130,7 @@ def __invoke_post_validation_hooks__(self, context: ValidationContext): requirement_type.finalize(context) logger.debug("Finalizing requirement types: completed") - def notify(self, event: Union[Event, EventType]): + def notify(self, event: Union[Event, EventType], ctx: Optional[Any] = None): """Override notify to update statistics""" assert self.__current_context__ is not None, "No current validation context" result: ValidationResult = self.__current_context__.result @@ -3152,13 +3151,13 @@ def __init__(self, validator: Validator, settings: ValidationSettings): # reference to the settings self._settings = settings # reference to the data graph - self._data_graph = None + self._data_graph: Optional[Graph] = None # reference to the profiles - self._profiles = None + self._profiles: Optional[list[Profile]] = None # reference to the target profile - self._target_validation_profile = None + self._target_validation_profile: Optional[Profile] = None # reference to the validation result - self._result = None + self._result: Optional[ValidationResult] = None # additional properties for the context self._properties: dict = {} # URLs already reported as missing from the HTTP cache during this run @@ -3295,7 +3294,7 @@ def fail_fast(self) -> bool: :return: The flag to abort on first error :rtype: bool """ - return self.settings.abort_on_first + return bool(self.settings.abort_on_first) @property def rel_fd_path(self) -> Path: @@ -3311,7 +3310,7 @@ def __load_data_graph__(self) -> Graph: data_graph = Graph() logger.debug("Loading RO-Crate metadata of: %s", self.ro_crate.uri) _ = data_graph.parse( - data=self.ro_crate.metadata.as_dict(), + data=self.ro_crate.metadata.as_dict(), # type: ignore[arg-type] format="json-ld", publicID=self.publicID, ) @@ -3338,7 +3337,7 @@ def get_data_graph(self, refresh: bool = False) -> Graph: return self._data_graph except (HTTPError, FileNotFoundError) as e: logger.debug("Error loading data graph: %s", e) - raise ROCrateMetadataNotFoundError(self.rocrate_uri) + raise ROCrateMetadataNotFoundError(str(self.rocrate_uri)) @property def data_graph(self) -> Graph: @@ -3458,6 +3457,7 @@ def target_validation_profile(self) -> Profile: :return: The target validation profile :rtype: Profile """ + assert self._target_validation_profile is not None, "Target validation profile not set" return self._target_validation_profile @property @@ -3484,7 +3484,7 @@ def get_profile_by_token(self, token: str) -> list[Profile]: """ return [p for p in self.profiles if p.token == token] - def get_profile_by_identifier(self, identifier: str) -> list[Profile]: + def get_profile_by_identifier(self, identifier: str) -> Profile: """ Get the profile by identifier from the profiles to validate against From 667d8b93f08f6878154cace223efd01028390a6e Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Wed, 3 Jun 2026 12:24:18 +0200 Subject: [PATCH 162/352] fix(models): :bug: call `as_json()` before passing the result to `as_graph()` --- rocrate_validator/rocrate.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rocrate_validator/rocrate.py b/rocrate_validator/rocrate.py index 0d9d4c6bb..00c45858a 100644 --- a/rocrate_validator/rocrate.py +++ b/rocrate_validator/rocrate.py @@ -430,7 +430,7 @@ def as_graph(self, publicID: str = None) -> Graph: if not self._graph: # if the graph is not cached, load it self._graph = Graph(base=publicID or self.ro_crate.uri) - self._graph.parse(data=self.as_json, format="json-ld") + self._graph.parse(data=self.as_json(), format="json-ld") return self._graph def __str__(self) -> str: From ff196b4a2dd7e5a4b459ef9f1587437128c6822a Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Wed, 3 Jun 2026 12:25:22 +0200 Subject: [PATCH 163/352] fix(models): :bug: add missing `return` statement --- rocrate_validator/rocrate.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rocrate_validator/rocrate.py b/rocrate_validator/rocrate.py index 00c45858a..1cbd7fa93 100644 --- a/rocrate_validator/rocrate.py +++ b/rocrate_validator/rocrate.py @@ -926,7 +926,7 @@ def list_files(self) -> list[Path]: return self._files def list_entries(self) -> list[zipfile.ZipInfo]: - self._zipref.infolist() + return self._zipref.infolist() def get_entry(self, path: Path) -> zipfile.ZipInfo: """ From cce407f55b713af5ca372f3fe9a139f45decf380 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Wed, 3 Jun 2026 12:26:38 +0200 Subject: [PATCH 164/352] refactor(rocrate): :recycle: fix implicit Optionals and type annotations Add missing type annotations, fix implicit Optionals on default args and narrow return types --- rocrate_validator/rocrate.py | 116 +++++++++++++++++++---------------- 1 file changed, 64 insertions(+), 52 deletions(-) diff --git a/rocrate_validator/rocrate.py b/rocrate_validator/rocrate.py index 1cbd7fa93..dbb7da804 100644 --- a/rocrate_validator/rocrate.py +++ b/rocrate_validator/rocrate.py @@ -21,7 +21,7 @@ import zipfile from abc import ABC, abstractmethod from pathlib import Path -from typing import Optional, Union +from typing import Any, Optional, Union, cast from urllib.parse import unquote from rdflib import Graph @@ -38,17 +38,17 @@ class ROCrateEntity: - def __init__(self, metadata: ROCrateMetadata, raw_data: object) -> None: - self._raw_data = raw_data + def __init__(self, metadata: ROCrateMetadata, raw_data: dict) -> None: + self._raw_data: dict = raw_data self._metadata = metadata @property def id(self) -> str: - return self._raw_data.get('@id') + return cast(str, self._raw_data.get('@id')) @property def type(self) -> Union[str, list[str]]: - return self._raw_data.get('@type') + return cast(Union[str, list[str]], self._raw_data.get('@type')) def is_dataset(self) -> bool: return self.has_type('Dataset') @@ -58,7 +58,7 @@ def is_file(self) -> bool: @property def name(self) -> str: - return self._raw_data.get('name') + return cast(str, self._raw_data.get('name')) @property def metadata(self) -> ROCrateMetadata: @@ -85,7 +85,7 @@ def get_id_as_path(cls, entity_id: str, ro_crate: Optional[ROCrate] = None) -> P def get_path_from_identifier( identifier: str, rocrate_path: Optional[Union[str, Path]] = None, - decode: bool = None, + decode: bool = False, ) -> Path: """ Get the path from an identifier. @@ -108,25 +108,27 @@ def __define_path__(path: str, decode: bool = False) -> Path: if decode: path = unquote(path) # Convert the path to a Path object - path = Path(path) + path_obj = Path(path) # if the path is absolute, return it - if path.is_absolute(): - return path - try: - # set the base path + if path_obj.is_absolute(): + return path_obj + # set the base path + base_path: Path + if rocrate_path is None: + base_path = Path("./") + elif not isinstance(rocrate_path, Path): + base_path = Path(rocrate_path) + else: base_path = rocrate_path - if base_path is None: - base_path = Path("./") - elif not isinstance(base_path, Path): - base_path = Path(base_path) + try: # Check if the path if the root of the RO-Crate - if path == Path("./"): + if path_obj == Path("./"): return base_path # if the path is relative, try to resolve it - return base_path / path.relative_to(base_path) + return base_path / path_obj.relative_to(base_path) except ValueError: # if the path cannot be resolved, return the absolute path - return base_path / path + return base_path / path_obj # Define the path based on the identifier path = __define_path__(identifier, decode=decode) @@ -198,11 +200,11 @@ def __process_property__(self, name: str, data: object) -> object: if isinstance(data, dict) and "@id" in data: entity = self.metadata.get_entity(data["@id"]) if entity is None: - return ROCrateEntity(self, data) + return ROCrateEntity(self.metadata, data) return entity return data - def get_property(self, name: str, default=None) -> Union[str, ROCrateEntity]: + def get_property(self, name: str, default=None) -> Any: data = self._raw_data.get(name, default) if data is None: return None @@ -253,17 +255,17 @@ def check_availability(self) -> AvailabilityStatus: if isinstance(self.ro_crate, ROCrateLocalZip): if self.id == "./": return AvailabilityStatus.AVAILABLE - found = self.ro_crate.has_directory(unquote(str(self.id))) or self.ro_crate.has_file( - unquote(str(self.id)) + found = self.ro_crate.has_directory(Path(unquote(str(self.id)))) or self.ro_crate.has_file( + Path(unquote(str(self.id))) ) return AvailabilityStatus.AVAILABLE if found else AvailabilityStatus.UNAVAILABLE if self.ro_crate.uri.is_remote_resource(): if self.id == "./": - found = self.ro_crate.get_file_size(Path(self.id_as_uri())) > 0 + found = self.ro_crate.get_file_size(self.id_as_path) > 0 else: - found = self.ro_crate.has_directory(unquote(str(self.id))) or self.ro_crate.has_file( - unquote(str(self.id)) + found = self.ro_crate.has_directory(Path(unquote(str(self.id)))) or self.ro_crate.has_file( + Path(unquote(str(self.id))) ) return AvailabilityStatus.AVAILABLE if found else AvailabilityStatus.UNAVAILABLE except Exception as e: @@ -292,7 +294,7 @@ def __str__(self) -> str: def __repr__(self) -> str: return str(self) - def __eq__(self, other: ROCrateEntity) -> bool: + def __eq__(self, other: object) -> bool: if not isinstance(other, ROCrateEntity): return False return self.id == other.id @@ -304,7 +306,8 @@ class ROCrateMetadata: def __init__(self, ro_crate: ROCrate, metadata_dict: Optional[dict] = None) -> None: self._ro_crate = ro_crate self._dict = metadata_dict - self._json: str = json.dumps(metadata_dict) if metadata_dict else None + self._json: Optional[str] = json.dumps(metadata_dict) if metadata_dict else None + self._graph: Optional[Graph] = None @property def ro_crate(self) -> ROCrate: @@ -353,7 +356,7 @@ def get_main_workflow(self) -> ROCrateEntity: raise ValueError("no main workflow in metadata file descriptor") return main_workflow - def get_entity(self, entity_id: str) -> ROCrateEntity: + def get_entity(self, entity_id: str) -> Optional[ROCrateEntity]: for entity in self.as_dict().get("@graph", []): if entity.get("@id") == entity_id: return ROCrateEntity(self, entity) @@ -368,9 +371,10 @@ def get_entities(self) -> list[ROCrateEntity]: def get_entities_by_type( self, entity_type: Union[str, list[str]] ) -> list[ROCrateEntity]: + entity_types = [entity_type] if isinstance(entity_type, str) else entity_type entities = [] for e in self.get_entities(): - if e.has_types(entity_type): + if e.has_types(entity_types): entities.append(e) return entities @@ -415,9 +419,9 @@ def get_conforms_to(self) -> Optional[list[str]]: def as_json(self) -> str: if not self._json: - self._json = self.ro_crate.get_file_content( + self._json = cast(str, self.ro_crate.get_file_content( Path(self.METADATA_FILE_DESCRIPTOR), binary_mode=False - ) + )) return self._json def as_dict(self) -> dict: @@ -426,10 +430,10 @@ def as_dict(self) -> dict: self._dict = json.loads(self.as_json()) return self._dict - def as_graph(self, publicID: str = None) -> Graph: + def as_graph(self, publicID: Optional[str] = None) -> Graph: if not self._graph: # if the graph is not cached, load it - self._graph = Graph(base=publicID or self.ro_crate.uri) + self._graph = Graph(base=publicID or str(self.ro_crate.uri)) self._graph.parse(data=self.as_json(), format="json-ld") return self._graph @@ -439,7 +443,7 @@ def __str__(self) -> str: def __repr__(self) -> str: return str(self) - def __eq__(self, other: ROCrateMetadata) -> bool: + def __eq__(self, other: object) -> bool: if not isinstance(other, ROCrateMetadata): return False return self.ro_crate == other.ro_crate @@ -450,7 +454,7 @@ class ROCrate(ABC): Base class for representing and interacting with a Research Object Crate (RO-Crate). """ - def __new__(cls, uri: Union[str, Path, URI], relative_root_path: Path = None): + def __new__(cls, uri: Union[str, Path, URI], relative_root_path: Optional[Path] = None): """ Factory method to create the appropriate ROCrate subclass instance. @@ -476,7 +480,7 @@ def __new__(cls, uri: Union[str, Path, URI], relative_root_path: Path = None): instance.relative_root_path = relative_root_path return instance - def __init__(self, uri: Union[str, Path, URI], relative_root_path: Path = None) -> None: + def __init__(self, uri: Union[str, Path, URI], relative_root_path: Optional[Path] = None) -> None: """ Initialize the RO-Crate. @@ -487,19 +491,19 @@ def __init__(self, uri: Union[str, Path, URI], relative_root_path: Path = None) """ # store the path to the crate - self._uri = URI(uri) + self._uri = uri if isinstance(uri, URI) else URI(uri) # the relative root path inside the RO-Crate self.relative_root_path = relative_root_path # cache the list of files - self._files = None + self._files: Optional[list[Path]] = None # initialize variables to cache the data - self._dict: dict = None - self._graph = None + self._dict: Optional[dict] = None + self._graph: Optional[Graph] = None - self._metadata = None + self._metadata: Optional[ROCrateMetadata] = None @property def uri(self) -> URI: @@ -520,6 +524,7 @@ def metadata(self) -> ROCrateMetadata: self._metadata = ROCrateMetadata(self) return self._metadata + @property @abstractmethod def size(self) -> int: """ @@ -530,7 +535,6 @@ def size(self) -> int: """ pass - @property @abstractmethod def list_files(self) -> list[Path]: """ @@ -578,7 +582,7 @@ def __check_search_path__(self, path) -> tuple[Optional[Path], Optional[Path]]: search_path, root_path = self.__get_search_path__(path) # Check if the path has the substring 'relative_root_path/' in it - has_sub_data_path = re.search(self.relative_root_path, str(search_path)) + has_sub_data_path = re.search(str(self.relative_root_path), str(search_path)) if not has_sub_data_path: return search_path, root_path return None, None @@ -742,8 +746,9 @@ def from_metadata_dict( :raises ROCrateInvalidURIError: if the URI is invalid """ - # create a new instance based on the URI - ro_crate = ROCrate(URI("./"), relative_root_path=None) + # create a new instance based on the URI (the ROCrate factory __new__ + # dispatches to a concrete subclass, so this is not truly abstract) + ro_crate = ROCrate(URI("./"), relative_root_path=None) # type: ignore[abstract] # override the metadata with the provided dictionary ro_crate._metadata = ROCrateMetadata(ro_crate, metadata_dict=metadata_dict) @@ -802,7 +807,7 @@ class ROCrateLocalFolder(ROCrate): Class representing an RO-Crate stored in a local folder. """ - def __init__(self, path: Union[str, Path, URI], relative_root_path: Path = None): + def __init__(self, path: Union[str, Path, URI], relative_root_path: Optional[Path] = None): super().__init__(path, relative_root_path=relative_root_path) # cache the list of files @@ -844,13 +849,13 @@ class ROCrateLocalZip(ROCrate): def __init__( self, path: Union[str, Path, URI], - relative_root_path: Path = None, + relative_root_path: Optional[Path] = None, init_zip: bool = True, ): super().__init__(path, relative_root_path=relative_root_path) # initialize the zip reference - self._zipref = None + self._zipref: Optional[zipfile.ZipFile] = None if init_zip: self.__init_zip_reference__() @@ -886,7 +891,8 @@ def __init_zip_reference__(self): self._zipref = zipfile.ZipFile(path) logger.debug("Initialized zip reference: %s", self._zipref) - def __get_file_info__(self, path: Path) -> zipfile.ZipInfo: + def __get_file_info__(self, path: Union[str, Path]) -> zipfile.ZipInfo: + assert self._zipref is not None, "Zip reference not initialized" try: return self._zipref.getinfo(str(path)) except KeyError: @@ -911,6 +917,7 @@ def has_file(self, path: Path) -> bool: def has_directory(self, path: Path) -> bool: assert path, "Path cannot be None" + assert self._zipref is not None, "Zip reference not initialized" for px in (path, self.__parse_path__(path)): for p in self._zipref.namelist(): if f"{str(px)}/" == str(p) or str(px) == str(p): @@ -920,12 +927,14 @@ def has_directory(self, path: Path) -> bool: def list_files(self) -> list[Path]: if not self._files: + assert self._zipref is not None, "Zip reference not initialized" self._files = [] for file in self._zipref.namelist(): self._files.append(Path(file)) return self._files def list_entries(self) -> list[zipfile.ZipInfo]: + assert self._zipref is not None, "Zip reference not initialized" return self._zipref.infolist() def get_entry(self, path: Path) -> zipfile.ZipInfo: @@ -935,6 +944,7 @@ def get_entry(self, path: Path) -> zipfile.ZipInfo: return self.__get_file_info__(self.__parse_path__(path)) def get_file_size(self, path: Path) -> int: + assert self._zipref is not None, "Zip reference not initialized" return self._zipref.getinfo(str(self.__parse_path__(path))).file_size def get_file_content( @@ -943,13 +953,14 @@ def get_file_content( path = self.__parse_path__(path) if not self.has_file(path): raise FileNotFoundError(f"File not found: {path}") + assert self._zipref is not None, "Zip reference not initialized" data = self._zipref.read(str(path)) return data if binary_mode else data.decode("utf-8") class ROCrateRemoteZip(ROCrateLocalZip): - def __init__(self, path: Union[str, Path, URI], relative_root_path: Path = None): + def __init__(self, path: Union[str, Path, URI], relative_root_path: Optional[Path] = None): super().__init__(path, relative_root_path=relative_root_path, init_zip=False) # # initialize the zip reference @@ -1111,7 +1122,7 @@ def __check_search_path__(self, path): class ROCrateBagitLocalFolder(BagitROCrate, ROCrateLocalFolder): - def __init__(self, uri: Union[str, Path, URI], relative_root_path: Path = None): + def __init__(self, uri: Union[str, Path, URI], relative_root_path: Optional[Path] = None): # initialize the parent classes super(ROCrateLocalFolder, self).__init__(uri, relative_root_path=relative_root_path) # check if the path is a BagIt-wrapped crate @@ -1139,6 +1150,7 @@ def __parse_path__(self, path: Path) -> Path: # if search_path is set, adjust the path if search_path: path = Path("data") / search_path + assert self._zipref is not None, "Zip reference not initialized" zip_namelist = self._zipref.namelist() if str(path) not in zip_namelist and f"{path}/" not in zip_namelist: path = Path(unquote(str(path))) From e39e28b1af23e7e3cf2d6ef2cbfaa34e4bd4b1d1 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Wed, 3 Jun 2026 12:43:48 +0200 Subject: [PATCH 165/352] refactor(types): :recycle: fix missing type annotations and implicit Optionals --- rocrate_validator/cli/commands/cache.py | 4 +- rocrate_validator/cli/commands/profiles.py | 8 +-- rocrate_validator/events.py | 4 +- rocrate_validator/models.py | 6 +- .../ro-crate/must/0_file_descriptor_format.py | 8 +-- .../requirements/python/__init__.py | 2 +- .../requirements/shacl/checks.py | 29 +++++---- .../requirements/shacl/errors.py | 6 +- .../requirements/shacl/models.py | 59 ++++++++++--------- .../requirements/shacl/requirements.py | 10 ++-- rocrate_validator/requirements/shacl/utils.py | 12 ++-- .../requirements/shacl/validator.py | 54 ++++++++--------- rocrate_validator/utils/cache_warmup.py | 4 +- .../utils/io_helpers/output/__init__.py | 31 ++++++---- .../utils/io_helpers/output/console.py | 7 ++- .../io_helpers/output/json/formatters.py | 9 +-- .../utils/io_helpers/output/text/__init__.py | 3 +- .../io_helpers/output/text/layout/progress.py | 11 +++- .../io_helpers/output/text/layout/report.py | 37 ++++++------ 19 files changed, 167 insertions(+), 137 deletions(-) diff --git a/rocrate_validator/cli/commands/cache.py b/rocrate_validator/cli/commands/cache.py index 83ac8035a..08efd599a 100644 --- a/rocrate_validator/cli/commands/cache.py +++ b/rocrate_validator/cli/commands/cache.py @@ -355,11 +355,11 @@ def cache_warm( loaded_profiles = list(Profile.all()) if requested_ids: selected = [] - missing = [] + missing: list[str] = [] # (requested, resolved, all candidates) for tokens that matched # more than one versioned profile — we warn so the user knows # which one was picked and how to opt for a different version. - ambiguous_fallbacks = [] + ambiguous_fallbacks: list[tuple[str, Profile, list[Profile]]] = [] for ident in requested_ids: profile = Profile.get_by_identifier(ident) if profile is None: diff --git a/rocrate_validator/cli/commands/profiles.py b/rocrate_validator/cli/commands/profiles.py index 7fc826b83..afbbbcab9 100644 --- a/rocrate_validator/cli/commands/profiles.py +++ b/rocrate_validator/cli/commands/profiles.py @@ -15,7 +15,7 @@ import re import sys from pathlib import Path -from typing import Optional +from typing import Any, Optional from rich.align import Align from rich.markdown import Markdown @@ -61,7 +61,7 @@ ) @click.pass_context def profiles(ctx, profiles_path: Path = DEFAULT_PROFILES_PATH, - extra_profiles_path: Path = None): + extra_profiles_path: Optional[Path] = None): """ [magenta]rocrate-validator:[/magenta] Manage profiles """ @@ -125,7 +125,7 @@ def list_profiles(ctx, no_paging: bool = False): # , profiles_path: Path = DEFA # Add data to the table for profile in profiles: # Count requirements by severity - checks_info = {} + checks_info: dict[str, dict[str, Any]] = {} for level in levels: checks_info[level.severity.name] = { "count": 0, @@ -182,7 +182,7 @@ def describe_profile(ctx, profile_identifier: str = DEFAULT_PROFILE_IDENTIFIER, check_identifier: Optional[str] = None, profiles_path: Path = DEFAULT_PROFILES_PATH, - extra_profiles_path: Path = None, + extra_profiles_path: Optional[Path] = None, verbose: bool = False, no_paging: bool = False): """ Show a profile, or — when CHECK_IDENTIFIER is given — show a single requirement check. diff --git a/rocrate_validator/events.py b/rocrate_validator/events.py index 68558e8a8..e47371331 100644 --- a/rocrate_validator/events.py +++ b/rocrate_validator/events.py @@ -153,8 +153,8 @@ def update(self, event: Event, ctx: Optional[Any] = None): class Publisher: def __init__(self, avoid_duplicate_notifications: bool = False): - self.__subscribers = set() - self.__notified_events = set() + self.__subscribers: set["Subscriber"] = set() + self.__notified_events: set["Event"] = set() self.__avoid_duplicate_notifications = avoid_duplicate_notifications @property diff --git a/rocrate_validator/models.py b/rocrate_validator/models.py index 0b6fb8a9e..19f989362 100644 --- a/rocrate_validator/models.py +++ b/rocrate_validator/models.py @@ -1407,7 +1407,7 @@ class RequirementCheck(ABC): def __init__( self, requirement: Requirement, - name: str, + name: Optional[str], level: Optional[RequirementLevel] = LevelCollection.REQUIRED, description: Optional[str] = None, hidden: Optional[bool] = None, @@ -2753,12 +2753,12 @@ def rocrate_uri(self) -> Optional[URI]: return self._rocrate_uri @rocrate_uri.setter - def rocrate_uri(self, value: URI): + def rocrate_uri(self, value: Union[str, Path, URI]): """ Set the RO-Crate URI. :param value: The RO-Crate URI. - :type value: URI + :type value: Union[str, Path, URI] """ if not value: raise ValueError("Invalid RO-Crate URI") diff --git a/rocrate_validator/profiles/ro-crate/must/0_file_descriptor_format.py b/rocrate_validator/profiles/ro-crate/must/0_file_descriptor_format.py index 316eec448..46742a8e9 100644 --- a/rocrate_validator/profiles/ro-crate/must/0_file_descriptor_format.py +++ b/rocrate_validator/profiles/ro-crate/must/0_file_descriptor_format.py @@ -13,7 +13,7 @@ # limitations under the License. import re -from typing import Any +from typing import Any, Optional from urllib.parse import urljoin from rocrate_validator.models import ValidationContext @@ -298,7 +298,7 @@ def is_entity_flat_recursive(entity: Any, is_first: bool = True, fail_fast: bool return result try: - fail_fast = context.settings.abort_on_first + fail_fast = bool(context.settings.abort_on_first) json_dict = context.ro_crate.metadata.as_dict() result = True for entity in json_dict["@graph"]: @@ -377,9 +377,9 @@ def __get_remote_context_keys__(self, context_uri: str) -> set: raise RuntimeError("The context is not a dictionary", self) return set(jsonLD_ctx.keys()) - def __check_entity_keys__(self, entity: dict, + def __check_entity_keys__(self, entity: Any, context_keys: set, - unexpected_keys: dict[str, int] = None) -> dict[str, int]: + unexpected_keys: Optional[dict[str, int]] = None) -> dict[str, int]: """ Check if the entity is in the correct format """ def add_unexpected_key(k: str, u_keys: dict) -> None: diff --git a/rocrate_validator/requirements/python/__init__.py b/rocrate_validator/requirements/python/__init__.py index b15d3ee2a..930403127 100644 --- a/rocrate_validator/requirements/python/__init__.py +++ b/rocrate_validator/requirements/python/__init__.py @@ -38,7 +38,7 @@ def __init__(self, name: str, check_function: Callable[[RequirementCheck, ValidationContext], bool], description: Optional[str] = None, - level: Optional[LevelCollection] = LevelCollection.REQUIRED, + level: Optional[RequirementLevel] = LevelCollection.REQUIRED, deactivated: bool = False): """ check_function: a function that accepts an instance of PyFunctionCheck and a ValidationContext. diff --git a/rocrate_validator/requirements/shacl/checks.py b/rocrate_validator/requirements/shacl/checks.py index 1ce62ab0b..1ffa2dd0b 100644 --- a/rocrate_validator/requirements/shacl/checks.py +++ b/rocrate_validator/requirements/shacl/checks.py @@ -14,7 +14,7 @@ import json from timeit import default_timer as timer -from typing import Optional +from typing import Any, Optional, cast from rdflib import RDF, BNode, Literal, Namespace @@ -27,11 +27,12 @@ RequirementCheck, RequirementCheckValidationEvent, RequirementLevel, + Severity, SkipRequirementCheck, SourceSnippet, ValidationContext, ) -from rocrate_validator.requirements.shacl.models import Shape, ShapesRegistry +from rocrate_validator.requirements.shacl.models import SHACLNode, Shape, ShapesRegistry from rocrate_validator.requirements.shacl.utils import build_node_subgraph, make_uris_relative, resolve_parent_shape from rocrate_validator.requirements.shacl.validator import ( SHACLValidationAlreadyProcessed, @@ -55,7 +56,7 @@ class SHACLCheck(RequirementCheck): """ # Map shape to requirement check instances - __instances__ = {} + __instances__: dict[int, "SHACLCheck"] = {} def __init__( self, @@ -64,7 +65,7 @@ def __init__( name: Optional[str] = None, root: bool = False, hidden: Optional[bool] = None, - level: Optional[bool] = None, + level: Optional[RequirementLevel] = None, ) -> None: self._shape = shape self._root = root @@ -167,13 +168,13 @@ def __derive_level_from_properties__(self) -> Optional[RequirementLevel]: return max(declared_levels, key=lambda lvl: lvl.severity.value) @property - def level(self) -> str: + def level(self) -> RequirementLevel: if not self._level: self._level = self.__compute_requirement_level__() return self._level @property - def severity(self) -> str: + def severity(self) -> Severity: return self.level.severity def get_source_snippet(self) -> Optional[SourceSnippet]: @@ -184,9 +185,9 @@ def get_source_snippet(self) -> Optional[SourceSnippet]: # build a subgraph containing all the triples related to the shape subgraph = build_node_subgraph(graph, self._shape.node) # identify the owner of the shape - owner = self._shape + owner: SHACLNode = self._shape while getattr(owner, "parent", None) is not None: - owner = owner.parent + owner = cast(SHACLNode, owner.parent) # if the shape is not a root shape, include the triples linking the owner to the shape if owner is not self._shape: shacl = Namespace(SHACL_NS) @@ -340,10 +341,11 @@ def __do_execute_check__(self, shacl_context: SHACLValidationContext): start_time = timer() # if the validation fails, process the failed checks failed_requirements_checks = set() - failed_requirements_checks_violations: dict[str, SHACLViolation] = {} + failed_requirements_checks_violations: dict[str, list[SHACLViolation]] = {} failed_requirement_checks_notified = [ _.check.identifier - for _ in shacl_context.result.get_issues(min_severity=shacl_context.settings.requirement_severity) + for _ in shacl_context.result.get_issues( + min_severity=cast(Severity, shacl_context.settings.requirement_severity)) ] logger.debug("Parsing Validation with result: %s", shacl_result) @@ -405,9 +407,10 @@ def __do_execute_check__(self, shacl_context: SHACLValidationContext): ): continue for violation in failed_requirements_checks_violations[requirementCheck.identifier]: - violating_entity = make_uris_relative(violation.focusNode.toPython(), shacl_context.publicID) + violating_entity = make_uris_relative(cast(Any, violation.focusNode).toPython(), + shacl_context.publicID) violating_property = violation.resultPath.toPython() if violation.resultPath else None - violation_message = violation.get_result_message(shacl_context.rocrate_uri) + violation_message = violation.get_result_message(str(shacl_context.rocrate_uri)) registered_check_issues = shacl_context.result.get_issues_by_check(requirementCheck) skip_requirement_check = False # check if the violation is already registered @@ -429,7 +432,7 @@ def __do_execute_check__(self, shacl_context: SHACLValidationContext): # if the check is not to be skipped, add the issue to the context if not skip_requirement_check: c = shacl_context.result.add_issue( - message=violation.get_result_message(shacl_context.rocrate_uri), + message=violation.get_result_message(str(shacl_context.rocrate_uri)), check=requirementCheck, violatingProperty=violating_property, violatingEntity=violating_entity, diff --git a/rocrate_validator/requirements/shacl/errors.py b/rocrate_validator/requirements/shacl/errors.py index 58aabbf12..c77bf571d 100644 --- a/rocrate_validator/requirements/shacl/errors.py +++ b/rocrate_validator/requirements/shacl/errors.py @@ -12,6 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +from typing import Optional + from rocrate_validator.errors import ValidationError from rocrate_validator.requirements.shacl.validator import \ SHACLValidationResult @@ -21,7 +23,7 @@ class SHACLValidationError(ValidationError): def __init__( self, - result: SHACLValidationResult = None, + result: Optional[SHACLValidationResult] = None, message: str = "Document does not conform to SHACL shapes.", path: str = ".", code: int = 500, @@ -30,7 +32,7 @@ def __init__( self._result = result @property - def result(self) -> SHACLValidationResult: + def result(self) -> Optional[SHACLValidationResult]: return self._result def __repr__(self): diff --git a/rocrate_validator/requirements/shacl/models.py b/rocrate_validator/requirements/shacl/models.py index 4da019ad2..2686b7c24 100644 --- a/rocrate_validator/requirements/shacl/models.py +++ b/rocrate_validator/requirements/shacl/models.py @@ -15,7 +15,7 @@ from __future__ import annotations from pathlib import Path -from typing import Optional, Union +from typing import Optional, Union, cast from rdflib import Graph, Literal, Namespace, URIRef from rdflib.term import Node @@ -34,9 +34,9 @@ class SHACLNode: # define default values - _name: str = None - _description: str = None - severity: str = None + _name: Optional[str] = None + _description: Optional[str] = None + severity: Optional[str] = None def __init__(self, node: Node, graph: Graph, parent: Optional[SHACLNode] = None): @@ -59,14 +59,14 @@ def name(self) -> str: """Return the name of the shape""" if not self._name: self._name = self.node_name - return self._name or self._node.split("/")[-1] + return self._name or str(self._node).split("/")[-1] @name.setter def name(self, value: str): self._name = value @property - def description(self) -> str: + def description(self) -> Optional[str]: """Return the description of the shape""" return self._description @@ -89,7 +89,8 @@ def node(self): @property def node_name(self): """Return the name of the node""" - return self._node.split("#")[-1] if "#" in self.node else self._node.split("/")[-1] + node_str = str(self._node) + return node_str.split("#")[-1] if "#" in node_str else node_str.split("/")[-1] @property def graph(self): @@ -162,7 +163,7 @@ def compute_hash(graph: Graph, node: Node) -> int: class SHACLNodeCollection(SHACLNode): - def __init__(self, node: Node, graph: Graph, properties: list[PropertyShape] = None): + def __init__(self, node: Node, graph: Graph, properties: Optional[list[PropertyShape]] = None): super().__init__(node, graph) # store the properties self._properties = properties if properties else [] @@ -172,7 +173,7 @@ def properties(self) -> list[PropertyShape]: """Return the properties of the shape""" return self._properties.copy() - def get_property(self, name) -> PropertyShape: + def get_property(self, name) -> Optional[PropertyShape]: """Return the property of the shape with the given name""" for prop in self._properties: if prop.name == name: @@ -206,14 +207,14 @@ class PropertyGroup(SHACLNodeCollection): class PropertyShape(Shape): # define default values - _name: str = None - _short_name: str = None - _description: str = None - group: str = None - defaultValue: str = None + _name: Optional[str] = None + _short_name: Optional[str] = None + _description: Optional[str] = None + group: Optional[str] = None + defaultValue: Optional[str] = None order: int = 0 # store the reference to the property group - _property_group: PropertyGroup = None + _property_group: Optional[PropertyGroup] = None def __init__(self, node: Node, @@ -232,10 +233,11 @@ def name(self) -> str: shacl_ns = Namespace(SHACL_NS) path = self.graph.value(subject=self.node, predicate=shacl_ns.path) if path: - self._short_name = path.split("#")[-1] if "#" in path else path.split("/")[-1] + path_str = str(path) + self._short_name = path_str.split("#")[-1] if "#" in path_str else path_str.split("/")[-1] if self.parent: self._name = f"{self._short_name} of {self.parent.name}" - return self._name + return self._name or str(self._node).split("/")[-1] @name.setter def name(self, value: str): @@ -272,10 +274,10 @@ def graph(self) -> Graph: @property def parent(self) -> Optional[Shape]: """Return the parent shape of the shape property""" - return self._parent + return cast(Optional[Shape], self._parent) @property - def propertyGroup(self) -> PropertyGroup: + def propertyGroup(self) -> Optional[PropertyGroup]: """Return the group of the shape property""" return self._property_group @@ -305,7 +307,7 @@ def ungrouped_properties(self) -> list[PropertyShape]: class ShapesRegistry: def __init__(self): - self._shapes = {} + self._shapes: dict[str, Shape] = {} self._shapes_graph: Graph = Graph() def add_shape(self, shape: Shape): @@ -360,17 +362,18 @@ def load_shapes(self, shapes_path: Union[str, Path], publicID: Optional[str] = N """ logger.debug(f"Loading shapes from: {shapes_path}") # load shapes (nodes and properties) from the shapes graph - shapes_list: ShapesList = ShapesList.load_from_file(shapes_path, publicID) + shapes_list: ShapesList = ShapesList.load_from_file(str(shapes_path), publicID) logger.debug(f"Shapes List: {shapes_list}") # append the partial shapes graph to the global shapes graph self._shapes_graph += shapes_list.shapes_graph - # list of instantiated shapes - shapes = [] + # list of instantiated shapes (NodeShape/PropertyShape, plus PropertyGroups + # which are shape-like collections registered alongside the shapes) + shapes: list[Shape] = [] # list of property groups - property_groups = {} + property_groups: dict[str, PropertyGroup] = {} # register Node Shapes for node_shape in shapes_list.node_shapes: @@ -393,7 +396,7 @@ def load_shapes(self, shapes_path: Union[str, Path], publicID: Optional[str] = N group = __process_property_group__(property_groups, p_shape) if group and group not in shapes: grouped = True - shapes.append(group) + shapes.append(cast(Shape, group)) if not group: ungrouped_properties.append(p_shape) @@ -436,11 +439,13 @@ def get_instance(cls, ctx: object): return instance -def __process_property_group__(groups: dict[str, PropertyGroup], property_shape: PropertyShape) -> PropertyGroup: +def __process_property_group__( + groups: dict[str, PropertyGroup], property_shape: PropertyShape +) -> Optional[PropertyGroup]: group_name = property_shape.group if group_name: if group_name not in groups: - groups[group_name] = PropertyGroup(URIRef(property_shape.group), property_shape.graph) + groups[group_name] = PropertyGroup(URIRef(group_name), property_shape.graph) groups[group_name].add_property(property_shape) property_shape._property_group = groups[group_name] return groups[group_name] diff --git a/rocrate_validator/requirements/shacl/requirements.py b/rocrate_validator/requirements/shacl/requirements.py index 9db999d65..f852f1d79 100644 --- a/rocrate_validator/requirements/shacl/requirements.py +++ b/rocrate_validator/requirements/shacl/requirements.py @@ -13,7 +13,7 @@ # limitations under the License. from pathlib import Path -from typing import Optional +from typing import Any, Optional, cast from rdflib import RDF @@ -56,9 +56,9 @@ def __init_checks__(self) -> list[RequirementCheck]: assert self.shape is not None, "The shape cannot be None" assert self.shape.node is not None, "The shape node cannot be None" # assign a check to each property of the shape - checks = [] + checks: list[RequirementCheck] = [] # check if the shape has nested properties - has_properties = hasattr(self.shape, "properties") and len(self.shape.properties) > 0 + has_properties = hasattr(self.shape, "properties") and len(cast(Any, self.shape).properties) > 0 # create a check for the shape itself, hidden if the shape has nested properties checks.append( SHACLCheck( @@ -71,7 +71,7 @@ def __init_checks__(self) -> list[RequirementCheck]: ) # create a check for each property if the shape has nested properties if has_properties: - for prop in self.shape.properties: + for prop in cast(Any, self.shape).properties: logger.debug("Creating check for property %s %s", prop.name, prop.description) property_check = SHACLCheck(self, prop) logger.debug("Property check %s: %s", property_check.name, property_check.description) @@ -173,7 +173,7 @@ def load( assert file_path is not None, "The file path cannot be None" shapes: list[Shape] = self.shapes_registry.load_shapes(file_path, publicID) logger.debug("Loaded %s shapes: %s", len(shapes), shapes) - requirements = [] + requirements: list[Requirement] = [] for shape in shapes: if shape is not None and shape.level >= requirement_level: requirements.append(SHACLRequirement(shape, profile, file_path)) diff --git a/rocrate_validator/requirements/shacl/utils.py b/rocrate_validator/requirements/shacl/utils.py index 3f2e0cac8..ff13d2736 100644 --- a/rocrate_validator/requirements/shacl/utils.py +++ b/rocrate_validator/requirements/shacl/utils.py @@ -16,7 +16,7 @@ import hashlib from pathlib import Path -from typing import TYPE_CHECKING, Optional, Union +from typing import TYPE_CHECKING, Any, Optional, Union, cast from rdflib import RDF, BNode, Graph, Namespace from rdflib.term import Node @@ -78,14 +78,14 @@ def inject_attributes(obj: object, node_graph: Graph, node: Node, exclude: Optio skip_properties = ["node"] if exclude is None else exclude + ["node"] triples = node_graph.triples((node, None, None)) for node, p, o in triples: - predicate_as_string = p.toPython() + predicate_as_string = cast(Any, p).toPython() # logger.debug(f"Processing {predicate_as_string} of property graph {node}") if predicate_as_string.startswith(SHACL_NS): property_name = predicate_as_string.split("#")[-1] if property_name in skip_properties: continue try: - setattr(obj, property_name, o.toPython()) + setattr(obj, property_name, cast(Any, o).toPython()) except AttributeError as e: logger.error(f"Error injecting attribute {property_name}: {e}") # logger.debug("Injected attribute %s: %s", property_name, o.toPython()) @@ -139,7 +139,7 @@ def compute_key(g: Graph, s: Node) -> str: if isinstance(s, BNode): return compute_hash(g, s) else: - return s.toPython() + return cast(Any, s).toPython() class ShapesList: @@ -210,7 +210,7 @@ def get_shape_property_graph(self, shape_node: Node, shape_property: Node) -> Gr return property_graph @classmethod - def load_from_file(cls, file_path: str, publicID: str = None) -> ShapesList: + def load_from_file(cls, file_path: str, publicID: Optional[str] = None) -> ShapesList: """ Load the shapes from the file @@ -261,7 +261,7 @@ def __extract_related_triples__(graph, subject_node, processed_nodes=None): return related_triples -def load_shapes_from_file(file_path: str, publicID: str = None) -> ShapesList: +def load_shapes_from_file(file_path: str, publicID: Optional[str] = None) -> ShapesList: try: # Check the file path is not None assert file_path is not None, "The file path cannot be None" diff --git a/rocrate_validator/requirements/shacl/validator.py b/rocrate_validator/requirements/shacl/validator.py index 1ee3163f6..4811cd13f 100644 --- a/rocrate_validator/requirements/shacl/validator.py +++ b/rocrate_validator/requirements/shacl/validator.py @@ -16,7 +16,7 @@ import os from pathlib import Path -from typing import Optional, Union +from typing import Any, Optional, Union, cast import pyshacl from pyshacl.pytypes import GraphLike @@ -46,7 +46,7 @@ class SHACLValidationSkip(Exception): class SHACLValidationAlreadyProcessed(Exception): - def __init__(self, profile_identifier: str, result: SHACLValidationResult) -> None: + def __init__(self, profile_identifier: str, result: Optional[bool]) -> None: super().__init__(f"Profile {profile_identifier} has already been processed") self.result = result @@ -95,7 +95,7 @@ def __init__(self, context: ValidationContext): super().__init__(context.validator, context.settings) self._base_context: ValidationContext = context # reference to the ontology path - self._ontology_path: Path = None + self._ontology_path: Optional[Path] = None # reference to the contextual ShapeRegistry instance self._shapes_registry: ShapesRegistry = ShapesRegistry() @@ -104,10 +104,10 @@ def __init__(self, context: ValidationContext): self._processed_profiles: dict[str, bool] = {} # reference to the current validation profile - self._current_validation_profile: Profile = None + self._current_validation_profile: Optional[Profile] = None - # store the validation result of the current profile - self._validation_result: SHACLValidationResult = None + # store the validation result of the current profile (a pass/fail boolean) + self._validation_result: Optional[bool] = None # reference to the contextual ontology graph self._ontology_graph: Graph = Graph() @@ -134,8 +134,8 @@ def __set_current_validation_profile__(self, profile: Profile) -> bool: # logger.debug("Processing check: %s", check) if check.overridden and check.requirement.profile != self.target_profile: # logger.debug("Overridden check: %s", check) - profile_shapes_graph -= check.shape.graph - profile_shapes.pop(check.shape.key) + profile_shapes_graph -= cast(Any, check).shape.graph + profile_shapes.pop(cast(Any, check).shape.key) # add the shapes to the registry self._shapes_registry.extend(profile_shapes, profile_shapes_graph) @@ -154,15 +154,15 @@ def base_context(self) -> ValidationContext: return self._base_context @property - def current_validation_profile(self) -> Profile: + def current_validation_profile(self) -> Optional[Profile]: return self._current_validation_profile @property - def current_validation_result(self) -> SHACLValidationResult: + def current_validation_result(self) -> Optional[bool]: return self._validation_result @current_validation_result.setter - def current_validation_result(self, result: ValidationResult): + def current_validation_result(self, result: bool): assert self._current_validation_profile is not None, "Invalid state: current profile not set" # store the validation result self._validation_result = result @@ -207,9 +207,9 @@ def __get_data_graph_base__(self) -> Optional[str]: return None def __load_ontology_graph__(self, profile_path: Path, - ontology_filename: Optional[str] = DEFAULT_ONTOLOGY_FILE) -> Graph: + ontology_filename: str = DEFAULT_ONTOLOGY_FILE) -> Optional[Graph]: # load the graph of ontologies - ontology_graph = None + ontology_graph: Optional[Graph] = None ontology_path = self.__get_ontology_path__(profile_path, ontology_filename) if os.path.exists(ontology_path): logger.debug("Loading ontologies: %s", ontology_path) @@ -246,7 +246,7 @@ def get_instance(cls, context: ValidationContext) -> SHACLValidationContext: class SHACLViolation: - def __init__(self, result: ValidationResult, violation_node: Node, graph: Graph) -> None: + def __init__(self, result: "SHACLValidationResult", violation_node: Node, graph: Graph) -> None: # check the input assert result is not None, "Invalid result" assert isinstance(violation_node, Node), "Invalid violation node" @@ -258,14 +258,14 @@ def __init__(self, result: ValidationResult, violation_node: Node, graph: Graph) self._graph = graph # initialize the properties for lazy loading - self._focus_node = None - self._result_message = None - self._result_path = None - self._severity = None - self._source_constraint_component = None - self._source_shape = None - self._source_shape_node = None - self._value = None + self._focus_node: Optional[Node] = None + self._result_message: Optional[str] = None + self._result_path: Optional[Node] = None + self._severity: Optional[Severity] = None + self._source_constraint_component: Optional[Node] = None + self._source_shape: Optional[Node] = None + self._source_shape_node: Optional[Node] = None + self._value: Optional[Node] = None @property def node(self) -> Node: @@ -299,7 +299,7 @@ def get_result_severity(self) -> Severity: severity = self.graph.value(self._violation_node, URIRef(f"{SHACL_NS}resultSeverity")) assert severity is not None, f"Unable to get severity from violation node {self._violation_node}" # we need to map the SHACL severity term to our Severity enum values - self._severity = map_severity(severity.toPython()) + self._severity = map_severity(cast(Any, severity).toPython()) return self._severity @property @@ -315,7 +315,7 @@ def get_result_message(self, ro_crate_path: Union[Path, str]) -> str: if not self._result_message: message = self.graph.value(self._violation_node, URIRef(f"{SHACL_NS}resultMessage")) assert message is not None, f"Unable to get result message from violation node {self._violation_node}" - self._result_message = make_uris_relative(message.toPython(), ro_crate_path) + self._result_message = make_uris_relative(cast(Any, message).toPython(), ro_crate_path) return self._result_message @property @@ -324,13 +324,13 @@ def sourceShape(self) -> Union[URIRef, BNode]: self._source_shape_node = self.graph.value(self._violation_node, URIRef(f"{SHACL_NS}sourceShape")) assert self._source_shape_node is not None, \ f"Unable to get source shape node from violation node {self._violation_node}" - return self._source_shape_node + return cast(Union[URIRef, BNode], self._source_shape_node) class SHACLValidationResult: def __init__(self, results_graph: Graph, - results_text: str = None) -> None: + results_text: Optional[str] = None) -> None: # validate the results graph input assert results_graph is not None, "Invalid graph" assert isinstance(results_graph, Graph), "Invalid graph type" @@ -366,7 +366,7 @@ def violations(self) -> list[SHACLViolation]: return self._violations @property - def text(self) -> str: + def text(self) -> Optional[str]: return self._text diff --git a/rocrate_validator/utils/cache_warmup.py b/rocrate_validator/utils/cache_warmup.py index e22f5d737..2b2a3c011 100644 --- a/rocrate_validator/utils/cache_warmup.py +++ b/rocrate_validator/utils/cache_warmup.py @@ -27,7 +27,7 @@ import os from dataclasses import dataclass -from typing import TYPE_CHECKING, Iterable, List, Optional, Sequence +from typing import TYPE_CHECKING, Any, Iterable, List, Optional, Sequence, cast from rocrate_validator import constants from rocrate_validator.utils import log as logging @@ -80,7 +80,7 @@ def discover_profile_cacheable_urls(profile: "Profile") -> List[str]: urls: List[str] = [] try: for row in graph.query(_CACHEABLE_URLS_SPARQL): - artifact = row.artifact + artifact = cast(Any, row).artifact if artifact is None: continue value = str(artifact) diff --git a/rocrate_validator/utils/io_helpers/output/__init__.py b/rocrate_validator/utils/io_helpers/output/__init__.py index 61f951a26..483292a5d 100644 --- a/rocrate_validator/utils/io_helpers/output/__init__.py +++ b/rocrate_validator/utils/io_helpers/output/__init__.py @@ -13,12 +13,20 @@ # limitations under the License. -from typing import Any, Optional, Protocol +from __future__ import annotations -from rich.console import Console, ConsoleOptions, RenderResult +from typing import TYPE_CHECKING, Any, Optional, Protocol, cast + +from rich.console import ConsoleOptions, RenderResult from rocrate_validator.utils import log as logging +if TYPE_CHECKING: + # The formatters render to the application's Console subclass (which adds + # ``interactive``/disabling behaviour); typing the protocol against it keeps + # the concrete formatters' ``__rich_console__`` overrides compatible. + from rocrate_validator.utils.io_helpers.output.console import Console + # set up logging logger = logging.getLogger(__name__) @@ -26,28 +34,30 @@ class OutputFormatter(Protocol): """Protocol for output formatters.""" - def __rich_console__(self, console: Console, options: ConsoleOptions) -> RenderResult: + def __rich_console__(self, console: "Console", options: ConsoleOptions) -> RenderResult: pass class BaseOutputFormatter(OutputFormatter): def __init__(self, data: Optional[Any] = None): - self._fmap = {} + # Formatters are registered as classes (instantiated with the data to + # render), so the map values are formatter types, not instances. + self._fmap: dict[type, type[OutputFormatter]] = {} self._data = data - def add_type_formatter(self, data_type: type, formatter: OutputFormatter): + def add_type_formatter(self, data_type: type, formatter: type[OutputFormatter]): """Register a formatter for a specific data type.""" self._fmap[data_type] = formatter - def get_type_formatter(self, data_type: type) -> OutputFormatter: + def get_type_formatter(self, data_type: type) -> type[OutputFormatter]: """Retrieve the formatter for a specific data type.""" formatter = self._fmap.get(data_type) if not formatter: raise NotImplementedError(f"No formatter registered for type: {data_type.__name__}") return formatter - def get_data_formatter(self, data: Any) -> OutputFormatter: + def get_data_formatter(self, data: Any) -> type[OutputFormatter]: """Retrieve the formatter for a specific data type.""" data_type = type(data) formatter = self._fmap.get(data_type) @@ -55,15 +65,16 @@ def get_data_formatter(self, data: Any) -> OutputFormatter: raise NotImplementedError(f"No formatter registered for type: {data_type.__name__}") return formatter - def get_type_formatters(self) -> dict[type]: + def get_type_formatters(self) -> dict[type, type[OutputFormatter]]: """Retrieve all registered formatters.""" return dict(self._fmap) - def __rich_console__(self, console: Console, options: ConsoleOptions) -> RenderResult: + def __rich_console__(self, console: "Console", options: ConsoleOptions) -> RenderResult: if self._data is None: raise ValueError("No data provided for formatting.") formatter = self.get_data_formatter(self._data) if not formatter: yield self._data else: - yield from formatter(self._data).__rich_console__(console, options) + # ``formatter`` is a formatter class instantiated with the data to render. + yield from cast(Any, formatter)(self._data).__rich_console__(console, options) diff --git a/rocrate_validator/utils/io_helpers/output/console.py b/rocrate_validator/utils/io_helpers/output/console.py index d5b12c4bc..6ae01bae5 100644 --- a/rocrate_validator/utils/io_helpers/output/console.py +++ b/rocrate_validator/utils/io_helpers/output/console.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from typing import Optional +from typing import Any, Optional from rich.console import Console as BaseConsole @@ -27,14 +27,14 @@ class Console(BaseConsole): """Rich console that can be disabled.""" def __init__(self, *args, disabled: bool = False, interactive: bool = True, - formatters: dict[type, OutputFormatter] = None, **kwargs): + formatters: Optional[dict[type, Any]] = None, **kwargs): force_jupyter = kwargs.pop("force_jupyter", None) if force_jupyter is None: force_jupyter = False if self.__jupyter_environment__() else None super().__init__(*args, force_jupyter=force_jupyter, **kwargs) self.disabled = disabled self.interactive = interactive - self._formatters = {} + self._formatters: dict[type, Any] = {} self._formatters_opts: dict[type, BaseOutputFormatter] = {} # Register provided formatters if any if formatters: @@ -52,6 +52,7 @@ def register_formatter(self, formatter: OutputFormatter, type_: Optional[type] = for t, f in formatter.get_type_formatters().items(): self._formatters[t] = f else: + assert type_ is not None # guaranteed by the check above self._formatters[type_] = formatter def __format_data__(self, obj, *args, **kwargs): diff --git a/rocrate_validator/utils/io_helpers/output/json/formatters.py b/rocrate_validator/utils/io_helpers/output/json/formatters.py index e07685467..78a34433f 100644 --- a/rocrate_validator/utils/io_helpers/output/json/formatters.py +++ b/rocrate_validator/utils/io_helpers/output/json/formatters.py @@ -13,6 +13,7 @@ # limitations under the License. import json +from typing import Any, Optional from rich.console import ConsoleOptions, RenderResult @@ -35,12 +36,12 @@ def format_validation_result(data: ValidationResult, console=console, console_options=console_options) -def format_validation_results(data: ValidationResult, - console: Console = None, - console_options: ConsoleOptions = None) -> str: +def format_validation_results(data: dict[str, ValidationResult], + console: Optional[Console] = None, + console_options: Optional[ConsoleOptions] = None) -> str: # Initialize an empty JSON output - json_output = { + json_output: dict[str, Any] = { "meta": { "generated_by": "rocrate-validator", "version": get_version(), diff --git a/rocrate_validator/utils/io_helpers/output/text/__init__.py b/rocrate_validator/utils/io_helpers/output/text/__init__.py index f0f6f08cf..924423721 100644 --- a/rocrate_validator/utils/io_helpers/output/text/__init__.py +++ b/rocrate_validator/utils/io_helpers/output/text/__init__.py @@ -16,12 +16,13 @@ from typing import Any, Optional -from rich.console import Console, ConsoleOptions, RenderResult +from rich.console import ConsoleOptions, RenderResult from rocrate_validator.utils import log as logging from rocrate_validator.models import ValidationResult, ValidationStatistics from .. import BaseOutputFormatter +from ..console import Console from .formatters import (ValidationResultTextOutputFormatter, ValidationStatisticsTextOutputFormatter) diff --git a/rocrate_validator/utils/io_helpers/output/text/layout/progress.py b/rocrate_validator/utils/io_helpers/output/text/layout/progress.py index 72b3991ce..9ab530b8a 100644 --- a/rocrate_validator/utils/io_helpers/output/text/layout/progress.py +++ b/rocrate_validator/utils/io_helpers/output/text/layout/progress.py @@ -13,13 +13,17 @@ # limitations under the License. -from typing import Optional +from typing import Optional, Union from rich.progress import BarColumn, Progress, TextColumn, TimeElapsedColumn from rocrate_validator.utils import log as logging from rocrate_validator.events import Event, EventType, Subscriber -from rocrate_validator.models import ValidationContext, ValidationStatistics +from rocrate_validator.models import (ProfileValidationEvent, + RequirementCheckValidationEvent, + RequirementValidationEvent, + ValidationContext, ValidationEvent, + ValidationSettings, ValidationStatistics) # set up logging logger = logging.getLogger(__name__) @@ -31,7 +35,8 @@ class ProgressMonitor(Subscriber): REQUIREMENT_VALIDATION = "Requirements" REQUIREMENT_CHECK_VALIDATION = "Requirements Checks" - def __init__(self, settings: dict, stats: Optional[ValidationStatistics] = None): + def __init__(self, settings: Union[dict, ValidationSettings], + stats: Optional[ValidationStatistics] = None): self.__progress = Progress( TextColumn("[progress.description]{task.description}"), BarColumn(), diff --git a/rocrate_validator/utils/io_helpers/output/text/layout/report.py b/rocrate_validator/utils/io_helpers/output/text/layout/report.py index 4aa43d5f5..f96970391 100644 --- a/rocrate_validator/utils/io_helpers/output/text/layout/report.py +++ b/rocrate_validator/utils/io_helpers/output/text/layout/report.py @@ -16,9 +16,8 @@ import threading import time -from typing import Callable +from typing import Any, Callable, Optional -from requests_cache import Optional from rich.align import Align from rich.layout import Layout from rich.live import Live @@ -31,7 +30,10 @@ from rocrate_validator.utils.io_helpers.colors import get_severity_color from rocrate_validator.events import Event, EventType from rocrate_validator.utils.io_helpers.output.console import Console -from rocrate_validator.models import (Severity, ValidationContext, +from rocrate_validator.models import (ProfileValidationEvent, + RequirementCheckValidationEvent, + RequirementValidationEvent, Severity, + ValidationContext, ValidationEvent, ValidationResult, ValidationSettings, ValidationStatistics) from rocrate_validator.utils.uri import URI @@ -54,15 +56,15 @@ def __init__(self, console: Console, self.validation_settings = settings self.statistics = statistics self.profile_autodetected = profile_autodetected - self.result = None - self.__layout = None - self._validation_checks_progress = None - self.__progress_monitor = None - self.requirement_checks_container_layout = None - self.passed_checks = None - self.failed_checks = None - self.report_details_container = None - self.overall_result = None + self.result: Optional[ValidationResult] = None + self.__layout: Optional[Padding] = None + self._validation_checks_progress: Optional[Layout] = None + self.__progress_monitor: Optional[ProgressMonitor] = None + self.requirement_checks_container_layout: Optional[Layout] = None + self.passed_checks: Optional[Layout] = None + self.failed_checks: Optional[Layout] = None + self.report_details_container: Optional[Layout] = None + self.overall_result: Optional[Layout] = None @property def layout(self): @@ -80,8 +82,7 @@ def progress_monitor(self) -> ProgressMonitor: self.__progress_monitor = ProgressMonitor(self.validation_settings, self.statistics) return self.__progress_monitor - def live(self, update_callable: callable) -> any: - assert update_callable, "Update callable must be provided" + def live(self, update_callable: Callable) -> Any: # Start live rendering result = None with Live(self.layout, console=self.console, refresh_per_second=10, transient=False): @@ -200,7 +201,7 @@ def update(self, event: Event, ctx: Optional[ValidationContext] = None): self.__show_overall_result__(event.validation_result) logger.debug("Validation ended with result: %s", event.validation_result) - def update_stats(self, profile_stats: ValidationStatistics = None): + def update_stats(self, profile_stats: Optional[ValidationStatistics] = None): assert profile_stats, "Profile stats must be provided" # self.profile_stats = profile_stats self.requirement_checks_by_severity_container_layout["required"].update( @@ -267,7 +268,7 @@ def update_stats(self, profile_stats: ValidationStatistics = None): ) ) - def __show_overall_result__(self, result: ValidationResult): + def __show_overall_result__(self, result: Optional[ValidationResult]): assert result, "Validation result must be provided" self.result = result if result.passed(): @@ -306,10 +307,10 @@ def __init__(self, console: Console, validation_settings: dict, refresh_per_second: Number of refreshes per second transient: Whether the display is transient """ - super().__init__(console, validation_settings, result, profile_autodetected) + super().__init__(console, validation_settings, result, profile_autodetected) # type: ignore[arg-type] self.refresh_per_second = refresh_per_second self.transient = transient - self._live = None + self._live: Optional[Live] = None def __enter__(self): """Enter the context and start live rendering.""" From fdbbc1e24b591d91852e407ff198ea187b229a40 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Wed, 3 Jun 2026 13:44:44 +0200 Subject: [PATCH 166/352] fix(services): :bug: raise ProfileNotFound when profile is not found in get_profile find_in_list() may return None; get_profile() now raises ProfileNotFound instead of returning None, matching its declared Profile return type. --- rocrate_validator/services.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/rocrate_validator/services.py b/rocrate_validator/services.py index 256dea787..835363367 100644 --- a/rocrate_validator/services.py +++ b/rocrate_validator/services.py @@ -25,6 +25,7 @@ from rocrate_validator.utils.uri import URI from rocrate_validator.utils.paths import get_profiles_path from rocrate_validator.utils.http import HttpRequester +from rocrate_validator.errors import ProfileNotFound # set the default profiles path DEFAULT_PROFILES_PATH = get_profiles_path() @@ -274,4 +275,7 @@ def get_profile( severity=severity, allow_requirement_check_override=allow_requirement_check_override, ) - return Profile.find_in_list(profiles, profile_identifier) + profile = Profile.find_in_list(profiles, profile_identifier) + if profile is None: + raise ProfileNotFound(profile_identifier) + return profile From dda588ae526c077a9956a308297313b70591b3a5 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Wed, 3 Jun 2026 13:47:05 +0200 Subject: [PATCH 167/352] refactor(services): :recycle: fix implicit Optional and URI typing in validator init --- rocrate_validator/services.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/rocrate_validator/services.py b/rocrate_validator/services.py index 835363367..0b9381df3 100644 --- a/rocrate_validator/services.py +++ b/rocrate_validator/services.py @@ -96,7 +96,8 @@ def __initialise_validator__( settings = ValidationSettings.parse(settings) # parse the rocrate path - rocrate_path: URI = URI(settings.rocrate_uri) + assert settings.rocrate_uri is not None, "RO-Crate URI is required" + rocrate_path: URI = URI(str(settings.rocrate_uri)) logger.debug("Validating RO-Crate: %s", rocrate_path) # check if the RO-Crate exists @@ -136,7 +137,7 @@ def __extract_and_validate_rocrate__(rocrate_path: Path): zip_ref.extractall(tmp_dir) logger.debug("RO-Crate extracted to temporary directory: %s", tmp_dir) # update the data path to point to the temporary directory - settings.rocrate_uri = Path(tmp_dir) + settings.rocrate_uri = URI(str(tmp_dir)) # continue with the validation process return __init_validator__(settings) finally: @@ -187,7 +188,7 @@ def __extract_and_validate_rocrate__(rocrate_path: Path): # if the RO-Crate is not a ZIP file, directly validate the RO-Crate elif rocrate_path.is_local_directory(): logger.debug("RO-Crate is a local directory") - settings.rocrate_uri = rocrate_path.as_path() + settings.rocrate_uri = URI(str(rocrate_path.as_path())) return __init_validator__(settings) else: raise ValueError( From 8ee0557767ef3daed5157545d85ded481b6c1d06 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Wed, 3 Jun 2026 13:51:15 +0200 Subject: [PATCH 168/352] refactor(types): :recycle: fix profile_identifier typing in validate and HTTP/input annotations Type the validate CLI profile_identifier as a tuple (matching click's multiple=True) and use a dedicated profile_identifiers list instead of reassigning the parameter. Also type HttpRequester.session as Any and silence the Windows-only msvcrt access for mypy. --- rocrate_validator/cli/commands/validate.py | 31 +++++++++++---------- rocrate_validator/utils/http.py | 5 +++- rocrate_validator/utils/io_helpers/input.py | 2 +- 3 files changed, 22 insertions(+), 16 deletions(-) diff --git a/rocrate_validator/cli/commands/validate.py b/rocrate_validator/cli/commands/validate.py index 0683780d1..9d0b5031c 100644 --- a/rocrate_validator/cli/commands/validate.py +++ b/rocrate_validator/cli/commands/validate.py @@ -18,7 +18,7 @@ import sys from contextlib import nullcontext from pathlib import Path -from typing import Optional +from typing import Optional, Union import rich_click as click from rich.padding import Padding @@ -244,14 +244,14 @@ def validate_uri(ctx, param, value): def validate(ctx, profiles_path: Path = DEFAULT_PROFILES_PATH, extra_profiles_path: Optional[Path] = None, - profile_identifier: Optional[str] = None, + profile_identifier: tuple[str, ...] = (), metadata_only: bool = False, no_auto_profile: bool = False, disable_profile_inheritance: bool = False, requirement_severity: str = Severity.REQUIRED.name, requirement_severity_only: bool = False, - skip_checks: list[str] = None, - rocrate_uri: Path = ".", + skip_checks: Optional[list[str]] = None, + rocrate_uri: Union[str, Path] = ".", relative_root_path: Optional[Path] = None, fail_fast: bool = False, no_paging: bool = False, @@ -323,7 +323,7 @@ def validate(ctx, # Parse the skip_checks option logger.debug("skip_checks: %s", skip_checks) # Parse the skip_checks option - skip_checks_list = [] + skip_checks_list: list[str] = [] if skip_checks: try: for s in skip_checks: @@ -372,6 +372,9 @@ def validate(ctx, # Detect the profile to use for validation autodetection = False selected_profile = profile_identifier + # The profile selection collapses to a concrete list of identifiers + # (the CLI passes a possibly-empty tuple because of ``multiple=True``). + profile_identifiers: list[str] = list(profile_identifier) if selected_profile is None or len(selected_profile) == 0: # Auto-detect the profile to use for validation (if not disabled) @@ -402,7 +405,7 @@ def validate(ctx, ) ) selected_options = multiple_choice(console, available_profiles) - profile_identifier = [available_profiles[int( + profile_identifiers = [available_profiles[int( selected_option)].identifier for selected_option in selected_options] logger.debug("Profile selected: %s", selected_options) console.print(Padding(Rule(style="bold yellow"), (1, 2))) @@ -410,22 +413,22 @@ def validate(ctx, elif candidate_profiles and len(candidate_profiles) < len(available_profiles): logger.debug("Profile identifier autodetected: %s", candidate_profiles[0].identifier) autodetection = True - profile_identifier = [_.identifier for _ in candidate_profiles] + profile_identifiers = [_.identifier for _ in candidate_profiles] # Fall back to the selected profile - if not profile_identifier or len(profile_identifier) == 0: + if not profile_identifiers or len(profile_identifiers) == 0: console.print(f"\n{' '*2}[bold yellow]WARNING: [/bold yellow]", end="") if no_auto_profile: console.print("[bold]Auto-detection of the profiles to use for validation is disabled[/bold]") else: console.print("[bold]Unable to automatically detect the profile to use for validation[/bold]") console.print(f"{' '*11}[bold]The base `ro-crate` profile will be used for validation[/bold]") - profile_identifier = ["ro-crate"] + profile_identifiers = ["ro-crate"] # Validate the RO-Crate against the selected profiles is_valid = True results = {} - for profile in profile_identifier: + for profile in profile_identifiers: # Duplicate settings for each profile and set the profile identifier logger.info("\nValidating RO-Crate against profile: [bold cyan]%s[/bold cyan]", profile) validation_settings = validation_settings.copy() @@ -435,7 +438,7 @@ def validate(ctx, logger.debug("Profile autodetected: %s", autodetection) # Initialize the validation result variable - result: ValidationResult = None + result: Optional[ValidationResult] = None ######################################################################################### # Perform and display the validation with progress bar to the console @@ -469,7 +472,7 @@ def validate(ctx, command_view.display_validation_result(result) else: # Validate RO-Crate against the profile and get the validation result - result: ValidationResult = services.validate(validation_settings) + result = services.validate(validation_settings) # Init TextOutputFormatter for console output console.register_formatter(TextOutputFormatter()) # Show the final overview of the validation if no interactive mode @@ -497,7 +500,7 @@ def validate(ctx, logger.debug("Validation result obtained") else: # Validate RO-Crate against the profile and get the validation result - result: ValidationResult = services.validate(validation_settings) + result = services.validate(validation_settings) results[profile] = result # Output processing for text format to file @@ -534,7 +537,7 @@ def validate(ctx, console.print( f"\n{' '*2}✅ [bold]Validation [green]PASSED![/green]. " f"\n{' '*5}RO-Crate is valid according to the profile(s): " - f"[cyan]{', '.join(profile_identifier)}[/cyan][/bold]" + f"[cyan]{', '.join(profile_identifiers)}[/cyan][/bold]" ) else: console.print(f"\n{' '*2}❌ [bold]Validation [red]FAILED![/red][/bold]") diff --git a/rocrate_validator/utils/http.py b/rocrate_validator/utils/http.py index fa825bfa5..714f1a5c9 100644 --- a/rocrate_validator/utils/http.py +++ b/rocrate_validator/utils/http.py @@ -142,7 +142,10 @@ def __init__(self, def __initialize_session__(self, cache_max_age: int, cache_path: Optional[str] = None): # initialize the session - self.session = None + # The session can be a CachedSession, a plain requests.Session, or the + # duck-typed _OfflineFallbackSession; HTTP methods are delegated dynamically + # via __getattr__, so it is typed as Any. + self.session: Any = None logger.debug(f"Initializing instance of {self.__class__.__name__}") assert not self._initialized, "Session already initialized" # check if requests_cache is installed diff --git a/rocrate_validator/utils/io_helpers/input.py b/rocrate_validator/utils/io_helpers/input.py index 5900968aa..71d5f4860 100644 --- a/rocrate_validator/utils/io_helpers/input.py +++ b/rocrate_validator/utils/io_helpers/input.py @@ -41,7 +41,7 @@ def __get_single_char_win32__(console: Optional[Console] = None, end: str = "\n" if console and message: console.print(f"\n{message}", end="") try: - char = msvcrt.getch().decode() + char = msvcrt.getch().decode() # type: ignore[attr-defined] # Windows-only finally: if console: console.print(char, end=end if choices and char in choices else "") From 3f456bd2799624754a35b698395a24ff1e1c7fec Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Wed, 3 Jun 2026 13:52:34 +0200 Subject: [PATCH 169/352] refactor(types): :recycle: add type-narrowing asserts in progress monitor and command view --- rocrate_validator/cli/ui/text/validate.py | 1 + .../utils/io_helpers/output/text/layout/progress.py | 5 +++++ 2 files changed, 6 insertions(+) diff --git a/rocrate_validator/cli/ui/text/validate.py b/rocrate_validator/cli/ui/text/validate.py index 94986247b..4ef1ed0e5 100644 --- a/rocrate_validator/cli/ui/text/validate.py +++ b/rocrate_validator/cli/ui/text/validate.py @@ -61,6 +61,7 @@ def report_layout(self) -> ValidationReportLayout: The current report layout """ if self._report_layout is None: + assert self.validation_settings is not None, "Validation settings must be set" self._report_layout = ValidationReportLayout( console=self.console, settings=self.validation_settings diff --git a/rocrate_validator/utils/io_helpers/output/text/layout/progress.py b/rocrate_validator/utils/io_helpers/output/text/layout/progress.py index 9ab530b8a..6c6337c1b 100644 --- a/rocrate_validator/utils/io_helpers/output/text/layout/progress.py +++ b/rocrate_validator/utils/io_helpers/output/text/layout/progress.py @@ -87,12 +87,15 @@ def update(self, event: Event, ctx: Optional[ValidationContext] = None): logger.debug("Validation started") # self.start() if event.event_type == EventType.PROFILE_VALIDATION_START: + assert isinstance(event, ProfileValidationEvent) logger.debug("Profile validation start: %s", event.profile.identifier) elif event.event_type == EventType.REQUIREMENT_VALIDATION_START: logger.debug("Requirement validation start") elif event.event_type == EventType.REQUIREMENT_CHECK_VALIDATION_START: logger.debug("Requirement check validation start") elif event.event_type == EventType.REQUIREMENT_CHECK_VALIDATION_END: + assert isinstance(event, RequirementCheckValidationEvent) + assert ctx is not None, "Validation context must be provided" target_profile = ctx.target_validation_profile if not event.requirement_check.requirement.hidden and \ (not event.requirement_check.overridden @@ -101,9 +104,11 @@ def update(self, event: Event, ctx: Optional[ValidationContext] = None): else: logger.debug("Skipping requirement check validation: %s", event.requirement_check.identifier) elif event.event_type == EventType.REQUIREMENT_VALIDATION_END: + assert isinstance(event, RequirementValidationEvent) if not event.requirement.hidden: self.progress.update(task_id=self.requirement_validation, advance=1) elif event.event_type == EventType.PROFILE_VALIDATION_END: self.progress.update(task_id=self.profile_validation, advance=1) elif event.event_type == EventType.VALIDATION_END: + assert isinstance(event, ValidationEvent) logger.debug("Validation ended with result: %s", event.validation_result) From 5f2452cc95a3c6935aac63d34aa163a353905b78 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Wed, 3 Jun 2026 13:53:30 +0200 Subject: [PATCH 170/352] refactor(types): :recycle: ensure __get_context_keys__ always returns a set --- .../profiles/ro-crate/must/0_file_descriptor_format.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/rocrate_validator/profiles/ro-crate/must/0_file_descriptor_format.py b/rocrate_validator/profiles/ro-crate/must/0_file_descriptor_format.py index 46742a8e9..c70884c13 100644 --- a/rocrate_validator/profiles/ro-crate/must/0_file_descriptor_format.py +++ b/rocrate_validator/profiles/ro-crate/must/0_file_descriptor_format.py @@ -367,6 +367,9 @@ def __get_context_keys__(self, context: object) -> set: keys.update(self.__get_context_keys__(ctx)) return keys + # any other context type contributes no keys + return set() + def __get_remote_context_keys__(self, context_uri: str) -> set: """ Get the keys of the context URI """ From 51e7a767487564282cce8004d3e6f2aa1f958ca0 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Wed, 3 Jun 2026 13:54:24 +0200 Subject: [PATCH 171/352] fix(profiles): :bug: show actual size in Web Data Entity contentSize mismatch message --- .../profiles/ro-crate/should/5_web_data_entity_metadata.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/rocrate_validator/profiles/ro-crate/should/5_web_data_entity_metadata.py b/rocrate_validator/profiles/ro-crate/should/5_web_data_entity_metadata.py index 424269361..a446301ef 100644 --- a/rocrate_validator/profiles/ro-crate/should/5_web_data_entity_metadata.py +++ b/rocrate_validator/profiles/ro-crate/should/5_web_data_entity_metadata.py @@ -92,11 +92,12 @@ def check_content_size(self, context: ValidationContext) -> bool: continue if entity.is_available(): content_size = entity.get_property("contentSize") - if content_size and int(content_size) != context.ro_crate.get_external_file_size(entity.id): + actual_size = context.ro_crate.get_external_file_size(entity.id) + if content_size and int(content_size) != actual_size: context.result.add_issue( f'The property contentSize={content_size} of the Web-based Data Entity ' f'{entity.id} does not match the actual size of ' - f'the downloadable content, i.e., {entity.content_size} (bytes)', self, + f'the downloadable content, i.e., {actual_size} (bytes)', self, violatingEntity=entity.id, violatingProperty='contentSize', violatingPropertyValue=content_size) result = False if not result and context.fail_fast: From 09f71854280a7ee3d945aa1aa5ca1cda07d1e805 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Wed, 3 Jun 2026 13:55:55 +0200 Subject: [PATCH 172/352] refactor(shacl): :recycle: avoid double get_declared_level() call and narrow its type --- rocrate_validator/requirements/shacl/checks.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/rocrate_validator/requirements/shacl/checks.py b/rocrate_validator/requirements/shacl/checks.py index 1ffa2dd0b..2b13eee97 100644 --- a/rocrate_validator/requirements/shacl/checks.py +++ b/rocrate_validator/requirements/shacl/checks.py @@ -146,8 +146,9 @@ def description(self) -> str: return f"Check for {self._shape.name}" if self._shape.name else "SHACL validation check" def __compute_requirement_level__(self) -> RequirementLevel: - if self._shape and self._shape.get_declared_level(): - return self._shape.get_declared_level() + declared_level = self._shape.get_declared_level() if self._shape else None + if declared_level: + return declared_level if self.requirement and self.requirement.requirement_level_from_path: return self.requirement.requirement_level_from_path # When the shape file lives in the profile root and the NodeShape From 6b3266cecb02ae6bec31d5ce7203cc29f7d33336 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Wed, 3 Jun 2026 13:56:50 +0200 Subject: [PATCH 173/352] refactor(shacl): :recycle: rename loop variable to skipped_check for clarity --- .../requirements/shacl/checks.py | 28 +++++++++---------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/rocrate_validator/requirements/shacl/checks.py b/rocrate_validator/requirements/shacl/checks.py index 2b13eee97..4f2712029 100644 --- a/rocrate_validator/requirements/shacl/checks.py +++ b/rocrate_validator/requirements/shacl/checks.py @@ -472,38 +472,38 @@ def __do_execute_check__(self, shacl_context: SHACLValidationContext): # As above, but for skipped checks which are not failed logger.debug("Skipped checks: %s", len(shacl_context.result.skipped_checks)) - for requirementCheck in list(shacl_context.result.skipped_checks): - logger.debug("Processing skipped check: %s", requirementCheck.identifier) - if not isinstance(requirementCheck, SHACLCheck): - logger.debug("Skipped check is not a SHACLCheck: %s", requirementCheck.identifier) + for skipped_check in list(shacl_context.result.skipped_checks): + logger.debug("Processing skipped check: %s", skipped_check.identifier) + if not isinstance(skipped_check, SHACLCheck): + logger.debug("Skipped check is not a SHACLCheck: %s", skipped_check.identifier) continue - # if requirementCheck.requirement.profile != shacl_context.current_validation_profile and - if requirementCheck.identifier not in failed_requirement_checks_notified: - failed_requirement_checks_notified.append(requirementCheck.identifier) - shacl_context.result._add_executed_check(requirementCheck, True) + # if skipped_check.requirement.profile != shacl_context.current_validation_profile and + if skipped_check.identifier not in failed_requirement_checks_notified: + failed_requirement_checks_notified.append(skipped_check.identifier) + shacl_context.result._add_executed_check(skipped_check, True) if ( - requirementCheck.requirement.profile != shacl_context.target_profile + skipped_check.requirement.profile != shacl_context.target_profile and shacl_context.settings.disable_inherited_profiles_issue_reporting ): continue shacl_context.validator.notify( RequirementCheckValidationEvent( EventType.REQUIREMENT_CHECK_VALIDATION_END, - requirementCheck, + skipped_check, validation_result=True, ) ) logger.debug( "Added skipped check to the context: %s", - requirementCheck.identifier, + skipped_check.identifier, ) logger.debug("Remaining skipped checks: %r", len(shacl_context.result.skipped_checks)) - for requirementCheck in shacl_context.result.skipped_checks: + for skipped_check in shacl_context.result.skipped_checks: logger.debug( "Remaining skipped check: %r - %s", - requirementCheck.identifier, - requirementCheck.name, + skipped_check.identifier, + skipped_check.name, ) end_time = timer() logger.debug(f"Execution time for parsing the validation result: {end_time - start_time} seconds") From aa02c2460ec683e4f92b639ea721d5be4b799c96 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Wed, 3 Jun 2026 13:57:26 +0200 Subject: [PATCH 174/352] refactor(shacl): :recycle: rename property shape loop variable to avoid type clash --- rocrate_validator/requirements/shacl/models.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/rocrate_validator/requirements/shacl/models.py b/rocrate_validator/requirements/shacl/models.py index 2686b7c24..63418d294 100644 --- a/rocrate_validator/requirements/shacl/models.py +++ b/rocrate_validator/requirements/shacl/models.py @@ -414,9 +414,9 @@ def load_shapes(self, shapes_path: Union[str, Path], publicID: Optional[str] = N # register Property Shapes for property_shape in shapes_list.property_shapes: - shape = PropertyShape(property_shape, shapes_list.get_shape_graph(property_shape)) - self.add_shape(shape) - shapes.append(shape) + prop_shape = PropertyShape(property_shape, shapes_list.get_shape_graph(property_shape)) + self.add_shape(prop_shape) + shapes.append(prop_shape) return shapes From 1aa77964e267d3af1c419dba0bf6838e2d1841bb Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Wed, 3 Jun 2026 13:58:26 +0200 Subject: [PATCH 175/352] fix(versioning): :bug: guard against None results from run_git_command --- rocrate_validator/utils/versioning.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/rocrate_validator/utils/versioning.py b/rocrate_validator/utils/versioning.py index 5e1997a90..2672aea98 100644 --- a/rocrate_validator/utils/versioning.py +++ b/rocrate_validator/utils/versioning.py @@ -47,7 +47,7 @@ def get_git_commit() -> str: :return: The git commit hash """ - return run_git_command(['git', 'rev-parse', '--short', 'HEAD']) + return run_git_command(['git', 'rev-parse', '--short', 'HEAD']) or "" def is_release_tag(git_sha: str) -> bool: @@ -67,7 +67,7 @@ def get_last_tag() -> str: :return: The last tag """ - return run_git_command(['git', 'describe', '--tags', '--abbrev=0']) + return run_git_command(['git', 'describe', '--tags', '--abbrev=0']) or "" def get_commit_distance(tag: Optional[str] = None) -> int: @@ -79,7 +79,8 @@ def get_commit_distance(tag: Optional[str] = None) -> int: if not tag: tag = get_last_tag() try: - return int(run_git_command(['git', 'rev-list', '--count', f"{tag}..HEAD"])) + count = run_git_command(['git', 'rev-list', '--count', f"{tag}..HEAD"]) + return int(count) if count else 0 except Exception as e: if logger.isEnabledFor(logging.DEBUG): logger.debug(e) From 2f141a09140fa6d792140617c764ef9bbb1028c7 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Wed, 3 Jun 2026 13:59:02 +0200 Subject: [PATCH 176/352] refactor(versioning): :recycle: correct get_min_python_version return type --- rocrate_validator/utils/versioning.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rocrate_validator/utils/versioning.py b/rocrate_validator/utils/versioning.py index 2672aea98..a32116907 100644 --- a/rocrate_validator/utils/versioning.py +++ b/rocrate_validator/utils/versioning.py @@ -123,7 +123,7 @@ def get_version() -> str: return f"{version}-dirty" if dirty else version -def get_min_python_version() -> tuple[int, int, Optional[int]]: +def get_min_python_version() -> tuple[int, ...]: """ Get the minimum Python version required by the package From 8bac3958aba0efe4d897f96a9c6d0d9ce1db4e6c Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Wed, 3 Jun 2026 14:01:25 +0200 Subject: [PATCH 177/352] refactor(formatters): :recycle: guard Optional issue.message before Markdown rendering --- rocrate_validator/utils/io_helpers/output/text/formatters.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rocrate_validator/utils/io_helpers/output/text/formatters.py b/rocrate_validator/utils/io_helpers/output/text/formatters.py index c0fc7a158..401d6e499 100644 --- a/rocrate_validator/utils/io_helpers/output/text/formatters.py +++ b/rocrate_validator/utils/io_helpers/output/text/formatters.py @@ -74,7 +74,7 @@ def __rich_console__(self, console: Console, options: ConsoleOptions) -> RenderR if issue.violatingEntity: path = f"{path} on [cyan]<{issue.violatingEntity}>[/cyan]" yield Padding(f"- [[red]Violation[/red]{path}]: " - f"{Markdown(issue.message).markup}", (0, 9, 1, 9)) + f"{Markdown(issue.message or '').markup}", (0, 9, 1, 9)) if console.no_color: yield Padding("\n", (0, 0)) yield Padding("\n", (0, 0)) From b33e060b2170b551daa9abe16c49c7606b484e5e Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Wed, 3 Jun 2026 14:02:24 +0200 Subject: [PATCH 178/352] refactor(report): :recycle: fix type narrowing and annotations in report layout --- .../utils/io_helpers/output/text/layout/report.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/rocrate_validator/utils/io_helpers/output/text/layout/report.py b/rocrate_validator/utils/io_helpers/output/text/layout/report.py index f96970391..4c996f402 100644 --- a/rocrate_validator/utils/io_helpers/output/text/layout/report.py +++ b/rocrate_validator/utils/io_helpers/output/text/layout/report.py @@ -101,7 +101,7 @@ def __init_layout__(self): severity_color = get_severity_color(settings.requirement_severity) base_info_layout = Layout( Align( - f"\n[bold cyan]RO-Crate:[/bold cyan] [bold]{URI(settings.rocrate_uri).uri}[/bold]" + f"\n[bold cyan]RO-Crate:[/bold cyan] [bold]{URI(str(settings.rocrate_uri)).uri}[/bold]" "\n[bold cyan]Target Profile:[/bold cyan][bold magenta] " f"{settings.profile_identifier}[/bold magenta] " f"{'[italic](autodetected)[/italic]' if self.profile_autodetected else ''}" @@ -175,15 +175,18 @@ def __init_layout__(self): if result: self.__show_overall_result__(result) - def update(self, event: Event, ctx: Optional[ValidationContext] = None): + def update(self, event: Event, ctx: Optional[ValidationContext] = None): # type: ignore[override] logger.debug("Event: %s", event.event_type) if event.event_type == EventType.PROFILE_VALIDATION_START: + assert isinstance(event, ProfileValidationEvent) logger.debug("Profile validation start: %s", event.profile.identifier) elif event.event_type == EventType.REQUIREMENT_VALIDATION_START: logger.debug("Requirement validation start") elif event.event_type == EventType.REQUIREMENT_CHECK_VALIDATION_START: logger.debug("Requirement check validation start") elif event.event_type == EventType.REQUIREMENT_CHECK_VALIDATION_END: + assert isinstance(event, RequirementCheckValidationEvent) + assert ctx is not None, "Validation context must be provided" target_profile = ctx.target_validation_profile if not event.requirement_check.requirement.hidden and \ (not event.requirement_check.overridden @@ -193,16 +196,20 @@ def update(self, event: Event, ctx: Optional[ValidationContext] = None): else: logger.debug("Skipping requirement check validation: %s", event.requirement_check.identifier) elif event.event_type == EventType.REQUIREMENT_VALIDATION_END: + assert isinstance(event, RequirementValidationEvent) + assert ctx is not None, "Validation context must be provided" if not event.requirement.hidden: self.update_stats(ctx.result.statistics) # elif event.event_type == EventType.PROFILE_VALIDATION_END: # pass elif event.event_type == EventType.VALIDATION_END: + assert isinstance(event, ValidationEvent) self.__show_overall_result__(event.validation_result) logger.debug("Validation ended with result: %s", event.validation_result) def update_stats(self, profile_stats: Optional[ValidationStatistics] = None): assert profile_stats, "Profile stats must be provided" + assert self.passed_checks is not None and self.failed_checks is not None, "Layout not initialized" # self.profile_stats = profile_stats self.requirement_checks_by_severity_container_layout["required"].update( Panel( @@ -270,6 +277,7 @@ def update_stats(self, profile_stats: Optional[ValidationStatistics] = None): def __show_overall_result__(self, result: Optional[ValidationResult]): assert result, "Validation result must be provided" + assert self.overall_result is not None, "Layout not initialized" self.result = result if result.passed(): icon = "[OK]" if not self.console.interactive else "✅" @@ -285,7 +293,7 @@ def __show_overall_result__(self, result: Optional[ValidationResult]): style="bold red"), (1, 1))) -def get_app_header_rule() -> Text: +def get_app_header_rule() -> Padding: return Padding(Rule(f"\n[bold][cyan]ROCrate Validator[/cyan] (ver. [magenta]{get_version()}[/magenta])[/bold]", style="bold cyan"), (1, 2)) From f4fd04462a1de8e26461e2316a2d72947cf7fa42 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Wed, 3 Jun 2026 14:04:13 +0200 Subject: [PATCH 179/352] fix(isa-ro-crate): :bug: terminate isa-ro-crate:Data triple in ontology.ttl --- rocrate_validator/profiles/isa-ro-crate/ontology.ttl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/rocrate_validator/profiles/isa-ro-crate/ontology.ttl b/rocrate_validator/profiles/isa-ro-crate/ontology.ttl index 4c7b5d926..aa9e5c8a8 100644 --- a/rocrate_validator/profiles/isa-ro-crate/ontology.ttl +++ b/rocrate_validator/profiles/isa-ro-crate/ontology.ttl @@ -64,8 +64,8 @@ isa-ro-crate:Sample rdf:type owl:Class ; # Data isa-ro-crate:Data rdf:type owl:Class ; rdfs:subClassOf schema:MediaObject ; - rdfs:label "Data"@en - + rdfs:label "Data"@en . + # Comment isa-ro-crate:Comment rdf:type owl:Class ; rdfs:subClassOf schema:MediaObject ; @@ -125,7 +125,7 @@ isa-ro-crate:Data rdf:type owl:Class ; ] ; rdfs:label "Data Entity"@en . - + ### https://schema.org/PropertyValue schema:PropertyValue rdf:type owl:Class ; rdfs:label "PropertyValue"@en . From 54acea0312c94157306005b7d839ac6aeef38efe Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Wed, 3 Jun 2026 14:39:46 +0200 Subject: [PATCH 180/352] fix: :bug: add missing argument in log message --- rocrate_validator/models.py | 1 + 1 file changed, 1 insertion(+) diff --git a/rocrate_validator/models.py b/rocrate_validator/models.py index 19f989362..f06c21019 100644 --- a/rocrate_validator/models.py +++ b/rocrate_validator/models.py @@ -1166,6 +1166,7 @@ def _do_validate_(self, context: ValidationContext) -> bool: logger.warning( "Ignoring the check %s as it returned the value %r instead of a boolean", check.name, + check_result, ) raise RuntimeError(f"Ignoring invalid result from check {check.name}") # Aggregate the check result From 19a3a933d6406e9a1134dff88302d4135b39f62f Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Wed, 3 Jun 2026 14:42:17 +0200 Subject: [PATCH 181/352] refactor(shacl): :recycle: rename loop var to avoid shadowing node param in inject_attributes --- rocrate_validator/requirements/shacl/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rocrate_validator/requirements/shacl/utils.py b/rocrate_validator/requirements/shacl/utils.py index ff13d2736..5e2f205c0 100644 --- a/rocrate_validator/requirements/shacl/utils.py +++ b/rocrate_validator/requirements/shacl/utils.py @@ -77,7 +77,7 @@ def inject_attributes(obj: object, node_graph: Graph, node: Node, exclude: Optio # logger.debug("Injecting attributes of node %s", node) skip_properties = ["node"] if exclude is None else exclude + ["node"] triples = node_graph.triples((node, None, None)) - for node, p, o in triples: + for _node, p, o in triples: predicate_as_string = cast(Any, p).toPython() # logger.debug(f"Processing {predicate_as_string} of property graph {node}") if predicate_as_string.startswith(SHACL_NS): From 545e193e302a9417d33180d39a544a2969b689db Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Wed, 3 Jun 2026 14:48:12 +0200 Subject: [PATCH 182/352] fix: :rotating_light: fix PLC0207 warning --- rocrate_validator/requirements/shacl/models.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/rocrate_validator/requirements/shacl/models.py b/rocrate_validator/requirements/shacl/models.py index 63418d294..c00e7d41e 100644 --- a/rocrate_validator/requirements/shacl/models.py +++ b/rocrate_validator/requirements/shacl/models.py @@ -90,7 +90,7 @@ def node(self): def node_name(self): """Return the name of the node""" node_str = str(self._node) - return node_str.split("#")[-1] if "#" in node_str else node_str.split("/")[-1] + return node_str.rsplit("#", maxsplit=1)[-1] if "#" in node_str else node_str.rsplit("/", maxsplit=1)[-1] @property def graph(self): @@ -234,7 +234,8 @@ def name(self) -> str: path = self.graph.value(subject=self.node, predicate=shacl_ns.path) if path: path_str = str(path) - self._short_name = path_str.split("#")[-1] if "#" in path_str else path_str.split("/")[-1] + sep = "#" if "#" in path_str else "/" + self._short_name = path_str.rsplit(sep, maxsplit=1)[-1] if self.parent: self._name = f"{self._short_name} of {self.parent.name}" return self._name or str(self._node).split("/")[-1] From 9c44ad223242016b2b31cb0d00e63a759e3a54e1 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Wed, 3 Jun 2026 15:02:14 +0200 Subject: [PATCH 183/352] fix: :rotating_light: fix PLW1641 issues --- rocrate_validator/models.py | 4 ++++ rocrate_validator/rocrate.py | 6 ++++++ 2 files changed, 10 insertions(+) diff --git a/rocrate_validator/models.py b/rocrate_validator/models.py index f06c21019..0d739789e 100644 --- a/rocrate_validator/models.py +++ b/rocrate_validator/models.py @@ -2566,6 +2566,10 @@ def __eq__(self, other: object) -> bool: raise TypeError(f"Cannot compare ValidationResult with {type(other)}") return self._issues == other._issues + # Equality is based on the mutable list of issues, so instances are + # intentionally unhashable (a content-based hash would be unstable). + __hash__ = None # type: ignore[assignment] + def to_dict(self) -> dict: """ Convert the ValidationResult to a dictionary diff --git a/rocrate_validator/rocrate.py b/rocrate_validator/rocrate.py index dbb7da804..a01c9c735 100644 --- a/rocrate_validator/rocrate.py +++ b/rocrate_validator/rocrate.py @@ -299,6 +299,9 @@ def __eq__(self, other: object) -> bool: return False return self.id == other.id + def __hash__(self) -> int: + return hash(self.id) + class ROCrateMetadata: METADATA_FILE_DESCRIPTOR = "ro-crate-metadata.json" @@ -448,6 +451,9 @@ def __eq__(self, other: object) -> bool: return False return self.ro_crate == other.ro_crate + def __hash__(self) -> int: + return hash(self.ro_crate) + class ROCrate(ABC): """ From 5778f9615bc3d6a9a8c2bf1e70f8e22899be7e66 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Wed, 3 Jun 2026 15:40:00 +0200 Subject: [PATCH 184/352] fix: :rotating_light: fix UP035 + UP006 warnings --- rocrate_validator/cli/commands/cache.py | 20 +++++++++---------- rocrate_validator/cli/ui/text/validate.py | 3 ++- rocrate_validator/models.py | 14 ++++++------- .../requirements/python/__init__.py | 5 +++-- rocrate_validator/utils/cache_warmup.py | 17 ++++++++-------- rocrate_validator/utils/document_loader.py | 4 ++-- .../io_helpers/output/text/layout/report.py | 3 ++- 7 files changed, 35 insertions(+), 31 deletions(-) diff --git a/rocrate_validator/cli/commands/cache.py b/rocrate_validator/cli/commands/cache.py index 08efd599a..feef95168 100644 --- a/rocrate_validator/cli/commands/cache.py +++ b/rocrate_validator/cli/commands/cache.py @@ -23,7 +23,7 @@ import json from datetime import datetime from pathlib import Path -from typing import List, Optional +from typing import Optional from rich.table import Table @@ -315,10 +315,10 @@ def cache_warm( cache_path: Optional[Path] = None, profiles_path: Optional[Path] = None, extra_profiles_path: Optional[Path] = None, - profile_identifier: Optional[List[str]] = None, + profile_identifier: Optional[list[str]] = None, all_profiles: bool = False, - crate: Optional[List[str]] = None, - url: Optional[List[str]] = None, + crate: Optional[list[str]] = None, + url: Optional[list[str]] = None, ): """ Pre-populate the HTTP cache with resources declared by profiles and with @@ -341,7 +341,7 @@ def cache_warm( extra_dir = Path(extra_profiles_path) if extra_profiles_path else None requested_ids = list(profile_identifier or []) - urls: List[str] = [] + urls: list[str] = [] profile_scope: Optional[str] = None # Only fall back to "warm all profiles" when the user gave no other @@ -395,7 +395,7 @@ def cache_warm( profile_scope = "all installed profiles" urls = discover_cacheable_urls_from_profiles(loaded_profiles) - results: List[WarmUpResult] = [] + results: list[WarmUpResult] = [] if urls: console.print( f"[bold]Warming cache for {profile_scope}[/bold] " @@ -445,13 +445,13 @@ def cache_warm( ctx.exit(1) -def _warm_remote_crates(urls: List[str]) -> List[WarmUpResult]: +def _warm_remote_crates(urls: list[str]) -> list[WarmUpResult]: """ Download each remote RO-Crate URL via ``HttpRequester.fetch_fresh`` so that its response is stored in the cache. """ requester = HttpRequester() - results: List[WarmUpResult] = [] + results: list[WarmUpResult] = [] for url in urls: try: response = requester.fetch_fresh(url, allow_redirects=True) @@ -503,7 +503,7 @@ def _collect_cache_entries( url_filter: Optional[str] = None, sort_by: str = "size", sort_order: Optional[str] = None, -) -> List[dict]: +) -> list[dict]: """ Read every cached response and return a list of plain dicts. Filtering and sorting happen here so the CLI rendering paths (table / JSON) share @@ -517,7 +517,7 @@ def _collect_cache_entries( if cache is None: return [] needle = url_filter.lower() if url_filter else None - entries: List[dict] = [] + entries: list[dict] = [] responses = getattr(cache, "responses", None) or {} for key in list(responses): try: diff --git a/rocrate_validator/cli/ui/text/validate.py b/rocrate_validator/cli/ui/text/validate.py index 4ef1ed0e5..c3db89641 100644 --- a/rocrate_validator/cli/ui/text/validate.py +++ b/rocrate_validator/cli/ui/text/validate.py @@ -14,7 +14,8 @@ from __future__ import annotations -from typing import Any, Callable, Optional +from typing import Any, Optional +from collections.abc import Callable from rocrate_validator.utils import log as logging from rocrate_validator.utils.io_helpers.output.console import Console diff --git a/rocrate_validator/models.py b/rocrate_validator/models.py index 0d739789e..5804e8fa4 100644 --- a/rocrate_validator/models.py +++ b/rocrate_validator/models.py @@ -26,7 +26,7 @@ from datetime import datetime, timezone from functools import total_ordering from pathlib import Path -from typing import Any, Optional, Protocol, Tuple, Type, Union, cast +from typing import Any, Optional, Protocol, Union, cast from urllib.error import HTTPError import enum_tools @@ -666,7 +666,7 @@ def __extract_token_from_path__(self) -> str: identifier = identifier.replace("/", "-") return identifier - def __init_token_version__(self) -> Tuple[str, Optional[str]]: + def __init_token_version__(self) -> tuple[str, Optional[str]]: # try to extract the token from the specs or the path candidate_token = cast(Optional[str], self.__get_specification_property__("hasToken", PROF_NS)) if not candidate_token: @@ -713,7 +713,7 @@ def __load_profiles_paths__( cls, profiles_path: Optional[Union[str, Path]] = None, extra_profiles_path: Optional[Union[str, Path]] = None, - ) -> list[Tuple[Path, Path]]: + ) -> list[tuple[Path, Path]]: """ Load the paths of the profiles from the given profiles path and extra profiles path. @@ -1303,7 +1303,7 @@ def __get_requirement_loader__(cls, profile: Profile, requirement_path: Path) -> return loader_instance @staticmethod - def __get_requirement_classes__() -> list[Type[Requirement]]: + def __get_requirement_classes__() -> list[type[Requirement]]: # Ensure known requirement modules are imported so subclasses are registered. for requirement_type in ("python", "shacl"): @@ -1318,9 +1318,9 @@ def __get_requirement_classes__() -> list[Type[Requirement]]: ) def all_subclasses( - base_class: Type[Requirement], - ) -> list[Type[Requirement]]: - result: list[Type[Requirement]] = [] + base_class: type[Requirement], + ) -> list[type[Requirement]]: + result: list[type[Requirement]] = [] for subcls in base_class.__subclasses__(): result.append(subcls) result.extend(all_subclasses(subcls)) diff --git a/rocrate_validator/requirements/python/__init__.py b/rocrate_validator/requirements/python/__init__.py index 930403127..ee4435ae4 100644 --- a/rocrate_validator/requirements/python/__init__.py +++ b/rocrate_validator/requirements/python/__init__.py @@ -15,7 +15,8 @@ import inspect import re from pathlib import Path -from typing import Callable, Optional, Type +from typing import Optional +from collections.abc import Callable from rocrate_validator.utils import log as logging from rocrate_validator.models import (LevelCollection, Profile, Requirement, @@ -98,7 +99,7 @@ class PyRequirement(Requirement): def __init__(self, profile: Profile, - requirement_check_class: Type[PyFunctionCheck], + requirement_check_class: type[PyFunctionCheck], name: str = "", description: Optional[str] = None, path: Optional[Path] = None): diff --git a/rocrate_validator/utils/cache_warmup.py b/rocrate_validator/utils/cache_warmup.py index 2b2a3c011..8071659c3 100644 --- a/rocrate_validator/utils/cache_warmup.py +++ b/rocrate_validator/utils/cache_warmup.py @@ -27,7 +27,8 @@ import os from dataclasses import dataclass -from typing import TYPE_CHECKING, Any, Iterable, List, Optional, Sequence, cast +from typing import TYPE_CHECKING, Any, Optional, cast +from collections.abc import Iterable, Sequence from rocrate_validator import constants from rocrate_validator.utils import log as logging @@ -66,7 +67,7 @@ class WarmUpResult: detail: Optional[str] = None -def discover_profile_cacheable_urls(profile: "Profile") -> List[str]: +def discover_profile_cacheable_urls(profile: "Profile") -> list[str]: """ Return the list of HTTP(S) URLs declared by ``profile`` as cacheable artifacts. Returns an empty list when the profile has no declared @@ -77,7 +78,7 @@ def discover_profile_cacheable_urls(profile: "Profile") -> List[str]: logger.debug( "Profile %s has no specification graph loaded", getattr(profile, "identifier", "?")) return [] - urls: List[str] = [] + urls: list[str] = [] try: for row in graph.query(_CACHEABLE_URLS_SPARQL): artifact = cast(Any, row).artifact @@ -92,13 +93,13 @@ def discover_profile_cacheable_urls(profile: "Profile") -> List[str]: return urls -def discover_cacheable_urls_from_profiles(profiles: Iterable["Profile"]) -> List[str]: +def discover_cacheable_urls_from_profiles(profiles: Iterable["Profile"]) -> list[str]: """ Aggregate cacheable URLs from the given profiles, preserving order and removing duplicates. """ seen: set[str] = set() - result: List[str] = [] + result: list[str] = [] for profile in profiles: for url in discover_profile_cacheable_urls(profile): if url not in seen: @@ -107,7 +108,7 @@ def discover_cacheable_urls_from_profiles(profiles: Iterable["Profile"]) -> List return result -def warm_up_urls(urls: Sequence[str]) -> List[WarmUpResult]: +def warm_up_urls(urls: Sequence[str]) -> list[WarmUpResult]: """ Fetch each URL so that its response is stored in the HTTP cache. @@ -115,7 +116,7 @@ def warm_up_urls(urls: Sequence[str]) -> List[WarmUpResult]: offline cache misses) are reported but do not raise. """ requester = HttpRequester() - results: List[WarmUpResult] = [] + results: list[WarmUpResult] = [] offline = bool(getattr(requester, "offline", False)) for url in urls: try: @@ -141,7 +142,7 @@ def warm_up_urls(urls: Sequence[str]) -> List[WarmUpResult]: return results -def auto_warm_up_for_settings(settings: "ValidationSettings") -> Optional[List[WarmUpResult]]: +def auto_warm_up_for_settings(settings: "ValidationSettings") -> Optional[list[WarmUpResult]]: """ Perform a best-effort synchronous warm-up triggered by ``ValidationSettings.__post_init__``. diff --git a/rocrate_validator/utils/document_loader.py b/rocrate_validator/utils/document_loader.py index 0242fac67..12e0063a9 100644 --- a/rocrate_validator/utils/document_loader.py +++ b/rocrate_validator/utils/document_loader.py @@ -26,7 +26,7 @@ import json import threading -from typing import Any, Optional, Tuple +from typing import Any, Optional from rdflib.plugins.shared.jsonld import context as jsonld_context from rdflib.plugins.shared.jsonld import util as jsonld_util @@ -126,7 +126,7 @@ def _fetch_json_ld(url: str) -> Any: return json.loads(response.text) -def resolve_remote_document(url: str) -> Tuple[Optional[dict], Optional[str]]: +def resolve_remote_document(url: str) -> tuple[Optional[dict], Optional[str]]: """ Resolve a remote JSON-LD document, returning ``(json, content_type)``. diff --git a/rocrate_validator/utils/io_helpers/output/text/layout/report.py b/rocrate_validator/utils/io_helpers/output/text/layout/report.py index 4c996f402..1dc72bd86 100644 --- a/rocrate_validator/utils/io_helpers/output/text/layout/report.py +++ b/rocrate_validator/utils/io_helpers/output/text/layout/report.py @@ -16,7 +16,8 @@ import threading import time -from typing import Any, Callable, Optional +from typing import Any, Optional +from collections.abc import Callable from rich.align import Align from rich.layout import Layout From 06ec76bab411f0615b386e2447783cec98704eaf Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Wed, 3 Jun 2026 15:41:52 +0200 Subject: [PATCH 185/352] refactor: :rotating_light: organize imports --- rocrate_validator/cli/commands/errors.py | 3 +-- rocrate_validator/cli/commands/profiles.py | 7 ++---- rocrate_validator/cli/commands/validate.py | 10 ++++----- rocrate_validator/cli/main.py | 2 +- rocrate_validator/cli/ui/text/validate.py | 5 ++--- .../ro-crate/must/0_file_descriptor_format.py | 3 +-- .../ro-crate/must/4_data_entity_metadata.py | 5 ++--- .../should/2_root_data_entity_relative_uri.py | 5 ++--- .../should/4_data_entity_existence.py | 5 ++--- .../should/5_web_data_entity_metadata.py | 5 ++--- .../workflow-ro-crate/may/1_main_workflow.py | 5 ++--- .../workflow-ro-crate/must/0_main_workflow.py | 5 ++--- .../requirements/python/__init__.py | 17 +++++++++----- .../requirements/shacl/__init__.py | 6 ++--- .../requirements/shacl/errors.py | 3 +-- .../requirements/shacl/models.py | 6 ++--- .../requirements/shacl/requirements.py | 2 +- .../requirements/shacl/validator.py | 22 +++++++++---------- rocrate_validator/rocrate.py | 5 ++--- rocrate_validator/services.py | 8 +++---- rocrate_validator/utils/cache_warmup.py | 5 ++--- rocrate_validator/utils/document_loader.py | 3 +-- rocrate_validator/utils/io_helpers/input.py | 2 +- .../utils/io_helpers/output/json/__init__.py | 8 ++++--- .../io_helpers/output/json/formatters.py | 9 +++++--- .../utils/io_helpers/output/pager.py | 5 ++--- .../utils/io_helpers/output/text/__init__.py | 5 ++--- .../io_helpers/output/text/formatters.py | 5 ++--- .../io_helpers/output/text/layout/progress.py | 16 +++++++++----- .../io_helpers/output/text/layout/report.py | 21 +++++++++++------- rocrate_validator/utils/log.py | 4 ++-- 31 files changed, 104 insertions(+), 108 deletions(-) diff --git a/rocrate_validator/cli/commands/errors.py b/rocrate_validator/cli/commands/errors.py index 08fa6e171..c89a1aedf 100644 --- a/rocrate_validator/cli/commands/errors.py +++ b/rocrate_validator/cli/commands/errors.py @@ -17,9 +17,8 @@ from rich.console import Console +from rocrate_validator.errors import InvalidProfilePath, ProfileNotFound, ProfilesDirectoryNotFound from rocrate_validator.utils import log as logging -from rocrate_validator.errors import (InvalidProfilePath, ProfileNotFound, - ProfilesDirectoryNotFound) # Create a logger for this module logger = logging.getLogger(__name__) diff --git a/rocrate_validator/cli/commands/profiles.py b/rocrate_validator/cli/commands/profiles.py index afbbbcab9..2be5eeb04 100644 --- a/rocrate_validator/cli/commands/profiles.py +++ b/rocrate_validator/cli/commands/profiles.py @@ -28,13 +28,10 @@ from rocrate_validator.cli.commands.errors import handle_error from rocrate_validator.cli.main import cli, click from rocrate_validator.constants import DEFAULT_PROFILE_IDENTIFIER -from rocrate_validator.models import (LevelCollection, Profile, - RequirementCheck, RequirementLevel, - Severity) +from rocrate_validator.models import LevelCollection, Profile, RequirementCheck, RequirementLevel, Severity from rocrate_validator.utils import log as logging from rocrate_validator.utils.io_helpers.colors import get_severity_color -from rocrate_validator.utils.io_helpers.output.text.layout.report import \ - get_app_header_rule +from rocrate_validator.utils.io_helpers.output.text.layout.report import get_app_header_rule from rocrate_validator.utils.paths import get_profiles_path, shorten_path # set the default profiles path diff --git a/rocrate_validator/cli/commands/validate.py b/rocrate_validator/cli/commands/validate.py index 9d0b5031c..e15401b11 100644 --- a/rocrate_validator/cli/commands/validate.py +++ b/rocrate_validator/cli/commands/validate.py @@ -24,22 +24,20 @@ from rich.padding import Padding from rich.rule import Rule -from rocrate_validator.utils import log as logging from rocrate_validator import constants, services from rocrate_validator.cli.commands.errors import handle_error from rocrate_validator.cli.main import cli from rocrate_validator.cli.ui.text.validate import ValidationCommandView from rocrate_validator.errors import ROCrateInvalidURIError +from rocrate_validator.models import Severity, ValidationResult, ValidationSettings +from rocrate_validator.utils import log as logging from rocrate_validator.utils.io_helpers.input import get_single_char, multiple_choice from rocrate_validator.utils.io_helpers.output.console import Console from rocrate_validator.utils.io_helpers.output.json import JSONOutputFormatter from rocrate_validator.utils.io_helpers.output.text import TextOutputFormatter -from rocrate_validator.utils.io_helpers.output.text.layout.report import ( - LiveTextProgressLayout, get_app_header_rule) -from rocrate_validator.models import (Severity, ValidationResult, - ValidationSettings) -from rocrate_validator.utils.uri import validate_rocrate_uri +from rocrate_validator.utils.io_helpers.output.text.layout.report import LiveTextProgressLayout, get_app_header_rule from rocrate_validator.utils.paths import get_profiles_path +from rocrate_validator.utils.uri import validate_rocrate_uri # from rich.markdown import Markdown # from rich.table import Table diff --git a/rocrate_validator/cli/main.py b/rocrate_validator/cli/main.py index e42097f95..0d79ed0a0 100644 --- a/rocrate_validator/cli/main.py +++ b/rocrate_validator/cli/main.py @@ -16,8 +16,8 @@ import rich_click as click -from rocrate_validator.utils import log as logging from rocrate_validator.cli.utils import running_in_jupyter +from rocrate_validator.utils import log as logging from rocrate_validator.utils.io_helpers.output.console import Console from rocrate_validator.utils.io_helpers.output.pager import SystemPager from rocrate_validator.utils.versioning import get_version diff --git a/rocrate_validator/cli/ui/text/validate.py b/rocrate_validator/cli/ui/text/validate.py index c3db89641..ffaff3bda 100644 --- a/rocrate_validator/cli/ui/text/validate.py +++ b/rocrate_validator/cli/ui/text/validate.py @@ -14,16 +14,15 @@ from __future__ import annotations -from typing import Any, Optional from collections.abc import Callable +from typing import Any, Optional +from rocrate_validator.models import ValidationResult, ValidationSettings, ValidationStatistics from rocrate_validator.utils import log as logging from rocrate_validator.utils.io_helpers.output.console import Console from rocrate_validator.utils.io_helpers.output.pager import SystemPager from rocrate_validator.utils.io_helpers.output.text import TextOutputFormatter from rocrate_validator.utils.io_helpers.output.text.layout.report import ValidationReportLayout -from rocrate_validator.models import (ValidationResult, ValidationSettings, - ValidationStatistics) # set up logging logger = logging.getLogger(__name__) diff --git a/rocrate_validator/profiles/ro-crate/must/0_file_descriptor_format.py b/rocrate_validator/profiles/ro-crate/must/0_file_descriptor_format.py index c70884c13..b24324b85 100644 --- a/rocrate_validator/profiles/ro-crate/must/0_file_descriptor_format.py +++ b/rocrate_validator/profiles/ro-crate/must/0_file_descriptor_format.py @@ -17,8 +17,7 @@ from urllib.parse import urljoin from rocrate_validator.models import ValidationContext -from rocrate_validator.requirements.python import (PyFunctionCheck, check, - requirement) +from rocrate_validator.requirements.python import PyFunctionCheck, check, requirement from rocrate_validator.utils import log as logging from rocrate_validator.utils.http import HttpRequester diff --git a/rocrate_validator/profiles/ro-crate/must/4_data_entity_metadata.py b/rocrate_validator/profiles/ro-crate/must/4_data_entity_metadata.py index d96d8ee03..5647304c9 100644 --- a/rocrate_validator/profiles/ro-crate/must/4_data_entity_metadata.py +++ b/rocrate_validator/profiles/ro-crate/must/4_data_entity_metadata.py @@ -12,10 +12,9 @@ # See the License for the specific language governing permissions and # limitations under the License. -from rocrate_validator.utils import log as logging from rocrate_validator.models import ValidationContext -from rocrate_validator.requirements.python import (PyFunctionCheck, check, - requirement) +from rocrate_validator.requirements.python import PyFunctionCheck, check, requirement +from rocrate_validator.utils import log as logging # set up logging logger = logging.getLogger(__name__) diff --git a/rocrate_validator/profiles/ro-crate/should/2_root_data_entity_relative_uri.py b/rocrate_validator/profiles/ro-crate/should/2_root_data_entity_relative_uri.py index dd7d8fc12..9cbb0751c 100644 --- a/rocrate_validator/profiles/ro-crate/should/2_root_data_entity_relative_uri.py +++ b/rocrate_validator/profiles/ro-crate/should/2_root_data_entity_relative_uri.py @@ -12,10 +12,9 @@ # See the License for the specific language governing permissions and # limitations under the License. -from rocrate_validator.utils import log as logging from rocrate_validator.models import ValidationContext -from rocrate_validator.requirements.python import (PyFunctionCheck, check, - requirement) +from rocrate_validator.requirements.python import PyFunctionCheck, check, requirement +from rocrate_validator.utils import log as logging # set up logging logger = logging.getLogger(__name__) diff --git a/rocrate_validator/profiles/ro-crate/should/4_data_entity_existence.py b/rocrate_validator/profiles/ro-crate/should/4_data_entity_existence.py index f5742acd2..4f9004eed 100644 --- a/rocrate_validator/profiles/ro-crate/should/4_data_entity_existence.py +++ b/rocrate_validator/profiles/ro-crate/should/4_data_entity_existence.py @@ -12,10 +12,9 @@ # See the License for the specific language governing permissions and # limitations under the License. -from rocrate_validator.utils import log as logging from rocrate_validator.models import ValidationContext -from rocrate_validator.requirements.python import (PyFunctionCheck, check, - requirement) +from rocrate_validator.requirements.python import PyFunctionCheck, check, requirement +from rocrate_validator.utils import log as logging # set up logging logger = logging.getLogger(__name__) diff --git a/rocrate_validator/profiles/ro-crate/should/5_web_data_entity_metadata.py b/rocrate_validator/profiles/ro-crate/should/5_web_data_entity_metadata.py index a446301ef..24d787799 100644 --- a/rocrate_validator/profiles/ro-crate/should/5_web_data_entity_metadata.py +++ b/rocrate_validator/profiles/ro-crate/should/5_web_data_entity_metadata.py @@ -12,10 +12,9 @@ # See the License for the specific language governing permissions and # limitations under the License. -from rocrate_validator.utils import log as logging from rocrate_validator.models import ValidationContext -from rocrate_validator.requirements.python import (PyFunctionCheck, check, - requirement) +from rocrate_validator.requirements.python import PyFunctionCheck, check, requirement +from rocrate_validator.utils import log as logging from rocrate_validator.utils.uri import AvailabilityStatus # set up logging diff --git a/rocrate_validator/profiles/workflow-ro-crate/may/1_main_workflow.py b/rocrate_validator/profiles/workflow-ro-crate/may/1_main_workflow.py index 68eafc0b4..9bc07b7f5 100644 --- a/rocrate_validator/profiles/workflow-ro-crate/may/1_main_workflow.py +++ b/rocrate_validator/profiles/workflow-ro-crate/may/1_main_workflow.py @@ -12,10 +12,9 @@ # See the License for the specific language governing permissions and # limitations under the License. -from rocrate_validator.utils import log as logging from rocrate_validator.models import ValidationContext -from rocrate_validator.requirements.python import (PyFunctionCheck, check, - requirement) +from rocrate_validator.requirements.python import PyFunctionCheck, check, requirement +from rocrate_validator.utils import log as logging # set up logging logger = logging.getLogger(__name__) diff --git a/rocrate_validator/profiles/workflow-ro-crate/must/0_main_workflow.py b/rocrate_validator/profiles/workflow-ro-crate/must/0_main_workflow.py index a64adbf98..6d4f147e9 100644 --- a/rocrate_validator/profiles/workflow-ro-crate/must/0_main_workflow.py +++ b/rocrate_validator/profiles/workflow-ro-crate/must/0_main_workflow.py @@ -12,10 +12,9 @@ # See the License for the specific language governing permissions and # limitations under the License. -from rocrate_validator.utils import log as logging from rocrate_validator.models import ValidationContext -from rocrate_validator.requirements.python import (PyFunctionCheck, check, - requirement) +from rocrate_validator.requirements.python import PyFunctionCheck, check, requirement +from rocrate_validator.utils import log as logging # set up logging logger = logging.getLogger(__name__) diff --git a/rocrate_validator/requirements/python/__init__.py b/rocrate_validator/requirements/python/__init__.py index ee4435ae4..63c441bf7 100644 --- a/rocrate_validator/requirements/python/__init__.py +++ b/rocrate_validator/requirements/python/__init__.py @@ -14,15 +14,22 @@ import inspect import re +from collections.abc import Callable from pathlib import Path from typing import Optional -from collections.abc import Callable +from rocrate_validator.models import ( + LevelCollection, + Profile, + Requirement, + RequirementCheck, + RequirementLevel, + RequirementLoader, + Severity, + SourceSnippet, + ValidationContext, +) from rocrate_validator.utils import log as logging -from rocrate_validator.models import (LevelCollection, Profile, Requirement, - RequirementCheck, RequirementLevel, - RequirementLoader, Severity, - SourceSnippet, ValidationContext) from rocrate_validator.utils.python_helpers import get_classes_from_file # set up logging diff --git a/rocrate_validator/requirements/shacl/__init__.py b/rocrate_validator/requirements/shacl/__init__.py index 072410d9c..7cb912126 100644 --- a/rocrate_validator/requirements/shacl/__init__.py +++ b/rocrate_validator/requirements/shacl/__init__.py @@ -14,10 +14,8 @@ from rocrate_validator.requirements.shacl.checks import SHACLCheck from rocrate_validator.requirements.shacl.errors import SHACLValidationError -from rocrate_validator.requirements.shacl.requirements import ( - SHACLRequirement, SHACLRequirementLoader) -from rocrate_validator.requirements.shacl.validator import ( - SHACLValidationResult, SHACLValidator) +from rocrate_validator.requirements.shacl.requirements import SHACLRequirement, SHACLRequirementLoader +from rocrate_validator.requirements.shacl.validator import SHACLValidationResult, SHACLValidator __all__ = ["SHACLCheck", "SHACLValidator", "SHACLValidationResult", "SHACLValidationError", "SHACLRequirement", "SHACLRequirementLoader"] diff --git a/rocrate_validator/requirements/shacl/errors.py b/rocrate_validator/requirements/shacl/errors.py index c77bf571d..6465a4054 100644 --- a/rocrate_validator/requirements/shacl/errors.py +++ b/rocrate_validator/requirements/shacl/errors.py @@ -15,8 +15,7 @@ from typing import Optional from rocrate_validator.errors import ValidationError -from rocrate_validator.requirements.shacl.validator import \ - SHACLValidationResult +from rocrate_validator.requirements.shacl.validator import SHACLValidationResult class SHACLValidationError(ValidationError): diff --git a/rocrate_validator/requirements/shacl/models.py b/rocrate_validator/requirements/shacl/models.py index c00e7d41e..6b4e0a988 100644 --- a/rocrate_validator/requirements/shacl/models.py +++ b/rocrate_validator/requirements/shacl/models.py @@ -21,11 +21,9 @@ from rdflib.term import Node from rocrate_validator.constants import SHACL_NS -from rocrate_validator.utils import log as logging from rocrate_validator.models import LevelCollection, RequirementLevel, Severity -from rocrate_validator.requirements.shacl.utils import (ShapesList, - compute_key, - inject_attributes) +from rocrate_validator.requirements.shacl.utils import ShapesList, compute_key, inject_attributes +from rocrate_validator.utils import log as logging # set up logging logger = logging.getLogger(__name__) diff --git a/rocrate_validator/requirements/shacl/requirements.py b/rocrate_validator/requirements/shacl/requirements.py index f852f1d79..0ca7d3416 100644 --- a/rocrate_validator/requirements/shacl/requirements.py +++ b/rocrate_validator/requirements/shacl/requirements.py @@ -17,7 +17,6 @@ from rdflib import RDF -from rocrate_validator.utils import log as logging from rocrate_validator.constants import VALIDATOR_NS from rocrate_validator.models import ( Profile, @@ -29,6 +28,7 @@ ) from rocrate_validator.requirements.shacl.checks import SHACLCheck from rocrate_validator.requirements.shacl.models import Shape, ShapesRegistry +from rocrate_validator.utils import log as logging # set up logging logger = logging.getLogger(__name__) diff --git a/rocrate_validator/requirements/shacl/validator.py b/rocrate_validator/requirements/shacl/validator.py index 4811cd13f..5325cc83b 100644 --- a/rocrate_validator/requirements/shacl/validator.py +++ b/rocrate_validator/requirements/shacl/validator.py @@ -23,16 +23,17 @@ from rdflib import BNode, Graph from rdflib.term import Node, URIRef -from rocrate_validator.constants import (DEFAULT_ONTOLOGY_FILE, - RDF_SERIALIZATION_FORMATS, - RDF_SERIALIZATION_FORMATS_TYPES, - SHACL_NS, VALID_INFERENCE_OPTIONS, - VALID_INFERENCE_OPTIONS_TYPES) -from rocrate_validator.models import (Profile, RequirementCheck, Severity, - ValidationContext, ValidationResult) +from rocrate_validator.constants import ( + DEFAULT_ONTOLOGY_FILE, + RDF_SERIALIZATION_FORMATS, + RDF_SERIALIZATION_FORMATS_TYPES, + SHACL_NS, + VALID_INFERENCE_OPTIONS, + VALID_INFERENCE_OPTIONS_TYPES, +) +from rocrate_validator.models import Profile, RequirementCheck, Severity, ValidationContext, ValidationResult from rocrate_validator.requirements.shacl.models import ShapesRegistry -from rocrate_validator.requirements.shacl.utils import (make_uris_relative, - map_severity) +from rocrate_validator.requirements.shacl.utils import make_uris_relative, map_severity from rocrate_validator.utils import log as logging from rocrate_validator.utils.rdf import extract_base_from_jsonld @@ -126,8 +127,7 @@ def __set_current_validation_profile__(self, profile: Profile) -> bool: # enable overriding of checks if self.settings.allow_requirement_check_override: - from rocrate_validator.requirements.shacl.requirements import \ - SHACLRequirement + from rocrate_validator.requirements.shacl.requirements import SHACLRequirement for requirement in [_ for _ in profile.requirements if isinstance(_, SHACLRequirement)]: # logger.debug("Processing requirement: %s", requirement.name) for check in requirement.get_checks(): diff --git a/rocrate_validator/rocrate.py b/rocrate_validator/rocrate.py index a01c9c735..fec9ffe39 100644 --- a/rocrate_validator/rocrate.py +++ b/rocrate_validator/rocrate.py @@ -26,11 +26,10 @@ from rdflib import Graph -from rocrate_validator.utils import log as logging from rocrate_validator.errors import ROCrateInvalidURIError -from rocrate_validator.utils.uri import validate_rocrate_uri +from rocrate_validator.utils import log as logging from rocrate_validator.utils.http import HttpRequester -from rocrate_validator.utils.uri import URI, AvailabilityStatus, is_external_reference +from rocrate_validator.utils.uri import URI, AvailabilityStatus, is_external_reference, validate_rocrate_uri # set up logging logger = logging.getLogger(__name__) diff --git a/rocrate_validator/services.py b/rocrate_validator/services.py index 0b9381df3..bfc4f8cb9 100644 --- a/rocrate_validator/services.py +++ b/rocrate_validator/services.py @@ -19,13 +19,13 @@ from pathlib import Path from typing import Optional, Union -from rocrate_validator.utils import log as logging +from rocrate_validator.errors import ProfileNotFound from rocrate_validator.events import Subscriber from rocrate_validator.models import Profile, Severity, ValidationResult, ValidationSettings, Validator -from rocrate_validator.utils.uri import URI -from rocrate_validator.utils.paths import get_profiles_path +from rocrate_validator.utils import log as logging from rocrate_validator.utils.http import HttpRequester -from rocrate_validator.errors import ProfileNotFound +from rocrate_validator.utils.paths import get_profiles_path +from rocrate_validator.utils.uri import URI # set the default profiles path DEFAULT_PROFILES_PATH = get_profiles_path() diff --git a/rocrate_validator/utils/cache_warmup.py b/rocrate_validator/utils/cache_warmup.py index 8071659c3..9e118f2bc 100644 --- a/rocrate_validator/utils/cache_warmup.py +++ b/rocrate_validator/utils/cache_warmup.py @@ -26,14 +26,13 @@ from __future__ import annotations import os +from collections.abc import Iterable, Sequence from dataclasses import dataclass from typing import TYPE_CHECKING, Any, Optional, cast -from collections.abc import Iterable, Sequence from rocrate_validator import constants from rocrate_validator.utils import log as logging -from rocrate_validator.utils.http import (OFFLINE_CACHE_MISS_STATUS, - HttpRequester) +from rocrate_validator.utils.http import OFFLINE_CACHE_MISS_STATUS, HttpRequester if TYPE_CHECKING: from rocrate_validator.models import Profile, ValidationSettings diff --git a/rocrate_validator/utils/document_loader.py b/rocrate_validator/utils/document_loader.py index 12e0063a9..d0dfe9dc2 100644 --- a/rocrate_validator/utils/document_loader.py +++ b/rocrate_validator/utils/document_loader.py @@ -32,8 +32,7 @@ from rdflib.plugins.shared.jsonld import util as jsonld_util from rocrate_validator.utils import log as logging -from rocrate_validator.utils.http import (OFFLINE_CACHE_MISS_STATUS, - HttpRequester, OfflineCacheMissError) +from rocrate_validator.utils.http import OFFLINE_CACHE_MISS_STATUS, HttpRequester, OfflineCacheMissError logger = logging.getLogger(__name__) diff --git a/rocrate_validator/utils/io_helpers/input.py b/rocrate_validator/utils/io_helpers/input.py index 71d5f4860..8d2c399b1 100644 --- a/rocrate_validator/utils/io_helpers/input.py +++ b/rocrate_validator/utils/io_helpers/input.py @@ -21,8 +21,8 @@ from InquirerPy.base.control import Choice from rich.console import Console -from rocrate_validator.utils import log as logging from rocrate_validator.models import Profile +from rocrate_validator.utils import log as logging # set up logging logger = logging.getLogger(__name__) diff --git a/rocrate_validator/utils/io_helpers/output/json/__init__.py b/rocrate_validator/utils/io_helpers/output/json/__init__.py index 351cca56e..5318fb246 100644 --- a/rocrate_validator/utils/io_helpers/output/json/__init__.py +++ b/rocrate_validator/utils/io_helpers/output/json/__init__.py @@ -14,12 +14,14 @@ from typing import Any, Optional +from rocrate_validator.models import ValidationResult, ValidationStatistics from rocrate_validator.utils import log as logging from rocrate_validator.utils.io_helpers.output import BaseOutputFormatter from rocrate_validator.utils.io_helpers.output.json.formatters import ( - ValidationResultJSONOutputFormatter, ValidationResultsJSONOutputFormatter, - ValidationStatisticsJSONOutputFormatter) -from rocrate_validator.models import ValidationResult, ValidationStatistics + ValidationResultJSONOutputFormatter, + ValidationResultsJSONOutputFormatter, + ValidationStatisticsJSONOutputFormatter, +) # set up logging logger = logging.getLogger(__name__) diff --git a/rocrate_validator/utils/io_helpers/output/json/formatters.py b/rocrate_validator/utils/io_helpers/output/json/formatters.py index 78a34433f..d116cfcf3 100644 --- a/rocrate_validator/utils/io_helpers/output/json/formatters.py +++ b/rocrate_validator/utils/io_helpers/output/json/formatters.py @@ -17,10 +17,13 @@ from rich.console import ConsoleOptions, RenderResult +from rocrate_validator.models import ( + AggregatedValidationStatistics, + CustomEncoder, + ValidationResult, + ValidationStatistics, +) from rocrate_validator.utils import log as logging -from rocrate_validator.models import (AggregatedValidationStatistics, - CustomEncoder, ValidationResult, - ValidationStatistics) from rocrate_validator.utils.io_helpers.output import OutputFormatter from rocrate_validator.utils.io_helpers.output.console import Console from rocrate_validator.utils.versioning import get_version diff --git a/rocrate_validator/utils/io_helpers/output/pager.py b/rocrate_validator/utils/io_helpers/output/pager.py index a7df8434c..fd72ad3dd 100644 --- a/rocrate_validator/utils/io_helpers/output/pager.py +++ b/rocrate_validator/utils/io_helpers/output/pager.py @@ -12,12 +12,11 @@ # See the License for the specific language governing permissions and # limitations under the License. -from rich.pager import Pager - - import pydoc from typing import Any +from rich.pager import Pager + class SystemPager(Pager): """Uses the pager installed on the system.""" diff --git a/rocrate_validator/utils/io_helpers/output/text/__init__.py b/rocrate_validator/utils/io_helpers/output/text/__init__.py index 924423721..040fe9a02 100644 --- a/rocrate_validator/utils/io_helpers/output/text/__init__.py +++ b/rocrate_validator/utils/io_helpers/output/text/__init__.py @@ -18,13 +18,12 @@ from rich.console import ConsoleOptions, RenderResult -from rocrate_validator.utils import log as logging from rocrate_validator.models import ValidationResult, ValidationStatistics +from rocrate_validator.utils import log as logging from .. import BaseOutputFormatter from ..console import Console -from .formatters import (ValidationResultTextOutputFormatter, - ValidationStatisticsTextOutputFormatter) +from .formatters import ValidationResultTextOutputFormatter, ValidationStatisticsTextOutputFormatter # set up logging logger = logging.getLogger(__name__) diff --git a/rocrate_validator/utils/io_helpers/output/text/formatters.py b/rocrate_validator/utils/io_helpers/output/text/formatters.py index 401d6e499..bf0f139a9 100644 --- a/rocrate_validator/utils/io_helpers/output/text/formatters.py +++ b/rocrate_validator/utils/io_helpers/output/text/formatters.py @@ -19,11 +19,10 @@ from rich.markdown import Markdown from rich.padding import Padding +from rocrate_validator.models import ValidationResult, ValidationStatistics from rocrate_validator.utils import log as logging from rocrate_validator.utils.io_helpers.colors import get_severity_color -from rocrate_validator.utils.io_helpers.output.text.layout.report import \ - ValidationReportLayout -from rocrate_validator.models import ValidationResult, ValidationStatistics +from rocrate_validator.utils.io_helpers.output.text.layout.report import ValidationReportLayout from .. import OutputFormatter from ..console import Console diff --git a/rocrate_validator/utils/io_helpers/output/text/layout/progress.py b/rocrate_validator/utils/io_helpers/output/text/layout/progress.py index 6c6337c1b..861518732 100644 --- a/rocrate_validator/utils/io_helpers/output/text/layout/progress.py +++ b/rocrate_validator/utils/io_helpers/output/text/layout/progress.py @@ -17,13 +17,17 @@ from rich.progress import BarColumn, Progress, TextColumn, TimeElapsedColumn -from rocrate_validator.utils import log as logging from rocrate_validator.events import Event, EventType, Subscriber -from rocrate_validator.models import (ProfileValidationEvent, - RequirementCheckValidationEvent, - RequirementValidationEvent, - ValidationContext, ValidationEvent, - ValidationSettings, ValidationStatistics) +from rocrate_validator.models import ( + ProfileValidationEvent, + RequirementCheckValidationEvent, + RequirementValidationEvent, + ValidationContext, + ValidationEvent, + ValidationSettings, + ValidationStatistics, +) +from rocrate_validator.utils import log as logging # set up logging logger = logging.getLogger(__name__) diff --git a/rocrate_validator/utils/io_helpers/output/text/layout/report.py b/rocrate_validator/utils/io_helpers/output/text/layout/report.py index 1dc72bd86..813b998e9 100644 --- a/rocrate_validator/utils/io_helpers/output/text/layout/report.py +++ b/rocrate_validator/utils/io_helpers/output/text/layout/report.py @@ -16,8 +16,8 @@ import threading import time -from typing import Any, Optional from collections.abc import Callable +from typing import Any, Optional from rich.align import Align from rich.layout import Layout @@ -27,16 +27,21 @@ from rich.rule import Rule from rich.text import Text +from rocrate_validator.events import Event, EventType +from rocrate_validator.models import ( + ProfileValidationEvent, + RequirementCheckValidationEvent, + RequirementValidationEvent, + Severity, + ValidationContext, + ValidationEvent, + ValidationResult, + ValidationSettings, + ValidationStatistics, +) from rocrate_validator.utils import log as logging from rocrate_validator.utils.io_helpers.colors import get_severity_color -from rocrate_validator.events import Event, EventType from rocrate_validator.utils.io_helpers.output.console import Console -from rocrate_validator.models import (ProfileValidationEvent, - RequirementCheckValidationEvent, - RequirementValidationEvent, Severity, - ValidationContext, ValidationEvent, - ValidationResult, ValidationSettings, - ValidationStatistics) from rocrate_validator.utils.uri import URI from rocrate_validator.utils.versioning import get_version diff --git a/rocrate_validator/utils/log.py b/rocrate_validator/utils/log.py index 9a55bcf86..37e6ebf21 100644 --- a/rocrate_validator/utils/log.py +++ b/rocrate_validator/utils/log.py @@ -16,8 +16,8 @@ import sys import threading from io import StringIO -from logging import (CRITICAL, DEBUG, ERROR, INFO, WARNING, Handler, Logger, - StreamHandler, basicConfig as logging_basicConfig) +from logging import CRITICAL, DEBUG, ERROR, INFO, WARNING, Handler, Logger, StreamHandler +from logging import basicConfig as logging_basicConfig from typing import Optional import colorlog From fc12381bf19bc87a8768ad2eadcfec36725efd11 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Wed, 3 Jun 2026 15:48:44 +0200 Subject: [PATCH 186/352] refactor: :recycle: flatten conditional returns (RET505/RET506) Drop redundant else/elif branches following return/raise statements --- rocrate_validator/errors.py | 6 ++-- rocrate_validator/models.py | 13 +++---- .../requirements/shacl/models.py | 11 +++--- rocrate_validator/requirements/shacl/utils.py | 10 +++--- rocrate_validator/rocrate.py | 34 +++++++++---------- rocrate_validator/utils/io_helpers/colors.py | 18 +++++----- .../utils/io_helpers/output/console.py | 3 +- 7 files changed, 41 insertions(+), 54 deletions(-) diff --git a/rocrate_validator/errors.py b/rocrate_validator/errors.py index 74b1ed794..22570e266 100644 --- a/rocrate_validator/errors.py +++ b/rocrate_validator/errors.py @@ -303,10 +303,8 @@ def __str__(self) -> str: if self._path: if self._message: return f"RO-Crate metadata not found on '{self._path!r}': {self._message!r}" - else: - return f"RO-Crate metadata not found on '{self._path!r}'" - else: - return "RO-Crate metadata not found" + return f"RO-Crate metadata not found on '{self._path!r}'" + return "RO-Crate metadata not found" def __repr__(self): return f"ROCrateMetadataNotFoundError({self._path!r},{self._message!r})" diff --git a/rocrate_validator/models.py b/rocrate_validator/models.py index 5804e8fa4..bf45eb6bd 100644 --- a/rocrate_validator/models.py +++ b/rocrate_validator/models.py @@ -96,8 +96,7 @@ class Severity(enum.Enum): def __lt__(self, other: object) -> bool: if isinstance(other, Severity): return self.value < other.value - else: - raise TypeError(f"Comparison not supported between instances of {type(self)} and {type(other)}") + raise TypeError(f"Comparison not supported between instances of {type(self)} and {type(other)}") @staticmethod def get(name: str) -> Severity: @@ -1282,10 +1281,9 @@ def profile(self) -> Profile: def __get_requirement_type__(requirement_path: Path) -> str: if requirement_path.suffix == ".py": return "python" - elif requirement_path.suffix == ".ttl": + if requirement_path.suffix == ".ttl": return "shacl" - else: - raise ValueError(f"Unsupported requirement type: {requirement_path.suffix}") + raise ValueError(f"Unsupported requirement type: {requirement_path.suffix}") @classmethod def __get_requirement_loader__(cls, profile: Profile, requirement_path: Path) -> RequirementLoader: @@ -2784,10 +2782,9 @@ def parse(cls, settings: Union[dict, ValidationSettings]) -> ValidationSettings: """ if isinstance(settings, dict): return cls(**settings) - elif isinstance(settings, ValidationSettings): + if isinstance(settings, ValidationSettings): return settings - else: - raise ValueError(f"Invalid settings type: {type(settings)}") + raise ValueError(f"Invalid settings type: {type(settings)}") class ValidationEvent(Event): diff --git a/rocrate_validator/requirements/shacl/models.py b/rocrate_validator/requirements/shacl/models.py index 6b4e0a988..8d2e652b5 100644 --- a/rocrate_validator/requirements/shacl/models.py +++ b/rocrate_validator/requirements/shacl/models.py @@ -120,9 +120,9 @@ def get_declared_severity(self) -> Optional[Severity]: severity = getattr(self, "severity", None) if severity == f"{SHACL_NS}Violation": return Severity.REQUIRED - elif severity == f"{SHACL_NS}Warning": + if severity == f"{SHACL_NS}Warning": return Severity.RECOMMENDED - elif severity == f"{SHACL_NS}Info": + if severity == f"{SHACL_NS}Info": return Severity.OPTIONAL return None @@ -130,12 +130,11 @@ def __str__(self): class_name = self.__class__.__name__ if self.name and self.description: return f"{class_name} - {self.name}: {self.description} ({hash(self)})" - elif self.name: + if self.name: return f"{class_name} - {self.name} ({hash(self)})" - elif self.description: + if self.description: return f"{class_name} - {self.description} ({hash(self)})" - else: - return f"{class_name} ({hash(self)})" + return f"{class_name} ({hash(self)})" def __repr__(self): return f"{self.__class__.__name__}({hash(self)})" diff --git a/rocrate_validator/requirements/shacl/utils.py b/rocrate_validator/requirements/shacl/utils.py index 5e2f205c0..4502d1fec 100644 --- a/rocrate_validator/requirements/shacl/utils.py +++ b/rocrate_validator/requirements/shacl/utils.py @@ -59,12 +59,11 @@ def map_severity(shacl_severity: str) -> Severity: """ if f"{SHACL_NS}Violation" == shacl_severity: return Severity.REQUIRED - elif f"{SHACL_NS}Warning" == shacl_severity: + if f"{SHACL_NS}Warning" == shacl_severity: return Severity.RECOMMENDED - elif f"{SHACL_NS}Info" == shacl_severity: + if f"{SHACL_NS}Info" == shacl_severity: return Severity.OPTIONAL - else: - raise RuntimeError(f"Unrecognized SHACL severity term {shacl_severity}") + raise RuntimeError(f"Unrecognized SHACL severity term {shacl_severity}") def make_uris_relative(text: str, ro_crate_path: Union[Path, str]) -> str: @@ -138,8 +137,7 @@ def compute_key(g: Graph, s: Node) -> str: if isinstance(s, BNode): return compute_hash(g, s) - else: - return cast(Any, s).toPython() + return cast(Any, s).toPython() class ShapesList: diff --git a/rocrate_validator/rocrate.py b/rocrate_validator/rocrate.py index fec9ffe39..93f3c6157 100644 --- a/rocrate_validator/rocrate.py +++ b/rocrate_validator/rocrate.py @@ -1001,8 +1001,7 @@ def size(self) -> int: file_size = response.headers.get('Content-Length') if file_size is not None: return int(file_size) - else: - raise Exception("Could not determine the file size from the headers") + raise Exception("Could not determine the file size from the headers") @staticmethod def __fetch_range__(uri: str, start, end): @@ -1058,7 +1057,7 @@ def is_bagit_wrapping_crate(uri: Union[str, Path, URI]) -> bool: (base_path / 'data' / 'ro-crate-metadata.json').is_file() # Check for local zip file - elif uri.is_local_file(): + if uri.is_local_file(): path = uri.as_path() if path.suffix == '.zip': with zipfile.ZipFile(path, 'r') as zf: @@ -1082,21 +1081,20 @@ def is_bagit_wrapping_crate(uri: Union[str, Path, URI]) -> bool: metadata_response = HttpRequester().head(f"{base_url}/data/ro-crate-metadata.json") return metadata_response.status_code == 200 - else: - # If it's a remote zip file, we need to download it partially - # Temporarily create instance to check - temp_crate = ROCrateRemoteZip(uri) - logger.debug("Initializing ROCrateRemoteZip for URI: %s", uri) - # ROCrate.__init__(temp_crate, uri) - # temp_crate._ROCrateRemoteZip__init_zip_reference__() - has_bagit_txt = temp_crate.has_file(Path('bagit.txt')) - logger.debug("Presence of 'bagit.txt': %s", has_bagit_txt) - has_ro_crate_metadata = temp_crate.has_file(Path('data/ro-crate-metadata.json')) - logger.debug("Presence of 'data/ro-crate-metadata.json': %s", - has_ro_crate_metadata) - result = has_bagit_txt and has_ro_crate_metadata - del temp_crate - return result + # If it's a remote zip file, we need to download it partially + # Temporarily create instance to check + temp_crate = ROCrateRemoteZip(uri) + logger.debug("Initializing ROCrateRemoteZip for URI: %s", uri) + # ROCrate.__init__(temp_crate, uri) + # temp_crate._ROCrateRemoteZip__init_zip_reference__() + has_bagit_txt = temp_crate.has_file(Path('bagit.txt')) + logger.debug("Presence of 'bagit.txt': %s", has_bagit_txt) + has_ro_crate_metadata = temp_crate.has_file(Path('data/ro-crate-metadata.json')) + logger.debug("Presence of 'data/ro-crate-metadata.json': %s", + has_ro_crate_metadata) + result = has_bagit_txt and has_ro_crate_metadata + del temp_crate + return result except Exception as e: if logger.isEnabledFor(logging.DEBUG): diff --git a/rocrate_validator/utils/io_helpers/colors.py b/rocrate_validator/utils/io_helpers/colors.py index 919737278..1c86db067 100644 --- a/rocrate_validator/utils/io_helpers/colors.py +++ b/rocrate_validator/utils/io_helpers/colors.py @@ -26,12 +26,11 @@ def get_severity_color(severity: Union[str, Severity]) -> str: """ if severity == Severity.REQUIRED or severity == "REQUIRED": return "red" - elif severity == Severity.RECOMMENDED or severity == "RECOMMENDED": + if severity == Severity.RECOMMENDED or severity == "RECOMMENDED": return "orange1" - elif severity == Severity.OPTIONAL or severity == "OPTIONAL": + if severity == Severity.OPTIONAL or severity == "OPTIONAL": return "yellow" - else: - return "white" + return "white" def get_req_level_color(level: LevelCollection) -> str: @@ -42,13 +41,12 @@ def get_req_level_color(level: LevelCollection) -> str: """ if level in (LevelCollection.MUST, LevelCollection.SHALL, LevelCollection.REQUIRED): return "red" - elif level in (LevelCollection.MUST_NOT, LevelCollection.SHALL_NOT): + if level in (LevelCollection.MUST_NOT, LevelCollection.SHALL_NOT): return "purple" - elif level in (LevelCollection.SHOULD, LevelCollection.RECOMMENDED): + if level in (LevelCollection.SHOULD, LevelCollection.RECOMMENDED): return "orange1" - elif level == LevelCollection.SHOULD_NOT: + if level == LevelCollection.SHOULD_NOT: return "lightyellow" - elif level in (LevelCollection.MAY, LevelCollection.OPTIONAL): + if level in (LevelCollection.MAY, LevelCollection.OPTIONAL): return "yellow" - else: - return "white" + return "white" diff --git a/rocrate_validator/utils/io_helpers/output/console.py b/rocrate_validator/utils/io_helpers/output/console.py index 6ae01bae5..ceaae1449 100644 --- a/rocrate_validator/utils/io_helpers/output/console.py +++ b/rocrate_validator/utils/io_helpers/output/console.py @@ -59,8 +59,7 @@ def __format_data__(self, obj, *args, **kwargs): formatter = self._formatters.get(type(obj)) if formatter: return formatter(obj) - else: - return obj + return obj def print(self, obj, *args, **kwargs): if not self.disabled: From f557560d3549c082324e76ae507bc8fadbf21905 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Wed, 3 Jun 2026 16:02:05 +0200 Subject: [PATCH 187/352] refactor: :rotating_light: merge nested `if` statements into a single one (SIM102) --- rocrate_validator/events.py | 7 +++--- rocrate_validator/models.py | 22 +++++++++---------- .../ro-crate/must/0_file_descriptor_format.py | 9 ++++---- .../requirements/shacl/checks.py | 19 ++++++++-------- .../requirements/shacl/models.py | 15 ++++++------- .../requirements/shacl/requirements.py | 6 ++--- rocrate_validator/rocrate.py | 10 ++++----- rocrate_validator/services.py | 9 +++++--- rocrate_validator/utils/io_helpers/input.py | 10 ++++----- 9 files changed, 50 insertions(+), 57 deletions(-) diff --git a/rocrate_validator/events.py b/rocrate_validator/events.py index e47371331..a1e181d5c 100644 --- a/rocrate_validator/events.py +++ b/rocrate_validator/events.py @@ -172,10 +172,9 @@ def notify(self, event: Union[Event, EventType], ctx: Optional[Any] = None): event = Event(event) # Check if the event has already been notified # This is to avoid notifying the same event multiple times - if self.__avoid_duplicate_notifications: - if event in self.__notified_events: - logger.warning(f"Event {event} already notified") - return + if self.__avoid_duplicate_notifications and event in self.__notified_events: + logger.warning(f"Event {event} already notified") + return # Add the event to the notified events self.__notified_events.add(event) logger.debug(f"Notifying event {event}") diff --git a/rocrate_validator/models.py b/rocrate_validator/models.py index bf45eb6bd..b810bb313 100644 --- a/rocrate_validator/models.py +++ b/rocrate_validator/models.py @@ -290,18 +290,16 @@ def __init__( # Check if the profile is overriding an existing profile existing_profile = self.__profiles_map.get_by_key(cast(Any, self._profile_node).toPython()) - if existing_profile: - # Check if the existing profile is different from the current one - if existing_profile.path != profile_path: - # if the profile already exists, log a warning - logger.warning( - "Profile with identifier %s at %s is being overridden by the profile loaded from %s.", - existing_profile.identifier, - existing_profile.path, - profile_path, - ) - # add the existing profile as an override - self.__add_override__(existing_profile) + # If an existing profile is being overridden by a different one, log a warning + if existing_profile and existing_profile.path != profile_path: + logger.warning( + "Profile with identifier %s at %s is being overridden by the profile loaded from %s.", + existing_profile.identifier, + existing_profile.path, + profile_path, + ) + # add the existing profile as an override + self.__add_override__(existing_profile) # add the profile to the profiles map self.__profiles_map.add( diff --git a/rocrate_validator/profiles/ro-crate/must/0_file_descriptor_format.py b/rocrate_validator/profiles/ro-crate/must/0_file_descriptor_format.py index b24324b85..0b5db8db6 100644 --- a/rocrate_validator/profiles/ro-crate/must/0_file_descriptor_format.py +++ b/rocrate_validator/profiles/ro-crate/must/0_file_descriptor_format.py @@ -171,11 +171,10 @@ def __check_contexts__(self, context: ValidationContext, jsonld_context: object) """ Get the keys of the context URI """ is_valid = True # if the context is a string, check if it is a valid URI - if isinstance(jsonld_context, str): - if not self.__check_remote_context__(jsonld_context): - context.result.add_issue( - f'Unable to retrieve the JSON-LD context "{jsonld_context}"', self) - is_valid = False + if isinstance(jsonld_context, str) and not self.__check_remote_context__(jsonld_context): + context.result.add_issue( + f'Unable to retrieve the JSON-LD context "{jsonld_context}"', self) + is_valid = False # if the context is a dictionary, get the keys of the dictionary if isinstance(jsonld_context, dict): diff --git a/rocrate_validator/requirements/shacl/checks.py b/rocrate_validator/requirements/shacl/checks.py index 4f2712029..e88038cf8 100644 --- a/rocrate_validator/requirements/shacl/checks.py +++ b/rocrate_validator/requirements/shacl/checks.py @@ -86,16 +86,15 @@ def __init__( requirement_level_from_path = self.requirement.requirement_level_from_path if requirement_level_from_path: declared_level = shape.get_declared_level() - if declared_level: - if shape.level.severity != requirement_level_from_path.severity: - logger.warning( - 'Mismatch in requirement level for check "%s": ' - "shape level %s does not match the level from the containing folder %s. " - "Consider moving the shape property or removing the severity property.", - self.name, - shape.level, - requirement_level_from_path, - ) + if declared_level and shape.level.severity != requirement_level_from_path.severity: + logger.warning( + 'Mismatch in requirement level for check "%s": ' + "shape level %s does not match the level from the containing folder %s. " + "Consider moving the shape property or removing the severity property.", + self.name, + shape.level, + requirement_level_from_path, + ) self._level = level @property diff --git a/rocrate_validator/requirements/shacl/models.py b/rocrate_validator/requirements/shacl/models.py index 8d2e652b5..fe68f0149 100644 --- a/rocrate_validator/requirements/shacl/models.py +++ b/rocrate_validator/requirements/shacl/models.py @@ -245,14 +245,13 @@ def name(self, value: str): def description(self) -> str: """Return the description of the shape property""" if not self._description: - # get the object of the predicate sh:description - if not self._description: - property_name = self.name - if self._short_name: - property_name = self._short_name - self._description = f"Check the property \"**{property_name}**\"" - if self.parent and self.parent.name not in property_name: - self._description += f" of the entity \"**{self.parent.name}**\"" + # build a default description from the property (and parent) name + property_name = self.name + if self._short_name: + property_name = self._short_name + self._description = f"Check the property \"**{property_name}**\"" + if self.parent and self.parent.name not in property_name: + self._description += f" of the entity \"**{self.parent.name}**\"" return self._description @description.setter diff --git a/rocrate_validator/requirements/shacl/requirements.py b/rocrate_validator/requirements/shacl/requirements.py index 0ca7d3416..8bc559503 100644 --- a/rocrate_validator/requirements/shacl/requirements.py +++ b/rocrate_validator/requirements/shacl/requirements.py @@ -85,9 +85,9 @@ def shape(self) -> Shape: @property def hidden(self) -> bool: - if self.shape.node is not None: - if (self.shape.node, RDF.type, VALIDATOR_NS.HiddenShape) in self.shape.graph: - return True + if self.shape.node is not None and \ + (self.shape.node, RDF.type, VALIDATOR_NS.HiddenShape) in self.shape.graph: + return True return False @classmethod diff --git a/rocrate_validator/rocrate.py b/rocrate_validator/rocrate.py index 93f3c6157..ba5ddf55b 100644 --- a/rocrate_validator/rocrate.py +++ b/rocrate_validator/rocrate.py @@ -152,9 +152,8 @@ def get_id_as_uri(cls, entity_id: str, ro_crate: ROCrate) -> URI: # Otherwise the `@id` is a relative path: if the RO-Crate itself is # remote, resolve it against the crate URI so the entity is still # classified as remote/web-based. - if ro_crate.uri.is_remote_resource(): - if entity_id.startswith("./"): - return URI(f"{ro_crate.uri}/{entity_id[2:]}") + if ro_crate.uri.is_remote_resource() and entity_id.startswith("./"): + return URI(f"{ro_crate.uri}/{entity_id[2:]}") return URI(cls.get_id_as_path(entity_id, ro_crate)) @property @@ -400,9 +399,8 @@ def get_data_entities( def get_web_data_entities(self) -> list[ROCrateEntity]: entities = [] for entity in self.get_entities(): - if entity.has_type("File") or entity.has_type("Dataset"): - if entity.is_remote(): - entities.append(entity) + if (entity.has_type("File") or entity.has_type("Dataset")) and entity.is_remote(): + entities.append(entity) return entities def get_conforms_to(self) -> Optional[list[str]]: diff --git a/rocrate_validator/services.py b/rocrate_validator/services.py index bfc4f8cb9..809019203 100644 --- a/rocrate_validator/services.py +++ b/rocrate_validator/services.py @@ -101,9 +101,12 @@ def __initialise_validator__( logger.debug("Validating RO-Crate: %s", rocrate_path) # check if the RO-Crate exists - if not getattr(settings, "metadata_only", False) and getattr(settings, "metadata_dict", None) is None: - if not rocrate_path.is_available(): - raise FileNotFoundError(f"RO-Crate not found: {rocrate_path}") + if ( + not getattr(settings, "metadata_only", False) + and getattr(settings, "metadata_dict", None) is None + and not rocrate_path.is_available() + ): + raise FileNotFoundError(f"RO-Crate not found: {rocrate_path}") # check if remote validation is enabled disable_remote_crate_download = settings.disable_remote_crate_download diff --git a/rocrate_validator/utils/io_helpers/input.py b/rocrate_validator/utils/io_helpers/input.py index 8d2c399b1..5dd36087c 100644 --- a/rocrate_validator/utils/io_helpers/input.py +++ b/rocrate_validator/utils/io_helpers/input.py @@ -45,9 +45,8 @@ def __get_single_char_win32__(console: Optional[Console] = None, end: str = "\n" finally: if console: console.print(char, end=end if choices and char in choices else "") - if choices and char not in choices: - if console: - console.print(" [bold red]INVALID CHOICE[/bold red]", end=end) + if choices and char not in choices and console: + console.print(" [bold red]INVALID CHOICE[/bold red]", end=end) return char @@ -75,9 +74,8 @@ def __get_single_char_unix__(console: Optional[Console] = None, end: str = "\n", termios.tcsetattr(fd, termios.TCSADRAIN, old_settings) if console: console.print(char, end=end if choices and char in choices else "") - if choices and char not in choices: - if console: - console.print(" [bold red]INVALID CHOICE[/bold red]", end=end) + if choices and char not in choices and console: + console.print(" [bold red]INVALID CHOICE[/bold red]", end=end) return char From b18d02acf969584da381d28616aeeb73ed7641dc Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Wed, 3 Jun 2026 16:09:22 +0200 Subject: [PATCH 188/352] fix: :rotating_light: add proper exception chaining in except blocks (B904) --- rocrate_validator/cli/commands/validate.py | 4 ++-- rocrate_validator/models.py | 4 ++-- rocrate_validator/requirements/shacl/checks.py | 4 ++-- rocrate_validator/rocrate.py | 2 +- rocrate_validator/utils/http.py | 2 +- rocrate_validator/utils/uri.py | 4 ++-- tests/unit/test_offline_cache_miss_warning.py | 2 +- 7 files changed, 11 insertions(+), 11 deletions(-) diff --git a/rocrate_validator/cli/commands/validate.py b/rocrate_validator/cli/commands/validate.py index e15401b11..a0e8425f6 100644 --- a/rocrate_validator/cli/commands/validate.py +++ b/rocrate_validator/cli/commands/validate.py @@ -59,7 +59,7 @@ def validate_uri(ctx, param, value): except ROCrateInvalidURIError as e: if logger.isEnabledFor(logging.DEBUG): logger.exception(e) - raise click.BadParameter(e.message, param=param) + raise click.BadParameter(e.message, param=param) from e return value @@ -334,7 +334,7 @@ def validate(ctx, raise ValueError( f"Invalid skip_checks value: {s}. " "It must be a comma-separated list of Fully Qualified Check IDs." - ) + ) from e logger.debug("Skip checks: %s", skip_checks_list) try: diff --git a/rocrate_validator/models.py b/rocrate_validator/models.py index b810bb313..9efd89175 100644 --- a/rocrate_validator/models.py +++ b/rocrate_validator/models.py @@ -200,7 +200,7 @@ def get(name: str) -> RequirementLevel: try: return getattr(LevelCollection, name.upper()) except AttributeError: - raise ValueError(f"Invalid RequirementLevel: {name}") + raise ValueError(f"Invalid RequirementLevel: {name}") from None @total_ordering @@ -3337,7 +3337,7 @@ def get_data_graph(self, refresh: bool = False) -> Graph: return self._data_graph except (HTTPError, FileNotFoundError) as e: logger.debug("Error loading data graph: %s", e) - raise ROCrateMetadataNotFoundError(str(self.rocrate_uri)) + raise ROCrateMetadataNotFoundError(str(self.rocrate_uri)) from e @property def data_graph(self) -> Graph: diff --git a/rocrate_validator/requirements/shacl/checks.py b/rocrate_validator/requirements/shacl/checks.py index e88038cf8..5771e87d1 100644 --- a/rocrate_validator/requirements/shacl/checks.py +++ b/rocrate_validator/requirements/shacl/checks.py @@ -270,7 +270,7 @@ def execute_check(self, context: ValidationContext): ) # The validation is postponed to the more specific profiles # so the check is not considered as failed. - raise SkipRequirementCheck(self, str(e)) + raise SkipRequirementCheck(self, str(e)) from e except ROCrateMetadataNotFoundError as e: logger.debug( "Unable to perform metadata validation due to missing metadata file: %s", @@ -305,7 +305,7 @@ def __do_execute_check__(self, shacl_context: SHACLValidationContext): ) raise ROCrateMetadataNotFoundError( "Unable to perform metadata validation due to one or more errors in the JSON-LD data file" - ) + ) from e # Begin the timer start_time = timer() diff --git a/rocrate_validator/rocrate.py b/rocrate_validator/rocrate.py index ba5ddf55b..425dd96a5 100644 --- a/rocrate_validator/rocrate.py +++ b/rocrate_validator/rocrate.py @@ -900,7 +900,7 @@ def __get_file_info__(self, path: Union[str, Path]) -> zipfile.ZipInfo: return self._zipref.getinfo(str(path)) except KeyError: logger.error("File not found in zip: %s", path) - raise FileNotFoundError(f"File not found in zip: {path}") + raise FileNotFoundError(f"File not found in zip: {path}") from None def has_descriptor(self) -> bool: """ diff --git a/rocrate_validator/utils/http.py b/rocrate_validator/utils/http.py index 714f1a5c9..351016f2d 100644 --- a/rocrate_validator/utils/http.py +++ b/rocrate_validator/utils/http.py @@ -127,7 +127,7 @@ def __init__(self, logger.debug(f"Setting cache_max_age to {cache_max_age}") self.cache_max_age = int(cache_max_age) except ValueError: - raise TypeError("cache_max_age must be an integer") + raise TypeError("cache_max_age must be an integer") from None self.cache_path_prefix = cache_path self.offline = bool(offline) self.no_cache = bool(no_cache) diff --git a/rocrate_validator/utils/uri.py b/rocrate_validator/utils/uri.py index 2111f584c..7a07bee60 100644 --- a/rocrate_validator/utils/uri.py +++ b/rocrate_validator/utils/uri.py @@ -148,7 +148,7 @@ def __init__(self, uri: Union[str, Path]): except Exception as e: if logger.isEnabledFor(logging.DEBUG): logger.debug(e) - raise ValueError("Invalid URI: %s" % uri) + raise ValueError("Invalid URI: %s" % uri) from e @property def uri(self) -> str: @@ -316,7 +316,7 @@ def validate_rocrate_uri(uri: Union[str, Path, URI], silent: bool = False) -> bo logger.error(e) if logger.isEnabledFor(logging.DEBUG): logger.exception(e) - raise errors.ROCrateInvalidURIError(uri) + raise errors.ROCrateInvalidURIError(uri) from e except Exception as e: if not silent: raise e diff --git a/tests/unit/test_offline_cache_miss_warning.py b/tests/unit/test_offline_cache_miss_warning.py index eae79b10e..eab453191 100644 --- a/tests/unit/test_offline_cache_miss_warning.py +++ b/tests/unit/test_offline_cache_miss_warning.py @@ -47,7 +47,7 @@ def test_find_offline_cache_miss_walks_context_chain(): try: raise OfflineCacheMissError("https://example.org/y") except OfflineCacheMissError: - raise RuntimeError("wrapped via context") + raise RuntimeError("wrapped via context") # noqa: B904 except Exception as outer: found = find_offline_cache_miss(outer) assert isinstance(found, OfflineCacheMissError) From 9a35c41b69cf61f45770564a7def003442e85153 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Wed, 3 Jun 2026 17:10:20 +0200 Subject: [PATCH 189/352] refactor(models): extract check execution logic into private method (PLR0912) --- rocrate_validator/models.py | 129 ++++++++++++++++++------------------ 1 file changed, 63 insertions(+), 66 deletions(-) diff --git a/rocrate_validator/models.py b/rocrate_validator/models.py index 9efd89175..3cded2a21 100644 --- a/rocrate_validator/models.py +++ b/rocrate_validator/models.py @@ -1097,12 +1097,6 @@ def _do_validate_(self, context: ValidationContext) -> bool: self.name, len(self._checks), ) - - logger.debug( - "Running %s checks for Requirement '%s'", - len(self._checks), - self.name, - ) all_passed = True checks_to_perform = [ _ @@ -1111,78 +1105,21 @@ def _do_validate_(self, context: ValidationContext) -> bool: ] for check in checks_to_perform: try: - if check.overridden and not check.requirement.profile.identifier == context.profile_identifier: - logger.debug( - "Skipping check '%s' because overridden by '%r'", - check.identifier, - [_.identifier for _ in check.overridden_by], - ) - continue - if check.deactivated: - logger.debug("Skipping check '%s' because deactivated", check.identifier) - context.result._add_skipped_check(check) - continue - # Determine whether to skip event notification for inherited profiles - skip_event_notify = False - if ( - check.requirement.profile.identifier != context.profile_identifier - and context.settings.disable_inherited_profiles_issue_reporting - ): - logger.debug( - "Inherited profiles reporting disabled. " - "Skipping requirement %s as it belongs to an inherited profile %s", - check.requirement.identifier, - check.requirement.profile.identifier, - ) - skip_event_notify = True - # Notify the start of the check execution if not skip_event_notify is set to True - if not skip_event_notify: - context.validator.notify( - RequirementCheckValidationEvent(EventType.REQUIREMENT_CHECK_VALIDATION_START, check) - ) - # Execute the check - check_result = check.execute_check(context) - logger.debug("Result of check %s: %s", check.identifier, check_result) - context.result._add_executed_check(check, check_result) - # Notify the end of the check execution if not skip_event_notify is set to True - if not skip_event_notify: - context.validator.notify( - RequirementCheckValidationEvent( - EventType.REQUIREMENT_CHECK_VALIDATION_END, - check, - validation_result=check_result, - ) - ) - logger.debug( - "Ran check '%s'. Got result %s", - check.identifier, - check_result, - ) - # Ensure the check result is a boolean - if not isinstance(check_result, bool): - logger.warning( - "Ignoring the check %s as it returned the value %r instead of a boolean", - check.name, - check_result, - ) - raise RuntimeError(f"Ignoring invalid result from check {check.name}") - # Aggregate the check result - all_passed = all_passed and check_result - if not all_passed and context.fail_fast: + all_passed, should_break = self.__execute_check__(check, context, all_passed) + if should_break: break except SkipRequirementCheck as e: logger.debug("Skipping check '%s' because: %s", check.name, e) context.result._add_skipped_check(check) continue except Exception as e: - # Ignore the fact that the check failed as far as the validation result is concerned. if context.maybe_warn_offline_cache_miss(e): logger.debug("Offline cache miss during check %s: %s", check, e) else: logger.warning("Unexpected error during check %s. Exception: %s", check, e) logger.warning("Consider reporting this as a bug.") if logger.isEnabledFor(logging.DEBUG): - logger.exception(e) + logger.exception("Unhandled exception during check execution", exc_info=e) skipped_checks = set(self._checks) - set(checks_to_perform) context.result.skipped_checks.update(skipped_checks) logger.debug( @@ -1192,6 +1129,66 @@ def _do_validate_(self, context: ValidationContext) -> bool: ) return all_passed + def __execute_check__(self, check, context, all_passed): + if check.overridden and not check.requirement.profile.identifier == context.profile_identifier: + logger.debug( + "Skipping check '%s' because overridden by '%r'", + check.identifier, + [_.identifier for _ in check.overridden_by], + ) + return all_passed, False + if check.deactivated: + logger.debug("Skipping check '%s' because deactivated", check.identifier) + context.result._add_skipped_check(check) + return all_passed, False + # Determine whether to skip event notification for inherited profiles + skip_event_notify = False + if ( + check.requirement.profile.identifier != context.profile_identifier + and context.settings.disable_inherited_profiles_issue_reporting + ): + logger.debug( + "Inherited profiles reporting disabled. " + "Skipping requirement %s as it belongs to an inherited profile %s", + check.requirement.identifier, + check.requirement.profile.identifier, + ) + skip_event_notify = True + # Notify the start of the check execution if not skip_event_notify is set to True + if not skip_event_notify: + context.validator.notify( + RequirementCheckValidationEvent(EventType.REQUIREMENT_CHECK_VALIDATION_START, check) + ) + # Execute the check and get the result + check_result = check.execute_check(context) + logger.debug("Result of check %s: %s", check.identifier, check_result) + context.result._add_executed_check(check, check_result) + # Notify the end of the check execution if not skip_event_notify is set to True + if not skip_event_notify: + context.validator.notify( + RequirementCheckValidationEvent( + EventType.REQUIREMENT_CHECK_VALIDATION_END, + check, + validation_result=check_result, + ) + ) + logger.debug( + "Ran check '%s'. Got result %s", + check.identifier, + check_result, + ) + # Ensure the check result is a boolean value, otherwise log a warning and ignore the check result + if not isinstance(check_result, bool): + logger.warning( + "Ignoring the check %s as it returned the value %r instead of a boolean", + check.name, + check_result, + ) + raise RuntimeError(f"Ignoring invalid result from check {check.name}") + new_all_passed = all_passed and check_result + should_break = not new_all_passed and context.fail_fast + return new_all_passed, should_break + def __eq__(self, other: object) -> bool: if not isinstance(other, Requirement): raise TypeError(f"Cannot compare {type(self)} with {type(other)}") From 276b96436cf5baba19b11ab4462b4690e77816a2 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Wed, 3 Jun 2026 17:11:33 +0200 Subject: [PATCH 190/352] refactor(models): replace if-elif chain with __event_handlers__ dispatch property (PLR0912) --- rocrate_validator/models.py | 119 +++++++++++++++++++++--------------- 1 file changed, 70 insertions(+), 49 deletions(-) diff --git a/rocrate_validator/models.py b/rocrate_validator/models.py index 3cded2a21..b61934d09 100644 --- a/rocrate_validator/models.py +++ b/rocrate_validator/models.py @@ -1976,60 +1976,81 @@ def __initialise__(cls, validation_settings: ValidationSettings): return result def update(self, event: Event, ctx: Optional[ValidationContext] = None) -> None: - # logger.debug("Event: %s", event.event_type) - if event.event_type == EventType.VALIDATION_START: - logger.debug("Validation started") - self._stats["started_at"] = datetime.now(timezone.utc) - if event.event_type == EventType.PROFILE_VALIDATION_START: - assert isinstance(event, ProfileValidationEvent) - logger.debug("Profile validation start: %s", event.profile.identifier) - elif event.event_type == EventType.REQUIREMENT_VALIDATION_START: - logger.debug("Requirement validation start") - elif event.event_type == EventType.REQUIREMENT_CHECK_VALIDATION_START: - logger.debug("Requirement check validation start") - elif event.event_type == EventType.REQUIREMENT_CHECK_VALIDATION_END: - assert isinstance(event, RequirementCheckValidationEvent) - assert ctx is not None - target_profile = ctx.target_validation_profile - if not event.requirement_check.requirement.hidden and ( - not event.requirement_check.overridden - or target_profile.identifier == event.requirement_check.requirement.profile.identifier - ): - if event.validation_result is not None: - if event.validation_result: - self._stats["passed_checks"].append(event.requirement_check) - else: - self._stats["failed_checks"].append(event.requirement_check) - self._stats["validated_checks"].append(event.requirement_check) - self.notify_listeners() + self.__event_handlers__.get(event.event_type, lambda e, c: None)(event, ctx) + + def __handle_validation_start__(self, event: Event, _ctx: Optional[ValidationContext]) -> None: + logger.debug("Validation started") + self._stats["started_at"] = datetime.now(timezone.utc) + + def __handle_profile_validation_start__(self, event: Event, _ctx: Optional[ValidationContext]) -> None: + assert isinstance(event, ProfileValidationEvent) + logger.debug("Profile validation start: %s", event.profile.identifier) + + def __handle_requirement_validation_start__(self, event: Event, _ctx: Optional[ValidationContext]) -> None: + logger.debug("Requirement validation start") + + def __handle_requirement_check_validation_start__(self, event: Event, _ctx: Optional[ValidationContext]) -> None: + logger.debug("Requirement check validation start") + + def __handle_requirement_check_validation_end__(self, event: Event, ctx: Optional[ValidationContext]) -> None: + assert isinstance(event, RequirementCheckValidationEvent) + assert ctx is not None + target_profile = ctx.target_validation_profile + if not event.requirement_check.requirement.hidden and ( + not event.requirement_check.overridden + or target_profile.identifier == event.requirement_check.requirement.profile.identifier + ): + if event.validation_result is not None: + if event.validation_result: + self._stats["passed_checks"].append(event.requirement_check) else: - logger.debug( - "Requirement check validation result is None: %s", - event.requirement_check.identifier, - ) + self._stats["failed_checks"].append(event.requirement_check) + self._stats["validated_checks"].append(event.requirement_check) + self.notify_listeners() else: logger.debug( - "Skipping requirement check validation: %s", + "Requirement check validation result is None: %s", event.requirement_check.identifier, ) - elif event.event_type == EventType.REQUIREMENT_VALIDATION_END: - assert isinstance(event, RequirementValidationEvent) - if not event.requirement.hidden: - if event.validation_result: - self._stats["passed_requirements"].append(event.requirement) - else: - self._stats["failed_requirements"].append(event.requirement) - self._stats["validated_requirements"].append(event.requirement) - self.notify_listeners() - elif event.event_type == EventType.PROFILE_VALIDATION_END: - assert isinstance(event, ProfileValidationEvent) - self._stats["validated_profiles"].append(event.profile) - logger.debug("Profile validation ended: %s", event.profile.identifier) - elif event.event_type == EventType.VALIDATION_END: - assert isinstance(event, ValidationEvent) - self._result = event.validation_result - self._stats["finished_at"] = datetime.now(timezone.utc) - logger.debug("Validation ended with result: %s", event.validation_result) + else: + logger.debug( + "Skipping requirement check validation: %s", + event.requirement_check.identifier, + ) + + def __handle_requirement_validation_end__(self, event: Event, _ctx: Optional[ValidationContext]) -> None: + assert isinstance(event, RequirementValidationEvent) + if not event.requirement.hidden: + if event.validation_result: + self._stats["passed_requirements"].append(event.requirement) + else: + self._stats["failed_requirements"].append(event.requirement) + self._stats["validated_requirements"].append(event.requirement) + self.notify_listeners() + + def __handle_profile_validation_end__(self, event: Event, _ctx: Optional[ValidationContext]) -> None: + assert isinstance(event, ProfileValidationEvent) + self._stats["validated_profiles"].append(event.profile) + logger.debug("Profile validation ended: %s", event.profile.identifier) + + def __handle_validation_end__(self, event: Event, _ctx: Optional[ValidationContext]) -> None: + assert isinstance(event, ValidationEvent) + self._result = event.validation_result + self._stats["finished_at"] = datetime.now(timezone.utc) + logger.debug("Validation ended with result: %s", event.validation_result) + + @property + def __event_handlers__(self): + return { + EventType.VALIDATION_START: self.__handle_validation_start__, + EventType.PROFILE_VALIDATION_START: self.__handle_profile_validation_start__, + EventType.REQUIREMENT_VALIDATION_START: self.__handle_requirement_validation_start__, + EventType.REQUIREMENT_CHECK_VALIDATION_START: self.__handle_requirement_check_validation_start__, + EventType.REQUIREMENT_CHECK_VALIDATION_END: self.__handle_requirement_check_validation_end__, + EventType.REQUIREMENT_VALIDATION_END: self.__handle_requirement_validation_end__, + EventType.PROFILE_VALIDATION_END: self.__handle_profile_validation_end__, + EventType.VALIDATION_END: self.__handle_validation_end__, + } def to_dict(self) -> dict: """ From 4614940d577b111a3819bbb7c8164d5357bf6ad7 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Wed, 3 Jun 2026 17:12:33 +0200 Subject: [PATCH 191/352] refactor(cache): extract _resolve_warmup_urls_from_profiles to reduce PLR0912 --- rocrate_validator/cli/commands/cache.py | 91 +++++++++++++------------ 1 file changed, 46 insertions(+), 45 deletions(-) diff --git a/rocrate_validator/cli/commands/cache.py b/rocrate_validator/cli/commands/cache.py index feef95168..86ae29399 100644 --- a/rocrate_validator/cli/commands/cache.py +++ b/rocrate_validator/cli/commands/cache.py @@ -348,52 +348,9 @@ def cache_warm( # source (no -p, no --crate, no --url, no --all-profiles). any_explicit_source = bool(crate or explicit_urls or requested_ids or all_profiles) if all_profiles or requested_ids or not any_explicit_source: - Profile.load_profiles( - profiles_path=profiles_dir, - extra_profiles_path=extra_dir, + urls, profile_scope = _resolve_warmup_urls_from_profiles( + console, profiles_dir, extra_dir, requested_ids ) - loaded_profiles = list(Profile.all()) - if requested_ids: - selected = [] - missing: list[str] = [] - # (requested, resolved, all candidates) for tokens that matched - # more than one versioned profile — we warn so the user knows - # which one was picked and how to opt for a different version. - ambiguous_fallbacks: list[tuple[str, Profile, list[Profile]]] = [] - for ident in requested_ids: - profile = Profile.get_by_identifier(ident) - if profile is None: - # Mirror the fallback used by `validate`: if no exact - # identifier match, treat the value as a token and - # pick the highest-version profile sharing it. - candidates = Profile.get_by_token(ident) or [] - if candidates: - profile = max(candidates, key=lambda p: p.version) - if len(candidates) > 1: - ambiguous_fallbacks.append((ident, profile, candidates)) - if profile is None: - missing.append(ident) - else: - selected.append(profile) - for requested, resolved, candidates in ambiguous_fallbacks: - other_versions = sorted( - p.identifier for p in candidates if p.identifier != resolved.identifier - ) - console.print( - f"[yellow]Note:[/yellow] '{requested}' matched multiple profiles; " - f"using [cyan]{resolved.identifier}[/cyan] (highest version). " - f"Pass the full identifier to pick a different one " - f"(available: {', '.join(other_versions)})." - ) - if missing: - console.print( - f"[yellow]Profile(s) not found and skipped:[/yellow] {', '.join(missing)}" - ) - profile_scope = f"profiles: {', '.join(p.identifier for p in selected)}" - urls = discover_cacheable_urls_from_profiles(selected) - else: - profile_scope = "all installed profiles" - urls = discover_cacheable_urls_from_profiles(loaded_profiles) results: list[WarmUpResult] = [] if urls: @@ -445,6 +402,50 @@ def cache_warm( ctx.exit(1) +def _resolve_warmup_urls_from_profiles(console, profiles_dir, extra_dir, requested_ids): + Profile.load_profiles( + profiles_path=profiles_dir, + extra_profiles_path=extra_dir, + ) + loaded_profiles = list(Profile.all()) + if requested_ids: + selected = [] + missing: list[str] = [] + ambiguous_fallbacks: list[tuple[str, Profile, list[Profile]]] = [] + for ident in requested_ids: + profile = Profile.get_by_identifier(ident) + if profile is None: + candidates = Profile.get_by_token(ident) or [] + if candidates: + profile = max(candidates, key=lambda p: p.version) + if len(candidates) > 1: + ambiguous_fallbacks.append((ident, profile, candidates)) + if profile is None: + missing.append(ident) + else: + selected.append(profile) + for requested, resolved, candidates in ambiguous_fallbacks: + other_versions = sorted( + p.identifier for p in candidates if p.identifier != resolved.identifier + ) + console.print( + f"[yellow]Note:[/yellow] '{requested}' matched multiple profiles; " + f"using [cyan]{resolved.identifier}[/cyan] (highest version). " + f"Pass the full identifier to pick a different one " + f"(available: {', '.join(other_versions)})." + ) + if missing: + console.print( + f"[yellow]Profile(s) not found and skipped:[/yellow] {', '.join(missing)}" + ) + profile_scope = f"profiles: {', '.join(p.identifier for p in selected)}" + urls = discover_cacheable_urls_from_profiles(selected) + else: + profile_scope = "all installed profiles" + urls = discover_cacheable_urls_from_profiles(loaded_profiles) + return urls, profile_scope + + def _warm_remote_crates(urls: list[str]) -> list[WarmUpResult]: """ Download each remote RO-Crate URL via ``HttpRequester.fetch_fresh`` From 929a75bd87bb6b55d07297081b19a468484a5a60 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Wed, 3 Jun 2026 17:13:06 +0200 Subject: [PATCH 192/352] chore(validate): suppress PLR0912 on validate command --- rocrate_validator/cli/commands/validate.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rocrate_validator/cli/commands/validate.py b/rocrate_validator/cli/commands/validate.py index a0e8425f6..526536001 100644 --- a/rocrate_validator/cli/commands/validate.py +++ b/rocrate_validator/cli/commands/validate.py @@ -239,7 +239,7 @@ def validate_uri(ctx, param, value): show_default=True, ) @click.pass_context -def validate(ctx, +def validate(ctx, # noqa: PLR0912 profiles_path: Path = DEFAULT_PROFILES_PATH, extra_profiles_path: Optional[Path] = None, profile_identifier: tuple[str, ...] = (), From b21417610b79fed66d1ce2da978930a7bef2df88 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Wed, 3 Jun 2026 17:15:08 +0200 Subject: [PATCH 193/352] refactor(0_file_descriptor_format): reduce branch nesting in is_entity_flat_recursive (PLR0912) --- .../ro-crate/must/0_file_descriptor_format.py | 127 +++++++++--------- 1 file changed, 64 insertions(+), 63 deletions(-) diff --git a/rocrate_validator/profiles/ro-crate/must/0_file_descriptor_format.py b/rocrate_validator/profiles/ro-crate/must/0_file_descriptor_format.py index 0b5db8db6..b64ad847b 100644 --- a/rocrate_validator/profiles/ro-crate/must/0_file_descriptor_format.py +++ b/rocrate_validator/profiles/ro-crate/must/0_file_descriptor_format.py @@ -212,6 +212,66 @@ def check_context(self, context: ValidationContext) -> bool: def check_flattened(self, context: ValidationContext) -> bool: """ Check if the file descriptor is flattened """ + def _validate_non_root_entity(entity: Any, fail_fast: bool) -> bool: + result = True + if "@id" in entity and "@value" in entity: + context.result.add_issue( + ( + f'entity "{entity.get("@id", entity)}" contains both @id and @value: ' + 'an object with an @value represents a value object, which is a literal value such as ' + 'a string, number, date, or language-tagged string. This object is not an identifiable ' + 'resource, but a simple literal value.' + ), + self + ) + result = False + if fail_fast: + return False + + if "@value" in entity: + if not isinstance(entity, dict): + context.result.add_issue( + f'entity "{entity.get("@id", entity)}" is not a valid value object: ' + 'it MUST be a dictionary.', + self + ) + result = False + if fail_fast: + return False + + has_language = "@language" in entity + has_type = "@type" in entity + + if has_language and has_type: + context.result.add_issue( + f'entity "{entity.get("@id", entity)}" is not a valid value object: ' + '@language and @type cannot coexist.', + self + ) + result = False + if fail_fast: + return False + + if has_language and not isinstance(entity["@value"], str): + context.result.add_issue( + f'entity "{entity.get("@id", entity)}" is not a valid value object: ' + 'if @language is present, @value must be a string.', + self + ) + result = False + if fail_fast: + return False + elif "@id" not in entity or len(entity) > 1: + context.result.add_issue( + f'entity "{entity.get("@id", entity)}" is not a valid node object reference: ' + 'it MUST have only @id, but no other properties.', + self + ) + result = False + if fail_fast: + return False + return result + def is_entity_flat_recursive(entity: Any, is_first: bool = True, fail_fast: bool = False) -> bool: """ Recursively check if the given data corresponds to a flattened JSON-LD object and returns False if it does not and is not a root element @@ -224,69 +284,10 @@ def is_entity_flat_recursive(entity: Any, is_first: bool = True, fail_fast: bool result = False if fail_fast: return False - # if this is not the root element, it must not contain more properties than @id - else: - if "@id" in entity and "@value" in entity: - # add issue if both @id and @value are present - context.result.add_issue( - ( - f'entity "{entity.get("@id", entity)}" contains both @id and @value: ' - 'an object with an @value represents a value object, which is a literal value such as ' - 'a string, number, date, or language-tagged string. This object is not an identifiable ' - 'resource, but a simple literal value.' - ), - self - ) - result = False - if fail_fast: - return False - - # Handle value objects - if "@value" in entity: - # Inline the checks from is_value_object and add issues for each violation - if not isinstance(entity, dict): - context.result.add_issue( - f'entity "{entity.get("@id", entity)}" is not a valid value object: ' - 'it MUST be a dictionary.', - self - ) - result = False - if fail_fast: - return False - - has_language = "@language" in entity - has_type = "@type" in entity - - if has_language and has_type: - context.result.add_issue( - f'entity "{entity.get("@id", entity)}" is not a valid value object: ' - '@language and @type cannot coexist.', - self - ) - result = False - if fail_fast: - return False - - if has_language and not isinstance(entity["@value"], str): - context.result.add_issue( - f'entity "{entity.get("@id", entity)}" is not a valid value object: ' - 'if @language is present, @value must be a string.', - self - ) - result = False - if fail_fast: - return False - # Handle node objects: - # every remaining entity with len(entity) > 1 must be a node object - elif "@id" not in entity or len(entity) > 1: - context.result.add_issue( - f'entity "{entity.get("@id", entity)}" is not a valid node object reference: ' - 'it MUST have only @id, but no other properties.', - self - ) - result = False - if fail_fast: - return False + elif not _validate_non_root_entity(entity, fail_fast): + result = False + if fail_fast: + return False if isinstance(entity, list): for element in entity: if not is_entity_flat_recursive(element, is_first=False, fail_fast=fail_fast): From d9952040644b87cfc693ffd0a5dc66511f23c169 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Wed, 3 Jun 2026 17:19:52 +0200 Subject: [PATCH 194/352] refactor(checks): break up _execute_impl to reduce PLR0912 and PLR0915 --- .../requirements/shacl/checks.py | 61 ++++++++++--------- 1 file changed, 32 insertions(+), 29 deletions(-) diff --git a/rocrate_validator/requirements/shacl/checks.py b/rocrate_validator/requirements/shacl/checks.py index 5771e87d1..3a19b0990 100644 --- a/rocrate_validator/requirements/shacl/checks.py +++ b/rocrate_validator/requirements/shacl/checks.py @@ -339,24 +339,34 @@ def __do_execute_check__(self, shacl_context: SHACLValidationContext): # store the validation result in the context start_time = timer() - # if the validation fails, process the failed checks + failed_requirements_checks, failed_requirements_checks_violations = self.__collect_failed_checks__( + shacl_context, shacl_result, shapes_registry, shapes_graph + ) + failed_requirement_checks_notified = self.__process_failed_checks__( + shacl_context, failed_requirements_checks, failed_requirements_checks_violations + ) + self.__notify_skipped_checks__(shacl_context, failed_requirement_checks_notified) + + logger.debug("Remaining skipped checks: %r", len(shacl_context.result.skipped_checks)) + for skipped_check in shacl_context.result.skipped_checks: + logger.debug( + "Remaining skipped check: %r - %s", + skipped_check.identifier, + skipped_check.name, + ) + end_time = timer() + logger.debug(f"Execution time for parsing the validation result: {end_time - start_time} seconds") + + return failed_requirements_checks + + def __collect_failed_checks__(self, shacl_context, shacl_result, shapes_registry, shapes_graph): failed_requirements_checks = set() failed_requirements_checks_violations: dict[str, list[SHACLViolation]] = {} - failed_requirement_checks_notified = [ - _.check.identifier - for _ in shacl_context.result.get_issues( - min_severity=cast(Severity, shacl_context.settings.requirement_severity)) - ] - - logger.debug("Parsing Validation with result: %s", shacl_result) - # process the failed checks to extract the requirement checks involved for violation in shacl_result.violations: shape = None try: shape = shapes_registry.get_shape(Shape.compute_key(shapes_graph, violation.sourceShape)) except (ValueError, KeyError): - # sourceShape may be a BNode (e.g. an inline sh:sparql constraint). - # Attempt to resolve the owning NodeShape/PropertyShape via the graph. shape = resolve_parent_shape(shapes_graph, violation.sourceShape, shapes_registry) if shape is None: logger.warning( @@ -395,9 +405,15 @@ def __do_execute_check__(self, shacl_context: SHACLValidationContext): if violations is None: failed_requirements_checks_violations[requirementCheck.identifier] = (violations := []) violations.append(violation) - # sort the failed checks by identifier and severity - # to ensure a consistent order of the issues - # and to make the fail fast mode deterministic + return failed_requirements_checks, failed_requirements_checks_violations + + def __process_failed_checks__(self, shacl_context, failed_requirements_checks, + failed_requirements_checks_violations): + failed_requirement_checks_notified = [ + _.check.identifier + for _ in shacl_context.result.get_issues( + min_severity=cast(Severity, shacl_context.settings.requirement_severity)) + ] for requirementCheck in sorted(failed_requirements_checks, key=lambda x: (x.identifier, x.severity)): # if the check is not in the current profile # and the disable_inherited_profiles_reporting is enabled, skip it @@ -468,15 +484,14 @@ def __do_execute_check__(self, shacl_context: SHACLValidationContext): # if the fail fast mode is enabled, stop the validation after the first failed check if shacl_context.fail_fast: break + return failed_requirement_checks_notified - # As above, but for skipped checks which are not failed - logger.debug("Skipped checks: %s", len(shacl_context.result.skipped_checks)) + def __notify_skipped_checks__(self, shacl_context, failed_requirement_checks_notified): for skipped_check in list(shacl_context.result.skipped_checks): logger.debug("Processing skipped check: %s", skipped_check.identifier) if not isinstance(skipped_check, SHACLCheck): logger.debug("Skipped check is not a SHACLCheck: %s", skipped_check.identifier) continue - # if skipped_check.requirement.profile != shacl_context.current_validation_profile and if skipped_check.identifier not in failed_requirement_checks_notified: failed_requirement_checks_notified.append(skipped_check.identifier) shacl_context.result._add_executed_check(skipped_check, True) @@ -497,18 +512,6 @@ def __do_execute_check__(self, shacl_context: SHACLValidationContext): skipped_check.identifier, ) - logger.debug("Remaining skipped checks: %r", len(shacl_context.result.skipped_checks)) - for skipped_check in shacl_context.result.skipped_checks: - logger.debug( - "Remaining skipped check: %r - %s", - skipped_check.identifier, - skipped_check.name, - ) - end_time = timer() - logger.debug(f"Execution time for parsing the validation result: {end_time - start_time} seconds") - - return failed_requirements_checks - @classmethod def get_instance(cls, shape: Shape) -> Optional["SHACLCheck"]: return cls.__instances__.get(hash(shape), None) From e3ed5048761617a99180769787f89cfdc864b50d Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Wed, 3 Jun 2026 17:22:31 +0200 Subject: [PATCH 195/352] refactor(tests): extract _prepare_temp_rocrate from do_entity_test (PLR0912) --- tests/shared.py | 66 +++++++++++++++++++++++++------------------------ 1 file changed, 34 insertions(+), 32 deletions(-) diff --git a/tests/shared.py b/tests/shared.py index 162ba5def..c794433f8 100644 --- a/tests/shared.py +++ b/tests/shared.py @@ -91,6 +91,37 @@ def replace_uri_in_graph(graph, old_uri_str, new_uri_str): return g +def _prepare_temp_rocrate( + rocrate_path: Path, + rocrate_entity_patch: Optional[dict], + rocrate_entity_mod_sparql: Optional[str], +) -> Path: + temp_rocrate_path = Path(tempfile.TemporaryDirectory().name) + shutil.copytree(rocrate_path, temp_rocrate_path) + with open(temp_rocrate_path / "ro-crate-metadata.json", "r") as f: + rocrate = json.load(f) + if rocrate_entity_patch is not None: + for key, value in rocrate_entity_patch.items(): + for entity in rocrate["@graph"]: + if entity["@id"] == key: + entity.update(value) + break + with open(temp_rocrate_path / "ro-crate-metadata.json", "w") as f: + json.dump(rocrate, f) + if rocrate_entity_mod_sparql is not None: + rocrate_graph = load_graph_and_preserve_relative_ids(rocrate) + rocrate_graph.update(rocrate_entity_mod_sparql) + context_uri = "https://w3id.org/ro/crate/1.1/context" + rocrate_graph.serialize( + Path(temp_rocrate_path, "ro-crate-metadata.json"), + format="json-ld", + context=context_uri, + indent=2, + use_native_types=True, + ) + return temp_rocrate_path + + def do_entity_test( rocrate_path: Union[Path, str], requirement_severity: models.Severity, @@ -125,38 +156,9 @@ def do_entity_test( temp_rocrate_path = None if any([rocrate_entity_patch, rocrate_entity_mod_sparql]) and rocrate_path.is_dir(): - # create a temporary copy of the RO-Crate - temp_rocrate_path = Path(tempfile.TemporaryDirectory().name) - # copy the RO-Crate to the temporary path using shutil - shutil.copytree(rocrate_path, temp_rocrate_path) - # load the RO-Crate metadata as RO-Crate JSON-LD - with open(temp_rocrate_path / "ro-crate-metadata.json", "r") as f: - rocrate = json.load(f) - # update the RO-Crate metadata with the patch - if rocrate_entity_patch is not None: - for key, value in rocrate_entity_patch.items(): - for entity in rocrate["@graph"]: - if entity["@id"] == key: - entity.update(value) - break - # save the updated RO-Crate metadata - with open(temp_rocrate_path / "ro-crate-metadata.json", "w") as f: - json.dump(rocrate, f) - # update the RO-Crate metadata using SPARQL, if required - if rocrate_entity_mod_sparql is not None: - rocrate_graph = load_graph_and_preserve_relative_ids(rocrate) - - rocrate_graph.update(rocrate_entity_mod_sparql) - - # save the updated RO-Crate metadata - context = "https://w3id.org/ro/crate/1.1/context" - rocrate_graph.serialize( - Path(temp_rocrate_path, "ro-crate-metadata.json"), - format="json-ld", - context=context, - indent=2, - use_native_types=True, - ) + temp_rocrate_path = _prepare_temp_rocrate( + rocrate_path, rocrate_entity_patch, rocrate_entity_mod_sparql + ) rocrate_path = temp_rocrate_path if expected_triggered_requirements is None: From e573686db556a413ed55b4fbeb5a68742051d776 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Wed, 3 Jun 2026 17:22:55 +0200 Subject: [PATCH 196/352] refactor: reformat code --- tests/shared.py | 39 +++++++++++++-------------------------- 1 file changed, 13 insertions(+), 26 deletions(-) diff --git a/tests/shared.py b/tests/shared.py index c794433f8..a055087f4 100644 --- a/tests/shared.py +++ b/tests/shared.py @@ -34,8 +34,7 @@ T = TypeVar("T") -SPARQL_PREFIXES = """PREFIX schema: -""" +SPARQL_PREFIXES = "PREFIX schema: " def first(c: Collection[T]) -> T: @@ -50,9 +49,7 @@ def collect_ids(obj): if isinstance(obj, dict): if "@id" in obj: idv = obj["@id"] - if isinstance(idv, str) and ( - idv.startswith("./") or idv.startswith("../") or idv.startswith("#") - ): + if isinstance(idv, str) and (idv.startswith("./") or idv.startswith("../") or idv.startswith("#")): rel_ids.add(idv) for v in obj.values(): collect_ids(v) @@ -143,9 +140,9 @@ def do_entity_test( Additional keyword arguments (kwargs) are passed along to initialise ValidationSettings. """ - assert not ( - rocrate_entity_patch and rocrate_entity_mod_sparql - ), "Cannot use rocrate_entity_patch and rocrate_entity_mod_sparql together" + assert not (rocrate_entity_patch and rocrate_entity_mod_sparql), ( + "Cannot use rocrate_entity_patch and rocrate_entity_mod_sparql together" + ) # declare variables failed_requirements = None @@ -156,9 +153,7 @@ def do_entity_test( temp_rocrate_path = None if any([rocrate_entity_patch, rocrate_entity_mod_sparql]) and rocrate_path.is_dir(): - temp_rocrate_path = _prepare_temp_rocrate( - rocrate_path, rocrate_entity_patch, rocrate_entity_mod_sparql - ) + temp_rocrate_path = _prepare_temp_rocrate(rocrate_path, rocrate_entity_patch, rocrate_entity_mod_sparql) rocrate_path = temp_rocrate_path if expected_triggered_requirements is None: @@ -194,9 +189,9 @@ def do_entity_test( assert result.context is not None, "Validation context should not be None" f"Expected requirement severity to be {requirement_severity}, but got {result.context.requirement_severity}" - assert ( - result.passed() == expected_validation_result - ), f"RO-Crate should be {'valid' if expected_validation_result else 'invalid'}" + assert result.passed() == expected_validation_result, ( + f"RO-Crate should be {'valid' if expected_validation_result else 'invalid'}" + ) # check requirement failed_requirements = [_.name for _ in result.failed_requirements] @@ -214,28 +209,20 @@ def do_entity_test( # check requirement issues detected_issues = [ - issue.message - for issue in result.get_issues(requirement_severity) - if issue.message is not None + issue.message for issue in result.get_issues(requirement_severity) if issue.message is not None ] logger.debug("Detected issues: %s", detected_issues) logger.debug("Expected issues: %s", expected_triggered_issues) for expected_issue in expected_triggered_issues: - if not any( - expected_issue in issue for issue in detected_issues - ): # support partial match - assert ( - False - ), f'The expected issue "{expected_issue}" was not found in the detected issues' + if not any(expected_issue in issue for issue in detected_issues): # support partial match + assert False, f'The expected issue "{expected_issue}" was not found in the detected issues' except Exception as e: if logger.isEnabledFor(logging.DEBUG): logger.exception(e) logger.debug("Failed to validate RO-Crate @ path: %s", rocrate_path) logger.debug("Requirement severity: %s", requirement_severity) logger.debug("Expected validation result: %s", expected_validation_result) - logger.debug( - "Expected triggered requirements: %s", expected_triggered_requirements - ) + logger.debug("Expected triggered requirements: %s", expected_triggered_requirements) logger.debug("Expected triggered issues: %s", expected_triggered_issues) logger.debug("Failed requirements: %s", failed_requirements) logger.debug("Detected issues: %s", detected_issues) From 071f27947609f1f3f95b62e96b46153463b24f65 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Wed, 3 Jun 2026 17:24:40 +0200 Subject: [PATCH 197/352] refactor: fix S112 warning --- rocrate_validator/requirements/shacl/checks.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/rocrate_validator/requirements/shacl/checks.py b/rocrate_validator/requirements/shacl/checks.py index 3a19b0990..95432f713 100644 --- a/rocrate_validator/requirements/shacl/checks.py +++ b/rocrate_validator/requirements/shacl/checks.py @@ -130,7 +130,8 @@ def deactivated(self) -> bool: for profile in Profile.get_descendants(owning_profile): try: registry = ShapesRegistry.get_instance(profile) - except Exception: + except Exception as e: + logger.debug("Unable to get shapes registry for profile %s: %s", profile.identifier, e) continue if registry.is_node_deactivated(shape.node): return True From c73e6b2edf8b98bf4be5670bf80c1d07631772cd Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Wed, 3 Jun 2026 17:36:13 +0200 Subject: [PATCH 198/352] =?UTF-8?q?chore:=20=F0=9F=A7=B9=20add=20explicit?= =?UTF-8?q?=20encoding=3D"utf-8"=20to=20open()=20calls?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- rocrate_validator/models.py | 4 ++-- tests/integration/profiles/test_metadata_only.py | 2 +- tests/integration/test_sparql_constraints.py | 2 +- tests/shared.py | 4 ++-- tests/unit/test_services.py | 2 +- 5 files changed, 7 insertions(+), 7 deletions(-) diff --git a/rocrate_validator/models.py b/rocrate_validator/models.py index b61934d09..77052e5a9 100644 --- a/rocrate_validator/models.py +++ b/rocrate_validator/models.py @@ -511,7 +511,7 @@ def description(self) -> str: """ if not self._description: if self.path and self.readme_file_path.exists(): - with open(self.readme_file_path, "r") as f: + with open(self.readme_file_path, encoding="utf-8") as f: self._description = f.read() else: self._description = self.comment @@ -2613,7 +2613,7 @@ def to_json(self, path: Optional[Path] = None) -> str: """ result = json.dumps(self.to_dict(), indent=4, cls=CustomEncoder) if path: - with open(path, "w") as f: + with open(path, "w", encoding="utf-8") as f: f.write(result) return result diff --git a/tests/integration/profiles/test_metadata_only.py b/tests/integration/profiles/test_metadata_only.py index 9b998a545..53ec7c43e 100644 --- a/tests/integration/profiles/test_metadata_only.py +++ b/tests/integration/profiles/test_metadata_only.py @@ -67,7 +67,7 @@ def test_valid_ro_crates_from_metadata_dict(valid_roc_path): metadata_dict = None # Load the metadata dict from the RO-Crate if not isinstance(valid_roc_path, str): - with open(valid_roc_path / "ro-crate-metadata.json", "r") as f: + with open(valid_roc_path / "ro-crate-metadata.json", "r", encoding="utf-8") as f: metadata_dict = json.load(f) assert metadata_dict is not None, "Failed to load metadata dict" assert isinstance(metadata_dict, dict), "Metadata dict is not a dictionary" diff --git a/tests/integration/test_sparql_constraints.py b/tests/integration/test_sparql_constraints.py index a59bdfa8b..c6e1f6c20 100644 --- a/tests/integration/test_sparql_constraints.py +++ b/tests/integration/test_sparql_constraints.py @@ -81,7 +81,7 @@ def sparql_test_rocrate(): ], } - with open(rocrate_dir / "ro-crate-metadata.json", "w") as f: + with open(rocrate_dir / "ro-crate-metadata.json", "w", encoding="utf-8") as f: json.dump(metadata, f, indent=2) yield rocrate_dir diff --git a/tests/shared.py b/tests/shared.py index a055087f4..9e441570e 100644 --- a/tests/shared.py +++ b/tests/shared.py @@ -95,7 +95,7 @@ def _prepare_temp_rocrate( ) -> Path: temp_rocrate_path = Path(tempfile.TemporaryDirectory().name) shutil.copytree(rocrate_path, temp_rocrate_path) - with open(temp_rocrate_path / "ro-crate-metadata.json", "r") as f: + with open(temp_rocrate_path / "ro-crate-metadata.json", "r", encoding="utf-8") as f: rocrate = json.load(f) if rocrate_entity_patch is not None: for key, value in rocrate_entity_patch.items(): @@ -103,7 +103,7 @@ def _prepare_temp_rocrate( if entity["@id"] == key: entity.update(value) break - with open(temp_rocrate_path / "ro-crate-metadata.json", "w") as f: + with open(temp_rocrate_path / "ro-crate-metadata.json", "w", encoding="utf-8") as f: json.dump(rocrate, f) if rocrate_entity_mod_sparql is not None: rocrate_graph = load_graph_and_preserve_relative_ids(rocrate) diff --git a/tests/unit/test_services.py b/tests/unit/test_services.py index 1aef73709..a663bd435 100644 --- a/tests/unit/test_services.py +++ b/tests/unit/test_services.py @@ -215,7 +215,7 @@ def test_valid_crate_metadata_dict_with_metadata_only(): logger.debug("Validating a local RO-Crate in metadata-only mode: %s", crate_path) # Load the metadata dict from the RO-Crate - with open(crate_path / "ro-crate-metadata.json", "r") as f: + with open(crate_path / "ro-crate-metadata.json", "r", encoding="utf-8") as f: metadata_dict = json.loads(f.read()) # Define shared settings object From df7ac8c9d17edf1736b7149af8a810be21afa2ae Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Wed, 3 Jun 2026 17:43:29 +0200 Subject: [PATCH 199/352] =?UTF-8?q?refactor:=20=F0=9F=8E=A8=20replace=20no?= =?UTF-8?q?t=20x=20=3D=3D=20y=20with=20x=20!=3D=20y=20for=20readability?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- rocrate_validator/models.py | 7 +++---- .../ro-crate/should/2_root_data_entity_relative_uri.py | 2 +- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/rocrate_validator/models.py b/rocrate_validator/models.py index 77052e5a9..85c457b2e 100644 --- a/rocrate_validator/models.py +++ b/rocrate_validator/models.py @@ -1130,7 +1130,7 @@ def _do_validate_(self, context: ValidationContext) -> bool: return all_passed def __execute_check__(self, check, context, all_passed): - if check.overridden and not check.requirement.profile.identifier == context.profile_identifier: + if check.overridden and check.requirement.profile.identifier != context.profile_identifier: logger.debug( "Skipping check '%s' because overridden by '%r'", check.identifier, @@ -1331,15 +1331,14 @@ def ok_file(p: Path) -> bool: return ( p.is_file() and p.suffix in PROFILE_FILE_EXTENSIONS - and not p.name == DEFAULT_ONTOLOGY_FILE - and not p.name == PROFILE_SPECIFICATION_FILE + and p.name not in {DEFAULT_ONTOLOGY_FILE, PROFILE_SPECIFICATION_FILE} and not p.name.startswith(".") and not p.name.startswith("_") ) files = sorted( (p for p in profile.path.rglob("*.*") if ok_file(p)), - key=lambda x: (not x.suffix == ".py", x), + key=lambda x: (x.suffix != ".py", x), ) # set the requirement level corresponding to the severity diff --git a/rocrate_validator/profiles/ro-crate/should/2_root_data_entity_relative_uri.py b/rocrate_validator/profiles/ro-crate/should/2_root_data_entity_relative_uri.py index 9cbb0751c..259c54b40 100644 --- a/rocrate_validator/profiles/ro-crate/should/2_root_data_entity_relative_uri.py +++ b/rocrate_validator/profiles/ro-crate/should/2_root_data_entity_relative_uri.py @@ -30,7 +30,7 @@ class RootDataEntityRelativeURI(PyFunctionCheck): def check_relative_uris(self, context: ValidationContext) -> bool: """Check if the Root Data Entity is denoted by the string `./` in the file descriptor JSON-LD""" try: - if not context.ro_crate.metadata.get_root_data_entity().id == './': + if context.ro_crate.metadata.get_root_data_entity().id != './': context.result.add_issue( 'Root Data Entity URI is not denoted by the string `./`', self) return False From 31b6765b1fc555018ffcad35e229ec48a2257327 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Wed, 3 Jun 2026 17:48:22 +0200 Subject: [PATCH 200/352] =?UTF-8?q?refactor:=20=F0=9F=8E=A8=20remove=20red?= =?UTF-8?q?undant=20intermediate=20variable=20assignments?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- rocrate_validator/models.py | 3 +-- rocrate_validator/requirements/shacl/utils.py | 6 ++---- rocrate_validator/rocrate.py | 6 ++---- .../utils/io_helpers/output/text/layout/report.py | 4 +--- rocrate_validator/utils/python_helpers.py | 13 +++++++------ rocrate_validator/utils/versioning.py | 3 +-- 6 files changed, 14 insertions(+), 21 deletions(-) diff --git a/rocrate_validator/models.py b/rocrate_validator/models.py index 85c457b2e..0c43b74a6 100644 --- a/rocrate_validator/models.py +++ b/rocrate_validator/models.py @@ -660,8 +660,7 @@ def __extract_token_from_path__(self) -> str: # Remove the base path from the identifier identifier = identifier.replace(f"{base_path}/", "") # Replace slashes with hyphens - identifier = identifier.replace("/", "-") - return identifier + return identifier.replace("/", "-") def __init_token_version__(self) -> tuple[str, Optional[str]]: # try to extract the token from the specs or the path diff --git a/rocrate_validator/requirements/shacl/utils.py b/rocrate_validator/requirements/shacl/utils.py index 4502d1fec..ae23db580 100644 --- a/rocrate_validator/requirements/shacl/utils.py +++ b/rocrate_validator/requirements/shacl/utils.py @@ -122,10 +122,8 @@ def compute_hash(g: Graph, s: Node): triples_values = sorted(__compute_values__(g, s)) # Convert the list of triples values to a string representation triples_string = str(triples_values) - # Calculate the hash of the triples string - hash_value = hashlib.sha256(triples_string.encode()).hexdigest() - # Return the hash value - return hash_value + # Calculate and return the hash of the triples string + return hashlib.sha256(triples_string.encode()).hexdigest() def compute_key(g: Graph, s: Node) -> str: diff --git a/rocrate_validator/rocrate.py b/rocrate_validator/rocrate.py index 425dd96a5..9e8fad932 100644 --- a/rocrate_validator/rocrate.py +++ b/rocrate_validator/rocrate.py @@ -72,13 +72,12 @@ def is_remote(self) -> bool: @classmethod def get_id_as_path(cls, entity_id: str, ro_crate: Optional[ROCrate] = None) -> Path: - result = cls.get_path_from_identifier( + return cls.get_path_from_identifier( entity_id, ro_crate.uri.as_path() if ro_crate and ro_crate.uri.is_local_resource() else None, ) - return result @staticmethod def get_path_from_identifier( @@ -474,8 +473,7 @@ def __new__(cls, uri: Union[str, Path, URI], relative_root_path: Optional[Path] """ if cls is not ROCrate: # If called on a subclass, use normal instantiation - instance = super(ROCrate, cls).__new__(cls) - return instance + return super(ROCrate, cls).__new__(cls) # If called on ROCrate directly, use factory logic instance = cls.new_instance(uri) diff --git a/rocrate_validator/utils/io_helpers/output/text/layout/report.py b/rocrate_validator/utils/io_helpers/output/text/layout/report.py index 813b998e9..f80dbb882 100644 --- a/rocrate_validator/utils/io_helpers/output/text/layout/report.py +++ b/rocrate_validator/utils/io_helpers/output/text/layout/report.py @@ -90,10 +90,8 @@ def progress_monitor(self) -> ProgressMonitor: def live(self, update_callable: Callable) -> Any: # Start live rendering - result = None with Live(self.layout, console=self.console, refresh_per_second=10, transient=False): - result = update_callable() - return result + return update_callable() def __init_layout__(self): diff --git a/rocrate_validator/utils/python_helpers.py b/rocrate_validator/utils/python_helpers.py index 02bf525fe..9f5f653c9 100644 --- a/rocrate_validator/utils/python_helpers.py +++ b/rocrate_validator/utils/python_helpers.py @@ -55,12 +55,13 @@ def get_classes_from_file(file_path: Path, logger.debug("Module: %r", module) # Get all classes in the module that are subclasses of filter_class - classes = {name: cls for name, cls in inspect.getmembers(module, inspect.isclass) - if cls.__module__ == module_name - and (not class_name_suffix or cls.__name__.endswith(class_name_suffix)) - and (not filter_class or (issubclass(cls, filter_class) and cls != filter_class))} - - return classes + return { + name: cls + for name, cls in inspect.getmembers(module, inspect.isclass) + if cls.__module__ == module_name + and (not class_name_suffix or cls.__name__.endswith(class_name_suffix)) + and (not filter_class or (issubclass(cls, filter_class) and cls != filter_class)) + } def to_camel_case(snake_str: str) -> str: diff --git a/rocrate_validator/utils/versioning.py b/rocrate_validator/utils/versioning.py index a32116907..50c01cf5e 100644 --- a/rocrate_validator/utils/versioning.py +++ b/rocrate_validator/utils/versioning.py @@ -33,8 +33,7 @@ def run_git_command(command: list[str]) -> Optional[str]: import subprocess try: - output = subprocess.check_output(command, stderr=subprocess.DEVNULL).decode().strip() - return output + return subprocess.check_output(command, stderr=subprocess.DEVNULL).decode().strip() except Exception as e: if logger.isEnabledFor(logging.DEBUG): logger.debug(e) From 4f6e6126b67b38f0a5671b7cf2ca553e3034289b Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Wed, 3 Jun 2026 17:49:19 +0200 Subject: [PATCH 201/352] =?UTF-8?q?style:=20=F0=9F=8E=A8=20normalize=20str?= =?UTF-8?q?ing=20quotes=20and=20reformat=20function=20signature?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- rocrate_validator/utils/python_helpers.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/rocrate_validator/utils/python_helpers.py b/rocrate_validator/utils/python_helpers.py index 9f5f653c9..a600f708e 100644 --- a/rocrate_validator/utils/python_helpers.py +++ b/rocrate_validator/utils/python_helpers.py @@ -26,10 +26,10 @@ logger = logging.getLogger(__name__) -def get_classes_from_file(file_path: Path, - filter_class: Optional[type] = None, - class_name_suffix: Optional[str] = None) -> dict[str, type]: - """Get all classes in a Python file """ +def get_classes_from_file( + file_path: Path, filter_class: Optional[type] = None, class_name_suffix: Optional[str] = None +) -> dict[str, type]: + """Get all classes in a Python file""" # ensure the file path is a Path object assert file_path, "The file path is required" if not isinstance(file_path, Path): @@ -71,8 +71,8 @@ def to_camel_case(snake_str: str) -> str: :param snake_str: The snake case string :return: The camel case string """ - components = re.split('_|-', snake_str) - return components[0].capitalize() + ''.join(x.title() for x in components[1:]) + components = re.split(r"_|-", snake_str) + return components[0].capitalize() + "".join(x.title() for x in components[1:]) def get_requirement_name_from_file(file: Path, check_name: Optional[str] = None) -> str: From d92200dd6937b8efb5f0eb1a35e47ba25cc206ef Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Wed, 3 Jun 2026 17:55:53 +0200 Subject: [PATCH 202/352] =?UTF-8?q?refactor:=20=F0=9F=8F=B7=EF=B8=8F=20use?= =?UTF-8?q?=20TypeError=20instead=20of=20ValueError=20for=20type=20errors?= =?UTF-8?q?=20(TRY004)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- rocrate_validator/models.py | 10 +++++----- .../profiles/ro-crate/must/0_file_descriptor_format.py | 2 +- rocrate_validator/requirements/shacl/validator.py | 2 +- rocrate_validator/utils/paths.py | 2 +- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/rocrate_validator/models.py b/rocrate_validator/models.py index 0c43b74a6..49e934dac 100644 --- a/rocrate_validator/models.py +++ b/rocrate_validator/models.py @@ -1183,7 +1183,7 @@ def __execute_check__(self, check, context, all_passed): check.name, check_result, ) - raise RuntimeError(f"Ignoring invalid result from check {check.name}") + raise TypeError(f"Ignoring invalid result from check {check.name}") new_all_passed = all_passed and check_result should_break = not new_all_passed and context.fail_fast return new_all_passed, should_break @@ -1201,7 +1201,7 @@ def __hash__(self): def __lt__(self, other: object) -> bool: if not isinstance(other, Requirement): - raise ValueError(f"Cannot compare Requirement with {type(other)}") + raise TypeError(f"Cannot compare Requirement with {type(other)}") return (self._order_number, self.name) < ( other._order_number, other.name, @@ -1514,12 +1514,12 @@ def to_dict(self, with_requirement: bool = True, with_profile: bool = True) -> d def __eq__(self, other: object) -> bool: if not isinstance(other, RequirementCheck): - raise ValueError(f"Cannot compare RequirementCheck with {type(other)}") + raise TypeError(f"Cannot compare RequirementCheck with {type(other)}") return self.requirement == other.requirement and self.name == other.name def __lt__(self, other: object) -> bool: if not isinstance(other, RequirementCheck): - raise ValueError(f"Cannot compare RequirementCheck with {type(other)}") + raise TypeError(f"Cannot compare RequirementCheck with {type(other)}") return (self.requirement, self.identifier) < ( other.requirement, other.identifier, @@ -3279,7 +3279,7 @@ def requirement_severity(self) -> Severity: if isinstance(severity, str): severity = Severity[severity] elif not isinstance(severity, Severity): - raise ValueError(f"Invalid severity type: {type(severity)}") + raise TypeError(f"Invalid severity type: {type(severity)}") return severity @property diff --git a/rocrate_validator/profiles/ro-crate/must/0_file_descriptor_format.py b/rocrate_validator/profiles/ro-crate/must/0_file_descriptor_format.py index b64ad847b..32a53125d 100644 --- a/rocrate_validator/profiles/ro-crate/must/0_file_descriptor_format.py +++ b/rocrate_validator/profiles/ro-crate/must/0_file_descriptor_format.py @@ -376,7 +376,7 @@ def __get_remote_context_keys__(self, context_uri: str) -> set: # Get the keys of the context jsonLD_ctx = self.__get_remote_context__(context_uri) if not isinstance(jsonLD_ctx, dict): - raise RuntimeError("The context is not a dictionary", self) + raise TypeError("The context is not a dictionary") return set(jsonLD_ctx.keys()) def __check_entity_keys__(self, entity: Any, diff --git a/rocrate_validator/requirements/shacl/validator.py b/rocrate_validator/requirements/shacl/validator.py index 5325cc83b..567f2c5a6 100644 --- a/rocrate_validator/requirements/shacl/validator.py +++ b/rocrate_validator/requirements/shacl/validator.py @@ -449,7 +449,7 @@ def validate( # Validate data_graph if not isinstance(data_graph, (Graph, str, bytes)): - raise ValueError( + raise TypeError( "data_graph must be an instance of Graph, str, or bytes") # Validate inference diff --git a/rocrate_validator/utils/paths.py b/rocrate_validator/utils/paths.py index 44fddc907..3fbd2224b 100644 --- a/rocrate_validator/utils/paths.py +++ b/rocrate_validator/utils/paths.py @@ -148,7 +148,7 @@ def shorten_path(p: Path) -> str: :raises ValueError: If the path is not a valid Path object """ if not isinstance(p, Path): - raise ValueError("The path must be a Path or ParseResult object") + raise TypeError("The path must be a Path or ParseResult object") try: cwd = Path.cwd() From 2fb3f97b0cbfb0179249e7dcc5fecb30d23d79d3 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Wed, 3 Jun 2026 18:07:15 +0200 Subject: [PATCH 203/352] =?UTF-8?q?style:=20=F0=9F=8E=A8=20apply=20ruff=20?= =?UTF-8?q?automatic=20fixes?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - RUF022: ordinate liste `__all__` in 3 file - SIM210: rimosso `True if ... else False` ridondante in 3 file - SIM211: `False if offline else True` → `not offline` - SIM108: if-else → ternario in cache.py, profiles.py, versioning.py, cache_warmup.py - SIM103: condizione inline invece di `if ... return True; return False` (requirements.py, uri.py) - SIM101: `isinstance(x, A) or isinstance(x, B)` → `isinstance(x, (A, B))` - SIM105: try-except-pass → `contextlib.suppress(OSError)` - SIM118: `key in dict.keys()` → `key in dict` - SIM113: loop manuale → `enumerate()` (requirements.py, utils.py) - SIM910: `dict.get(x, None)` → `dict.get(x)` (2 occorrenze) - UP008: `super(__class__, self)` → `super()` - UP031: `%s` format → f-string - UP037: rimosse quote inutili da annotazioni di tipo (9 occorrenze) - C401: `set(gen)` → set comprehension (3 occorrenze) - C411: rimosso `list()` superfluo attorno a list comprehension - C416: list comprehension → `list()` diretto - C419: rimossa list comprehension inutile in `all()`/`any()` - PIE790: rimossi `pass` inutili (5 occorrenze) - PIE808: `range(0, n)` → `range(n)` - PERF102: `.items()` → `.values()` - RUF005: `x + [y]` → `[*x, y]` (2 occorrenze) - RUF010: `.format()` / `%s` → f-string conversion flag `!s` (3 occorrenze) - PLR1714: `x == A or x == B` → `x in (A, B)` (3 occorrenze) - PLR2044: rimosso commento vuoto - B010: `setattr(obj, "attr", val)` → `obj.attr = val` (2 occorrenze) - PLR1722: `exit()` → `sys.exit()` --- rocrate_validator/cli/__init__.py | 2 +- rocrate_validator/cli/commands/cache.py | 5 +---- rocrate_validator/cli/commands/profiles.py | 9 +++------ rocrate_validator/cli/commands/validate.py | 4 ++-- rocrate_validator/cli/main.py | 2 +- rocrate_validator/errors.py | 2 +- rocrate_validator/events.py | 5 ++--- rocrate_validator/models.py | 12 ++++++------ .../ro-crate/must/0_file_descriptor_format.py | 2 +- .../should/2_root_data_entity_relative_uri.py | 2 +- rocrate_validator/requirements/shacl/__init__.py | 10 ++++++++-- rocrate_validator/requirements/shacl/checks.py | 2 +- rocrate_validator/requirements/shacl/models.py | 2 +- .../requirements/shacl/requirements.py | 5 +---- rocrate_validator/requirements/shacl/utils.py | 4 ++-- .../requirements/shacl/validator.py | 6 +++--- rocrate_validator/rocrate.py | 16 ++++++---------- rocrate_validator/utils/cache_warmup.py | 13 +++++-------- rocrate_validator/utils/http.py | 9 ++++----- rocrate_validator/utils/io_helpers/colors.py | 6 +++--- rocrate_validator/utils/io_helpers/input.py | 2 +- .../utils/io_helpers/output/__init__.py | 4 ++-- .../utils/io_helpers/output/json/formatters.py | 4 +--- .../io_helpers/output/text/layout/__init__.py | 2 +- .../io_helpers/output/text/layout/report.py | 1 - rocrate_validator/utils/log.py | 2 +- rocrate_validator/utils/uri.py | 9 +++------ rocrate_validator/utils/versioning.py | 5 +---- 28 files changed, 63 insertions(+), 84 deletions(-) diff --git a/rocrate_validator/cli/__init__.py b/rocrate_validator/cli/__init__.py index ec84cfba4..98b5f645f 100644 --- a/rocrate_validator/cli/__init__.py +++ b/rocrate_validator/cli/__init__.py @@ -15,4 +15,4 @@ from rocrate_validator.cli.commands import cache, profiles, validate from rocrate_validator.cli.main import cli -__all__ = ["cli", "cache", "profiles", "validate"] +__all__ = ["cache", "cli", "profiles", "validate"] diff --git a/rocrate_validator/cli/commands/cache.py b/rocrate_validator/cli/commands/cache.py index 86ae29399..aa0fd9c84 100644 --- a/rocrate_validator/cli/commands/cache.py +++ b/rocrate_validator/cli/commands/cache.py @@ -40,10 +40,7 @@ def _resolve_cache_path(cache_path: Optional[Path]) -> Path: """Return the effective cache path, creating the parent directory.""" - if cache_path is None: - path = get_default_http_cache_path() - else: - path = Path(cache_path) + path = get_default_http_cache_path() if cache_path is None else Path(cache_path) path.parent.mkdir(parents=True, exist_ok=True) return path diff --git a/rocrate_validator/cli/commands/profiles.py b/rocrate_validator/cli/commands/profiles.py index 2be5eeb04..fb337320c 100644 --- a/rocrate_validator/cli/commands/profiles.py +++ b/rocrate_validator/cli/commands/profiles.py @@ -74,7 +74,7 @@ def profiles(ctx, profiles_path: Path = DEFAULT_PROFILES_PATH, help="Disable paging", default=False, show_default=True, - hidden=True if sys.platform == "win32" else False + hidden=sys.platform == "win32" ) @click.pass_context def list_profiles(ctx, no_paging: bool = False): # , profiles_path: Path = DEFAULT_PROFILES_PATH): @@ -172,7 +172,7 @@ def list_profiles(ctx, no_paging: bool = False): # , profiles_path: Path = DEFA help="Disable paging", default=False, show_default=True, - hidden=True if sys.platform == "win32" else False + hidden=sys.platform == "win32" ) @click.pass_context def describe_profile(ctx, @@ -245,10 +245,7 @@ def describe_profile(ctx, ) # Build the profile table - if not verbose: - table = __compacted_describe_profile__(profile) - else: - table = __verbose_describe_profile__(profile) + table = __compacted_describe_profile__(profile) if not verbose else __verbose_describe_profile__(profile) with console.pager(pager=pager, styles=not console.no_color) if enable_pager else console: console.print(get_app_header_rule()) diff --git a/rocrate_validator/cli/commands/validate.py b/rocrate_validator/cli/commands/validate.py index 526536001..cf62c26ba 100644 --- a/rocrate_validator/cli/commands/validate.py +++ b/rocrate_validator/cli/commands/validate.py @@ -177,7 +177,7 @@ def validate_uri(ctx, param, value): help="Disable pagination of the validation details", default=False, show_default=True, - hidden=True if sys.platform == "win32" else False + hidden=sys.platform == "win32" ) @click.option( '-f', @@ -357,7 +357,7 @@ def validate(ctx, # noqa: PLR0912 "no_cache": no_cache, # When offline is requested, remote crate fetching must use the cache # instead of the "disable download" short-circuit. - "disable_remote_crate_download": False if offline else True, + "disable_remote_crate_download": not offline, } # Print the application header diff --git a/rocrate_validator/cli/main.py b/rocrate_validator/cli/main.py index 0d79ed0a0..8e29cd984 100644 --- a/rocrate_validator/cli/main.py +++ b/rocrate_validator/cli/main.py @@ -109,4 +109,4 @@ def cli(ctx: click.Context, debug: bool, version: bool, disable_color: bool, no_ except Exception as e: if logger.isEnabledFor(logging.DEBUG): logger.exception(f"An unexpected error occurred: {e}") - exit(2) + sys.exit(2) diff --git a/rocrate_validator/errors.py b/rocrate_validator/errors.py index 22570e266..12f081a2a 100644 --- a/rocrate_validator/errors.py +++ b/rocrate_validator/errors.py @@ -278,7 +278,7 @@ def __repr__(self) -> str: @classmethod def default_error_message(cls, uri: Union[str, Path, URI]) -> str: - return f"\"{str(uri)}\" is not a valid RO-Crate URI. "\ + return f"\"{uri!s}\" is not a valid RO-Crate URI. "\ "It MUST be either a local path to the RO-Crate root directory or a local/remote RO-Crate ZIP file." diff --git a/rocrate_validator/events.py b/rocrate_validator/events.py index a1e181d5c..6d83b57d0 100644 --- a/rocrate_validator/events.py +++ b/rocrate_validator/events.py @@ -148,13 +148,12 @@ def update(self, event: Event, ctx: Optional[Any] = None): :param ctx: optional context :type ctx: Optional[Any] """ - pass class Publisher: def __init__(self, avoid_duplicate_notifications: bool = False): - self.__subscribers: set["Subscriber"] = set() - self.__notified_events: set["Event"] = set() + self.__subscribers: set[Subscriber] = set() + self.__notified_events: set[Event] = set() self.__avoid_duplicate_notifications = avoid_duplicate_notifications @property diff --git a/rocrate_validator/models.py b/rocrate_validator/models.py index 49e934dac..3fc03edaa 100644 --- a/rocrate_validator/models.py +++ b/rocrate_validator/models.py @@ -2538,8 +2538,8 @@ def failed_requirements(self) -> Collection[Requirement]: Get the requirements that failed at or above the configured `requirement_severity`. """ min_severity = self.context.requirement_severity - return set(issue.check.requirement for issue in self._issues - if issue.severity >= min_severity) + return {issue.check.requirement for issue in self._issues + if issue.severity >= min_severity} # --- Checks --- @property @@ -2548,8 +2548,8 @@ def failed_checks(self) -> Collection[RequirementCheck]: Get the checks that failed at or above the configured `requirement_severity`. """ min_severity = self.context.requirement_severity - return set(issue.check for issue in self._issues - if issue.severity >= min_severity) + return {issue.check for issue in self._issues + if issue.severity >= min_severity} def get_failed_checks_by_requirement(self, requirement: Requirement) -> Collection[RequirementCheck]: """ @@ -3013,7 +3013,7 @@ def detect_rocrate_profiles(self) -> list[Profile]: candidate_profiles, ) # unmatched candidate profiles - unmatched_profiles = candidate_profiles_uris.difference(set(p.uri for p in profiles)) + unmatched_profiles = candidate_profiles_uris.difference({p.uri for p in profiles}) logger.debug("Unmatched Candidate Profiles URIs: %s", unmatched_profiles) if len(unmatched_profiles) > 0: logger.warning( @@ -3444,7 +3444,7 @@ def __load_profiles__(self) -> list[Profile]: return [profile] # Set the profiles to validate against as the target profile and its inherited profiles - profiles = profile.inherited_profiles + [profile] + profiles = [*profile.inherited_profiles, profile] # if the check for duplicates is disabled, return the profiles if self.disable_check_for_duplicates: diff --git a/rocrate_validator/profiles/ro-crate/must/0_file_descriptor_format.py b/rocrate_validator/profiles/ro-crate/must/0_file_descriptor_format.py index 32a53125d..3b864c84c 100644 --- a/rocrate_validator/profiles/ro-crate/must/0_file_descriptor_format.py +++ b/rocrate_validator/profiles/ro-crate/must/0_file_descriptor_format.py @@ -279,7 +279,7 @@ def is_entity_flat_recursive(entity: Any, is_first: bool = True, fail_fast: bool result = True if isinstance(entity, dict): if is_first: - for _, elem in entity.items(): + for elem in entity.values(): if not is_entity_flat_recursive(elem, is_first=False, fail_fast=fail_fast): result = False if fail_fast: diff --git a/rocrate_validator/profiles/ro-crate/should/2_root_data_entity_relative_uri.py b/rocrate_validator/profiles/ro-crate/should/2_root_data_entity_relative_uri.py index 259c54b40..800383cf3 100644 --- a/rocrate_validator/profiles/ro-crate/should/2_root_data_entity_relative_uri.py +++ b/rocrate_validator/profiles/ro-crate/should/2_root_data_entity_relative_uri.py @@ -37,5 +37,5 @@ def check_relative_uris(self, context: ValidationContext) -> bool: return True except Exception as e: context.result.add_issue( - f'Error checking Root Data Entity URI: {str(e)}', self) + f'Error checking Root Data Entity URI: {e!s}', self) return False diff --git a/rocrate_validator/requirements/shacl/__init__.py b/rocrate_validator/requirements/shacl/__init__.py index 7cb912126..699a0f8d8 100644 --- a/rocrate_validator/requirements/shacl/__init__.py +++ b/rocrate_validator/requirements/shacl/__init__.py @@ -17,5 +17,11 @@ from rocrate_validator.requirements.shacl.requirements import SHACLRequirement, SHACLRequirementLoader from rocrate_validator.requirements.shacl.validator import SHACLValidationResult, SHACLValidator -__all__ = ["SHACLCheck", "SHACLValidator", "SHACLValidationResult", - "SHACLValidationError", "SHACLRequirement", "SHACLRequirementLoader"] +__all__ = [ + "SHACLCheck", + "SHACLRequirement", + "SHACLRequirementLoader", + "SHACLValidationError", + "SHACLValidationResult", + "SHACLValidator", +] diff --git a/rocrate_validator/requirements/shacl/checks.py b/rocrate_validator/requirements/shacl/checks.py index 95432f713..364dd0257 100644 --- a/rocrate_validator/requirements/shacl/checks.py +++ b/rocrate_validator/requirements/shacl/checks.py @@ -402,7 +402,7 @@ def __collect_failed_checks__(self, shacl_context, shacl_result, shapes_registry or requirementCheck.identifier not in shacl_context.settings.skip_checks ): failed_requirements_checks.add(requirementCheck) - violations = failed_requirements_checks_violations.get(requirementCheck.identifier, None) + violations = failed_requirements_checks_violations.get(requirementCheck.identifier) if violations is None: failed_requirements_checks_violations[requirementCheck.identifier] = (violations := []) violations.append(violation) diff --git a/rocrate_validator/requirements/shacl/models.py b/rocrate_validator/requirements/shacl/models.py index fe68f0149..7c35ef07c 100644 --- a/rocrate_validator/requirements/shacl/models.py +++ b/rocrate_validator/requirements/shacl/models.py @@ -432,7 +432,7 @@ def get_instance(cls, ctx: object): instance = getattr(ctx, "_shapes_registry_instance", None) if not instance: instance = cls() - setattr(ctx, "_shapes_registry_instance", instance) + ctx._shapes_registry_instance = instance return instance diff --git a/rocrate_validator/requirements/shacl/requirements.py b/rocrate_validator/requirements/shacl/requirements.py index 8bc559503..121936eaf 100644 --- a/rocrate_validator/requirements/shacl/requirements.py +++ b/rocrate_validator/requirements/shacl/requirements.py @@ -85,10 +85,7 @@ def shape(self) -> Shape: @property def hidden(self) -> bool: - if self.shape.node is not None and \ - (self.shape.node, RDF.type, VALIDATOR_NS.HiddenShape) in self.shape.graph: - return True - return False + return bool(self.shape.node is not None and (self.shape.node, RDF.type, VALIDATOR_NS.HiddenShape) in self.shape.graph) @classmethod def finalize(cls, context: ValidationContext) -> None: diff --git a/rocrate_validator/requirements/shacl/utils.py b/rocrate_validator/requirements/shacl/utils.py index ae23db580..9f0802470 100644 --- a/rocrate_validator/requirements/shacl/utils.py +++ b/rocrate_validator/requirements/shacl/utils.py @@ -74,7 +74,7 @@ def make_uris_relative(text: str, ro_crate_path: Union[Path, str]) -> str: def inject_attributes(obj: object, node_graph: Graph, node: Node, exclude: Optional[list] = None) -> object: # inject attributes of the shape property # logger.debug("Injecting attributes of node %s", node) - skip_properties = ["node"] if exclude is None else exclude + ["node"] + skip_properties = ["node"] if exclude is None else [*exclude, "node"] triples = node_graph.triples((node, None, None)) for _node, p, o in triples: predicate_as_string = cast(Any, p).toPython() @@ -102,7 +102,7 @@ def __compute_values__(g: Graph, s: Node) -> list[tuple]: # Collect the values of the triples in the graph (excluding BNodes) values = [] # Assuming the list of triples values is stored in a variable called 'triples_values' - triples_values = list([(_, x, _) for (_, x, _) in g.triples((s, None, None)) if x != RDF.type]) + triples_values = [(_, x, _) for (_, x, _) in g.triples((s, None, None)) if x != RDF.type] for (s, p, o) in triples_values: if isinstance(o, BNode): diff --git a/rocrate_validator/requirements/shacl/validator.py b/rocrate_validator/requirements/shacl/validator.py index 567f2c5a6..7339830bd 100644 --- a/rocrate_validator/requirements/shacl/validator.py +++ b/rocrate_validator/requirements/shacl/validator.py @@ -240,13 +240,13 @@ def get_instance(cls, context: ValidationContext) -> SHACLValidationContext: instance = getattr(context, "_shacl_validation_context", None) if not instance: instance = SHACLValidationContext(context) - setattr(context, "_shacl_validation_context", instance) + context._shacl_validation_context = instance return instance class SHACLViolation: - def __init__(self, result: "SHACLValidationResult", violation_node: Node, graph: Graph) -> None: + def __init__(self, result: SHACLValidationResult, violation_node: Node, graph: Graph) -> None: # check the input assert result is not None, "Invalid result" assert isinstance(violation_node, Node), "Invalid violation node" @@ -505,4 +505,4 @@ def validate( return SHACLValidationResult(results_graph, results_text) -__all__ = ["SHACLValidator", "SHACLValidationResult", "SHACLViolation"] +__all__ = ["SHACLValidationResult", "SHACLValidator", "SHACLViolation"] diff --git a/rocrate_validator/rocrate.py b/rocrate_validator/rocrate.py index 9e8fad932..23142bf75 100644 --- a/rocrate_validator/rocrate.py +++ b/rocrate_validator/rocrate.py @@ -190,8 +190,8 @@ def has_types(self, entity_types: list[str], all_types: bool = False) -> bool: assert isinstance(entity_types, list), "Entity types must be a list" e_types = self.type if isinstance(self.type, list) else [self.type] if all_types: - return all([t in e_types for t in entity_types]) - return any([t in e_types for t in entity_types]) + return all(t in e_types for t in entity_types) + return any(t in e_types for t in entity_types) def __process_property__(self, name: str, data: object) -> object: if isinstance(data, dict) and "@id" in data: @@ -473,7 +473,7 @@ def __new__(cls, uri: Union[str, Path, URI], relative_root_path: Optional[Path] """ if cls is not ROCrate: # If called on a subclass, use normal instantiation - return super(ROCrate, cls).__new__(cls) + return super().__new__(cls) # If called on ROCrate directly, use factory logic instance = cls.new_instance(uri) @@ -534,7 +534,6 @@ def size(self) -> int: :return: the size of the RO-Crate :rtype: int """ - pass @abstractmethod def list_files(self) -> list[Path]: @@ -544,7 +543,6 @@ def list_files(self) -> list[Path]: :return: a list of file paths :rtype: list[Path] """ - pass def __get_search_path__(self, path: Path) -> tuple[Path, Path]: """ @@ -678,7 +676,6 @@ def get_file_size(self, path: Path) -> int: :return: the size of the file :rtype: int """ - pass @abstractmethod def get_file_content( @@ -696,7 +693,6 @@ def get_file_content( :return: the content of the file :rtype: Union[str, bytes] """ - pass @staticmethod def get_external_file_content( @@ -738,7 +734,7 @@ def get_external_file_size(uri: str) -> int: @staticmethod def from_metadata_dict( metadata_dict: dict - ) -> "ROCrate": + ) -> ROCrate: """ Create a new instance of the RO-Crate based on the metadata dictionary. @@ -758,7 +754,7 @@ def from_metadata_dict( @staticmethod def new_instance( uri: Union[str, Path, URI], relative_root_path: Optional[Path] = None - ) -> "ROCrate": + ) -> ROCrate: """ Create a new instance of the RO-Crate based on the URI. @@ -921,7 +917,7 @@ def has_directory(self, path: Path) -> bool: assert self._zipref is not None, "Zip reference not initialized" for px in (path, self.__parse_path__(path)): for p in self._zipref.namelist(): - if f"{str(px)}/" == str(p) or str(px) == str(p): + if f"{px!s}/" == str(p) or str(px) == str(p): info = self.__get_file_info__(p) return info.is_dir() return False diff --git a/rocrate_validator/utils/cache_warmup.py b/rocrate_validator/utils/cache_warmup.py index 9e118f2bc..9b31518f6 100644 --- a/rocrate_validator/utils/cache_warmup.py +++ b/rocrate_validator/utils/cache_warmup.py @@ -66,7 +66,7 @@ class WarmUpResult: detail: Optional[str] = None -def discover_profile_cacheable_urls(profile: "Profile") -> list[str]: +def discover_profile_cacheable_urls(profile: Profile) -> list[str]: """ Return the list of HTTP(S) URLs declared by ``profile`` as cacheable artifacts. Returns an empty list when the profile has no declared @@ -92,7 +92,7 @@ def discover_profile_cacheable_urls(profile: "Profile") -> list[str]: return urls -def discover_cacheable_urls_from_profiles(profiles: Iterable["Profile"]) -> list[str]: +def discover_cacheable_urls_from_profiles(profiles: Iterable[Profile]) -> list[str]: """ Aggregate cacheable URLs from the given profiles, preserving order and removing duplicates. @@ -122,10 +122,7 @@ def warm_up_urls(urls: Sequence[str]) -> list[WarmUpResult]: if requester.has_cached(url): results.append(WarmUpResult(url=url, status="skipped", detail="already cached")) continue - if offline: - response = requester.get(url) - else: - response = requester.fetch_fresh(url) + response = requester.get(url) if offline else requester.fetch_fresh(url) status_code = getattr(response, "status_code", None) if status_code is None: results.append(WarmUpResult(url=url, status="failed", detail="no status code")) @@ -141,7 +138,7 @@ def warm_up_urls(urls: Sequence[str]) -> list[WarmUpResult]: return results -def auto_warm_up_for_settings(settings: "ValidationSettings") -> Optional[list[WarmUpResult]]: +def auto_warm_up_for_settings(settings: ValidationSettings) -> Optional[list[WarmUpResult]]: """ Perform a best-effort synchronous warm-up triggered by ``ValidationSettings.__post_init__``. @@ -186,7 +183,7 @@ def auto_warm_up_for_settings(settings: "ValidationSettings") -> Optional[list[W return results -def _find_profile(identifier, settings) -> Optional["Profile"]: +def _find_profile(identifier, settings) -> Optional[Profile]: """ Look up a loaded profile by identifier. Accepts either a string or a list (the settings sometimes store a list of identifiers). diff --git a/rocrate_validator/utils/http.py b/rocrate_validator/utils/http.py index 351016f2d..72d7e33a4 100644 --- a/rocrate_validator/utils/http.py +++ b/rocrate_validator/utils/http.py @@ -15,6 +15,7 @@ from __future__ import annotations import atexit +import contextlib import os import random import string @@ -104,7 +105,7 @@ def __new__(cls, *args, **kwargs) -> HttpRequester: with cls._lock: if cls._instance is None: logger.debug(f"Creating instance of {cls.__name__}") - cls._instance = super(HttpRequester, cls).__new__(cls) + cls._instance = super().__new__(cls) atexit.register(cls._instance.__del__) logger.debug(f"Instance created: {cls._instance.__class__.__name__}") return cls._instance @@ -182,7 +183,7 @@ def __initialize_session__(self, cache_max_age: int, cache_path: Optional[str] = self.session.settings.only_if_cached = True except AttributeError: # Older requests_cache versions expose the flag on the session directly. - setattr(self.session, "only_if_cached", True) + self.session.only_if_cached = True except ImportError: logger.warning("requests_cache is not installed. Using requests instead.") except Exception as e: @@ -342,10 +343,8 @@ def cache_info(self) -> dict[str, Any]: except Exception as e: logger.debug("Unable to count cache entries: %s", e) if info["path"] and os.path.exists(info["path"]): - try: + with contextlib.suppress(OSError): info["size_bytes"] = os.path.getsize(info["path"]) - except OSError: - pass return info @classmethod diff --git a/rocrate_validator/utils/io_helpers/colors.py b/rocrate_validator/utils/io_helpers/colors.py index 1c86db067..fa6e28424 100644 --- a/rocrate_validator/utils/io_helpers/colors.py +++ b/rocrate_validator/utils/io_helpers/colors.py @@ -24,11 +24,11 @@ def get_severity_color(severity: Union[str, Severity]) -> str: :param severity: The severity :return: The color """ - if severity == Severity.REQUIRED or severity == "REQUIRED": + if severity in (Severity.REQUIRED, "REQUIRED"): return "red" - if severity == Severity.RECOMMENDED or severity == "RECOMMENDED": + if severity in (Severity.RECOMMENDED, "RECOMMENDED"): return "orange1" - if severity == Severity.OPTIONAL or severity == "OPTIONAL": + if severity in (Severity.OPTIONAL, "OPTIONAL"): return "yellow" return "white" diff --git a/rocrate_validator/utils/io_helpers/input.py b/rocrate_validator/utils/io_helpers/input.py index 5dd36087c..6c3576262 100644 --- a/rocrate_validator/utils/io_helpers/input.py +++ b/rocrate_validator/utils/io_helpers/input.py @@ -104,7 +104,7 @@ def multiple_choice(console: Console, "type": "checkbox", "name": "profiles", "message": prompt_text, - "choices": [Choice(i, f"{choices[i].identifier}: {choices[i].name}") for i in range(0, len(choices))] + "choices": [Choice(i, f"{choices[i].identifier}: {choices[i].name}") for i in range(len(choices))] } ] console.print("\n") diff --git a/rocrate_validator/utils/io_helpers/output/__init__.py b/rocrate_validator/utils/io_helpers/output/__init__.py index 483292a5d..53b4143a8 100644 --- a/rocrate_validator/utils/io_helpers/output/__init__.py +++ b/rocrate_validator/utils/io_helpers/output/__init__.py @@ -34,7 +34,7 @@ class OutputFormatter(Protocol): """Protocol for output formatters.""" - def __rich_console__(self, console: "Console", options: ConsoleOptions) -> RenderResult: + def __rich_console__(self, console: Console, options: ConsoleOptions) -> RenderResult: pass @@ -69,7 +69,7 @@ def get_type_formatters(self) -> dict[type, type[OutputFormatter]]: """Retrieve all registered formatters.""" return dict(self._fmap) - def __rich_console__(self, console: "Console", options: ConsoleOptions) -> RenderResult: + def __rich_console__(self, console: Console, options: ConsoleOptions) -> RenderResult: if self._data is None: raise ValueError("No data provided for formatting.") formatter = self.get_data_formatter(self._data) diff --git a/rocrate_validator/utils/io_helpers/output/json/formatters.py b/rocrate_validator/utils/io_helpers/output/json/formatters.py index d116cfcf3..91d65d9ec 100644 --- a/rocrate_validator/utils/io_helpers/output/json/formatters.py +++ b/rocrate_validator/utils/io_helpers/output/json/formatters.py @@ -66,9 +66,7 @@ def format_validation_results(data: dict[str, ValidationResult], verbose = settings.verbose if settings else False # Set the list of validation profiles - json_output["validation_settings"]["profile_identifiers"] = [ - profile_identifier for profile_identifier in data.keys() - ] + json_output["validation_settings"]["profile_identifiers"] = list(data) # Initialize the overall passed status json_output["passed"] = True diff --git a/rocrate_validator/utils/io_helpers/output/text/layout/__init__.py b/rocrate_validator/utils/io_helpers/output/text/layout/__init__.py index 3d27c46bf..ff3b52f09 100644 --- a/rocrate_validator/utils/io_helpers/output/text/layout/__init__.py +++ b/rocrate_validator/utils/io_helpers/output/text/layout/__init__.py @@ -17,4 +17,4 @@ from .progress import Progress from .report import ValidationReportLayout -__all__ = ["ValidationReportLayout", "Progress"] +__all__ = ["Progress", "ValidationReportLayout"] diff --git a/rocrate_validator/utils/io_helpers/output/text/layout/report.py b/rocrate_validator/utils/io_helpers/output/text/layout/report.py index f80dbb882..ffde15e26 100644 --- a/rocrate_validator/utils/io_helpers/output/text/layout/report.py +++ b/rocrate_validator/utils/io_helpers/output/text/layout/report.py @@ -113,7 +113,6 @@ def __init_layout__(self): f"[bold {severity_color}]{settings.requirement_severity}[/bold {severity_color}]", style="white", align="left"), name="Base Info", size=5) - # self.passed_checks = Layout(name="PASSED") self.failed_checks = Layout(name="FAILED") # Create the layout of the requirement checks section diff --git a/rocrate_validator/utils/log.py b/rocrate_validator/utils/log.py index 37e6ebf21..ee89aa544 100644 --- a/rocrate_validator/utils/log.py +++ b/rocrate_validator/utils/log.py @@ -124,7 +124,7 @@ def __setup_logger__(logger: Logger): logger.setLevel(level) # configure the logger handler - ch = __handlers__.get(logger.name, None) + ch = __handlers__.get(logger.name) if not ch: ch = StreamHandler(__log_stream__) ch.setLevel(level) diff --git a/rocrate_validator/utils/uri.py b/rocrate_validator/utils/uri.py index 7a07bee60..07efb77c8 100644 --- a/rocrate_validator/utils/uri.py +++ b/rocrate_validator/utils/uri.py @@ -70,10 +70,7 @@ def is_external_reference(value: object) -> bool: # Reject scheme-only input (``urn:``, ``doi:``): syntactically valid # per the grammar but semantically unusable as an identifier. - if not (parts.netloc or parts.path or parts.query or parts.fragment): - return False - - return True + return parts.netloc or parts.path or parts.query or parts.fragment class URI: @@ -148,7 +145,7 @@ def __init__(self, uri: Union[str, Path]): except Exception as e: if logger.isEnabledFor(logging.DEBUG): logger.debug(e) - raise ValueError("Invalid URI: %s" % uri) from e + raise ValueError(f"Invalid URI: {uri}") from e @property def uri(self) -> str: @@ -298,7 +295,7 @@ def validate_rocrate_uri(uri: Union[str, Path, URI], silent: bool = False) -> bo assert isinstance(uri, (str, Path, URI)), "The RO-Crate URI must be a string, Path, or URI object" try: # parse the value to extract the scheme - uri = URI(str(uri)) if isinstance(uri, str) or isinstance(uri, Path) else uri + uri = URI(str(uri)) if isinstance(uri, (str, Path)) else uri # restrict RO-Crate roots to schemes the loader can actually handle if not uri.has_supported_rocrate_scheme(): raise errors.ROCrateInvalidURIError(uri) diff --git a/rocrate_validator/utils/versioning.py b/rocrate_validator/utils/versioning.py index 50c01cf5e..c72c19a59 100644 --- a/rocrate_validator/utils/versioning.py +++ b/rocrate_validator/utils/versioning.py @@ -114,10 +114,7 @@ def get_version() -> str: version = latest_tag else: commit_distance = get_commit_distance(latest_tag) - if commit_sha: - version = f"{declared_version}_{commit_sha}+{commit_distance}" - else: - version = declared_version + version = f"{declared_version}_{commit_sha}+{commit_distance}" if commit_sha else declared_version dirty = has_uncommitted_changes() return f"{version}-dirty" if dirty else version From 69a4633fb81daa187867ebcd8e0c8e668b01d361 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Wed, 3 Jun 2026 21:18:41 +0200 Subject: [PATCH 204/352] =?UTF-8?q?chore:=20=F0=9F=A7=B9=20remove=20non-br?= =?UTF-8?q?eaking=20space=20from=20comment=20(RUF003)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- rocrate_validator/requirements/shacl/models.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rocrate_validator/requirements/shacl/models.py b/rocrate_validator/requirements/shacl/models.py index 7c35ef07c..d65b440dc 100644 --- a/rocrate_validator/requirements/shacl/models.py +++ b/rocrate_validator/requirements/shacl/models.py @@ -402,7 +402,7 @@ def load_shapes(self, shapes_path: Union[str, Path], publicID: Optional[str] = N # store the node shape in the registry self.add_shape(shape) - #  store the node in the list of shapes + # store the node in the list of shapes if not grouped: shapes.append(shape) else: From 95318ab115c9fcbf2002c1d33fa6b36a4cbcc507 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Wed, 3 Jun 2026 21:21:26 +0200 Subject: [PATCH 205/352] =?UTF-8?q?chore:=20=F0=9F=A7=B9=20replace=20curly?= =?UTF-8?q?=20apostrophes=20with=20straight=20ones=20(RUF002)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- rocrate_validator/models.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/rocrate_validator/models.py b/rocrate_validator/models.py index 3fc03edaa..98ff24e86 100644 --- a/rocrate_validator/models.py +++ b/rocrate_validator/models.py @@ -1584,7 +1584,7 @@ def violatingEntity(self) -> Optional[str]: to meet the defined rules or constraints within a validation process. Also referred to as `focusNode` in SHACL terminology in the context of an RDF graph, it is the subject of a triple - that violates a given constraint on the subject’s property/predicate, + that violates a given constraint on the subject's property/predicate, represented by the violatingProperty. """ return self._violatingEntity @@ -1597,7 +1597,7 @@ def violatingProperty(self) -> Optional[str]: It identifies the part of the data structure that is causing the issue. Also referred to as `resultPath` in SHACL terminology, in the context of an RDF graph, it is the predicate of a triple - that violates a given constraint on the subject’s property/predicate, + that violates a given constraint on the subject's property/predicate, represented by the violatingProperty. """ return self._violatingProperty From 1c5d7d79764234c933c3127ce99f3160073f255c Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Wed, 3 Jun 2026 21:25:51 +0200 Subject: [PATCH 206/352] =?UTF-8?q?refactor:=20=F0=9F=8F=B7=EF=B8=8F=20add?= =?UTF-8?q?=20ClassVar=20annotation=20to=20SHACLCheck.=5F=5Finstances=5F?= =?UTF-8?q?=5F?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- rocrate_validator/requirements/shacl/checks.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/rocrate_validator/requirements/shacl/checks.py b/rocrate_validator/requirements/shacl/checks.py index 364dd0257..4fef19466 100644 --- a/rocrate_validator/requirements/shacl/checks.py +++ b/rocrate_validator/requirements/shacl/checks.py @@ -14,7 +14,7 @@ import json from timeit import default_timer as timer -from typing import Any, Optional, cast +from typing import Any, ClassVar, Optional, cast from rdflib import RDF, BNode, Literal, Namespace @@ -56,7 +56,7 @@ class SHACLCheck(RequirementCheck): """ # Map shape to requirement check instances - __instances__: dict[int, "SHACLCheck"] = {} + __instances__: ClassVar[dict[int, "SHACLCheck"]] = {} def __init__( self, From f01393d747e6c6d66fdbbc07024b49a19325789d Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Wed, 3 Jun 2026 21:27:18 +0200 Subject: [PATCH 207/352] =?UTF-8?q?chore:=20=F0=9F=A7=B9=20remove=20unnece?= =?UTF-8?q?ssary=20f-string=20prefix=20from=20docstring=20(RUF010)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- rocrate_validator/requirements/shacl/validator.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rocrate_validator/requirements/shacl/validator.py b/rocrate_validator/requirements/shacl/validator.py index 7339830bd..3f0227ada 100644 --- a/rocrate_validator/requirements/shacl/validator.py +++ b/rocrate_validator/requirements/shacl/validator.py @@ -419,7 +419,7 @@ def validate( Optional[RDF_SERIALIZATION_FORMATS_TYPES] = "turtle", **kwargs, ) -> SHACLValidationResult: - f""" + """ Validate a data graph using SHACL shapes as constraints :param data_graph: rdflib.Graph or file path or web url From 50de8a2e4c96297a03d0e8bd17963fb949c83b65 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Wed, 3 Jun 2026 21:38:39 +0200 Subject: [PATCH 208/352] =?UTF-8?q?refactor:=20=F0=9F=8E=A8=20replace=20ma?= =?UTF-8?q?nual=20counter=20with=20enumerate=20/=20remove=20unused=20varia?= =?UTF-8?q?ble=20(SIM113)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- rocrate_validator/requirements/shacl/requirements.py | 4 +--- rocrate_validator/requirements/shacl/utils.py | 2 -- 2 files changed, 1 insertion(+), 5 deletions(-) diff --git a/rocrate_validator/requirements/shacl/requirements.py b/rocrate_validator/requirements/shacl/requirements.py index 121936eaf..42582fbc8 100644 --- a/rocrate_validator/requirements/shacl/requirements.py +++ b/rocrate_validator/requirements/shacl/requirements.py @@ -46,10 +46,8 @@ def __init__(self, shape: Shape, profile: Profile, path: Path): self.__reorder_checks__() def __reorder_checks__(self) -> None: - i = 0 - for check in self._checks: + for i, check in enumerate(self._checks): check.order_number = i - i += 1 def __init_checks__(self) -> list[RequirementCheck]: # check if the shape is not None before creating checks diff --git a/rocrate_validator/requirements/shacl/utils.py b/rocrate_validator/requirements/shacl/utils.py index 9f0802470..de86b6f59 100644 --- a/rocrate_validator/requirements/shacl/utils.py +++ b/rocrate_validator/requirements/shacl/utils.py @@ -286,9 +286,7 @@ def load_shapes_from_graph(g: Graph) -> ShapesList: # Split the graph into subgraphs for each shape subgraphs = {} - count = 0 for shape in shapes: - count += 1 subgraph = Graph() # Extract all related triples for the current shape related_triples = __extract_related_triples__(g, shape) From 0bd9213f934c1966f2b026d68948b318b6cc2156 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Wed, 3 Jun 2026 21:41:49 +0200 Subject: [PATCH 209/352] =?UTF-8?q?refactor:=20=F0=9F=8E=A8=20rename=20loo?= =?UTF-8?q?p=20variables=20to=20avoid=20shadowing=20outer=20`s`=20paramete?= =?UTF-8?q?r=20(PLR1704)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- rocrate_validator/requirements/shacl/utils.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/rocrate_validator/requirements/shacl/utils.py b/rocrate_validator/requirements/shacl/utils.py index de86b6f59..0d4c04906 100644 --- a/rocrate_validator/requirements/shacl/utils.py +++ b/rocrate_validator/requirements/shacl/utils.py @@ -104,11 +104,11 @@ def __compute_values__(g: Graph, s: Node) -> list[tuple]: # Assuming the list of triples values is stored in a variable called 'triples_values' triples_values = [(_, x, _) for (_, x, _) in g.triples((s, None, None)) if x != RDF.type] - for (s, p, o) in triples_values: - if isinstance(o, BNode): - values.extend(__compute_values__(g, o)) + for (subj, pred, obj) in triples_values: + if isinstance(obj, BNode): + values.extend(__compute_values__(g, obj)) else: - values.append((s, p, o) if not isinstance(s, BNode) else (p, o)) + values.append((subj, pred, obj) if not isinstance(subj, BNode) else (pred, obj)) return values From 1c4bec98a0e01b50b225c615da8526d17653c8fe Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Wed, 3 Jun 2026 21:45:29 +0200 Subject: [PATCH 210/352] =?UTF-8?q?refactor:=20=F0=9F=8E=A8=20avoid=20reas?= =?UTF-8?q?signing=20loop=20variable=20in=20Profile.load=5Fprofiles=20(PLW?= =?UTF-8?q?2901)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- rocrate_validator/models.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/rocrate_validator/models.py b/rocrate_validator/models.py index 98ff24e86..46de90dfc 100644 --- a/rocrate_validator/models.py +++ b/rocrate_validator/models.py @@ -731,16 +731,19 @@ def __load_profiles_paths__( # collect profiles nested in the root profile directories for root_profile_directory in root_profile_directories: # if the path is a string, convert it to a Path - if isinstance(root_profile_directory, str): - root_profile_directory = Path(root_profile_directory) + profile_root_directory = ( + Path(root_profile_directory) + if isinstance(root_profile_directory, str) + else root_profile_directory + ) # check if the path is a directory and raise an error if not - if not root_profile_directory.is_dir(): - raise InvalidProfilePath(str(root_profile_directory)) + if not profile_root_directory.is_dir(): + raise InvalidProfilePath(str(profile_root_directory)) # if the path is a directory, get the profile directories result.extend( [ - (root_profile_directory, p.parent) - for p in root_profile_directory.rglob("*.*") + (profile_root_directory, p.parent) + for p in profile_root_directory.rglob("*.*") if p.name == PROFILE_SPECIFICATION_FILE ] ) From 4385777c0a65deaf86bdd9aed8da61ed4eff333f Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Wed, 3 Jun 2026 21:50:23 +0200 Subject: [PATCH 211/352] =?UTF-8?q?refactor:=20=F0=9F=8E=A8=20replace=20ma?= =?UTF-8?q?nual=20loops=20with=20comprehensions=20(PERF401)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../requirements/shacl/requirements.py | 9 +++++---- rocrate_validator/rocrate.py | 20 ++++++------------- rocrate_validator/utils/paths.py | 8 +------- 3 files changed, 12 insertions(+), 25 deletions(-) diff --git a/rocrate_validator/requirements/shacl/requirements.py b/rocrate_validator/requirements/shacl/requirements.py index 42582fbc8..d9f23b742 100644 --- a/rocrate_validator/requirements/shacl/requirements.py +++ b/rocrate_validator/requirements/shacl/requirements.py @@ -168,8 +168,9 @@ def load( assert file_path is not None, "The file path cannot be None" shapes: list[Shape] = self.shapes_registry.load_shapes(file_path, publicID) logger.debug("Loaded %s shapes: %s", len(shapes), shapes) - requirements: list[Requirement] = [] - for shape in shapes: - if shape is not None and shape.level >= requirement_level: - requirements.append(SHACLRequirement(shape, profile, file_path)) + requirements: list[Requirement] = [ + SHACLRequirement(shape, profile, file_path) + for shape in shapes + if shape is not None and shape.level >= requirement_level + ] return requirements diff --git a/rocrate_validator/rocrate.py b/rocrate_validator/rocrate.py index 23142bf75..efc97af32 100644 --- a/rocrate_validator/rocrate.py +++ b/rocrate_validator/rocrate.py @@ -363,20 +363,13 @@ def get_entity(self, entity_id: str) -> Optional[ROCrateEntity]: return None def get_entities(self) -> list[ROCrateEntity]: - entities = [] - for entity in self.as_dict().get("@graph", []): - entities.append(ROCrateEntity(self, entity)) - return entities + return [ROCrateEntity(self, entity) for entity in self.as_dict().get("@graph", [])] def get_entities_by_type( self, entity_type: Union[str, list[str]] ) -> list[ROCrateEntity]: entity_types = [entity_type] if isinstance(entity_type, str) else entity_type - entities = [] - for e in self.get_entities(): - if e.has_types(entity_types): - entities.append(e) - return entities + return [e for e in self.get_entities() if e.has_types(entity_types)] def get_dataset_entities(self) -> list[ROCrateEntity]: return self.get_entities_by_type("Dataset") @@ -396,11 +389,10 @@ def get_data_entities( ] def get_web_data_entities(self) -> list[ROCrateEntity]: - entities = [] - for entity in self.get_entities(): - if (entity.has_type("File") or entity.has_type("Dataset")) and entity.is_remote(): - entities.append(entity) - return entities + return [ + entity for entity in self.get_entities() + if (entity.has_type("File") or entity.has_type("Dataset")) and entity.is_remote() + ] def get_conforms_to(self) -> Optional[list[str]]: try: diff --git a/rocrate_validator/utils/paths.py b/rocrate_validator/utils/paths.py index 3fbd2224b..4e4aad2f8 100644 --- a/rocrate_validator/utils/paths.py +++ b/rocrate_validator/utils/paths.py @@ -117,13 +117,7 @@ def list_matching_file_paths( # iterate through the directory and subdirectories for root, _, files in os.walk(directory): - # iterate through the files - for file in files: - # check if the file has a .ttl extension - if file.endswith(extension): - # append the file path to the list - file_paths.append(os.path.join(root, file)) - # return the list of file paths + file_paths.extend(os.path.join(root, f) for f in files if f.endswith(extension)) return file_paths From 0c730230bc2af34b85ab9cadd497bd3e12010083 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Wed, 3 Jun 2026 21:53:11 +0200 Subject: [PATCH 212/352] =?UTF-8?q?refactor:=20=F0=9F=8E=A8=20replace=20ma?= =?UTF-8?q?nual=20append=20loops=20with=20extend()=20[PERF402]?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- rocrate_validator/models.py | 5 ++--- rocrate_validator/requirements/shacl/models.py | 3 +-- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/rocrate_validator/models.py b/rocrate_validator/models.py index 46de90dfc..0da37d9e8 100644 --- a/rocrate_validator/models.py +++ b/rocrate_validator/models.py @@ -1358,13 +1358,12 @@ def ok_file(p: Path) -> bool: requirement_path, ) requirement_loader = RequirementLoader.__get_requirement_loader__(profile, requirement_path) - for requirement in cast(Any, requirement_loader).load( + requirements.extend(cast(Any, requirement_loader).load( profile, requirement_level, requirement_path, publicID=profile.publicID, - ): - requirements.append(requirement) + )) # sort the requirements by severity requirements = sorted( requirements, diff --git a/rocrate_validator/requirements/shacl/models.py b/rocrate_validator/requirements/shacl/models.py index d65b440dc..1fdf02c56 100644 --- a/rocrate_validator/requirements/shacl/models.py +++ b/rocrate_validator/requirements/shacl/models.py @@ -406,8 +406,7 @@ def load_shapes(self, shapes_path: Union[str, Path], publicID: Optional[str] = N if not grouped: shapes.append(shape) else: - for prop in ungrouped_properties: - shapes.append(prop) + shapes.extend(ungrouped_properties) # register Property Shapes for property_shape in shapes_list.property_shapes: From 8ef605e5e79321fe2483817e58817fe6e868367d Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Wed, 3 Jun 2026 21:58:12 +0200 Subject: [PATCH 213/352] =?UTF-8?q?refactor:=20=F0=9F=8E=A8=20fix=20unnece?= =?UTF-8?q?ssary=20try/except=20inside=20loop=20(PERF203)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../requirements/python/__init__.py | 9 +++---- rocrate_validator/requirements/shacl/utils.py | 27 ++++++++++--------- 2 files changed, 17 insertions(+), 19 deletions(-) diff --git a/rocrate_validator/requirements/python/__init__.py b/rocrate_validator/requirements/python/__init__.py index 63c441bf7..08f55595f 100644 --- a/rocrate_validator/requirements/python/__init__.py +++ b/rocrate_validator/requirements/python/__init__.py @@ -252,12 +252,9 @@ def load(self, profile: Profile, rq["description"] = check_class.__doc__.strip() if check_class.__doc__ else "" # handle default overrides via decorators for pn in ("name", "description"): - try: - pv = getattr(check_class, f"__rq_{pn}__", None) - if pv and isinstance(pv, str): - rq[pn] = pv - except AttributeError: - pass + pv = getattr(check_class, f"__rq_{pn}__", None) + if pv and isinstance(pv, str): + rq[pn] = pv logger.debug("Processing requirement: %r", requirement_name) r = PyRequirement( profile, diff --git a/rocrate_validator/requirements/shacl/utils.py b/rocrate_validator/requirements/shacl/utils.py index 0d4c04906..f251ef471 100644 --- a/rocrate_validator/requirements/shacl/utils.py +++ b/rocrate_validator/requirements/shacl/utils.py @@ -313,21 +313,22 @@ def resolve_parent_shape( if not isinstance(source_shape_node, BNode): return None SHACL = Namespace(SHACL_NS) - # Predicates via which a NodeShape/PropertyShape can own a constraint BNode parent_predicates = [SHACL.sparql, SHACL.property] + + def _safe_get_shape(graph, node): + try: + return shapes_registry.get_shape(Shape.compute_key(graph, node)) + except (ValueError, KeyError): + return None + for predicate in parent_predicates: for parent_node in shapes_graph.subjects(predicate, source_shape_node): - try: - parent_shape = shapes_registry.get_shape( - Shape.compute_key(shapes_graph, parent_node) + parent_shape = _safe_get_shape(shapes_graph, parent_node) + if parent_shape is not None: + logger.debug( + "Resolved parent shape %s for SPARQL/inline constraint BNode %s", + parent_shape.key, + source_shape_node, ) - if parent_shape is not None: - logger.debug( - "Resolved parent shape %s for SPARQL/inline constraint BNode %s", - parent_shape.key, - source_shape_node, - ) - return parent_shape - except (ValueError, KeyError): - continue + return parent_shape return None From 81773404322da17826ffe6c34030bcad250488db Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Wed, 3 Jun 2026 22:06:06 +0200 Subject: [PATCH 214/352] =?UTF-8?q?refactor:=20=F0=9F=94=A7=20extract=20HT?= =?UTF-8?q?TP=20status=20codes,=20byte=20size,=20and=20param=20count=20int?= =?UTF-8?q?o=20constants=20(PLR2004)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- rocrate_validator/cli/commands/cache.py | 7 ++++--- rocrate_validator/constants.py | 11 +++++++++++ .../ro-crate/must/0_file_descriptor_format.py | 5 +++-- rocrate_validator/requirements/python/__init__.py | 5 +++-- rocrate_validator/rocrate.py | 5 +++-- rocrate_validator/services.py | 5 +++-- rocrate_validator/utils/cache_warmup.py | 2 +- rocrate_validator/utils/document_loader.py | 3 ++- 8 files changed, 30 insertions(+), 13 deletions(-) diff --git a/rocrate_validator/cli/commands/cache.py b/rocrate_validator/cli/commands/cache.py index aa0fd9c84..4797ee2ee 100644 --- a/rocrate_validator/cli/commands/cache.py +++ b/rocrate_validator/cli/commands/cache.py @@ -34,6 +34,7 @@ from rocrate_validator.utils.cache_warmup import WarmUpResult, discover_cacheable_urls_from_profiles, warm_up_urls from rocrate_validator.utils.http import HttpRequester from rocrate_validator.utils.paths import get_default_http_cache_path, get_profiles_path +from rocrate_validator.constants import BYTES_PER_KIB, HTTP_STATUS_BAD_REQUEST logger = logging.getLogger(__name__) @@ -457,7 +458,7 @@ def _warm_remote_crates(urls: list[str]) -> list[WarmUpResult]: if status is None: results.append(WarmUpResult(url=url, status="failed", detail="no status code")) continue - if status >= 400: + if status >= HTTP_STATUS_BAD_REQUEST: results.append(WarmUpResult(url=url, status="failed", detail=f"HTTP {status}")) continue # Touch the body so the cache backend stores the full response. @@ -475,8 +476,8 @@ def _format_bytes(size: int) -> str: units = ["B", "KiB", "MiB", "GiB", "TiB"] idx = 0 value = float(size) - while value >= 1024 and idx < len(units) - 1: - value /= 1024 + while value >= BYTES_PER_KIB and idx < len(units) - 1: + value /= BYTES_PER_KIB idx += 1 return f"{value:.2f} {units[idx]}" diff --git a/rocrate_validator/constants.py b/rocrate_validator/constants.py index c717219f7..9e8311b91 100644 --- a/rocrate_validator/constants.py +++ b/rocrate_validator/constants.py @@ -86,6 +86,17 @@ # Current JSON output format JSON_OUTPUT_FORMAT_VERSION = "0.2" +# HTTP Status Codes +HTTP_STATUS_OK = 200 +HTTP_STATUS_BAD_REQUEST = 400 +HTTP_STATUS_GATEWAY_TIMEOUT = 504 + +# Number of bytes per kibibyte +BYTES_PER_KIB = 1024 + +# Expected number of parameters for check function signatures +EXPECTED_CHECK_PARAM_COUNT = 2 + # Http Cache Settings DEFAULT_HTTP_CACHE_MAX_AGE = -1 # in seconds; negative means "never expire" DEFAULT_HTTP_CACHE_PATH_PREFIX = '/tmp/rocrate_validator_cache' diff --git a/rocrate_validator/profiles/ro-crate/must/0_file_descriptor_format.py b/rocrate_validator/profiles/ro-crate/must/0_file_descriptor_format.py index 3b864c84c..baf1b995f 100644 --- a/rocrate_validator/profiles/ro-crate/must/0_file_descriptor_format.py +++ b/rocrate_validator/profiles/ro-crate/must/0_file_descriptor_format.py @@ -20,6 +20,7 @@ from rocrate_validator.requirements.python import PyFunctionCheck, check, requirement from rocrate_validator.utils import log as logging from rocrate_validator.utils.http import HttpRequester +from rocrate_validator.constants import HTTP_STATUS_OK # set up logging logger = logging.getLogger(__name__) @@ -89,7 +90,7 @@ class FileDescriptorJsonLdFormat(PyFunctionCheck): def __get_remote_context__(self, context_uri: str) -> object: raw_data = HttpRequester().get(context_uri, headers={"Accept": "application/ld+json, application/json"}) - if raw_data.status_code != 200: + if raw_data.status_code != HTTP_STATUS_OK: raise RuntimeError(f"Unable to retrieve the JSON-LD context '{context_uri}'", self) logger.debug(f"Retrieved context from {context_uri}") @@ -125,7 +126,7 @@ def __get_remote_context__(self, context_uri: str) -> object: logger.debug(f"Trying to retrieve JSON-LD context from alternate URL: {alternate_url}") raw_data = HttpRequester().get(alternate_url, headers={ "Accept": "application/ld+json, application/json"}) - if raw_data.status_code != 200: + if raw_data.status_code != HTTP_STATUS_OK: raise RuntimeError( f"Unable to retrieve the JSON-LD context from alternate URL '{alternate_url}'", self) logger.debug(f"Retrieved context from alternate URL {alternate_url}") diff --git a/rocrate_validator/requirements/python/__init__.py b/rocrate_validator/requirements/python/__init__.py index 08f55595f..243c77d0e 100644 --- a/rocrate_validator/requirements/python/__init__.py +++ b/rocrate_validator/requirements/python/__init__.py @@ -31,6 +31,7 @@ ) from rocrate_validator.utils import log as logging from rocrate_validator.utils.python_helpers import get_classes_from_file +from rocrate_validator.constants import EXPECTED_CHECK_PARAM_COUNT # set up logging logger = logging.getLogger(__name__) @@ -54,7 +55,7 @@ def __init__(self, super().__init__(requirement, name, description=description, level=level, deactivated=deactivated) sig = inspect.signature(check_function) - if len(sig.parameters) != 2: + if len(sig.parameters) != EXPECTED_CHECK_PARAM_COUNT: raise RuntimeError("Invalid PyFunctionCheck function. Checks are expected to accept " f"arguments [RequirementCheck, ValidationContext] but this has signature {sig}") if sig.return_annotation not in (bool, inspect.Signature.empty): @@ -216,7 +217,7 @@ def check(name: Optional[str] = None, def decorator(func): check_name = name if name else func.__name__ sig = inspect.signature(func) - if len(sig.parameters) != 2: + if len(sig.parameters) != EXPECTED_CHECK_PARAM_COUNT: raise RuntimeError(f"Invalid check {check_name}. Checks are expected to " f"accept two arguments but this only takes {len(sig.parameters)}") if sig.return_annotation not in (bool, inspect.Signature.empty): diff --git a/rocrate_validator/rocrate.py b/rocrate_validator/rocrate.py index efc97af32..91482893e 100644 --- a/rocrate_validator/rocrate.py +++ b/rocrate_validator/rocrate.py @@ -30,6 +30,7 @@ from rocrate_validator.utils import log as logging from rocrate_validator.utils.http import HttpRequester from rocrate_validator.utils.uri import URI, AvailabilityStatus, is_external_reference, validate_rocrate_uri +from rocrate_validator.constants import HTTP_STATUS_OK # set up logging logger = logging.getLogger(__name__) @@ -1058,12 +1059,12 @@ def is_bagit_wrapping_crate(uri: Union[str, Path, URI]) -> bool: if not base_url.endswith('.zip'): # Check for bagit.txt bagit_response = HttpRequester().head(f"{base_url}/bagit.txt") - if bagit_response.status_code != 200: + if bagit_response.status_code != HTTP_STATUS_OK: return False # Check for data/ro-crate-metadata.json metadata_response = HttpRequester().head(f"{base_url}/data/ro-crate-metadata.json") - return metadata_response.status_code == 200 + return metadata_response.status_code == HTTP_STATUS_OK # If it's a remote zip file, we need to download it partially # Temporarily create instance to check diff --git a/rocrate_validator/services.py b/rocrate_validator/services.py index 809019203..aa7bd2d1e 100644 --- a/rocrate_validator/services.py +++ b/rocrate_validator/services.py @@ -26,6 +26,7 @@ from rocrate_validator.utils.http import HttpRequester from rocrate_validator.utils.paths import get_profiles_path from rocrate_validator.utils.uri import URI +from rocrate_validator.constants import HTTP_STATUS_BAD_REQUEST, HTTP_STATUS_GATEWAY_TIMEOUT # set the default profiles path DEFAULT_PROFILES_PATH = get_profiles_path() @@ -166,8 +167,8 @@ def __extract_and_validate_rocrate__(rocrate_path: Path): else: response = requester.fetch_fresh(rocrate_path.uri, stream=True, allow_redirects=True) with response as r: - if r.status_code >= 400: - if offline and r.status_code == 504: + if r.status_code >= HTTP_STATUS_BAD_REQUEST: + if offline and r.status_code == HTTP_STATUS_GATEWAY_TIMEOUT: raise FileNotFoundError( f"Remote RO-Crate '{rocrate_path.uri}' is not available in the HTTP cache. " f"Validate it online first, or run " diff --git a/rocrate_validator/utils/cache_warmup.py b/rocrate_validator/utils/cache_warmup.py index 9b31518f6..7ab1b3359 100644 --- a/rocrate_validator/utils/cache_warmup.py +++ b/rocrate_validator/utils/cache_warmup.py @@ -128,7 +128,7 @@ def warm_up_urls(urls: Sequence[str]) -> list[WarmUpResult]: results.append(WarmUpResult(url=url, status="failed", detail="no status code")) elif status_code == OFFLINE_CACHE_MISS_STATUS and offline: results.append(WarmUpResult(url=url, status="failed", detail="offline cache miss")) - elif status_code >= 400: + elif status_code >= constants.HTTP_STATUS_BAD_REQUEST: results.append(WarmUpResult(url=url, status="failed", detail=f"HTTP {status_code}")) else: results.append(WarmUpResult(url=url, status="ok", detail=f"HTTP {status_code}")) diff --git a/rocrate_validator/utils/document_loader.py b/rocrate_validator/utils/document_loader.py index d0dfe9dc2..997bd03f0 100644 --- a/rocrate_validator/utils/document_loader.py +++ b/rocrate_validator/utils/document_loader.py @@ -33,6 +33,7 @@ from rocrate_validator.utils import log as logging from rocrate_validator.utils.http import OFFLINE_CACHE_MISS_STATUS, HttpRequester, OfflineCacheMissError +from rocrate_validator.constants import HTTP_STATUS_BAD_REQUEST logger = logging.getLogger(__name__) @@ -117,7 +118,7 @@ def _fetch_json_ld(url: str) -> Any: status = getattr(response, "status_code", None) if status == OFFLINE_CACHE_MISS_STATUS and getattr(requester, "offline", False): raise OfflineCacheMissError(url) - if status is None or status >= 400: + if status is None or status >= HTTP_STATUS_BAD_REQUEST: raise RuntimeError(f"Unable to retrieve JSON-LD document from {url} (status {status})") try: return response.json() From b323d5a4701126ba83518a7d46a3efd5a2967abc Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Wed, 3 Jun 2026 22:09:51 +0200 Subject: [PATCH 215/352] =?UTF-8?q?chore:=20=F0=9F=A7=B9=20suppress=20PLC0?= =?UTF-8?q?415=20for=20delayed=20imports=20to=20avoid=20circular=20depende?= =?UTF-8?q?ncies?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- rocrate_validator/__init__.py | 2 +- rocrate_validator/cli/main.py | 2 +- rocrate_validator/requirements/shacl/checks.py | 2 +- rocrate_validator/requirements/shacl/requirements.py | 4 ++-- rocrate_validator/requirements/shacl/utils.py | 2 +- rocrate_validator/requirements/shacl/validator.py | 2 +- rocrate_validator/utils/cache_warmup.py | 4 ++-- rocrate_validator/utils/config.py | 2 +- rocrate_validator/utils/http.py | 2 +- rocrate_validator/utils/io_helpers/input.py | 6 +++--- rocrate_validator/utils/io_helpers/output/console.py | 2 +- rocrate_validator/utils/versioning.py | 2 +- 12 files changed, 16 insertions(+), 16 deletions(-) diff --git a/rocrate_validator/__init__.py b/rocrate_validator/__init__.py index 50efb6aba..d1d271ab6 100644 --- a/rocrate_validator/__init__.py +++ b/rocrate_validator/__init__.py @@ -13,7 +13,7 @@ # limitations under the License. def get_version(): - from rocrate_validator.utils.versioning import get_version + from rocrate_validator.utils.versioning import get_version # noqa: PLC0415 return get_version() diff --git a/rocrate_validator/cli/main.py b/rocrate_validator/cli/main.py index 8e29cd984..f55b6ed37 100644 --- a/rocrate_validator/cli/main.py +++ b/rocrate_validator/cli/main.py @@ -88,7 +88,7 @@ def cli(ctx: click.Context, debug: bool, version: bool, disable_color: bool, no_ # If no subcommand is provided, invoke the default command if ctx.invoked_subcommand is None: # If no subcommand is provided, invoke the default command - from rocrate_validator.cli.commands.validate import validate + from rocrate_validator.cli.commands.validate import validate # noqa: PLC0415 ctx.invoke(validate) else: logger.debug("Command invoked: %s", ctx.invoked_subcommand) diff --git a/rocrate_validator/requirements/shacl/checks.py b/rocrate_validator/requirements/shacl/checks.py index 4fef19466..d1931b75f 100644 --- a/rocrate_validator/requirements/shacl/checks.py +++ b/rocrate_validator/requirements/shacl/checks.py @@ -124,7 +124,7 @@ def deactivated(self) -> bool: # (transitively) from the shape's owning profile, so unrelated # profiles loaded in the same process can't influence the result. # Validator.__do_validate__ pre-loads the shape graphs. - from rocrate_validator.models import Profile + from rocrate_validator.models import Profile # noqa: PLC0415 owning_profile = self.requirement.profile for profile in Profile.get_descendants(owning_profile): diff --git a/rocrate_validator/requirements/shacl/requirements.py b/rocrate_validator/requirements/shacl/requirements.py index d9f23b742..cb535f21e 100644 --- a/rocrate_validator/requirements/shacl/requirements.py +++ b/rocrate_validator/requirements/shacl/requirements.py @@ -104,8 +104,8 @@ def finalize(cls, context: ValidationContext) -> None: # extract profiles and target profile from context profiles = context.profiles - from rocrate_validator.requirements.shacl.checks import SHACLCheck - from rocrate_validator.requirements.shacl.validator import SHACLValidationContext + from rocrate_validator.requirements.shacl.checks import SHACLCheck # noqa: PLC0415 + from rocrate_validator.requirements.shacl.validator import SHACLValidationContext # noqa: PLC0415 target = next((p for p in profiles if p.identifier == context.settings.profile_identifier), None) if target is None: diff --git a/rocrate_validator/requirements/shacl/utils.py b/rocrate_validator/requirements/shacl/utils.py index f251ef471..18f5784c0 100644 --- a/rocrate_validator/requirements/shacl/utils.py +++ b/rocrate_validator/requirements/shacl/utils.py @@ -308,7 +308,7 @@ def resolve_parent_shape( in the ShapesRegistry directly; instead the containing NodeShape is. This helper walks up via sh:sparql and sh:property predicates to find it. """ - from rocrate_validator.requirements.shacl.models import Shape + from rocrate_validator.requirements.shacl.models import Shape # noqa: PLC0415 if not isinstance(source_shape_node, BNode): return None diff --git a/rocrate_validator/requirements/shacl/validator.py b/rocrate_validator/requirements/shacl/validator.py index 3f0227ada..9e57dcdf1 100644 --- a/rocrate_validator/requirements/shacl/validator.py +++ b/rocrate_validator/requirements/shacl/validator.py @@ -127,7 +127,7 @@ def __set_current_validation_profile__(self, profile: Profile) -> bool: # enable overriding of checks if self.settings.allow_requirement_check_override: - from rocrate_validator.requirements.shacl.requirements import SHACLRequirement + from rocrate_validator.requirements.shacl.requirements import SHACLRequirement # noqa: PLC0415 for requirement in [_ for _ in profile.requirements if isinstance(_, SHACLRequirement)]: # logger.debug("Processing requirement: %s", requirement.name) for check in requirement.get_checks(): diff --git a/rocrate_validator/utils/cache_warmup.py b/rocrate_validator/utils/cache_warmup.py index 7ab1b3359..84981fc45 100644 --- a/rocrate_validator/utils/cache_warmup.py +++ b/rocrate_validator/utils/cache_warmup.py @@ -189,8 +189,8 @@ def _find_profile(identifier, settings) -> Optional[Profile]: (the settings sometimes store a list of identifiers). """ # Import here to avoid a circular import with models.py. - from rocrate_validator.models import Profile - from rocrate_validator.utils.paths import get_profiles_path + from rocrate_validator.models import Profile # noqa: PLC0415 + from rocrate_validator.utils.paths import get_profiles_path # noqa: PLC0415 # Load profiles to ensure the requested one is available and its graph is parsed. global __profiles_loaded diff --git a/rocrate_validator/utils/config.py b/rocrate_validator/utils/config.py index d2b292bcb..b42988ad8 100644 --- a/rocrate_validator/utils/config.py +++ b/rocrate_validator/utils/config.py @@ -31,7 +31,7 @@ def get_config() -> dict: """ global _config if _config is None: - from .paths import get_config_path + from .paths import get_config_path # noqa: PLC0415 # Read the pyproject.toml file _config = toml.load(get_config_path()) diff --git a/rocrate_validator/utils/http.py b/rocrate_validator/utils/http.py index 72d7e33a4..38c0b87a0 100644 --- a/rocrate_validator/utils/http.py +++ b/rocrate_validator/utils/http.py @@ -153,7 +153,7 @@ def __initialize_session__(self, cache_max_age: int, cache_path: Optional[str] = # and set up the cached session try: if not self.no_cache: - from requests_cache import CachedSession + from requests_cache import CachedSession # noqa: PLC0415 # If cache_path is not provided, use the default path prefix if not cache_path: diff --git a/rocrate_validator/utils/io_helpers/input.py b/rocrate_validator/utils/io_helpers/input.py index 6c3576262..e1f0001eb 100644 --- a/rocrate_validator/utils/io_helpers/input.py +++ b/rocrate_validator/utils/io_helpers/input.py @@ -34,7 +34,7 @@ def __get_single_char_win32__(console: Optional[Console] = None, end: str = "\n" """ Get a single character from the console """ - import msvcrt + import msvcrt # noqa: PLC0415 char = None while char is None or (choices and char not in choices): @@ -56,8 +56,8 @@ def __get_single_char_unix__(console: Optional[Console] = None, end: str = "\n", """ Get a single character from the console """ - import termios - import tty + import termios # noqa: PLC0415 + import tty # noqa: PLC0415 fd = sys.stdin.fileno() old_settings = termios.tcgetattr(fd) diff --git a/rocrate_validator/utils/io_helpers/output/console.py b/rocrate_validator/utils/io_helpers/output/console.py index ceaae1449..f443c5d18 100644 --- a/rocrate_validator/utils/io_helpers/output/console.py +++ b/rocrate_validator/utils/io_helpers/output/console.py @@ -42,7 +42,7 @@ def __init__(self, *args, disabled: bool = False, interactive: bool = True, self.register_formatter(formatter, type_) def __jupyter_environment__(self) -> bool: - from rocrate_validator.cli.utils import running_in_jupyter + from rocrate_validator.cli.utils import running_in_jupyter # noqa: PLC0415 return running_in_jupyter() def register_formatter(self, formatter: OutputFormatter, type_: Optional[type] = None): diff --git a/rocrate_validator/utils/versioning.py b/rocrate_validator/utils/versioning.py index c72c19a59..0221a750d 100644 --- a/rocrate_validator/utils/versioning.py +++ b/rocrate_validator/utils/versioning.py @@ -30,7 +30,7 @@ def run_git_command(command: list[str]) -> Optional[str]: :param command: The git command :return: The output of the command """ - import subprocess + import subprocess # noqa: PLC0415 try: return subprocess.check_output(command, stderr=subprocess.DEVNULL).decode().strip() From 61327b49f34a52b0b67e838033a82ea273e973bf Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Wed, 3 Jun 2026 22:19:49 +0200 Subject: [PATCH 216/352] =?UTF-8?q?refactor:=20=F0=9F=94=A7=20reduce=20ret?= =?UTF-8?q?urn=20statements=20(PLR0911)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- rocrate_validator/cli/commands/cache.py | 2 +- rocrate_validator/models.py | 9 +- .../ro-crate/must/0_file_descriptor_format.py | 2 +- .../requirements/python/__init__.py | 2 +- rocrate_validator/rocrate.py | 105 ++++++++---------- rocrate_validator/services.py | 2 +- rocrate_validator/utils/cache_warmup.py | 34 +++--- rocrate_validator/utils/document_loader.py | 2 +- 8 files changed, 71 insertions(+), 87 deletions(-) diff --git a/rocrate_validator/cli/commands/cache.py b/rocrate_validator/cli/commands/cache.py index 4797ee2ee..04aa71035 100644 --- a/rocrate_validator/cli/commands/cache.py +++ b/rocrate_validator/cli/commands/cache.py @@ -29,12 +29,12 @@ from rocrate_validator.cli.commands.errors import handle_error from rocrate_validator.cli.main import cli, click +from rocrate_validator.constants import BYTES_PER_KIB, HTTP_STATUS_BAD_REQUEST from rocrate_validator.models import Profile from rocrate_validator.utils import log as logging from rocrate_validator.utils.cache_warmup import WarmUpResult, discover_cacheable_urls_from_profiles, warm_up_urls from rocrate_validator.utils.http import HttpRequester from rocrate_validator.utils.paths import get_default_http_cache_path, get_profiles_path -from rocrate_validator.constants import BYTES_PER_KIB, HTTP_STATUS_BAD_REQUEST logger = logging.getLogger(__name__) diff --git a/rocrate_validator/models.py b/rocrate_validator/models.py index 0da37d9e8..c6d83582c 100644 --- a/rocrate_validator/models.py +++ b/rocrate_validator/models.py @@ -2624,15 +2624,10 @@ def default(self, obj): return obj.__dict__ if isinstance(obj, Path): return str(obj) - if isinstance(obj, Severity): - return obj.name - if isinstance(obj, RequirementCheck): - return obj.identifier - if isinstance(obj, Requirement): + if isinstance(obj, (RequirementCheck, Requirement)): return obj.identifier - if isinstance(obj, RequirementLevel): + if isinstance(obj, (Severity, RequirementLevel)): return obj.name - return super().default(obj) diff --git a/rocrate_validator/profiles/ro-crate/must/0_file_descriptor_format.py b/rocrate_validator/profiles/ro-crate/must/0_file_descriptor_format.py index baf1b995f..543074b90 100644 --- a/rocrate_validator/profiles/ro-crate/must/0_file_descriptor_format.py +++ b/rocrate_validator/profiles/ro-crate/must/0_file_descriptor_format.py @@ -16,11 +16,11 @@ from typing import Any, Optional from urllib.parse import urljoin +from rocrate_validator.constants import HTTP_STATUS_OK from rocrate_validator.models import ValidationContext from rocrate_validator.requirements.python import PyFunctionCheck, check, requirement from rocrate_validator.utils import log as logging from rocrate_validator.utils.http import HttpRequester -from rocrate_validator.constants import HTTP_STATUS_OK # set up logging logger = logging.getLogger(__name__) diff --git a/rocrate_validator/requirements/python/__init__.py b/rocrate_validator/requirements/python/__init__.py index 243c77d0e..30dc46657 100644 --- a/rocrate_validator/requirements/python/__init__.py +++ b/rocrate_validator/requirements/python/__init__.py @@ -18,6 +18,7 @@ from pathlib import Path from typing import Optional +from rocrate_validator.constants import EXPECTED_CHECK_PARAM_COUNT from rocrate_validator.models import ( LevelCollection, Profile, @@ -31,7 +32,6 @@ ) from rocrate_validator.utils import log as logging from rocrate_validator.utils.python_helpers import get_classes_from_file -from rocrate_validator.constants import EXPECTED_CHECK_PARAM_COUNT # set up logging logger = logging.getLogger(__name__) diff --git a/rocrate_validator/rocrate.py b/rocrate_validator/rocrate.py index 91482893e..dbca249a7 100644 --- a/rocrate_validator/rocrate.py +++ b/rocrate_validator/rocrate.py @@ -26,11 +26,11 @@ from rdflib import Graph +from rocrate_validator.constants import HTTP_STATUS_OK from rocrate_validator.errors import ROCrateInvalidURIError from rocrate_validator.utils import log as logging from rocrate_validator.utils.http import HttpRequester from rocrate_validator.utils.uri import URI, AvailabilityStatus, is_external_reference, validate_rocrate_uri -from rocrate_validator.constants import HTTP_STATUS_OK # set up logging logger = logging.getLogger(__name__) @@ -217,6 +217,30 @@ def raw_data(self) -> object: def is_local(self) -> bool: return not self.is_remote() + def _check_local_availability(self) -> AvailabilityStatus: + if self.ro_crate.uri.is_local_resource(): + if isinstance(self.ro_crate, ROCrateLocalFolder): + found = self.ro_crate.has_file(self.id_as_path) or self.ro_crate.has_directory(self.id_as_path) + return AvailabilityStatus.AVAILABLE if found else AvailabilityStatus.UNAVAILABLE + if isinstance(self.ro_crate, ROCrateLocalZip): + if self.id == "./": + return AvailabilityStatus.AVAILABLE + found = self.ro_crate.has_directory(Path(unquote(str(self.id)))) or self.ro_crate.has_file( + Path(unquote(str(self.id))) + ) + return AvailabilityStatus.AVAILABLE if found else AvailabilityStatus.UNAVAILABLE + + if self.ro_crate.uri.is_remote_resource(): + if self.id == "./": + found = self.ro_crate.get_file_size(self.id_as_path) > 0 + else: + found = self.ro_crate.has_directory(Path(unquote(str(self.id)))) or self.ro_crate.has_file( + Path(unquote(str(self.id))) + ) + return AvailabilityStatus.AVAILABLE if found else AvailabilityStatus.UNAVAILABLE + + raise ROCrateInvalidURIError(uri=self.id, message="Could not determine the availability of the entity") + def check_availability(self) -> AvailabilityStatus: """ Return a fine-grained availability status for this entity. @@ -228,15 +252,10 @@ def check_availability(self) -> AvailabilityStatus: """ try: entity_uri = self.id_as_uri - # Remote entities with a scheme we can natively reach are checked - # by inspecting the remote response status. if entity_uri.is_natively_checkable(): logger.debug("Checking the availability of a remote entity") return entity_uri.check_availability() - # Remote entities with a non-natively-checkable scheme cannot be - # verified (scp://, sftp://, s3://, ...): report UNCHECKABLE so - # callers can warn without invalidating the validation. if entity_uri.is_remote_resource(): logger.debug( "Cannot natively verify availability for entity '%s' (scheme '%s')", @@ -245,36 +264,12 @@ def check_availability(self) -> AvailabilityStatus: ) return AvailabilityStatus.UNCHECKABLE - # Local entity: locate it inside the (local or remote) RO-Crate. - if self.ro_crate.uri.is_local_resource(): - if isinstance(self.ro_crate, ROCrateLocalFolder): - found = self.ro_crate.has_file(self.id_as_path) or self.ro_crate.has_directory(self.id_as_path) - return AvailabilityStatus.AVAILABLE if found else AvailabilityStatus.UNAVAILABLE - if isinstance(self.ro_crate, ROCrateLocalZip): - if self.id == "./": - return AvailabilityStatus.AVAILABLE - found = self.ro_crate.has_directory(Path(unquote(str(self.id)))) or self.ro_crate.has_file( - Path(unquote(str(self.id))) - ) - return AvailabilityStatus.AVAILABLE if found else AvailabilityStatus.UNAVAILABLE - - if self.ro_crate.uri.is_remote_resource(): - if self.id == "./": - found = self.ro_crate.get_file_size(self.id_as_path) > 0 - else: - found = self.ro_crate.has_directory(Path(unquote(str(self.id)))) or self.ro_crate.has_file( - Path(unquote(str(self.id))) - ) - return AvailabilityStatus.AVAILABLE if found else AvailabilityStatus.UNAVAILABLE + return self._check_local_availability() except Exception as e: if logger.isEnabledFor(logging.DEBUG): logger.exception(e) return AvailabilityStatus.UNAVAILABLE - # Fallthrough: the crate URI is neither a recognized local nor a - # remote resource — the entity location cannot be determined. - raise ROCrateInvalidURIError(uri=self.id, message="Could not determine the availability of the entity") - def is_available(self) -> bool: return self.check_availability() == AvailabilityStatus.AVAILABLE @@ -1034,20 +1029,21 @@ def is_bagit_wrapping_crate(uri: Union[str, Path, URI]) -> bool: if not isinstance(uri, URI): uri = URI(uri) + result = False try: # Check for local directory if uri.is_local_directory(): base_path = uri.as_path() - return (base_path / 'bagit.txt').is_file() and \ + result = (base_path / 'bagit.txt').is_file() and \ (base_path / 'data' / 'ro-crate-metadata.json').is_file() # Check for local zip file - if uri.is_local_file(): + elif uri.is_local_file(): path = uri.as_path() if path.suffix == '.zip': with zipfile.ZipFile(path, 'r') as zf: namelist = zf.namelist() - return 'bagit.txt' in namelist and \ + result = 'bagit.txt' in namelist and \ 'data/ro-crate-metadata.json' in namelist # Check for remote zip file @@ -1059,34 +1055,25 @@ def is_bagit_wrapping_crate(uri: Union[str, Path, URI]) -> bool: if not base_url.endswith('.zip'): # Check for bagit.txt bagit_response = HttpRequester().head(f"{base_url}/bagit.txt") - if bagit_response.status_code != HTTP_STATUS_OK: - return False - - # Check for data/ro-crate-metadata.json - metadata_response = HttpRequester().head(f"{base_url}/data/ro-crate-metadata.json") - return metadata_response.status_code == HTTP_STATUS_OK - - # If it's a remote zip file, we need to download it partially - # Temporarily create instance to check - temp_crate = ROCrateRemoteZip(uri) - logger.debug("Initializing ROCrateRemoteZip for URI: %s", uri) - # ROCrate.__init__(temp_crate, uri) - # temp_crate._ROCrateRemoteZip__init_zip_reference__() - has_bagit_txt = temp_crate.has_file(Path('bagit.txt')) - logger.debug("Presence of 'bagit.txt': %s", has_bagit_txt) - has_ro_crate_metadata = temp_crate.has_file(Path('data/ro-crate-metadata.json')) - logger.debug("Presence of 'data/ro-crate-metadata.json': %s", - has_ro_crate_metadata) - result = has_bagit_txt and has_ro_crate_metadata - del temp_crate - return result - + if bagit_response.status_code == HTTP_STATUS_OK: + # Check for data/ro-crate-metadata.json + metadata_response = HttpRequester().head(f"{base_url}/data/ro-crate-metadata.json") + result = metadata_response.status_code == HTTP_STATUS_OK + else: + # If it's a remote zip file, we need to download it partially + # Temporarily create instance to check + temp_crate = ROCrateRemoteZip(uri) + logger.debug("Initializing ROCrateRemoteZip for URI: %s", uri) + has_bagit_txt = temp_crate.has_file(Path('bagit.txt')) + logger.debug("Presence of 'bagit.txt': %s", has_bagit_txt) + has_ro_crate_metadata = temp_crate.has_file(Path('data/ro-crate-metadata.json')) + logger.debug("Presence of 'data/ro-crate-metadata.json': %s", has_ro_crate_metadata) + result = has_bagit_txt and has_ro_crate_metadata + del temp_crate except Exception as e: if logger.isEnabledFor(logging.DEBUG): logger.exception(e) - return False - - return False + return result def __check_search_path__(self, path): """ diff --git a/rocrate_validator/services.py b/rocrate_validator/services.py index aa7bd2d1e..0aec420d2 100644 --- a/rocrate_validator/services.py +++ b/rocrate_validator/services.py @@ -19,6 +19,7 @@ from pathlib import Path from typing import Optional, Union +from rocrate_validator.constants import HTTP_STATUS_BAD_REQUEST, HTTP_STATUS_GATEWAY_TIMEOUT from rocrate_validator.errors import ProfileNotFound from rocrate_validator.events import Subscriber from rocrate_validator.models import Profile, Severity, ValidationResult, ValidationSettings, Validator @@ -26,7 +27,6 @@ from rocrate_validator.utils.http import HttpRequester from rocrate_validator.utils.paths import get_profiles_path from rocrate_validator.utils.uri import URI -from rocrate_validator.constants import HTTP_STATUS_BAD_REQUEST, HTTP_STATUS_GATEWAY_TIMEOUT # set the default profiles path DEFAULT_PROFILES_PATH = get_profiles_path() diff --git a/rocrate_validator/utils/cache_warmup.py b/rocrate_validator/utils/cache_warmup.py index 84981fc45..eea790d35 100644 --- a/rocrate_validator/utils/cache_warmup.py +++ b/rocrate_validator/utils/cache_warmup.py @@ -138,6 +138,21 @@ def warm_up_urls(urls: Sequence[str]) -> list[WarmUpResult]: return results +def _get_profile_for_warmup(settings) -> Optional[Profile]: + if getattr(settings, "offline", False): + return None + if getattr(settings, "cache_path", None) is None: + return None + env_value = os.environ.get(constants.AUTO_WARM_ENV_VAR, "1").strip().lower() + if env_value in {"0", "false", "no", "off"}: + logger.debug("Auto warm-up disabled via %s=%s", constants.AUTO_WARM_ENV_VAR, env_value) + return None + profile_identifier = getattr(settings, "profile_identifier", None) + if not profile_identifier: + return None + return _find_profile(profile_identifier, settings) + + def auto_warm_up_for_settings(settings: ValidationSettings) -> Optional[list[WarmUpResult]]: """ Perform a best-effort synchronous warm-up triggered by @@ -151,20 +166,7 @@ def auto_warm_up_for_settings(settings: ValidationSettings) -> Optional[list[War - the environment variable ``ROCRATE_VALIDATOR_AUTO_WARM`` is set to a value disabling the feature (``0``, ``false``, ``no``, ``off``). """ - if getattr(settings, "offline", False): - return None - if getattr(settings, "cache_path", None) is None: - return None - env_value = os.environ.get(constants.AUTO_WARM_ENV_VAR, "1").strip().lower() - if env_value in {"0", "false", "no", "off"}: - logger.debug("Auto warm-up disabled via %s=%s", constants.AUTO_WARM_ENV_VAR, env_value) - return None - - profile_identifier = getattr(settings, "profile_identifier", None) - if not profile_identifier: - return None - - profile = _find_profile(profile_identifier, settings) + profile = _get_profile_for_warmup(settings) if profile is None: return None urls = discover_profile_cacheable_urls(profile) @@ -174,12 +176,12 @@ def auto_warm_up_for_settings(settings: ValidationSettings) -> Optional[list[War urls_to_fetch = [u for u in urls if not requester.has_cached(u)] if not urls_to_fetch: logger.debug("Auto warm-up: all %d resources already cached for profile %s", - len(urls), profile_identifier) + len(urls), settings.profile_identifier) return [] results = warm_up_urls(urls_to_fetch) ok = sum(1 for r in results if r.status == "ok") logger.info("Auto warm-up: pre-loaded %d/%d resources for profile %s", - ok, len(urls_to_fetch), profile_identifier) + ok, len(urls_to_fetch), settings.profile_identifier) return results diff --git a/rocrate_validator/utils/document_loader.py b/rocrate_validator/utils/document_loader.py index 997bd03f0..75634315b 100644 --- a/rocrate_validator/utils/document_loader.py +++ b/rocrate_validator/utils/document_loader.py @@ -31,9 +31,9 @@ from rdflib.plugins.shared.jsonld import context as jsonld_context from rdflib.plugins.shared.jsonld import util as jsonld_util +from rocrate_validator.constants import HTTP_STATUS_BAD_REQUEST from rocrate_validator.utils import log as logging from rocrate_validator.utils.http import OFFLINE_CACHE_MISS_STATUS, HttpRequester, OfflineCacheMissError -from rocrate_validator.constants import HTTP_STATUS_BAD_REQUEST logger = logging.getLogger(__name__) From 01e47036f57436c992e420e82ac3af037a58642a Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Wed, 3 Jun 2026 22:25:53 +0200 Subject: [PATCH 217/352] =?UTF-8?q?refactor:=20=F0=9F=94=A7=20suppress/fix?= =?UTF-8?q?=20PLR0915=20to=20reduce=20complexity?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- rocrate_validator/cli/commands/validate.py | 2 +- .../profiles/ro-crate/must/0_file_descriptor_format.py | 8 ++++---- rocrate_validator/services.py | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/rocrate_validator/cli/commands/validate.py b/rocrate_validator/cli/commands/validate.py index cf62c26ba..760a69f3f 100644 --- a/rocrate_validator/cli/commands/validate.py +++ b/rocrate_validator/cli/commands/validate.py @@ -239,7 +239,7 @@ def validate_uri(ctx, param, value): show_default=True, ) @click.pass_context -def validate(ctx, # noqa: PLR0912 +def validate(ctx, # noqa: PLR0912, PLR0915 profiles_path: Path = DEFAULT_PROFILES_PATH, extra_profiles_path: Optional[Path] = None, profile_identifier: tuple[str, ...] = (), diff --git a/rocrate_validator/profiles/ro-crate/must/0_file_descriptor_format.py b/rocrate_validator/profiles/ro-crate/must/0_file_descriptor_format.py index 543074b90..ff12892f5 100644 --- a/rocrate_validator/profiles/ro-crate/must/0_file_descriptor_format.py +++ b/rocrate_validator/profiles/ro-crate/must/0_file_descriptor_format.py @@ -274,9 +274,6 @@ def _validate_non_root_entity(entity: Any, fail_fast: bool) -> bool: return result def is_entity_flat_recursive(entity: Any, is_first: bool = True, fail_fast: bool = False) -> bool: - """ Recursively check if the given data corresponds to a flattened JSON-LD object - and returns False if it does not and is not a root element - """ result = True if isinstance(entity, dict): if is_first: @@ -297,12 +294,15 @@ def is_entity_flat_recursive(entity: Any, is_first: bool = True, fail_fast: bool return False return result + return self._check_flattened_graph(context, is_entity_flat_recursive) + + def _check_flattened_graph(self, context, is_flat): try: fail_fast = bool(context.settings.abort_on_first) json_dict = context.ro_crate.metadata.as_dict() result = True for entity in json_dict["@graph"]: - if not is_entity_flat_recursive(entity, fail_fast=fail_fast): + if not is_flat(entity, fail_fast=fail_fast): context.result.add_issue( f'RO-Crate file descriptor "{context.rel_fd_path}" ' f'is not fully flattened at entity "{entity.get("@id", entity)}"', self) diff --git a/rocrate_validator/services.py b/rocrate_validator/services.py index 0aec420d2..9f9c0ac04 100644 --- a/rocrate_validator/services.py +++ b/rocrate_validator/services.py @@ -87,7 +87,7 @@ def validate( return result -def __initialise_validator__( +def __initialise_validator__( # noqa: PLR0915 settings: Union[dict, ValidationSettings], subscribers: Optional[list[Subscriber]] = None ) -> Validator: """ From b12667b5ea62e1618f8307e7980a4ce1f0611585 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Wed, 3 Jun 2026 22:29:43 +0200 Subject: [PATCH 218/352] =?UTF-8?q?chore:=20=F0=9F=A7=B9=20suppress=20PLW0?= =?UTF-8?q?603=20for=20legitimate=20global=20variable=20usage?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- rocrate_validator/utils/cache_warmup.py | 2 +- rocrate_validator/utils/config.py | 2 +- rocrate_validator/utils/document_loader.py | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/rocrate_validator/utils/cache_warmup.py b/rocrate_validator/utils/cache_warmup.py index eea790d35..32ac6dba3 100644 --- a/rocrate_validator/utils/cache_warmup.py +++ b/rocrate_validator/utils/cache_warmup.py @@ -195,7 +195,7 @@ def _find_profile(identifier, settings) -> Optional[Profile]: from rocrate_validator.utils.paths import get_profiles_path # noqa: PLC0415 # Load profiles to ensure the requested one is available and its graph is parsed. - global __profiles_loaded + global __profiles_loaded # noqa: PLW0603 if not __profiles_loaded: profiles_path = getattr(settings, "profiles_path", None) or get_profiles_path() extra_profiles_path = getattr(settings, "extra_profiles_path", None) diff --git a/rocrate_validator/utils/config.py b/rocrate_validator/utils/config.py index b42988ad8..0c96cb2d8 100644 --- a/rocrate_validator/utils/config.py +++ b/rocrate_validator/utils/config.py @@ -29,7 +29,7 @@ def get_config() -> dict: :return: The configuration """ - global _config + global _config # noqa: PLW0603 if _config is None: from .paths import get_config_path # noqa: PLC0415 diff --git a/rocrate_validator/utils/document_loader.py b/rocrate_validator/utils/document_loader.py index 75634315b..3bef77eb8 100644 --- a/rocrate_validator/utils/document_loader.py +++ b/rocrate_validator/utils/document_loader.py @@ -61,7 +61,7 @@ def install_document_loader() -> bool: Returns ``True`` when the loader is active after the call, ``False`` when installation raised an unexpected error (which is logged). """ - global _installed + global _installed # noqa: PLW0603 with _install_lock: if _installed: @@ -88,7 +88,7 @@ def uninstall_document_loader() -> bool: Returns ``True`` when the loader is no longer active after the call, ``False`` when uninstallation raised an unexpected error (which is logged). """ - global _installed + global _installed # noqa: PLW0603 with _install_lock: if not _installed: return True From c3485be46e96dfd39fef8f505fc90bac24b6c345 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Thu, 4 Jun 2026 06:16:29 +0200 Subject: [PATCH 219/352] =?UTF-8?q?refactor:=20=F0=9F=94=A7=20move=20Path?= =?UTF-8?q?=20import=20under=20TYPE=5FCHECKING=20guard=20to=20avoid=20circ?= =?UTF-8?q?ular=20import?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- rocrate_validator/errors.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/rocrate_validator/errors.py b/rocrate_validator/errors.py index 12f081a2a..3ebdb2bd7 100644 --- a/rocrate_validator/errors.py +++ b/rocrate_validator/errors.py @@ -14,12 +14,13 @@ from __future__ import annotations -from pathlib import Path from typing import TYPE_CHECKING, Optional, Union if TYPE_CHECKING: # Imported only for type-checking to avoid a circular import: # rocrate_validator.utils.uri imports this module at runtime. + from pathlib import Path + from rocrate_validator.utils.uri import URI From e8c2107f86bcda3adc38386822c28910dbfb9251 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Thu, 4 Jun 2026 06:17:09 +0200 Subject: [PATCH 220/352] =?UTF-8?q?style:=20=F0=9F=8E=A8=20fix=20indentati?= =?UTF-8?q?on=20in=20set=20comprehensions?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- rocrate_validator/models.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/rocrate_validator/models.py b/rocrate_validator/models.py index c6d83582c..2d0131aa3 100644 --- a/rocrate_validator/models.py +++ b/rocrate_validator/models.py @@ -2541,7 +2541,7 @@ def failed_requirements(self) -> Collection[Requirement]: """ min_severity = self.context.requirement_severity return {issue.check.requirement for issue in self._issues - if issue.severity >= min_severity} + if issue.severity >= min_severity} # --- Checks --- @property @@ -2551,7 +2551,7 @@ def failed_checks(self) -> Collection[RequirementCheck]: """ min_severity = self.context.requirement_severity return {issue.check for issue in self._issues - if issue.severity >= min_severity} + if issue.severity >= min_severity} def get_failed_checks_by_requirement(self, requirement: Requirement) -> Collection[RequirementCheck]: """ From fd790c24f8c7a5a843698f0f011302ef464ec019 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Thu, 4 Jun 2026 06:19:07 +0200 Subject: [PATCH 221/352] =?UTF-8?q?refactor:=20=F0=9F=94=A7=20use=20string?= =?UTF-8?q?=20literals=20in=20cast()=20to=20avoid=20runtime=20type=20impor?= =?UTF-8?q?ts=20[TC006]?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- rocrate_validator/models.py | 26 +++++++++---------- .../requirements/shacl/checks.py | 6 ++--- .../requirements/shacl/models.py | 4 +-- .../requirements/shacl/requirements.py | 4 +-- rocrate_validator/requirements/shacl/utils.py | 6 ++--- .../requirements/shacl/validator.py | 10 +++---- rocrate_validator/rocrate.py | 8 +++--- rocrate_validator/utils/cache_warmup.py | 2 +- .../utils/io_helpers/output/__init__.py | 2 +- 9 files changed, 34 insertions(+), 34 deletions(-) diff --git a/rocrate_validator/models.py b/rocrate_validator/models.py index 2d0131aa3..4a0d5397c 100644 --- a/rocrate_validator/models.py +++ b/rocrate_validator/models.py @@ -289,7 +289,7 @@ def __init__( self._token, self._version = self.__init_token_version__() # Check if the profile is overriding an existing profile - existing_profile = self.__profiles_map.get_by_key(cast(Any, self._profile_node).toPython()) + existing_profile = self.__profiles_map.get_by_key(cast("Any", self._profile_node).toPython()) # If an existing profile is being overridden by a different one, log a warning if existing_profile and existing_profile.path != profile_path: logger.warning( @@ -303,7 +303,7 @@ def __init__( # add the profile to the profiles map self.__profiles_map.add( - cast(Any, self._profile_node).toPython(), + cast("Any", self._profile_node).toPython(), self, token=self.token, name=self.name, @@ -324,7 +324,7 @@ def __get_specification_property__( ) -> Union[str, list[Union[str, URIRef]], None]: assert self._profile_specification_graph is not None, "Profile specification graph not loaded" nodes = list(self._profile_specification_graph.objects(self._profile_node, namespace[property])) - values: list = [cast(Any, v).toPython() for v in nodes] if (nodes and as_Python_object) else list(nodes) + values: list = [cast("Any", v).toPython() for v in nodes] if (nodes and as_Python_object) else list(nodes) if pop_first: return values[0] if values else None return values @@ -437,7 +437,7 @@ def is_profile_of(self) -> list[str]: as specified in the profile specification file (i.e., the value of the prof: isProfileOf property in the `profile.ttl` file). """ - return cast(list[str], self.__get_specification_property__("isProfileOf", PROF_NS, pop_first=False)) + return cast("list[str]", self.__get_specification_property__("isProfileOf", PROF_NS, pop_first=False)) @property def is_transitive_profile_of(self) -> list[str]: @@ -446,7 +446,7 @@ def is_transitive_profile_of(self) -> list[str]: as specified in the profile specification file (i.e., the value of the prof: isTransitiveProfileOf property in the `profile.ttl` file). """ - return cast(list[str], self.__get_specification_property__("isTransitiveProfileOf", PROF_NS, pop_first=False)) + return cast("list[str]", self.__get_specification_property__("isTransitiveProfileOf", PROF_NS, pop_first=False)) @property def parents(self) -> list[Profile]: @@ -639,7 +639,7 @@ def __get_consistent_version__(self, candidate_token: str) -> Optional[str]: candidates = { _ for _ in [ - cast(Optional[str], self.__get_specification_property__("version", SCHEMA_ORG_NS)), + cast("Optional[str]", self.__get_specification_property__("version", SCHEMA_ORG_NS)), self.__extract_version_from_token__(candidate_token), self.__extract_version_from_token__(str(self.path.relative_to(self._profiles_base_path))), self.__extract_version_from_token__(str(self.uri)), @@ -664,7 +664,7 @@ def __extract_token_from_path__(self) -> str: def __init_token_version__(self) -> tuple[str, Optional[str]]: # try to extract the token from the specs or the path - candidate_token = cast(Optional[str], self.__get_specification_property__("hasToken", PROF_NS)) + candidate_token = cast("Optional[str]", self.__get_specification_property__("hasToken", PROF_NS)) if not candidate_token: candidate_token = self.__extract_token_from_path__() logger.debug("Candidate token: %s", candidate_token) @@ -1358,7 +1358,7 @@ def ok_file(p: Path) -> bool: requirement_path, ) requirement_loader = RequirementLoader.__get_requirement_loader__(profile, requirement_path) - requirements.extend(cast(Any, requirement_loader).load( + requirements.extend(cast("Any", requirement_loader).load( profile, requirement_level, requirement_path, @@ -1733,7 +1733,7 @@ def profile(self) -> Profile: """ Get the profile being validated """ - return cast(Profile, self._stats.get("profile")) + return cast("Profile", self._stats.get("profile")) @property def profiles(self) -> list[Profile]: @@ -1747,7 +1747,7 @@ def severity(self) -> Severity: """ Get the validation severity level """ - return cast(Severity, self._stats.get("severity")) + return cast("Severity", self._stats.get("severity")) @property def checks_by_severity(self) -> dict: @@ -1875,10 +1875,10 @@ def __initialise__(cls, validation_settings: ValidationSettings): profiles: list[Profile] = Profile.load_profiles( validation_settings.profiles_path, extra_profiles_path=validation_settings.extra_profiles_path, - severity=cast(Severity, severity_validation), + severity=cast("Severity", severity_validation), allow_requirement_check_override=validation_settings.allow_requirement_check_override, ) - profile: Profile = cast(Profile, Profile.find_in_list(profiles, validation_settings.profile_identifier)) + profile: Profile = cast("Profile", Profile.find_in_list(profiles, validation_settings.profile_identifier)) target_profile_identifier = profile.identifier # initialize the profiles list profiles = [profile] @@ -2600,7 +2600,7 @@ def to_dict(self) -> dict: result: dict[str, Any] = { "meta": {"version": JSON_OUTPUT_FORMAT_VERSION}, "validation_settings": validation_settings, - "passed": self.passed(cast(Severity, self.context.settings.requirement_severity)), + "passed": self.passed(cast("Severity", self.context.settings.requirement_severity)), "issues": [issue.to_dict() for issue in self.issues], } # add validator version to the settings diff --git a/rocrate_validator/requirements/shacl/checks.py b/rocrate_validator/requirements/shacl/checks.py index d1931b75f..2d035b41a 100644 --- a/rocrate_validator/requirements/shacl/checks.py +++ b/rocrate_validator/requirements/shacl/checks.py @@ -188,7 +188,7 @@ def get_source_snippet(self) -> Optional[SourceSnippet]: # identify the owner of the shape owner: SHACLNode = self._shape while getattr(owner, "parent", None) is not None: - owner = cast(SHACLNode, owner.parent) + owner = cast("SHACLNode", owner.parent) # if the shape is not a root shape, include the triples linking the owner to the shape if owner is not self._shape: shacl = Namespace(SHACL_NS) @@ -413,7 +413,7 @@ def __process_failed_checks__(self, shacl_context, failed_requirements_checks, failed_requirement_checks_notified = [ _.check.identifier for _ in shacl_context.result.get_issues( - min_severity=cast(Severity, shacl_context.settings.requirement_severity)) + min_severity=cast("Severity", shacl_context.settings.requirement_severity)) ] for requirementCheck in sorted(failed_requirements_checks, key=lambda x: (x.identifier, x.severity)): # if the check is not in the current profile @@ -424,7 +424,7 @@ def __process_failed_checks__(self, shacl_context, failed_requirements_checks, ): continue for violation in failed_requirements_checks_violations[requirementCheck.identifier]: - violating_entity = make_uris_relative(cast(Any, violation.focusNode).toPython(), + violating_entity = make_uris_relative(cast("Any", violation.focusNode).toPython(), shacl_context.publicID) violating_property = violation.resultPath.toPython() if violation.resultPath else None violation_message = violation.get_result_message(str(shacl_context.rocrate_uri)) diff --git a/rocrate_validator/requirements/shacl/models.py b/rocrate_validator/requirements/shacl/models.py index 1fdf02c56..ced52f3af 100644 --- a/rocrate_validator/requirements/shacl/models.py +++ b/rocrate_validator/requirements/shacl/models.py @@ -271,7 +271,7 @@ def graph(self) -> Graph: @property def parent(self) -> Optional[Shape]: """Return the parent shape of the shape property""" - return cast(Optional[Shape], self._parent) + return cast("Optional[Shape]", self._parent) @property def propertyGroup(self) -> Optional[PropertyGroup]: @@ -393,7 +393,7 @@ def load_shapes(self, shapes_path: Union[str, Path], publicID: Optional[str] = N group = __process_property_group__(property_groups, p_shape) if group and group not in shapes: grouped = True - shapes.append(cast(Shape, group)) + shapes.append(cast("Shape", group)) if not group: ungrouped_properties.append(p_shape) diff --git a/rocrate_validator/requirements/shacl/requirements.py b/rocrate_validator/requirements/shacl/requirements.py index cb535f21e..13d2c8347 100644 --- a/rocrate_validator/requirements/shacl/requirements.py +++ b/rocrate_validator/requirements/shacl/requirements.py @@ -56,7 +56,7 @@ def __init_checks__(self) -> list[RequirementCheck]: # assign a check to each property of the shape checks: list[RequirementCheck] = [] # check if the shape has nested properties - has_properties = hasattr(self.shape, "properties") and len(cast(Any, self.shape).properties) > 0 + has_properties = hasattr(self.shape, "properties") and len(cast("Any", self.shape).properties) > 0 # create a check for the shape itself, hidden if the shape has nested properties checks.append( SHACLCheck( @@ -69,7 +69,7 @@ def __init_checks__(self) -> list[RequirementCheck]: ) # create a check for each property if the shape has nested properties if has_properties: - for prop in cast(Any, self.shape).properties: + for prop in cast("Any", self.shape).properties: logger.debug("Creating check for property %s %s", prop.name, prop.description) property_check = SHACLCheck(self, prop) logger.debug("Property check %s: %s", property_check.name, property_check.description) diff --git a/rocrate_validator/requirements/shacl/utils.py b/rocrate_validator/requirements/shacl/utils.py index 18f5784c0..c578830c1 100644 --- a/rocrate_validator/requirements/shacl/utils.py +++ b/rocrate_validator/requirements/shacl/utils.py @@ -77,14 +77,14 @@ def inject_attributes(obj: object, node_graph: Graph, node: Node, exclude: Optio skip_properties = ["node"] if exclude is None else [*exclude, "node"] triples = node_graph.triples((node, None, None)) for _node, p, o in triples: - predicate_as_string = cast(Any, p).toPython() + predicate_as_string = cast("Any", p).toPython() # logger.debug(f"Processing {predicate_as_string} of property graph {node}") if predicate_as_string.startswith(SHACL_NS): property_name = predicate_as_string.split("#")[-1] if property_name in skip_properties: continue try: - setattr(obj, property_name, cast(Any, o).toPython()) + setattr(obj, property_name, cast("Any", o).toPython()) except AttributeError as e: logger.error(f"Error injecting attribute {property_name}: {e}") # logger.debug("Injected attribute %s: %s", property_name, o.toPython()) @@ -135,7 +135,7 @@ def compute_key(g: Graph, s: Node) -> str: if isinstance(s, BNode): return compute_hash(g, s) - return cast(Any, s).toPython() + return cast("Any", s).toPython() class ShapesList: diff --git a/rocrate_validator/requirements/shacl/validator.py b/rocrate_validator/requirements/shacl/validator.py index 9e57dcdf1..07aa64399 100644 --- a/rocrate_validator/requirements/shacl/validator.py +++ b/rocrate_validator/requirements/shacl/validator.py @@ -134,8 +134,8 @@ def __set_current_validation_profile__(self, profile: Profile) -> bool: # logger.debug("Processing check: %s", check) if check.overridden and check.requirement.profile != self.target_profile: # logger.debug("Overridden check: %s", check) - profile_shapes_graph -= cast(Any, check).shape.graph - profile_shapes.pop(cast(Any, check).shape.key) + profile_shapes_graph -= cast("Any", check).shape.graph + profile_shapes.pop(cast("Any", check).shape.key) # add the shapes to the registry self._shapes_registry.extend(profile_shapes, profile_shapes_graph) @@ -299,7 +299,7 @@ def get_result_severity(self) -> Severity: severity = self.graph.value(self._violation_node, URIRef(f"{SHACL_NS}resultSeverity")) assert severity is not None, f"Unable to get severity from violation node {self._violation_node}" # we need to map the SHACL severity term to our Severity enum values - self._severity = map_severity(cast(Any, severity).toPython()) + self._severity = map_severity(cast("Any", severity).toPython()) return self._severity @property @@ -315,7 +315,7 @@ def get_result_message(self, ro_crate_path: Union[Path, str]) -> str: if not self._result_message: message = self.graph.value(self._violation_node, URIRef(f"{SHACL_NS}resultMessage")) assert message is not None, f"Unable to get result message from violation node {self._violation_node}" - self._result_message = make_uris_relative(cast(Any, message).toPython(), ro_crate_path) + self._result_message = make_uris_relative(cast("Any", message).toPython(), ro_crate_path) return self._result_message @property @@ -324,7 +324,7 @@ def sourceShape(self) -> Union[URIRef, BNode]: self._source_shape_node = self.graph.value(self._violation_node, URIRef(f"{SHACL_NS}sourceShape")) assert self._source_shape_node is not None, \ f"Unable to get source shape node from violation node {self._violation_node}" - return cast(Union[URIRef, BNode], self._source_shape_node) + return cast("Union[URIRef, BNode]", self._source_shape_node) class SHACLValidationResult: diff --git a/rocrate_validator/rocrate.py b/rocrate_validator/rocrate.py index dbca249a7..ce2f2d435 100644 --- a/rocrate_validator/rocrate.py +++ b/rocrate_validator/rocrate.py @@ -44,11 +44,11 @@ def __init__(self, metadata: ROCrateMetadata, raw_data: dict) -> None: @property def id(self) -> str: - return cast(str, self._raw_data.get('@id')) + return cast('str', self._raw_data.get('@id')) @property def type(self) -> Union[str, list[str]]: - return cast(Union[str, list[str]], self._raw_data.get('@type')) + return cast('Union[str, list[str]]', self._raw_data.get('@type')) def is_dataset(self) -> bool: return self.has_type('Dataset') @@ -58,7 +58,7 @@ def is_file(self) -> bool: @property def name(self) -> str: - return cast(str, self._raw_data.get('name')) + return cast('str', self._raw_data.get('name')) @property def metadata(self) -> ROCrateMetadata: @@ -406,7 +406,7 @@ def get_conforms_to(self) -> Optional[list[str]]: def as_json(self) -> str: if not self._json: - self._json = cast(str, self.ro_crate.get_file_content( + self._json = cast('str', self.ro_crate.get_file_content( Path(self.METADATA_FILE_DESCRIPTOR), binary_mode=False )) return self._json diff --git a/rocrate_validator/utils/cache_warmup.py b/rocrate_validator/utils/cache_warmup.py index 32ac6dba3..0a6fb7a43 100644 --- a/rocrate_validator/utils/cache_warmup.py +++ b/rocrate_validator/utils/cache_warmup.py @@ -80,7 +80,7 @@ def discover_profile_cacheable_urls(profile: Profile) -> list[str]: urls: list[str] = [] try: for row in graph.query(_CACHEABLE_URLS_SPARQL): - artifact = cast(Any, row).artifact + artifact = cast("Any", row).artifact if artifact is None: continue value = str(artifact) diff --git a/rocrate_validator/utils/io_helpers/output/__init__.py b/rocrate_validator/utils/io_helpers/output/__init__.py index 53b4143a8..378ad161c 100644 --- a/rocrate_validator/utils/io_helpers/output/__init__.py +++ b/rocrate_validator/utils/io_helpers/output/__init__.py @@ -77,4 +77,4 @@ def __rich_console__(self, console: Console, options: ConsoleOptions) -> RenderR yield self._data else: # ``formatter`` is a formatter class instantiated with the data to render. - yield from cast(Any, formatter)(self._data).__rich_console__(console, options) + yield from cast("Any", formatter)(self._data).__rich_console__(console, options) From 1c12725c60b0bd8e6f845a1785474790c2599ee8 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Thu, 4 Jun 2026 06:23:55 +0200 Subject: [PATCH 222/352] =?UTF-8?q?refactor:=20=F0=9F=8E=A8=20replace=20te?= =?UTF-8?q?rnary=20with=20or=20for=20idiomatic=20truthy=20fallback=20[FURB?= =?UTF-8?q?110]?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- rocrate_validator/models.py | 2 +- rocrate_validator/requirements/python/__init__.py | 2 +- rocrate_validator/requirements/shacl/models.py | 2 +- rocrate_validator/requirements/shacl/requirements.py | 2 +- rocrate_validator/requirements/shacl/validator.py | 4 ++-- rocrate_validator/utils/uri.py | 2 +- 6 files changed, 7 insertions(+), 7 deletions(-) diff --git a/rocrate_validator/models.py b/rocrate_validator/models.py index 4a0d5397c..714b23327 100644 --- a/rocrate_validator/models.py +++ b/rocrate_validator/models.py @@ -3262,7 +3262,7 @@ def extra_profiles_path(self) -> Optional[Path]: extra_profiles_path = self.settings.extra_profiles_path if isinstance(extra_profiles_path, str): extra_profiles_path = Path(extra_profiles_path) - return extra_profiles_path if extra_profiles_path else None + return extra_profiles_path or None @property def requirement_severity(self) -> Severity: diff --git a/rocrate_validator/requirements/python/__init__.py b/rocrate_validator/requirements/python/__init__.py index 30dc46657..60284b437 100644 --- a/rocrate_validator/requirements/python/__init__.py +++ b/rocrate_validator/requirements/python/__init__.py @@ -215,7 +215,7 @@ def check(name: Optional[str] = None, :rtype: Callable """ def decorator(func): - check_name = name if name else func.__name__ + check_name = name or func.__name__ sig = inspect.signature(func) if len(sig.parameters) != EXPECTED_CHECK_PARAM_COUNT: raise RuntimeError(f"Invalid check {check_name}. Checks are expected to " diff --git a/rocrate_validator/requirements/shacl/models.py b/rocrate_validator/requirements/shacl/models.py index ced52f3af..3a19bc1ec 100644 --- a/rocrate_validator/requirements/shacl/models.py +++ b/rocrate_validator/requirements/shacl/models.py @@ -163,7 +163,7 @@ class SHACLNodeCollection(SHACLNode): def __init__(self, node: Node, graph: Graph, properties: Optional[list[PropertyShape]] = None): super().__init__(node, graph) # store the properties - self._properties = properties if properties else [] + self._properties = properties or [] @property def properties(self) -> list[PropertyShape]: diff --git a/rocrate_validator/requirements/shacl/requirements.py b/rocrate_validator/requirements/shacl/requirements.py index 13d2c8347..d934ac693 100644 --- a/rocrate_validator/requirements/shacl/requirements.py +++ b/rocrate_validator/requirements/shacl/requirements.py @@ -38,7 +38,7 @@ class SHACLRequirement(Requirement): def __init__(self, shape: Shape, profile: Profile, path: Path): self._shape = shape super().__init__( - profile, shape.name if shape.name else "", shape.description if shape.description else "", path + profile, shape.name or "", shape.description or "", path ) # init checks self._checks = self.__init_checks__() diff --git a/rocrate_validator/requirements/shacl/validator.py b/rocrate_validator/requirements/shacl/validator.py index 07aa64399..ec8f783af 100644 --- a/rocrate_validator/requirements/shacl/validator.py +++ b/rocrate_validator/requirements/shacl/validator.py @@ -219,7 +219,7 @@ def __load_ontology_graph__(self, profile_path: Path, # 1. First, try to get @base from the data graph metadata # 2. Fall back to the default publicID (RO-Crate URI) data_graph_base = self.__get_data_graph_base__() - public_id = data_graph_base if data_graph_base else self.publicID + public_id = data_graph_base or self.publicID if data_graph_base: logger.debug("Using @base from data graph metadata: %s", data_graph_base) @@ -471,7 +471,7 @@ def validate( data_graph, shacl_graph=self.shapes_graph, ont_graph=self.ont_graph, - inference=inference if inference else "owlrl" if self.ont_graph else None, + inference=inference or ("owlrl" if self.ont_graph else None), inplace=inplace, abort_on_first=abort_on_first, allow_infos=allow_infos, diff --git a/rocrate_validator/utils/uri.py b/rocrate_validator/utils/uri.py index 07efb77c8..4efba1e7c 100644 --- a/rocrate_validator/utils/uri.py +++ b/rocrate_validator/utils/uri.py @@ -166,7 +166,7 @@ def scheme(self) -> str: @property def fragment(self) -> Optional[str]: fragment = self._parse_result.fragment - return fragment if fragment else None + return fragment or None def get_scheme(self) -> str: return self._parse_result.scheme From 74b31374533610d52bbd05c7f3f7af5e61e38dbc Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Thu, 4 Jun 2026 06:27:42 +0200 Subject: [PATCH 223/352] =?UTF-8?q?refactor:=20=F0=9F=94=A7=20replace=20re?= =?UTF-8?q?lative=20imports=20with=20absolute=20imports=20[TID252]?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- rocrate_validator/utils/io_helpers/output/text/__init__.py | 4 ++-- rocrate_validator/utils/io_helpers/output/text/formatters.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/rocrate_validator/utils/io_helpers/output/text/__init__.py b/rocrate_validator/utils/io_helpers/output/text/__init__.py index 040fe9a02..494ab3114 100644 --- a/rocrate_validator/utils/io_helpers/output/text/__init__.py +++ b/rocrate_validator/utils/io_helpers/output/text/__init__.py @@ -21,8 +21,8 @@ from rocrate_validator.models import ValidationResult, ValidationStatistics from rocrate_validator.utils import log as logging -from .. import BaseOutputFormatter -from ..console import Console +from rocrate_validator.utils.io_helpers.output import BaseOutputFormatter +from rocrate_validator.utils.io_helpers.output.console import Console from .formatters import ValidationResultTextOutputFormatter, ValidationStatisticsTextOutputFormatter # set up logging diff --git a/rocrate_validator/utils/io_helpers/output/text/formatters.py b/rocrate_validator/utils/io_helpers/output/text/formatters.py index bf0f139a9..efa1b1b21 100644 --- a/rocrate_validator/utils/io_helpers/output/text/formatters.py +++ b/rocrate_validator/utils/io_helpers/output/text/formatters.py @@ -24,8 +24,8 @@ from rocrate_validator.utils.io_helpers.colors import get_severity_color from rocrate_validator.utils.io_helpers.output.text.layout.report import ValidationReportLayout -from .. import OutputFormatter -from ..console import Console +from rocrate_validator.utils.io_helpers.output import OutputFormatter +from rocrate_validator.utils.io_helpers.output.console import Console # set up logging logger = logging.getLogger(__name__) From 2388e4f500db623c540dcaa0788649203cc20c40 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Thu, 4 Jun 2026 06:30:47 +0200 Subject: [PATCH 224/352] =?UTF-8?q?Istyle:=20=F0=9F=94=A7=20fix=20implicit?= =?UTF-8?q?=20string=20concatenation=20with=20explicit=20parentheses=20(IS?= =?UTF-8?q?C00*)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- rocrate_validator/errors.py | 4 +++- rocrate_validator/requirements/shacl/checks.py | 2 +- rocrate_validator/utils/log.py | 8 ++++++-- 3 files changed, 10 insertions(+), 4 deletions(-) diff --git a/rocrate_validator/errors.py b/rocrate_validator/errors.py index 3ebdb2bd7..83e18b004 100644 --- a/rocrate_validator/errors.py +++ b/rocrate_validator/errors.py @@ -279,8 +279,10 @@ def __repr__(self) -> str: @classmethod def default_error_message(cls, uri: Union[str, Path, URI]) -> str: - return f"\"{uri!s}\" is not a valid RO-Crate URI. "\ + return ( + f"\"{uri!s}\" is not a valid RO-Crate URI. " "It MUST be either a local path to the RO-Crate root directory or a local/remote RO-Crate ZIP file." + ) class ROCrateMetadataNotFoundError(ROCValidatorError): diff --git a/rocrate_validator/requirements/shacl/checks.py b/rocrate_validator/requirements/shacl/checks.py index 2d035b41a..8de33b3ad 100644 --- a/rocrate_validator/requirements/shacl/checks.py +++ b/rocrate_validator/requirements/shacl/checks.py @@ -297,7 +297,7 @@ def __do_execute_check__(self, shacl_context: SHACLValidationContext): logger.debug(f"Execution time for getting data graph: {end_time - start_time} seconds") except json.decoder.JSONDecodeError as e: logger.debug( - "Unable to perform metadata validation " "due to one or more errors in the JSON-LD data file: %s", + "Unable to perform metadata validation due to one or more errors in the JSON-LD data file: %s", e, ) shacl_context.result.add_issue( diff --git a/rocrate_validator/utils/log.py b/rocrate_validator/utils/log.py index ee89aa544..e0b297593 100644 --- a/rocrate_validator/utils/log.py +++ b/rocrate_validator/utils/log.py @@ -32,11 +32,15 @@ def get_log_format(level: int): """Get the log format based on the log level""" - log_format = '[%(log_color)s%(asctime)s%(reset)s] %(levelname)s in %(yellow)s%(module)s%(reset)s: '\ + log_format = ( + '[%(log_color)s%(asctime)s%(reset)s] %(levelname)s in %(yellow)s%(module)s%(reset)s: ' '%(light_white)s%(message)s%(reset)s' + ) if level == DEBUG: - log_format = '%(log_color)s%(levelname)s%(reset)s:%(yellow)s%(name)s:%(module)s::%(funcName)s%(reset)s '\ + log_format = ( + '%(log_color)s%(levelname)s%(reset)s:%(yellow)s%(name)s:%(module)s::%(funcName)s%(reset)s ' '@ %(light_green)sline: %(lineno)s%(reset)s - %(light_black)s%(message)s%(reset)s' + ) return log_format From 2097e74f7b6e9fb80fdb61433e002922aee67d8e Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Thu, 4 Jun 2026 06:32:31 +0200 Subject: [PATCH 225/352] =?UTF-8?q?style:=20=F0=9F=8E=A8=20wrap=20long=20l?= =?UTF-8?q?ine=20in=20SHACLRequirement.hidden=20property=20[E501]?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- rocrate_validator/requirements/shacl/requirements.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/rocrate_validator/requirements/shacl/requirements.py b/rocrate_validator/requirements/shacl/requirements.py index d934ac693..d0f5e3c82 100644 --- a/rocrate_validator/requirements/shacl/requirements.py +++ b/rocrate_validator/requirements/shacl/requirements.py @@ -83,7 +83,10 @@ def shape(self) -> Shape: @property def hidden(self) -> bool: - return bool(self.shape.node is not None and (self.shape.node, RDF.type, VALIDATOR_NS.HiddenShape) in self.shape.graph) + return bool( + self.shape.node is not None + and (self.shape.node, RDF.type, VALIDATOR_NS.HiddenShape) in self.shape.graph + ) @classmethod def finalize(cls, context: ValidationContext) -> None: From b05efd933e68444bd313a3779eb50bb939c7088a Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Thu, 4 Jun 2026 06:42:52 +0200 Subject: [PATCH 226/352] =?UTF-8?q?refactor:=20=F0=9F=94=A7=20move=20type-?= =?UTF-8?q?only=20imports=20under=20TYPE=5FCHECKING=20guards=20[TC001]?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- rocrate_validator/cli/ui/text/validate.py | 8 +++++--- rocrate_validator/utils/io_helpers/input.py | 9 ++++++--- .../utils/io_helpers/output/text/__init__.py | 8 +++++--- .../utils/io_helpers/output/text/formatters.py | 9 ++++++--- .../utils/io_helpers/output/text/layout/report.py | 6 ++++-- 5 files changed, 26 insertions(+), 14 deletions(-) diff --git a/rocrate_validator/cli/ui/text/validate.py b/rocrate_validator/cli/ui/text/validate.py index ffaff3bda..cab6fefef 100644 --- a/rocrate_validator/cli/ui/text/validate.py +++ b/rocrate_validator/cli/ui/text/validate.py @@ -15,15 +15,17 @@ from __future__ import annotations from collections.abc import Callable -from typing import Any, Optional +from typing import TYPE_CHECKING, Any, Optional -from rocrate_validator.models import ValidationResult, ValidationSettings, ValidationStatistics from rocrate_validator.utils import log as logging from rocrate_validator.utils.io_helpers.output.console import Console -from rocrate_validator.utils.io_helpers.output.pager import SystemPager from rocrate_validator.utils.io_helpers.output.text import TextOutputFormatter from rocrate_validator.utils.io_helpers.output.text.layout.report import ValidationReportLayout +if TYPE_CHECKING: + from rocrate_validator.models import ValidationResult, ValidationSettings, ValidationStatistics + from rocrate_validator.utils.io_helpers.output.pager import SystemPager + # set up logging logger = logging.getLogger(__name__) diff --git a/rocrate_validator/utils/io_helpers/input.py b/rocrate_validator/utils/io_helpers/input.py index e1f0001eb..a9cc759c1 100644 --- a/rocrate_validator/utils/io_helpers/input.py +++ b/rocrate_validator/utils/io_helpers/input.py @@ -15,15 +15,18 @@ from __future__ import annotations import sys -from typing import Optional +from typing import TYPE_CHECKING, Optional from InquirerPy import prompt from InquirerPy.base.control import Choice -from rich.console import Console -from rocrate_validator.models import Profile from rocrate_validator.utils import log as logging +if TYPE_CHECKING: + from rich.console import Console + + from rocrate_validator.models import Profile + # set up logging logger = logging.getLogger(__name__) diff --git a/rocrate_validator/utils/io_helpers/output/text/__init__.py b/rocrate_validator/utils/io_helpers/output/text/__init__.py index 494ab3114..f3d772d49 100644 --- a/rocrate_validator/utils/io_helpers/output/text/__init__.py +++ b/rocrate_validator/utils/io_helpers/output/text/__init__.py @@ -14,17 +14,19 @@ from __future__ import annotations -from typing import Any, Optional +from typing import TYPE_CHECKING, Any, Optional from rich.console import ConsoleOptions, RenderResult from rocrate_validator.models import ValidationResult, ValidationStatistics from rocrate_validator.utils import log as logging - from rocrate_validator.utils.io_helpers.output import BaseOutputFormatter -from rocrate_validator.utils.io_helpers.output.console import Console + from .formatters import ValidationResultTextOutputFormatter, ValidationStatisticsTextOutputFormatter +if TYPE_CHECKING: + from rocrate_validator.utils.io_helpers.output.console import Console + # set up logging logger = logging.getLogger(__name__) diff --git a/rocrate_validator/utils/io_helpers/output/text/formatters.py b/rocrate_validator/utils/io_helpers/output/text/formatters.py index efa1b1b21..a6b429906 100644 --- a/rocrate_validator/utils/io_helpers/output/text/formatters.py +++ b/rocrate_validator/utils/io_helpers/output/text/formatters.py @@ -14,18 +14,21 @@ from __future__ import annotations +from typing import TYPE_CHECKING + from rich.align import Align from rich.console import ConsoleOptions, RenderResult from rich.markdown import Markdown from rich.padding import Padding -from rocrate_validator.models import ValidationResult, ValidationStatistics from rocrate_validator.utils import log as logging from rocrate_validator.utils.io_helpers.colors import get_severity_color +from rocrate_validator.utils.io_helpers.output import OutputFormatter from rocrate_validator.utils.io_helpers.output.text.layout.report import ValidationReportLayout -from rocrate_validator.utils.io_helpers.output import OutputFormatter -from rocrate_validator.utils.io_helpers.output.console import Console +if TYPE_CHECKING: + from rocrate_validator.models import ValidationResult, ValidationStatistics + from rocrate_validator.utils.io_helpers.output.console import Console # set up logging logger = logging.getLogger(__name__) diff --git a/rocrate_validator/utils/io_helpers/output/text/layout/report.py b/rocrate_validator/utils/io_helpers/output/text/layout/report.py index ffde15e26..88e8255a3 100644 --- a/rocrate_validator/utils/io_helpers/output/text/layout/report.py +++ b/rocrate_validator/utils/io_helpers/output/text/layout/report.py @@ -17,7 +17,7 @@ import threading import time from collections.abc import Callable -from typing import Any, Optional +from typing import TYPE_CHECKING, Any, Optional from rich.align import Align from rich.layout import Layout @@ -41,12 +41,14 @@ ) from rocrate_validator.utils import log as logging from rocrate_validator.utils.io_helpers.colors import get_severity_color -from rocrate_validator.utils.io_helpers.output.console import Console from rocrate_validator.utils.uri import URI from rocrate_validator.utils.versioning import get_version from .progress import ProgressMonitor +if TYPE_CHECKING: + from rocrate_validator.utils.io_helpers.output.console import Console + # set up logging logger = logging.getLogger(__name__) From fb928d073dee1bf45549d6185bb9b4d374a21bd0 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Thu, 4 Jun 2026 06:48:25 +0200 Subject: [PATCH 227/352] =?UTF-8?q?refactor:=20=F0=9F=94=A7=20move=20third?= =?UTF-8?q?-party=20type-only=20imports=20under=20TYPE=5FCHECKING=20and=20?= =?UTF-8?q?fix=20import=20order=20(TC002)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- rocrate_validator/requirements/shacl/models.py | 6 ++++-- rocrate_validator/requirements/shacl/validator.py | 6 ++++-- rocrate_validator/utils/io_helpers/output/__init__.py | 5 +++-- rocrate_validator/utils/io_helpers/output/text/__init__.py | 5 ++--- .../utils/io_helpers/output/text/formatters.py | 5 +++-- 5 files changed, 16 insertions(+), 11 deletions(-) diff --git a/rocrate_validator/requirements/shacl/models.py b/rocrate_validator/requirements/shacl/models.py index 3a19bc1ec..37a13c177 100644 --- a/rocrate_validator/requirements/shacl/models.py +++ b/rocrate_validator/requirements/shacl/models.py @@ -15,10 +15,12 @@ from __future__ import annotations from pathlib import Path -from typing import Optional, Union, cast +from typing import TYPE_CHECKING, Optional, Union, cast from rdflib import Graph, Literal, Namespace, URIRef -from rdflib.term import Node + +if TYPE_CHECKING: + from rdflib.term import Node from rocrate_validator.constants import SHACL_NS from rocrate_validator.models import LevelCollection, RequirementLevel, Severity diff --git a/rocrate_validator/requirements/shacl/validator.py b/rocrate_validator/requirements/shacl/validator.py index ec8f783af..6626f63af 100644 --- a/rocrate_validator/requirements/shacl/validator.py +++ b/rocrate_validator/requirements/shacl/validator.py @@ -16,13 +16,15 @@ import os from pathlib import Path -from typing import Any, Optional, Union, cast +from typing import TYPE_CHECKING, Any, Optional, Union, cast import pyshacl -from pyshacl.pytypes import GraphLike from rdflib import BNode, Graph from rdflib.term import Node, URIRef +if TYPE_CHECKING: + from pyshacl.pytypes import GraphLike + from rocrate_validator.constants import ( DEFAULT_ONTOLOGY_FILE, RDF_SERIALIZATION_FORMATS, diff --git a/rocrate_validator/utils/io_helpers/output/__init__.py b/rocrate_validator/utils/io_helpers/output/__init__.py index 378ad161c..db970fa61 100644 --- a/rocrate_validator/utils/io_helpers/output/__init__.py +++ b/rocrate_validator/utils/io_helpers/output/__init__.py @@ -17,10 +17,11 @@ from typing import TYPE_CHECKING, Any, Optional, Protocol, cast -from rich.console import ConsoleOptions, RenderResult - from rocrate_validator.utils import log as logging +if TYPE_CHECKING: + from rich.console import ConsoleOptions, RenderResult + if TYPE_CHECKING: # The formatters render to the application's Console subclass (which adds # ``interactive``/disabling behaviour); typing the protocol against it keeps diff --git a/rocrate_validator/utils/io_helpers/output/text/__init__.py b/rocrate_validator/utils/io_helpers/output/text/__init__.py index f3d772d49..8b5621223 100644 --- a/rocrate_validator/utils/io_helpers/output/text/__init__.py +++ b/rocrate_validator/utils/io_helpers/output/text/__init__.py @@ -16,15 +16,14 @@ from typing import TYPE_CHECKING, Any, Optional -from rich.console import ConsoleOptions, RenderResult - from rocrate_validator.models import ValidationResult, ValidationStatistics from rocrate_validator.utils import log as logging -from rocrate_validator.utils.io_helpers.output import BaseOutputFormatter +from rocrate_validator.utils.io_helpers.output import BaseOutputFormatter from .formatters import ValidationResultTextOutputFormatter, ValidationStatisticsTextOutputFormatter if TYPE_CHECKING: + from rich.console import ConsoleOptions, RenderResult from rocrate_validator.utils.io_helpers.output.console import Console # set up logging diff --git a/rocrate_validator/utils/io_helpers/output/text/formatters.py b/rocrate_validator/utils/io_helpers/output/text/formatters.py index a6b429906..11113a1d3 100644 --- a/rocrate_validator/utils/io_helpers/output/text/formatters.py +++ b/rocrate_validator/utils/io_helpers/output/text/formatters.py @@ -17,16 +17,17 @@ from typing import TYPE_CHECKING from rich.align import Align -from rich.console import ConsoleOptions, RenderResult from rich.markdown import Markdown from rich.padding import Padding from rocrate_validator.utils import log as logging from rocrate_validator.utils.io_helpers.colors import get_severity_color -from rocrate_validator.utils.io_helpers.output import OutputFormatter from rocrate_validator.utils.io_helpers.output.text.layout.report import ValidationReportLayout +from rocrate_validator.utils.io_helpers.output import OutputFormatter + if TYPE_CHECKING: + from rich.console import ConsoleOptions, RenderResult from rocrate_validator.models import ValidationResult, ValidationStatistics from rocrate_validator.utils.io_helpers.output.console import Console From d72035171bbd7e5a2bdfcbd2416a9fcd36e36ebf Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Thu, 4 Jun 2026 06:53:43 +0200 Subject: [PATCH 228/352] =?UTF-8?q?refactor:=20=F0=9F=94=A7=20move=20stand?= =?UTF-8?q?ard=20library=20type-only=20imports=20under=20TYPE=5FCHECKING?= =?UTF-8?q?=20guards=20(TC003)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- rocrate_validator/cli/commands/validate.py | 18 ++++++++++-------- rocrate_validator/cli/ui/text/validate.py | 3 ++- rocrate_validator/models.py | 6 ++++-- rocrate_validator/requirements/shacl/models.py | 3 ++- rocrate_validator/requirements/shacl/utils.py | 4 +++- rocrate_validator/utils/cache_warmup.py | 3 ++- .../io_helpers/output/text/layout/report.py | 6 ++---- 7 files changed, 25 insertions(+), 18 deletions(-) diff --git a/rocrate_validator/cli/commands/validate.py b/rocrate_validator/cli/commands/validate.py index 760a69f3f..77d61898a 100644 --- a/rocrate_validator/cli/commands/validate.py +++ b/rocrate_validator/cli/commands/validate.py @@ -17,26 +17,28 @@ import os import sys from contextlib import nullcontext -from pathlib import Path -from typing import Optional, Union +from typing import TYPE_CHECKING -import rich_click as click from rich.padding import Padding from rich.rule import Rule +from rich_click import click from rocrate_validator import constants, services from rocrate_validator.cli.commands.errors import handle_error from rocrate_validator.cli.main import cli from rocrate_validator.cli.ui.text.validate import ValidationCommandView from rocrate_validator.errors import ROCrateInvalidURIError -from rocrate_validator.models import Severity, ValidationResult, ValidationSettings +from rocrate_validator.models import Severity, ValidationResult from rocrate_validator.utils import log as logging -from rocrate_validator.utils.io_helpers.input import get_single_char, multiple_choice +from rocrate_validator.utils.io_helpers.input import get_single_char from rocrate_validator.utils.io_helpers.output.console import Console -from rocrate_validator.utils.io_helpers.output.json import JSONOutputFormatter -from rocrate_validator.utils.io_helpers.output.text import TextOutputFormatter -from rocrate_validator.utils.io_helpers.output.text.layout.report import LiveTextProgressLayout, get_app_header_rule +from rocrate_validator.utils.io_helpers.output.text.layout.report import get_app_header_rule from rocrate_validator.utils.paths import get_profiles_path + +if TYPE_CHECKING: + from pathlib import Path + +# from rich.markdown import Markdown from rocrate_validator.utils.uri import validate_rocrate_uri # from rich.markdown import Markdown diff --git a/rocrate_validator/cli/ui/text/validate.py b/rocrate_validator/cli/ui/text/validate.py index cab6fefef..8a607e770 100644 --- a/rocrate_validator/cli/ui/text/validate.py +++ b/rocrate_validator/cli/ui/text/validate.py @@ -14,7 +14,6 @@ from __future__ import annotations -from collections.abc import Callable from typing import TYPE_CHECKING, Any, Optional from rocrate_validator.utils import log as logging @@ -23,6 +22,8 @@ from rocrate_validator.utils.io_helpers.output.text.layout.report import ValidationReportLayout if TYPE_CHECKING: + from collections.abc import Callable + from rocrate_validator.models import ValidationResult, ValidationSettings, ValidationStatistics from rocrate_validator.utils.io_helpers.output.pager import SystemPager diff --git a/rocrate_validator/models.py b/rocrate_validator/models.py index 714b23327..e28f7c1b7 100644 --- a/rocrate_validator/models.py +++ b/rocrate_validator/models.py @@ -21,14 +21,16 @@ import json import re from abc import ABC, abstractmethod -from collections.abc import Collection from dataclasses import asdict, dataclass from datetime import datetime, timezone from functools import total_ordering from pathlib import Path -from typing import Any, Optional, Protocol, Union, cast +from typing import TYPE_CHECKING, Any, Optional, Protocol, Union, cast from urllib.error import HTTPError +if TYPE_CHECKING: + from collections.abc import Collection + import enum_tools from rdflib import RDF, RDFS, Graph, Namespace, URIRef diff --git a/rocrate_validator/requirements/shacl/models.py b/rocrate_validator/requirements/shacl/models.py index 37a13c177..3b863d90b 100644 --- a/rocrate_validator/requirements/shacl/models.py +++ b/rocrate_validator/requirements/shacl/models.py @@ -14,12 +14,13 @@ from __future__ import annotations -from pathlib import Path from typing import TYPE_CHECKING, Optional, Union, cast from rdflib import Graph, Literal, Namespace, URIRef if TYPE_CHECKING: + from pathlib import Path + from rdflib.term import Node from rocrate_validator.constants import SHACL_NS diff --git a/rocrate_validator/requirements/shacl/utils.py b/rocrate_validator/requirements/shacl/utils.py index c578830c1..9c77124b4 100644 --- a/rocrate_validator/requirements/shacl/utils.py +++ b/rocrate_validator/requirements/shacl/utils.py @@ -15,9 +15,11 @@ from __future__ import annotations import hashlib -from pathlib import Path from typing import TYPE_CHECKING, Any, Optional, Union, cast +if TYPE_CHECKING: + from pathlib import Path + from rdflib import RDF, BNode, Graph, Namespace from rdflib.term import Node diff --git a/rocrate_validator/utils/cache_warmup.py b/rocrate_validator/utils/cache_warmup.py index 0a6fb7a43..57d902d10 100644 --- a/rocrate_validator/utils/cache_warmup.py +++ b/rocrate_validator/utils/cache_warmup.py @@ -26,7 +26,6 @@ from __future__ import annotations import os -from collections.abc import Iterable, Sequence from dataclasses import dataclass from typing import TYPE_CHECKING, Any, Optional, cast @@ -35,6 +34,8 @@ from rocrate_validator.utils.http import OFFLINE_CACHE_MISS_STATUS, HttpRequester if TYPE_CHECKING: + from collections.abc import Iterable, Sequence + from rocrate_validator.models import Profile, ValidationSettings # set up logging diff --git a/rocrate_validator/utils/io_helpers/output/text/layout/report.py b/rocrate_validator/utils/io_helpers/output/text/layout/report.py index 88e8255a3..f63f3b897 100644 --- a/rocrate_validator/utils/io_helpers/output/text/layout/report.py +++ b/rocrate_validator/utils/io_helpers/output/text/layout/report.py @@ -16,7 +16,6 @@ import threading import time -from collections.abc import Callable from typing import TYPE_CHECKING, Any, Optional from rich.align import Align @@ -32,9 +31,6 @@ ProfileValidationEvent, RequirementCheckValidationEvent, RequirementValidationEvent, - Severity, - ValidationContext, - ValidationEvent, ValidationResult, ValidationSettings, ValidationStatistics, @@ -47,6 +43,8 @@ from .progress import ProgressMonitor if TYPE_CHECKING: + from collections.abc import Callable + from rocrate_validator.utils.io_helpers.output.console import Console # set up logging From 23a85ce8de8a70fc8763b4d2407abdb6c071ffc3 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Thu, 4 Jun 2026 06:59:01 +0200 Subject: [PATCH 229/352] =?UTF-8?q?refactor:=20=F0=9F=94=A7=20replace=20op?= =?UTF-8?q?en()=20with=20Path.open()=20for=20consistency=20(PTH123)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- rocrate_validator/cli/commands/validate.py | 4 ++-- rocrate_validator/models.py | 4 ++-- rocrate_validator/services.py | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/rocrate_validator/cli/commands/validate.py b/rocrate_validator/cli/commands/validate.py index 77d61898a..faa1fa6cf 100644 --- a/rocrate_validator/cli/commands/validate.py +++ b/rocrate_validator/cli/commands/validate.py @@ -508,7 +508,7 @@ def validate(ctx, # noqa: PLR0912, PLR0915 if output_file and output_format == "text": if interactive: console.print(f"\n{' '*2}📝 [bold]Writing validation results to file[/bold]{'.'*4} ", end="") - with open(output_file, "w", encoding="utf-8") if output_file else sys.stdout as f: + with output_file.open("w", encoding="utf-8") if output_file else sys.stdout as f: out = Console(color_system=None, width=output_line_width, height=31, file=f) if output_format == "text": out.register_formatter(TextOutputFormatter()) @@ -553,7 +553,7 @@ def validate(ctx, # noqa: PLR0912, PLR0915 console.print(f"\n{' '*2}📋 [bold]The validation report in JSON format: [/bold]\n") # Generate the JSON output and write it to the specified output file or to stdout - with open(output_file, "w", encoding="utf-8") if output_file else nullcontext(sys.stdout) as f: + with output_file.open("w", encoding="utf-8") if output_file else nullcontext(sys.stdout) as f: out = Console(width=output_line_width, file=f) out.register_formatter(JSONOutputFormatter()) out.print(results) diff --git a/rocrate_validator/models.py b/rocrate_validator/models.py index e28f7c1b7..1229faa90 100644 --- a/rocrate_validator/models.py +++ b/rocrate_validator/models.py @@ -513,7 +513,7 @@ def description(self) -> str: """ if not self._description: if self.path and self.readme_file_path.exists(): - with open(self.readme_file_path, encoding="utf-8") as f: + with self.readme_file_path.open(encoding="utf-8") as f: self._description = f.read() else: self._description = self.comment @@ -2615,7 +2615,7 @@ def to_json(self, path: Optional[Path] = None) -> str: """ result = json.dumps(self.to_dict(), indent=4, cls=CustomEncoder) if path: - with open(path, "w", encoding="utf-8") as f: + with path.open("w", encoding="utf-8") as f: f.write(result) return result diff --git a/rocrate_validator/services.py b/rocrate_validator/services.py index 9f9c0ac04..ce0430642 100644 --- a/rocrate_validator/services.py +++ b/rocrate_validator/services.py @@ -177,7 +177,7 @@ def __extract_and_validate_rocrate__(rocrate_path: Path): raise FileNotFoundError( f"Failed to download remote RO-Crate '{rocrate_path.uri}' (status {r.status_code})." ) - with open(tmp_file.name, "wb") as f: + with Path(tmp_file.name).open("wb") as f: shutil.copyfileobj(r.raw, f) logger.debug("RO-Crate downloaded to temporary file: %s", tmp_file.name) # continue with the validation process by extracting the RO-Crate and validating it From ce813c3717e9df7df4ece17b28e3385a0fe177b5 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Thu, 4 Jun 2026 07:04:36 +0200 Subject: [PATCH 230/352] =?UTF-8?q?fix:=20=F0=9F=94=A7=20restore=20runtime?= =?UTF-8?q?-used=20imports?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- rocrate_validator/cli/commands/validate.py | 14 +++++++------- .../utils/io_helpers/output/text/layout/report.py | 3 +++ 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/rocrate_validator/cli/commands/validate.py b/rocrate_validator/cli/commands/validate.py index faa1fa6cf..eb95c9b3f 100644 --- a/rocrate_validator/cli/commands/validate.py +++ b/rocrate_validator/cli/commands/validate.py @@ -17,7 +17,7 @@ import os import sys from contextlib import nullcontext -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, Optional, Union from rich.padding import Padding from rich.rule import Rule @@ -28,19 +28,19 @@ from rocrate_validator.cli.main import cli from rocrate_validator.cli.ui.text.validate import ValidationCommandView from rocrate_validator.errors import ROCrateInvalidURIError -from rocrate_validator.models import Severity, ValidationResult +from rocrate_validator.models import Severity, ValidationResult, ValidationSettings from rocrate_validator.utils import log as logging -from rocrate_validator.utils.io_helpers.input import get_single_char +from rocrate_validator.utils.io_helpers.input import get_single_char, multiple_choice from rocrate_validator.utils.io_helpers.output.console import Console -from rocrate_validator.utils.io_helpers.output.text.layout.report import get_app_header_rule +from rocrate_validator.utils.io_helpers.output.json import JSONOutputFormatter +from rocrate_validator.utils.io_helpers.output.text import TextOutputFormatter +from rocrate_validator.utils.io_helpers.output.text.layout.report import LiveTextProgressLayout, get_app_header_rule from rocrate_validator.utils.paths import get_profiles_path +from rocrate_validator.utils.uri import validate_rocrate_uri if TYPE_CHECKING: from pathlib import Path -# from rich.markdown import Markdown -from rocrate_validator.utils.uri import validate_rocrate_uri - # from rich.markdown import Markdown # from rich.table import Table diff --git a/rocrate_validator/utils/io_helpers/output/text/layout/report.py b/rocrate_validator/utils/io_helpers/output/text/layout/report.py index f63f3b897..9bad2f6b8 100644 --- a/rocrate_validator/utils/io_helpers/output/text/layout/report.py +++ b/rocrate_validator/utils/io_helpers/output/text/layout/report.py @@ -31,6 +31,9 @@ ProfileValidationEvent, RequirementCheckValidationEvent, RequirementValidationEvent, + Severity, + ValidationContext, + ValidationEvent, ValidationResult, ValidationSettings, ValidationStatistics, From ec133487428ac7197131b73c70861a4881e151b8 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Thu, 4 Jun 2026 07:09:16 +0200 Subject: [PATCH 231/352] =?UTF-8?q?refactor:=20=F0=9F=94=A7=20replace=20os?= =?UTF-8?q?.path.abspath=20with=20Path.resolve()=20[PTH100]?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- rocrate_validator/cli/commands/validate.py | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/rocrate_validator/cli/commands/validate.py b/rocrate_validator/cli/commands/validate.py index eb95c9b3f..061207f73 100644 --- a/rocrate_validator/cli/commands/validate.py +++ b/rocrate_validator/cli/commands/validate.py @@ -14,10 +14,10 @@ from __future__ import annotations -import os import sys from contextlib import nullcontext -from typing import TYPE_CHECKING, Optional, Union +from pathlib import Path +from typing import Optional, Union from rich.padding import Padding from rich.rule import Rule @@ -38,9 +38,6 @@ from rocrate_validator.utils.paths import get_profiles_path from rocrate_validator.utils.uri import validate_rocrate_uri -if TYPE_CHECKING: - from pathlib import Path - # from rich.markdown import Markdown # from rich.table import Table @@ -275,8 +272,8 @@ def validate(ctx, # noqa: PLR0912, PLR0915 if not interactive or sys.platform == "win32": enable_pager = False # Log the input parameters for debugging - logger.debug("profiles_path: %s", os.path.abspath(profiles_path)) - logger.debug("extra_profiles_path: %s", os.path.abspath(extra_profiles_path) if extra_profiles_path else None) + logger.debug("profiles_path: %s", Path(profiles_path).resolve()) + logger.debug("extra_profiles_path: %s", Path(extra_profiles_path).resolve() if extra_profiles_path else None) logger.debug("profile_identifier: %s", profile_identifier) logger.debug("requirement_severity: %s", requirement_severity) logger.debug("requirement_severity_only: %s", requirement_severity_only) @@ -288,7 +285,7 @@ def validate(ctx, # noqa: PLR0912, PLR0915 # Cache settings logger.debug("cache_max_age: %s", cache_max_age) - logger.debug("cache_path: %s", os.path.abspath(cache_path) if cache_path else None) + logger.debug("cache_path: %s", Path(cache_path).resolve() if cache_path else None) logger.debug("no_cache: %s", no_cache) logger.debug("offline: %s", offline) @@ -301,7 +298,7 @@ def validate(ctx, # noqa: PLR0912, PLR0915 ) if rocrate_uri: - logger.debug("rocrate_path: %s", os.path.abspath(rocrate_uri)) + logger.debug("rocrate_path: %s", Path(rocrate_uri).resolve()) # Warn the user when a remote RO-Crate is about to be validated in offline mode: # the cached copy (if any) will be used, and it may be out of sync with the remote. From da9fdab7062409ba3726cde9bcd33a673e7458cc Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Thu, 4 Jun 2026 07:12:59 +0200 Subject: [PATCH 232/352] =?UTF-8?q?style:=20=F0=9F=94=A7=20reorganize=20ri?= =?UTF-8?q?ch=5Fclick=20import=20style?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- rocrate_validator/cli/commands/validate.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rocrate_validator/cli/commands/validate.py b/rocrate_validator/cli/commands/validate.py index 061207f73..35c7df602 100644 --- a/rocrate_validator/cli/commands/validate.py +++ b/rocrate_validator/cli/commands/validate.py @@ -19,9 +19,9 @@ from pathlib import Path from typing import Optional, Union +import rich_click as click from rich.padding import Padding from rich.rule import Rule -from rich_click import click from rocrate_validator import constants, services from rocrate_validator.cli.commands.errors import handle_error From a853317081ffba91df8062426e5cd9f1df1066c6 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Thu, 4 Jun 2026 07:21:12 +0200 Subject: [PATCH 233/352] =?UTF-8?q?refactor:=20=F0=9F=94=A7=20replace=20os?= =?UTF-8?q?.path.dirname=20with=20Path.parent=20(PTH120)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- rocrate_validator/utils/paths.py | 2 +- rocrate_validator/utils/python_helpers.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/rocrate_validator/utils/paths.py b/rocrate_validator/utils/paths.py index 4e4aad2f8..5bd58e5f8 100644 --- a/rocrate_validator/utils/paths.py +++ b/rocrate_validator/utils/paths.py @@ -19,7 +19,7 @@ from rocrate_validator.utils import log as logging # current directory -CURRENT_DIR = os.path.dirname(os.path.realpath(__file__)) +CURRENT_DIR = str(Path(__file__).resolve().parent) # set up logging diff --git a/rocrate_validator/utils/python_helpers.py b/rocrate_validator/utils/python_helpers.py index a600f708e..3bf5300ee 100644 --- a/rocrate_validator/utils/python_helpers.py +++ b/rocrate_validator/utils/python_helpers.py @@ -48,7 +48,7 @@ def get_classes_from_file( logger.debug("Module: %r", module_name) # Add the directory containing the file to the system path - sys.path.insert(0, os.path.dirname(file_path)) + sys.path.insert(0, str(file_path.parent)) # Import the module module = import_module(module_name) @@ -103,7 +103,7 @@ def get_requirement_class_by_name(requirement_name: str) -> type: # convert the module name to a path module_path = module_name.replace(".", "/") # add the path to the system path - sys.path.insert(0, os.path.dirname(module_path)) + sys.path.insert(0, str(Path(module_path).parent)) # Import the module module = import_module(module_name) From e958cdeaf7dd5ea5f71c55ef41f8ec24f7e6c451 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Thu, 4 Jun 2026 07:24:15 +0200 Subject: [PATCH 234/352] =?UTF-8?q?refactor:=20=F0=9F=94=A7=20replace=20os?= =?UTF-8?q?.path.exists=20with=20Path.exists()=20(PTH110)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- rocrate_validator/requirements/shacl/validator.py | 2 +- rocrate_validator/utils/http.py | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/rocrate_validator/requirements/shacl/validator.py b/rocrate_validator/requirements/shacl/validator.py index 6626f63af..5624fcba0 100644 --- a/rocrate_validator/requirements/shacl/validator.py +++ b/rocrate_validator/requirements/shacl/validator.py @@ -213,7 +213,7 @@ def __load_ontology_graph__(self, profile_path: Path, # load the graph of ontologies ontology_graph: Optional[Graph] = None ontology_path = self.__get_ontology_path__(profile_path, ontology_filename) - if os.path.exists(ontology_path): + if ontology_path.exists(): logger.debug("Loading ontologies: %s", ontology_path) ontology_graph = Graph() diff --git a/rocrate_validator/utils/http.py b/rocrate_validator/utils/http.py index 38c0b87a0..49d2de9c3 100644 --- a/rocrate_validator/utils/http.py +++ b/rocrate_validator/utils/http.py @@ -20,6 +20,7 @@ import random import string import threading +from pathlib import Path from typing import Any, Optional import requests @@ -220,7 +221,7 @@ def cleanup(self): try: logger.debug(f"Deleting cache directory: {self.session.cache.cache_name}") cache_path = f"{self.session.cache.cache_name}.sqlite" - if os.path.exists(cache_path): + if Path(cache_path).exists(): os.remove(cache_path) logger.debug(f"Deleted cache directory: {cache_path}") except Exception as e: @@ -342,7 +343,7 @@ def cache_info(self) -> dict[str, Any]: info["entries"] = sum(1 for _ in cache.urls()) except Exception as e: logger.debug("Unable to count cache entries: %s", e) - if info["path"] and os.path.exists(info["path"]): + if info["path"] and Path(info["path"]).exists(): with contextlib.suppress(OSError): info["size_bytes"] = os.path.getsize(info["path"]) return info From b105c0ed8e75d56f28ad227d387ee930bf82574b Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Thu, 4 Jun 2026 07:26:22 +0200 Subject: [PATCH 235/352] =?UTF-8?q?refactor:=20=F0=9F=94=A7=20replace=20os?= =?UTF-8?q?.remove=20with=20Path.unlink()=20[PTH107]?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- rocrate_validator/utils/http.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rocrate_validator/utils/http.py b/rocrate_validator/utils/http.py index 49d2de9c3..8f1162984 100644 --- a/rocrate_validator/utils/http.py +++ b/rocrate_validator/utils/http.py @@ -222,7 +222,7 @@ def cleanup(self): logger.debug(f"Deleting cache directory: {self.session.cache.cache_name}") cache_path = f"{self.session.cache.cache_name}.sqlite" if Path(cache_path).exists(): - os.remove(cache_path) + Path(cache_path).unlink() logger.debug(f"Deleted cache directory: {cache_path}") except Exception as e: logger.error(f"Error deleting cache directory: {e}") From e50fe0dcfea82727f3aaa6f61e843d1d152c793e Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Thu, 4 Jun 2026 07:27:54 +0200 Subject: [PATCH 236/352] =?UTF-8?q?refactor:=20=F0=9F=94=A7=20replace=20os?= =?UTF-8?q?.path.getsize=20with=20Path.stat().st=5Fsize=20(PTH202)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- rocrate_validator/utils/http.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rocrate_validator/utils/http.py b/rocrate_validator/utils/http.py index 8f1162984..86047dcca 100644 --- a/rocrate_validator/utils/http.py +++ b/rocrate_validator/utils/http.py @@ -345,7 +345,7 @@ def cache_info(self) -> dict[str, Any]: logger.debug("Unable to count cache entries: %s", e) if info["path"] and Path(info["path"]).exists(): with contextlib.suppress(OSError): - info["size_bytes"] = os.path.getsize(info["path"]) + info["size_bytes"] = Path(info["path"]).stat().st_size return info @classmethod From ba3d7d19b2c3a8f88cf3faea8f6b14e093c67b47 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Thu, 4 Jun 2026 07:31:16 +0200 Subject: [PATCH 237/352] =?UTF-8?q?refactor:=20=F0=9F=94=A7=20replace=20os?= =?UTF-8?q?.path.join=20with=20Path("/")=20operator=20(PTH118)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- rocrate_validator/utils/paths.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rocrate_validator/utils/paths.py b/rocrate_validator/utils/paths.py index 5bd58e5f8..6fdeff70d 100644 --- a/rocrate_validator/utils/paths.py +++ b/rocrate_validator/utils/paths.py @@ -117,7 +117,7 @@ def list_matching_file_paths( # iterate through the directory and subdirectories for root, _, files in os.walk(directory): - file_paths.extend(os.path.join(root, f) for f in files if f.endswith(extension)) + file_paths.extend(str(Path(root) / f) for f in files if f.endswith(extension)) return file_paths From 32bc653112836d1a00c2a18c9cd7a7b2ea5b8cd1 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Thu, 4 Jun 2026 07:32:51 +0200 Subject: [PATCH 238/352] =?UTF-8?q?chore:=20=F0=9F=94=A7=20remove=20unused?= =?UTF-8?q?=20`import=20os`=20and=20fix=20import=20spacing=20(F401)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- rocrate_validator/requirements/shacl/validator.py | 1 - rocrate_validator/utils/http.py | 1 - rocrate_validator/utils/io_helpers/output/text/__init__.py | 3 ++- rocrate_validator/utils/io_helpers/output/text/formatters.py | 4 ++-- rocrate_validator/utils/python_helpers.py | 1 - 5 files changed, 4 insertions(+), 6 deletions(-) diff --git a/rocrate_validator/requirements/shacl/validator.py b/rocrate_validator/requirements/shacl/validator.py index 5624fcba0..113c17324 100644 --- a/rocrate_validator/requirements/shacl/validator.py +++ b/rocrate_validator/requirements/shacl/validator.py @@ -14,7 +14,6 @@ from __future__ import annotations -import os from pathlib import Path from typing import TYPE_CHECKING, Any, Optional, Union, cast diff --git a/rocrate_validator/utils/http.py b/rocrate_validator/utils/http.py index 86047dcca..31497d949 100644 --- a/rocrate_validator/utils/http.py +++ b/rocrate_validator/utils/http.py @@ -16,7 +16,6 @@ import atexit import contextlib -import os import random import string import threading diff --git a/rocrate_validator/utils/io_helpers/output/text/__init__.py b/rocrate_validator/utils/io_helpers/output/text/__init__.py index 8b5621223..b4233553d 100644 --- a/rocrate_validator/utils/io_helpers/output/text/__init__.py +++ b/rocrate_validator/utils/io_helpers/output/text/__init__.py @@ -18,12 +18,13 @@ from rocrate_validator.models import ValidationResult, ValidationStatistics from rocrate_validator.utils import log as logging - from rocrate_validator.utils.io_helpers.output import BaseOutputFormatter + from .formatters import ValidationResultTextOutputFormatter, ValidationStatisticsTextOutputFormatter if TYPE_CHECKING: from rich.console import ConsoleOptions, RenderResult + from rocrate_validator.utils.io_helpers.output.console import Console # set up logging diff --git a/rocrate_validator/utils/io_helpers/output/text/formatters.py b/rocrate_validator/utils/io_helpers/output/text/formatters.py index 11113a1d3..8502453e2 100644 --- a/rocrate_validator/utils/io_helpers/output/text/formatters.py +++ b/rocrate_validator/utils/io_helpers/output/text/formatters.py @@ -22,12 +22,12 @@ from rocrate_validator.utils import log as logging from rocrate_validator.utils.io_helpers.colors import get_severity_color -from rocrate_validator.utils.io_helpers.output.text.layout.report import ValidationReportLayout - from rocrate_validator.utils.io_helpers.output import OutputFormatter +from rocrate_validator.utils.io_helpers.output.text.layout.report import ValidationReportLayout if TYPE_CHECKING: from rich.console import ConsoleOptions, RenderResult + from rocrate_validator.models import ValidationResult, ValidationStatistics from rocrate_validator.utils.io_helpers.output.console import Console diff --git a/rocrate_validator/utils/python_helpers.py b/rocrate_validator/utils/python_helpers.py index 3bf5300ee..d96ee0472 100644 --- a/rocrate_validator/utils/python_helpers.py +++ b/rocrate_validator/utils/python_helpers.py @@ -13,7 +13,6 @@ # limitations under the License. import inspect -import os import re import sys from importlib import import_module From cf9b9b1ba917439350a38e483a1bdf5031c54c35 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Thu, 4 Jun 2026 07:52:56 +0200 Subject: [PATCH 239/352] =?UTF-8?q?style:=20=F0=9F=8E=A8=20apply=20auto-fi?= =?UTF-8?q?xes=20across=20test=20files?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Fix import ordering (I) - Consolidate duplicate test branches - Remove unnecessary `"r"` mode in open() calls - Remove unused `# noqa` comments - Reformat multi-line imports with parentheses - Fix assertion operand order (assert 10 == len → assert len == 10) - Group stdlib/third-party/first-party imports with blank lines --- tests/conftest.py | 2 +- .../profiles/hidden_requirements/xh/bh.py | 5 ++--- .../hidden_requirements/xh/must/b_must.py | 5 ++--- .../data/profiles/requirement_loading/x/b.py | 5 ++--- .../requirement_loading/x/must/b_must.py | 5 ++--- .../isa-ro-crate/test_0_investigation.py | 2 +- .../isa-ro-crate/test_10_definedterm.py | 2 +- .../isa-ro-crate/test_11_propertyvalue.py | 2 +- .../profiles/isa-ro-crate/test_1_study.py | 2 +- .../profiles/isa-ro-crate/test_2_assay.py | 2 +- .../profiles/isa-ro-crate/test_3_process.py | 3 ++- .../profiles/isa-ro-crate/test_4_protocol.py | 3 ++- .../profiles/isa-ro-crate/test_5_sample.py | 2 +- .../profiles/isa-ro-crate/test_6_data.py | 2 +- .../profiles/isa-ro-crate/test_7_person.py | 2 +- .../profiles/isa-ro-crate/test_8_article.py | 2 +- .../profiles/isa-ro-crate/test_9_comment.py | 2 +- .../test_procrc_root_data_entity.py | 2 +- .../profiles/test_metadata_only.py | 7 ++++--- .../test_wroc_root_metadata.py | 2 +- tests/integration/test_offline_mode.py | 3 +-- tests/integration/test_sparql_constraints.py | 1 - tests/shared.py | 5 +++-- tests/test_cli.py | 5 ++--- tests/test_models.py | 5 ++--- tests/unit/requirements/test_profiles.py | 16 ++++----------- tests/unit/requirements/test_shacl_checks.py | 4 +--- tests/unit/test_cache_warmup.py | 7 +++++-- tests/unit/test_cli_internals.py | 9 +++++++-- tests/unit/test_document_loader.py | 8 +++++--- tests/unit/test_http_requester_offline.py | 3 +-- tests/unit/test_rocrate.py | 20 ++++++++++++------- tests/unit/test_services.py | 6 +++--- tests/unit/test_uri.py | 3 +-- 34 files changed, 77 insertions(+), 77 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index d14ff836a..eb8d64a04 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -19,8 +19,8 @@ import pytest from pytest import fixture -from rocrate_validator.utils import log as logging from rocrate_validator import services +from rocrate_validator.utils import log as logging # set up logging logging.basicConfig( diff --git a/tests/data/profiles/hidden_requirements/xh/bh.py b/tests/data/profiles/hidden_requirements/xh/bh.py index 9dc31a44b..4089a60cc 100644 --- a/tests/data/profiles/hidden_requirements/xh/bh.py +++ b/tests/data/profiles/hidden_requirements/xh/bh.py @@ -12,10 +12,9 @@ # See the License for the specific language governing permissions and # limitations under the License. -from rocrate_validator.utils import log as logging from rocrate_validator.models import Severity, ValidationContext -from rocrate_validator.requirements.python import (PyFunctionCheck, check, - requirement) +from rocrate_validator.requirements.python import PyFunctionCheck, check, requirement +from rocrate_validator.utils import log as logging # set up logging logger = logging.getLogger(__name__) diff --git a/tests/data/profiles/hidden_requirements/xh/must/b_must.py b/tests/data/profiles/hidden_requirements/xh/must/b_must.py index 03deea57e..ec3da2939 100644 --- a/tests/data/profiles/hidden_requirements/xh/must/b_must.py +++ b/tests/data/profiles/hidden_requirements/xh/must/b_must.py @@ -12,10 +12,9 @@ # See the License for the specific language governing permissions and # limitations under the License. -from rocrate_validator.utils import log as logging from rocrate_validator.models import Severity, ValidationContext -from rocrate_validator.requirements.python import (PyFunctionCheck, check, - requirement) +from rocrate_validator.requirements.python import PyFunctionCheck, check, requirement +from rocrate_validator.utils import log as logging # set up logging logger = logging.getLogger(__name__) diff --git a/tests/data/profiles/requirement_loading/x/b.py b/tests/data/profiles/requirement_loading/x/b.py index 4a3a162f5..d922d529d 100644 --- a/tests/data/profiles/requirement_loading/x/b.py +++ b/tests/data/profiles/requirement_loading/x/b.py @@ -12,10 +12,9 @@ # See the License for the specific language governing permissions and # limitations under the License. -from rocrate_validator.utils import log as logging from rocrate_validator.models import Severity, ValidationContext -from rocrate_validator.requirements.python import (PyFunctionCheck, check, - requirement) +from rocrate_validator.requirements.python import PyFunctionCheck, check, requirement +from rocrate_validator.utils import log as logging # set up logging logger = logging.getLogger(__name__) diff --git a/tests/data/profiles/requirement_loading/x/must/b_must.py b/tests/data/profiles/requirement_loading/x/must/b_must.py index 03deea57e..ec3da2939 100644 --- a/tests/data/profiles/requirement_loading/x/must/b_must.py +++ b/tests/data/profiles/requirement_loading/x/must/b_must.py @@ -12,10 +12,9 @@ # See the License for the specific language governing permissions and # limitations under the License. -from rocrate_validator.utils import log as logging from rocrate_validator.models import Severity, ValidationContext -from rocrate_validator.requirements.python import (PyFunctionCheck, check, - requirement) +from rocrate_validator.requirements.python import PyFunctionCheck, check, requirement +from rocrate_validator.utils import log as logging # set up logging logger = logging.getLogger(__name__) diff --git a/tests/integration/profiles/isa-ro-crate/test_0_investigation.py b/tests/integration/profiles/isa-ro-crate/test_0_investigation.py index e6830f8b5..c21a81119 100644 --- a/tests/integration/profiles/isa-ro-crate/test_0_investigation.py +++ b/tests/integration/profiles/isa-ro-crate/test_0_investigation.py @@ -17,7 +17,7 @@ from rocrate_validator.models import Severity from tests.ro_crates import ValidROC -from tests.shared import do_entity_test, SPARQL_PREFIXES +from tests.shared import SPARQL_PREFIXES, do_entity_test # set up logging logger = logging.getLogger(__name__) diff --git a/tests/integration/profiles/isa-ro-crate/test_10_definedterm.py b/tests/integration/profiles/isa-ro-crate/test_10_definedterm.py index 74719843b..3ffb4dbb2 100644 --- a/tests/integration/profiles/isa-ro-crate/test_10_definedterm.py +++ b/tests/integration/profiles/isa-ro-crate/test_10_definedterm.py @@ -17,7 +17,7 @@ from rocrate_validator.models import Severity from tests.ro_crates import ValidROC -from tests.shared import do_entity_test, SPARQL_PREFIXES +from tests.shared import SPARQL_PREFIXES, do_entity_test # set up logging logger = logging.getLogger(__name__) diff --git a/tests/integration/profiles/isa-ro-crate/test_11_propertyvalue.py b/tests/integration/profiles/isa-ro-crate/test_11_propertyvalue.py index 36459e535..28b8e08a6 100644 --- a/tests/integration/profiles/isa-ro-crate/test_11_propertyvalue.py +++ b/tests/integration/profiles/isa-ro-crate/test_11_propertyvalue.py @@ -17,7 +17,7 @@ from rocrate_validator.models import Severity from tests.ro_crates import ValidROC -from tests.shared import do_entity_test, SPARQL_PREFIXES +from tests.shared import SPARQL_PREFIXES, do_entity_test # set up logging logger = logging.getLogger(__name__) diff --git a/tests/integration/profiles/isa-ro-crate/test_1_study.py b/tests/integration/profiles/isa-ro-crate/test_1_study.py index e1ba02a67..e7b14b17e 100644 --- a/tests/integration/profiles/isa-ro-crate/test_1_study.py +++ b/tests/integration/profiles/isa-ro-crate/test_1_study.py @@ -17,7 +17,7 @@ from rocrate_validator.models import Severity from tests.ro_crates import ValidROC -from tests.shared import do_entity_test, SPARQL_PREFIXES +from tests.shared import SPARQL_PREFIXES, do_entity_test # set up logging logger = logging.getLogger(__name__) diff --git a/tests/integration/profiles/isa-ro-crate/test_2_assay.py b/tests/integration/profiles/isa-ro-crate/test_2_assay.py index f0cc50763..dc98aacc8 100644 --- a/tests/integration/profiles/isa-ro-crate/test_2_assay.py +++ b/tests/integration/profiles/isa-ro-crate/test_2_assay.py @@ -17,7 +17,7 @@ from rocrate_validator.models import Severity from tests.ro_crates import ValidROC -from tests.shared import do_entity_test, SPARQL_PREFIXES +from tests.shared import SPARQL_PREFIXES, do_entity_test # set up logging logger = logging.getLogger(__name__) diff --git a/tests/integration/profiles/isa-ro-crate/test_3_process.py b/tests/integration/profiles/isa-ro-crate/test_3_process.py index d7d9d15fb..3644bed8c 100644 --- a/tests/integration/profiles/isa-ro-crate/test_3_process.py +++ b/tests/integration/profiles/isa-ro-crate/test_3_process.py @@ -16,9 +16,10 @@ import logging from rocrate_validator.models import Severity + # from tests.conftest import SKIP_LOCAL_DATA_ENTITY_EXISTENCE_CHECK_IDENTIFIER from tests.ro_crates import ValidROC -from tests.shared import do_entity_test, SPARQL_PREFIXES +from tests.shared import SPARQL_PREFIXES, do_entity_test # set up logging logger = logging.getLogger(__name__) diff --git a/tests/integration/profiles/isa-ro-crate/test_4_protocol.py b/tests/integration/profiles/isa-ro-crate/test_4_protocol.py index eed5feff7..7d98ca193 100644 --- a/tests/integration/profiles/isa-ro-crate/test_4_protocol.py +++ b/tests/integration/profiles/isa-ro-crate/test_4_protocol.py @@ -16,9 +16,10 @@ import logging from rocrate_validator.models import Severity + # from tests.conftest import SKIP_LOCAL_DATA_ENTITY_EXISTENCE_CHECK_IDENTIFIER from tests.ro_crates import ValidROC -from tests.shared import do_entity_test, SPARQL_PREFIXES +from tests.shared import SPARQL_PREFIXES, do_entity_test # set up logging logger = logging.getLogger(__name__) diff --git a/tests/integration/profiles/isa-ro-crate/test_5_sample.py b/tests/integration/profiles/isa-ro-crate/test_5_sample.py index bc2714d0e..ddb8ed503 100644 --- a/tests/integration/profiles/isa-ro-crate/test_5_sample.py +++ b/tests/integration/profiles/isa-ro-crate/test_5_sample.py @@ -17,7 +17,7 @@ from rocrate_validator.models import Severity from tests.ro_crates import ValidROC -from tests.shared import do_entity_test, SPARQL_PREFIXES +from tests.shared import SPARQL_PREFIXES, do_entity_test # set up logging logger = logging.getLogger(__name__) diff --git a/tests/integration/profiles/isa-ro-crate/test_6_data.py b/tests/integration/profiles/isa-ro-crate/test_6_data.py index bb79248db..ec6e23c39 100644 --- a/tests/integration/profiles/isa-ro-crate/test_6_data.py +++ b/tests/integration/profiles/isa-ro-crate/test_6_data.py @@ -17,7 +17,7 @@ from rocrate_validator.models import Severity from tests.ro_crates import ValidROC -from tests.shared import do_entity_test, SPARQL_PREFIXES +from tests.shared import SPARQL_PREFIXES, do_entity_test # set up logging logger = logging.getLogger(__name__) diff --git a/tests/integration/profiles/isa-ro-crate/test_7_person.py b/tests/integration/profiles/isa-ro-crate/test_7_person.py index c7263ed6b..3261b50e4 100644 --- a/tests/integration/profiles/isa-ro-crate/test_7_person.py +++ b/tests/integration/profiles/isa-ro-crate/test_7_person.py @@ -17,7 +17,7 @@ from rocrate_validator.models import Severity from tests.ro_crates import ValidROC -from tests.shared import do_entity_test, SPARQL_PREFIXES +from tests.shared import SPARQL_PREFIXES, do_entity_test # set up logging logger = logging.getLogger(__name__) diff --git a/tests/integration/profiles/isa-ro-crate/test_8_article.py b/tests/integration/profiles/isa-ro-crate/test_8_article.py index 31041f667..745f8986b 100644 --- a/tests/integration/profiles/isa-ro-crate/test_8_article.py +++ b/tests/integration/profiles/isa-ro-crate/test_8_article.py @@ -17,7 +17,7 @@ from rocrate_validator.models import Severity from tests.ro_crates import ValidROC -from tests.shared import do_entity_test, SPARQL_PREFIXES +from tests.shared import SPARQL_PREFIXES, do_entity_test # set up logging logger = logging.getLogger(__name__) diff --git a/tests/integration/profiles/isa-ro-crate/test_9_comment.py b/tests/integration/profiles/isa-ro-crate/test_9_comment.py index 53ddcf6b5..e2fe56b84 100644 --- a/tests/integration/profiles/isa-ro-crate/test_9_comment.py +++ b/tests/integration/profiles/isa-ro-crate/test_9_comment.py @@ -17,7 +17,7 @@ from rocrate_validator.models import Severity from tests.ro_crates import ValidROC -from tests.shared import do_entity_test, SPARQL_PREFIXES +from tests.shared import SPARQL_PREFIXES, do_entity_test # set up logging logger = logging.getLogger(__name__) diff --git a/tests/integration/profiles/process-run-crate/test_procrc_root_data_entity.py b/tests/integration/profiles/process-run-crate/test_procrc_root_data_entity.py index 9cdd911d1..0fc9d305b 100644 --- a/tests/integration/profiles/process-run-crate/test_procrc_root_data_entity.py +++ b/tests/integration/profiles/process-run-crate/test_procrc_root_data_entity.py @@ -15,7 +15,7 @@ import logging from rocrate_validator.models import Severity -from tests.ro_crates import ValidROC, InvalidProcRC +from tests.ro_crates import InvalidProcRC, ValidROC from tests.shared import do_entity_test # set up logging diff --git a/tests/integration/profiles/test_metadata_only.py b/tests/integration/profiles/test_metadata_only.py index 53ec7c43e..dd99f86d0 100644 --- a/tests/integration/profiles/test_metadata_only.py +++ b/tests/integration/profiles/test_metadata_only.py @@ -14,14 +14,15 @@ import json import logging -from pathlib import Path import shutil import tempfile +from pathlib import Path + +import pytest from rocrate_validator import models from tests.ro_crates import ValidROC from tests.shared import do_entity_test -import pytest # set up logging logger = logging.getLogger(__name__) @@ -67,7 +68,7 @@ def test_valid_ro_crates_from_metadata_dict(valid_roc_path): metadata_dict = None # Load the metadata dict from the RO-Crate if not isinstance(valid_roc_path, str): - with open(valid_roc_path / "ro-crate-metadata.json", "r", encoding="utf-8") as f: + with open(valid_roc_path / "ro-crate-metadata.json", encoding="utf-8") as f: metadata_dict = json.load(f) assert metadata_dict is not None, "Failed to load metadata dict" assert isinstance(metadata_dict, dict), "Metadata dict is not a dictionary" diff --git a/tests/integration/profiles/workflow-ro-crate/test_wroc_root_metadata.py b/tests/integration/profiles/workflow-ro-crate/test_wroc_root_metadata.py index 0eec149b4..c7885cec0 100644 --- a/tests/integration/profiles/workflow-ro-crate/test_wroc_root_metadata.py +++ b/tests/integration/profiles/workflow-ro-crate/test_wroc_root_metadata.py @@ -15,7 +15,7 @@ import logging from rocrate_validator.models import Severity -from tests.ro_crates import WROCNoLicense, WROCMainEntity +from tests.ro_crates import WROCMainEntity, WROCNoLicense from tests.shared import do_entity_test logger = logging.getLogger(__name__) diff --git a/tests/integration/test_offline_mode.py b/tests/integration/test_offline_mode.py index 6c48bf588..10dd0ac54 100644 --- a/tests/integration/test_offline_mode.py +++ b/tests/integration/test_offline_mode.py @@ -24,8 +24,7 @@ from rocrate_validator.cli.main import cli from rocrate_validator.models import ValidationSettings -from rocrate_validator.utils.http import (OFFLINE_CACHE_MISS_STATUS, - HttpRequester) +from rocrate_validator.utils.http import OFFLINE_CACHE_MISS_STATUS, HttpRequester from tests.conftest import SKIP_LOCAL_DATA_ENTITY_EXISTENCE_CHECK_IDENTIFIER from tests.ro_crates import ValidROC diff --git a/tests/integration/test_sparql_constraints.py b/tests/integration/test_sparql_constraints.py index c6e1f6c20..fff587da8 100644 --- a/tests/integration/test_sparql_constraints.py +++ b/tests/integration/test_sparql_constraints.py @@ -40,7 +40,6 @@ from rocrate_validator.requirements.shacl.utils import resolve_parent_shape from tests.conftest import TEST_DATA_PATH - logger = logging.getLogger(__name__) CURRENT_PATH = os.path.dirname(os.path.realpath(__file__)) diff --git a/tests/shared.py b/tests/shared.py index 9e441570e..9ca5cecd0 100644 --- a/tests/shared.py +++ b/tests/shared.py @@ -20,12 +20,13 @@ import logging import shutil import tempfile -import rdflib from collections.abc import Collection from pathlib import Path from typing import Optional, TypeVar, Union from urllib.parse import urljoin +import rdflib + from rocrate_validator import models, services from rocrate_validator.constants import DEFAULT_PROFILE_IDENTIFIER @@ -95,7 +96,7 @@ def _prepare_temp_rocrate( ) -> Path: temp_rocrate_path = Path(tempfile.TemporaryDirectory().name) shutil.copytree(rocrate_path, temp_rocrate_path) - with open(temp_rocrate_path / "ro-crate-metadata.json", "r", encoding="utf-8") as f: + with open(temp_rocrate_path / "ro-crate-metadata.json", encoding="utf-8") as f: rocrate = json.load(f) if rocrate_entity_patch is not None: for key, value in rocrate_entity_patch.items(): diff --git a/tests/test_cli.py b/tests/test_cli.py index 519802e55..1b3ab518e 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -20,11 +20,10 @@ from pytest import fixture from rocrate_validator import services +from rocrate_validator.cli.main import cli from rocrate_validator.requirements.python import PyFunctionCheck from rocrate_validator.requirements.shacl.checks import SHACLCheck - from rocrate_validator.utils import log as logging -from rocrate_validator.cli.main import cli from rocrate_validator.utils.versioning import get_version from tests.conftest import SKIP_LOCAL_DATA_ENTITY_EXISTENCE_CHECK_IDENTIFIER from tests.ro_crates import InvalidFileDescriptor, ValidROC @@ -84,7 +83,7 @@ def test_validate_skip_checks_option(cli_runner: CliRunner): called_kwargs = {} def mock_validate(*args, **kwargs): - nonlocal called_args # noqa: F824 + nonlocal called_args logger.warning(f"Mock validate called with args: {args}, kwargs: {kwargs}") diff --git a/tests/test_models.py b/tests/test_models.py index 651a87f1c..517f232ba 100644 --- a/tests/test_models.py +++ b/tests/test_models.py @@ -15,8 +15,7 @@ import pytest from rocrate_validator import models, services -from rocrate_validator.models import (LevelCollection, RequirementLevel, - Severity, ValidationSettings) +from rocrate_validator.models import LevelCollection, RequirementLevel, Severity, ValidationSettings from tests.ro_crates import InvalidRootDataEntity, WROCInvalidReadme @@ -61,7 +60,7 @@ def test_level_collection(): assert LevelCollection.MAY == LevelCollection.MAY all_levels = LevelCollection.all() - assert 10 == len(all_levels) + assert len(all_levels) == 10 level_names = [level.name for level in all_levels] # Test a few of the keys assert 'MAY' in level_names diff --git a/tests/unit/requirements/test_profiles.py b/tests/unit/requirements/test_profiles.py index bb1f46469..e6159128a 100644 --- a/tests/unit/requirements/test_profiles.py +++ b/tests/unit/requirements/test_profiles.py @@ -348,19 +348,11 @@ def test_profile_parents(check_overriding_profiles_path: str): if profile.token == "a": assert len(profile.parents) == 0, "The number of parents should be 0" - elif profile.token == "b": - assert len(profile.parents) == 1, "The number of parents should be 1" - assert profile.parents[0].token == "a", "The parent should be 'a'" - - elif profile.token == "c": + elif profile.token == "b" or profile.token == "c": assert len(profile.parents) == 1, "The number of parents should be 1" assert profile.parents[0].token == "a", "The parent should be 'a'" - elif profile.token == "d": - assert len(profile.parents) == 1, "The number of parents should be 1" - assert profile.parents[0].token == "b", "The parent should be 'b'" - - elif profile.token == "e": + elif profile.token == "d" or profile.token == "e": assert len(profile.parents) == 1, "The number of parents should be 1" assert profile.parents[0].token == "b", "The parent should be 'b'" @@ -478,11 +470,11 @@ def test_python_check_decorator_sets_deactivated_flag(): from rocrate_validator.requirements.python import check @check(name="off", deactivated=True) - def disabled(self, ctx): # noqa: ANN001 + def disabled(self, ctx): return False @check(name="on") - def enabled(self, ctx): # noqa: ANN001 + def enabled(self, ctx): return True assert disabled.deactivated is True diff --git a/tests/unit/requirements/test_shacl_checks.py b/tests/unit/requirements/test_shacl_checks.py index f1c3fac0d..1e28631db 100644 --- a/tests/unit/requirements/test_shacl_checks.py +++ b/tests/unit/requirements/test_shacl_checks.py @@ -19,9 +19,7 @@ from rocrate_validator.constants import SHACL_NS from rocrate_validator.models import LevelCollection from rocrate_validator.requirements.shacl.checks import SHACLCheck -from rocrate_validator.requirements.shacl.models import (NodeShape, - PropertyShape, Shape, - ShapesRegistry) +from rocrate_validator.requirements.shacl.models import NodeShape, PropertyShape, Shape, ShapesRegistry from rocrate_validator.requirements.shacl.utils import resolve_parent_shape logger = logging.getLogger(__name__) diff --git a/tests/unit/test_cache_warmup.py b/tests/unit/test_cache_warmup.py index 3b591b962..a36d664ff 100644 --- a/tests/unit/test_cache_warmup.py +++ b/tests/unit/test_cache_warmup.py @@ -23,8 +23,11 @@ from rocrate_validator.models import Profile from rocrate_validator.utils.cache_warmup import ( - auto_warm_up_for_settings, discover_cacheable_urls_from_profiles, - discover_profile_cacheable_urls, warm_up_urls) + auto_warm_up_for_settings, + discover_cacheable_urls_from_profiles, + discover_profile_cacheable_urls, + warm_up_urls, +) from rocrate_validator.utils.http import HttpRequester from rocrate_validator.utils.paths import get_profiles_path diff --git a/tests/unit/test_cli_internals.py b/tests/unit/test_cli_internals.py index ae4762ce3..854ac4df1 100644 --- a/tests/unit/test_cli_internals.py +++ b/tests/unit/test_cli_internals.py @@ -14,9 +14,14 @@ import os +from rocrate_validator.models import ( + DEFAULT_PROFILES_PATH, + LevelCollection, + Profile, + ValidationSettings, + ValidationStatistics, +) from rocrate_validator.utils import log as logging -from rocrate_validator.models import (DEFAULT_PROFILES_PATH, LevelCollection, - Profile, ValidationSettings, ValidationStatistics) # TODO: move to models section diff --git a/tests/unit/test_document_loader.py b/tests/unit/test_document_loader.py index 85f03db44..2a995acae 100644 --- a/tests/unit/test_document_loader.py +++ b/tests/unit/test_document_loader.py @@ -22,9 +22,11 @@ import urllib3 from rocrate_validator.utils import document_loader -from rocrate_validator.utils.document_loader import (install_document_loader, - resolve_remote_document, - uninstall_document_loader) +from rocrate_validator.utils.document_loader import ( + install_document_loader, + resolve_remote_document, + uninstall_document_loader, +) from rocrate_validator.utils.http import HttpRequester, OfflineCacheMissError diff --git a/tests/unit/test_http_requester_offline.py b/tests/unit/test_http_requester_offline.py index de1491328..ad4b6a8d9 100644 --- a/tests/unit/test_http_requester_offline.py +++ b/tests/unit/test_http_requester_offline.py @@ -23,8 +23,7 @@ import urllib3 from rocrate_validator.utils import http as http_module -from rocrate_validator.utils.http import (OFFLINE_CACHE_MISS_STATUS, - HttpRequester) +from rocrate_validator.utils.http import OFFLINE_CACHE_MISS_STATUS, HttpRequester def _build_urllib3_response(body: bytes = b'{"ok": true}', diff --git a/tests/unit/test_rocrate.py b/tests/unit/test_rocrate.py index 69e0b4fe7..df6923744 100644 --- a/tests/unit/test_rocrate.py +++ b/tests/unit/test_rocrate.py @@ -16,14 +16,20 @@ import pytest -from rocrate_validator.utils import log as logging from rocrate_validator.errors import ROCrateInvalidURIError -from rocrate_validator.rocrate import (BagitROCrate, ROCrate, - ROCrateBagitLocalFolder, - ROCrateBagitLocalZip, - ROCrateBagitRemoteZip, ROCrateEntity, - ROCrateLocalFolder, ROCrateLocalZip, - ROCrateMetadata, ROCrateRemoteZip) +from rocrate_validator.rocrate import ( + BagitROCrate, + ROCrate, + ROCrateBagitLocalFolder, + ROCrateBagitLocalZip, + ROCrateBagitRemoteZip, + ROCrateEntity, + ROCrateLocalFolder, + ROCrateLocalZip, + ROCrateMetadata, + ROCrateRemoteZip, +) +from rocrate_validator.utils import log as logging from tests.ro_crates import InvalidDataEntity, ValidROC # set up logging diff --git a/tests/unit/test_services.py b/tests/unit/test_services.py index a663bd435..7c39081c6 100644 --- a/tests/unit/test_services.py +++ b/tests/unit/test_services.py @@ -17,11 +17,11 @@ import tempfile from pathlib import Path -from rocrate_validator.utils import log as logging from rocrate_validator.models import ValidationSettings from rocrate_validator.rocrate import ROCrateMetadata from rocrate_validator.services import detect_profiles, get_profiles, validate -from tests.ro_crates import InvalidMultiProfileROC, ValidROC, InvalidFileDescriptorEntity +from rocrate_validator.utils import log as logging +from tests.ro_crates import InvalidFileDescriptorEntity, InvalidMultiProfileROC, ValidROC # set up logging logger = logging.getLogger(__name__) @@ -215,7 +215,7 @@ def test_valid_crate_metadata_dict_with_metadata_only(): logger.debug("Validating a local RO-Crate in metadata-only mode: %s", crate_path) # Load the metadata dict from the RO-Crate - with open(crate_path / "ro-crate-metadata.json", "r", encoding="utf-8") as f: + with open(crate_path / "ro-crate-metadata.json", encoding="utf-8") as f: metadata_dict = json.loads(f.read()) # Define shared settings object diff --git a/tests/unit/test_uri.py b/tests/unit/test_uri.py index d6d4990db..21d745d8e 100644 --- a/tests/unit/test_uri.py +++ b/tests/unit/test_uri.py @@ -17,9 +17,8 @@ import pytest from rocrate_validator.errors import ROCrateInvalidURIError -from rocrate_validator.utils.uri import validate_rocrate_uri +from rocrate_validator.utils.uri import URI, validate_rocrate_uri from tests.ro_crates import ValidROC -from rocrate_validator.utils.uri import URI def test_valid_url(): From 67bb87d5e6890907af95273bca691258afb87771 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Thu, 4 Jun 2026 08:55:18 +0200 Subject: [PATCH 240/352] =?UTF-8?q?refactor:=20=F0=9F=94=A7=20replace=20os?= =?UTF-8?q?.path=20operations=20with=20Path=20equivalents=20across=20tests?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - PTH110: os.path.exists → Path.exists() - PTH118: os.path.join → Path / operator - PTH120: os.path.dirname → Path.parent - PTH123: open() → Path.open() - PTH113: os.path.abspath → Path.resolve() - os.listdir → Path.iterdir() - Add missing from pathlib import Path --- tests/conftest.py | 15 ++++++++------- .../integration/profiles/test_metadata_only.py | 2 +- tests/integration/test_sparql_constraints.py | 17 ++++++++--------- tests/shared.py | 4 ++-- .../unit/requirements/test_load_requirements.py | 6 ++++-- tests/unit/requirements/test_profiles.py | 9 +++++---- tests/unit/test_cli_internals.py | 3 ++- tests/unit/test_services.py | 2 +- 8 files changed, 31 insertions(+), 27 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index eb8d64a04..5d3d7b1b1 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -15,6 +15,7 @@ # calculate the absolute path of the rocrate-validator package # and add it to the system path import os +from pathlib import Path import pytest from pytest import fixture @@ -30,13 +31,13 @@ } ) -CURRENT_PATH = os.path.dirname(os.path.realpath(__file__)) +CURRENT_PATH = str(Path(__file__).resolve().parent) # test data paths -TEST_DATA_PATH = os.path.abspath(os.path.join(CURRENT_PATH, "data")) +TEST_DATA_PATH = str(Path(CURRENT_PATH) / "data") # profiles paths -PROFILES_PATH = os.path.abspath(f"{CURRENT_PATH}/../rocrate_validator/profiles") +PROFILES_PATH = str(Path(f"{CURRENT_PATH}/../rocrate_validator/profiles").resolve()) # Dynamically update the SKIP_LOCAL_DATA_ENTITY_EXISTENCE_CHECK_IDENTIFIER rocrate_profile = services.get_profile("ro-crate") @@ -175,22 +176,22 @@ def graph_books_path(): @fixture def ro_crate_profile_path(profiles_path): - return os.path.join(profiles_path, "ro-crate") + return str(Path(profiles_path) / "ro-crate") @fixture def ro_crate_profile_must_path(ro_crate_profile_path): - return os.path.join(ro_crate_profile_path, "must") + return str(Path(ro_crate_profile_path) / "must") @fixture def ro_crate_profile_should_path(ro_crate_profile_path): - return os.path.join(ro_crate_profile_path, "should") + return str(Path(ro_crate_profile_path) / "should") @fixture def ro_crate_profile_may_path(ro_crate_profile_path): - return os.path.join(ro_crate_profile_path, "may") + return str(Path(ro_crate_profile_path) / "may") @fixture(params=[ diff --git a/tests/integration/profiles/test_metadata_only.py b/tests/integration/profiles/test_metadata_only.py index dd99f86d0..e9e19513c 100644 --- a/tests/integration/profiles/test_metadata_only.py +++ b/tests/integration/profiles/test_metadata_only.py @@ -68,7 +68,7 @@ def test_valid_ro_crates_from_metadata_dict(valid_roc_path): metadata_dict = None # Load the metadata dict from the RO-Crate if not isinstance(valid_roc_path, str): - with open(valid_roc_path / "ro-crate-metadata.json", encoding="utf-8") as f: + with (valid_roc_path / "ro-crate-metadata.json").open(encoding="utf-8") as f: metadata_dict = json.load(f) assert metadata_dict is not None, "Failed to load metadata dict" assert isinstance(metadata_dict, dict), "Metadata dict is not a dictionary" diff --git a/tests/integration/test_sparql_constraints.py b/tests/integration/test_sparql_constraints.py index fff587da8..2f8063ce4 100644 --- a/tests/integration/test_sparql_constraints.py +++ b/tests/integration/test_sparql_constraints.py @@ -42,9 +42,9 @@ logger = logging.getLogger(__name__) -CURRENT_PATH = os.path.dirname(os.path.realpath(__file__)) +CURRENT_PATH = str(Path(__file__).resolve().parent) -SPARQL_TEST_PROFILES_PATH = os.path.join(TEST_DATA_PATH, "profiles", "sparql_test") +SPARQL_TEST_PROFILES_PATH = str(Path(TEST_DATA_PATH) / "profiles" / "sparql_test") @pytest.fixture @@ -80,7 +80,7 @@ def sparql_test_rocrate(): ], } - with open(rocrate_dir / "ro-crate-metadata.json", "w", encoding="utf-8") as f: + with (rocrate_dir / "ro-crate-metadata.json").open("w", encoding="utf-8") as f: json.dump(metadata, f, indent=2) yield rocrate_dir @@ -89,9 +89,7 @@ def sparql_test_rocrate(): def test_sparql_profile_shape_loaded_correctly(sparql_test_profiles_path): """Test that the sparql-test profile loads the test shape with SPARQL constraint.""" registry = ShapesRegistry() - shape_file = os.path.join( - sparql_test_profiles_path, "must", "agent_project_intersection.ttl" - ) + shape_file = str(Path(sparql_test_profiles_path) / "must" / "agent_project_intersection.ttl") shapes = registry.load_shapes(shape_file) @@ -161,9 +159,10 @@ def test_resolve_parent_shape_with_sparql_bnode(): profiles_path = "rocrate_validator/profiles/ro-crate/must" # Load shapes from profile - for filename in os.listdir(profiles_path): - if filename.endswith(".ttl"): - registry.load_shapes(os.path.join(profiles_path, filename)) + for filename in Path(profiles_path).iterdir(): + name = filename.name + if name.endswith(".ttl"): + registry.load_shapes(str(Path(profiles_path) / name)) g = Graph() diff --git a/tests/shared.py b/tests/shared.py index 9ca5cecd0..dd808f086 100644 --- a/tests/shared.py +++ b/tests/shared.py @@ -96,7 +96,7 @@ def _prepare_temp_rocrate( ) -> Path: temp_rocrate_path = Path(tempfile.TemporaryDirectory().name) shutil.copytree(rocrate_path, temp_rocrate_path) - with open(temp_rocrate_path / "ro-crate-metadata.json", encoding="utf-8") as f: + with (temp_rocrate_path / "ro-crate-metadata.json").open(encoding="utf-8") as f: rocrate = json.load(f) if rocrate_entity_patch is not None: for key, value in rocrate_entity_patch.items(): @@ -104,7 +104,7 @@ def _prepare_temp_rocrate( if entity["@id"] == key: entity.update(value) break - with open(temp_rocrate_path / "ro-crate-metadata.json", "w", encoding="utf-8") as f: + with (temp_rocrate_path / "ro-crate-metadata.json").open("w", encoding="utf-8") as f: json.dump(rocrate, f) if rocrate_entity_mod_sparql is not None: rocrate_graph = load_graph_and_preserve_relative_ids(rocrate) diff --git a/tests/unit/requirements/test_load_requirements.py b/tests/unit/requirements/test_load_requirements.py index 4a69d91ad..dcbcbc445 100644 --- a/tests/unit/requirements/test_load_requirements.py +++ b/tests/unit/requirements/test_load_requirements.py @@ -23,7 +23,9 @@ # set up logging logger = logging.getLogger(__name__) -#  Global set up the paths +from pathlib import Path + +# Global set up the paths paths = InvalidFileDescriptorEntity() @@ -91,7 +93,7 @@ def test_requirements_loading(profiles_requirement_loading: str): def test_order_of_loaded_profile_requirements(profiles_path: str): """Test the order of the loaded profiles.""" logger.debug("The profiles path: %r", profiles_path) - assert os.path.exists(profiles_path) + assert Path(profiles_path).exists() profiles = Profile.load_profiles(profiles_path=profiles_path, severity=Severity.RECOMMENDED) # The number of profiles should be greater than 0 assert len(profiles) > 0 diff --git a/tests/unit/requirements/test_profiles.py b/tests/unit/requirements/test_profiles.py index e6159128a..4ea2e7f8f 100644 --- a/tests/unit/requirements/test_profiles.py +++ b/tests/unit/requirements/test_profiles.py @@ -14,6 +14,7 @@ import logging import os +from pathlib import Path import pytest from rdflib import Literal, Namespace @@ -35,7 +36,7 @@ def test_order_of_loaded_profiles(profiles_path: str): """Test the order of the loaded profiles.""" logger.debug("The profiles path: %r", profiles_path) - assert os.path.exists(profiles_path) + assert Path(profiles_path).exists() profiles = Profile.load_profiles(profiles_path=profiles_path) # The number of profiles should be greater than 0 assert len(profiles) > 0 @@ -46,7 +47,7 @@ def test_order_of_loaded_profiles(profiles_path: str): # The order of the profiles should be the same as the order of the directories # in the profiles directory - profile_directories = sorted(os.listdir(profiles_path)) + profile_directories = sorted(p.name for p in Path(profiles_path).iterdir()) logger.debug("The profile directories: %r", profile_directories) assert profile_names == profile_directories @@ -330,7 +331,7 @@ def test_zero_shape_target_profile_triggers_pyshacl_run(fake_profiles_path: str) def test_profile_parents(check_overriding_profiles_path: str): """Test the order of the loaded profiles.""" logger.debug("The profiles path: %r", check_overriding_profiles_path) - assert os.path.exists(check_overriding_profiles_path) + assert Path(check_overriding_profiles_path).exists() # Load the profiles profiles = Profile.load_profiles(profiles_path=check_overriding_profiles_path) # The number of profiles should be greater than 0 @@ -373,7 +374,7 @@ def test_profile_parents(check_overriding_profiles_path: str): def test_profile_check_overriding(check_overriding_profiles_path: str): """Test the order of the loaded profiles.""" logger.debug("The profiles path: %r", check_overriding_profiles_path) - assert os.path.exists(check_overriding_profiles_path) + assert Path(check_overriding_profiles_path).exists() # Load the profiles profiles = Profile.load_profiles(profiles_path=check_overriding_profiles_path) # The number of profiles should be greater than 0 diff --git a/tests/unit/test_cli_internals.py b/tests/unit/test_cli_internals.py index 854ac4df1..35d172a3f 100644 --- a/tests/unit/test_cli_internals.py +++ b/tests/unit/test_cli_internals.py @@ -13,6 +13,7 @@ # limitations under the License. import os +from pathlib import Path from rocrate_validator.models import ( DEFAULT_PROFILES_PATH, @@ -42,7 +43,7 @@ def test_compute_stats(fake_profiles_path): profiles_path = settings.profiles_path or DEFAULT_PROFILES_PATH logger.debug("The profiles path: %r", profiles_path) - assert os.path.exists(profiles_path) + assert Path(profiles_path).exists() profiles = Profile.load_profiles(profiles_path) # The number of profiles should be greater than 0 assert len(profiles) > 0 diff --git a/tests/unit/test_services.py b/tests/unit/test_services.py index 7c39081c6..8845f398a 100644 --- a/tests/unit/test_services.py +++ b/tests/unit/test_services.py @@ -215,7 +215,7 @@ def test_valid_crate_metadata_dict_with_metadata_only(): logger.debug("Validating a local RO-Crate in metadata-only mode: %s", crate_path) # Load the metadata dict from the RO-Crate - with open(crate_path / "ro-crate-metadata.json", encoding="utf-8") as f: + with (crate_path / "ro-crate-metadata.json").open(encoding="utf-8") as f: metadata_dict = json.loads(f.read()) # Define shared settings object From 291e2883a22361ddc87086c0b8c709db5af7370f Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Thu, 4 Jun 2026 09:29:48 +0200 Subject: [PATCH 241/352] =?UTF-8?q?style:=20=F0=9F=8E=A8=20apply=20auto-fi?= =?UTF-8?q?xes=20across=20test=20files?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - RUF001: Replace non-breaking spaces in comments - F401: Remove unused `import os` - SIM300: Replace `assert may == may` with meaningful equality check - SIM300: Replace `assert False, msg` with `raise AssertionError(msg)` - SIM: Use `next(iter(...))` instead of `list(...)[0]` - SIM: Remove redundant variable assignment before return - SIM: Remove dead code (`abort_on_first = abort_on_first`) - SIM: Use `handler.emit = method` instead of `lambda` - SIM: Use `token in {"a", "b"}` instead of `== "a" or == "b"` - SIM: Consolidate pytest.raises with `match` parameter - B007: Add `# noqa: B007` for unused loop variable - B018: Add `# noqa: B018` for expression statement - ANN001: Add `Optional` type annotation for default-None parameter - PERF: Use `itertools.pairwise` instead of `zip(..., ...[1:])` - ISC001: Fix implicit string concatenation with explicit parentheses --- .../process-run-crate/test_procrc_action.py | 6 ++++-- .../profiles/ro-crate/test_data_entity_metadata.py | 2 +- .../ro-crate/test_file_descriptor_entity.py | 2 +- .../ro-crate/test_file_descriptor_format.py | 2 +- .../profiles/ro-crate/test_root_data_entity.py | 2 +- .../ro-crate/test_web_based_data_entity.py | 2 +- tests/integration/test_sparql_constraints.py | 1 - tests/shared.py | 9 ++++----- tests/test_cli_cache.py | 3 ++- tests/test_models.py | 2 +- tests/unit/requirements/test_load_requirements.py | 4 +--- tests/unit/requirements/test_profiles.py | 13 ++++--------- tests/unit/requirements/test_shacl_checks.py | 3 ++- tests/unit/test_cli_internals.py | 3 +-- tests/unit/test_http_requester_offline.py | 5 ++--- tests/unit/test_remote_context_retrieval.py | 14 ++++++++------ tests/unit/test_uri.py | 14 ++++++++------ 17 files changed, 42 insertions(+), 45 deletions(-) diff --git a/tests/integration/profiles/process-run-crate/test_procrc_action.py b/tests/integration/profiles/process-run-crate/test_procrc_action.py index cfd0ab5e5..1297136e3 100644 --- a/tests/integration/profiles/process-run-crate/test_procrc_action.py +++ b/tests/integration/profiles/process-run-crate/test_procrc_action.py @@ -346,8 +346,10 @@ def test_procrc_action_bad_containerimage(): Test a Process Run Crate where the Action has a containerImage that does not point to a URL or to a ContainerImage object. """ - for crate in (InvalidProcRC().action_bad_containerimage_url, - InvalidProcRC().action_bad_containerimage_type): + for crate in ( # noqa: B007 + InvalidProcRC().action_bad_containerimage_url, + InvalidProcRC().action_bad_containerimage_type, + ): do_entity_test( InvalidProcRC().action_bad_containerimage_url, Severity.RECOMMENDED, diff --git a/tests/integration/profiles/ro-crate/test_data_entity_metadata.py b/tests/integration/profiles/ro-crate/test_data_entity_metadata.py index baf00774d..85f83e00f 100644 --- a/tests/integration/profiles/ro-crate/test_data_entity_metadata.py +++ b/tests/integration/profiles/ro-crate/test_data_entity_metadata.py @@ -26,7 +26,7 @@ logger = logging.getLogger(__name__) -#  Global set up the paths +# Global set up the paths paths = InvalidDataEntity() diff --git a/tests/integration/profiles/ro-crate/test_file_descriptor_entity.py b/tests/integration/profiles/ro-crate/test_file_descriptor_entity.py index cec904283..d619300b2 100644 --- a/tests/integration/profiles/ro-crate/test_file_descriptor_entity.py +++ b/tests/integration/profiles/ro-crate/test_file_descriptor_entity.py @@ -23,7 +23,7 @@ # set up logging logger = logging.getLogger(__name__) -#  Global set up the paths +# Global set up the paths paths = InvalidFileDescriptorEntity() diff --git a/tests/integration/profiles/ro-crate/test_file_descriptor_format.py b/tests/integration/profiles/ro-crate/test_file_descriptor_format.py index 92a7d47c7..09fed9779 100644 --- a/tests/integration/profiles/ro-crate/test_file_descriptor_format.py +++ b/tests/integration/profiles/ro-crate/test_file_descriptor_format.py @@ -21,7 +21,7 @@ logger = logging.getLogger(__name__) -#  Global set up the paths +# Global set up the paths paths = InvalidFileDescriptor() diff --git a/tests/integration/profiles/ro-crate/test_root_data_entity.py b/tests/integration/profiles/ro-crate/test_root_data_entity.py index 969b69a38..770bb3224 100644 --- a/tests/integration/profiles/ro-crate/test_root_data_entity.py +++ b/tests/integration/profiles/ro-crate/test_root_data_entity.py @@ -23,7 +23,7 @@ logger = logging.getLogger(__name__) -#  Global set up the paths +# Global set up the paths paths = InvalidRootDataEntity() diff --git a/tests/integration/profiles/ro-crate/test_web_based_data_entity.py b/tests/integration/profiles/ro-crate/test_web_based_data_entity.py index 744c5efae..0770873c0 100644 --- a/tests/integration/profiles/ro-crate/test_web_based_data_entity.py +++ b/tests/integration/profiles/ro-crate/test_web_based_data_entity.py @@ -22,7 +22,7 @@ logger = logging.getLogger(__name__) -#  Global set up the paths +# Global set up the paths paths = InvalidDataEntity() diff --git a/tests/integration/test_sparql_constraints.py b/tests/integration/test_sparql_constraints.py index 2f8063ce4..d4730b3c2 100644 --- a/tests/integration/test_sparql_constraints.py +++ b/tests/integration/test_sparql_constraints.py @@ -27,7 +27,6 @@ import json import logging -import os import tempfile from pathlib import Path diff --git a/tests/shared.py b/tests/shared.py index dd808f086..5a2e10375 100644 --- a/tests/shared.py +++ b/tests/shared.py @@ -167,9 +167,6 @@ def do_entity_test( logger.debug("Requirement severity: %s", requirement_severity) logger.debug("Checks to skip: %s", skip_checks) - # set abort_on_first to False - abort_on_first = abort_on_first - # validate RO-Crate result: models.ValidationResult = services.validate( models.ValidationSettings( @@ -203,7 +200,7 @@ def do_entity_test( # check that the expected requirements are triggered for expected_triggered_requirement in expected_triggered_requirements: if expected_triggered_requirement not in failed_requirements: - assert False, ( + raise AssertionError( f"The expected requirement " f'"{expected_triggered_requirement}" was not found in the failed requirements' ) @@ -216,7 +213,9 @@ def do_entity_test( logger.debug("Expected issues: %s", expected_triggered_issues) for expected_issue in expected_triggered_issues: if not any(expected_issue in issue for issue in detected_issues): # support partial match - assert False, f'The expected issue "{expected_issue}" was not found in the detected issues' + raise AssertionError( + f'The expected issue "{expected_issue}" was not found in the detected issues' + ) except Exception as e: if logger.isEnabledFor(logging.DEBUG): logger.exception(e) diff --git a/tests/test_cli_cache.py b/tests/test_cli_cache.py index 380b20bfc..8be899558 100644 --- a/tests/test_cli_cache.py +++ b/tests/test_cli_cache.py @@ -23,6 +23,7 @@ from __future__ import annotations import io +import itertools import json import pytest @@ -438,7 +439,7 @@ def test_list_default_sort_is_created_desc(cli_runner, mock_network, tmp_cache): created = [e["created_at"] for e in json.loads(result.output)] # Each entry has a timestamp (mocked response goes through requests_cache); # the sequence must be monotonically non-increasing. - assert all(a >= b for a, b in zip(created, created[1:])) + assert all(a >= b for a, b in itertools.pairwise(created)) def test_list_invalid_order_is_rejected(cli_runner, tmp_cache): diff --git a/tests/test_models.py b/tests/test_models.py index 517f232ba..4095ef1d6 100644 --- a/tests/test_models.py +++ b/tests/test_models.py @@ -36,7 +36,7 @@ def test_level_ordering(): assert may < should assert should > may assert should != may - assert may == may + assert RequirementLevel('MAY', Severity.OPTIONAL) == may assert may != 1 assert may != RequirementLevel('OPTIONAL', Severity.OPTIONAL) with pytest.raises(TypeError): diff --git a/tests/unit/requirements/test_load_requirements.py b/tests/unit/requirements/test_load_requirements.py index dcbcbc445..2f0eb302d 100644 --- a/tests/unit/requirements/test_load_requirements.py +++ b/tests/unit/requirements/test_load_requirements.py @@ -13,7 +13,7 @@ # limitations under the License. import logging -import os +from pathlib import Path from rocrate_validator.constants import DEFAULT_PROFILE_IDENTIFIER from rocrate_validator.models import LevelCollection, Profile, Severity @@ -23,8 +23,6 @@ # set up logging logger = logging.getLogger(__name__) -from pathlib import Path - # Global set up the paths paths = InvalidFileDescriptorEntity() diff --git a/tests/unit/requirements/test_profiles.py b/tests/unit/requirements/test_profiles.py index 4ea2e7f8f..17383f421 100644 --- a/tests/unit/requirements/test_profiles.py +++ b/tests/unit/requirements/test_profiles.py @@ -13,7 +13,6 @@ # limitations under the License. import logging -import os from pathlib import Path import pytest @@ -29,7 +28,7 @@ # set up logging logger = logging.getLogger(__name__) -#  Global set up the paths +# Global set up the paths paths = InvalidFileDescriptorEntity() @@ -173,12 +172,8 @@ def test_versioned_profiles_loading(fake_versioned_profiles_path: str): def test_conflicting_versioned_profiles_loading(fake_conflicting_versioned_profiles_path: str): """Test the loaded profiles from the validator context.""" - with pytest.raises(ProfileSpecificationError) as excinfo: - logger.debug("result: %r", excinfo) - # Load the profiles + with pytest.raises(ProfileSpecificationError, match="Inconsistent versions found"): Profile.load_profiles(profiles_path=fake_conflicting_versioned_profiles_path) - # Check that the conflicting versions are found - assert "Inconsistent versions found: {'3.2.2', '3.2.1', '2.3'}" def test_loaded_valid_profile_with_inheritance_from_validator_context(fake_profiles_path: str): @@ -349,11 +344,11 @@ def test_profile_parents(check_overriding_profiles_path: str): if profile.token == "a": assert len(profile.parents) == 0, "The number of parents should be 0" - elif profile.token == "b" or profile.token == "c": + elif profile.token in {"b", "c"}: assert len(profile.parents) == 1, "The number of parents should be 1" assert profile.parents[0].token == "a", "The parent should be 'a'" - elif profile.token == "d" or profile.token == "e": + elif profile.token in {"d", "e"}: assert len(profile.parents) == 1, "The number of parents should be 1" assert profile.parents[0].token == "b", "The parent should be 'b'" diff --git a/tests/unit/requirements/test_shacl_checks.py b/tests/unit/requirements/test_shacl_checks.py index 1e28631db..7079e51cd 100644 --- a/tests/unit/requirements/test_shacl_checks.py +++ b/tests/unit/requirements/test_shacl_checks.py @@ -13,6 +13,7 @@ # limitations under the License. import logging +from typing import Optional from rdflib import BNode, Graph, Namespace, URIRef @@ -223,7 +224,7 @@ def test_resolve_parent_shape_with_property_bnode(): assert result.key == shape.key -def _make_property(graph: Graph, severity_term: str = None) -> PropertyShape: +def _make_property(graph: Graph, severity_term: Optional[str] = None) -> PropertyShape: """Build a PropertyShape on a fresh BNode, optionally setting sh:severity.""" prop = PropertyShape(BNode(), graph) if severity_term is not None: diff --git a/tests/unit/test_cli_internals.py b/tests/unit/test_cli_internals.py index 35d172a3f..365a6de41 100644 --- a/tests/unit/test_cli_internals.py +++ b/tests/unit/test_cli_internals.py @@ -12,7 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -import os from pathlib import Path from rocrate_validator.models import ( @@ -76,7 +75,7 @@ def test_compute_stats(fake_profiles_path): assert len(stats["requirements"]) > 0, "There should be at least one requirement" # extract the first and unique requirement - requirement = list(requirements)[0] + requirement = next(iter(requirements)) # check the number of checks in the requirement assert len(requirement.get_checks()) == len(requirement.get_checks_by_level(LevelCollection.get("REQUIRED"))) diff --git a/tests/unit/test_http_requester_offline.py b/tests/unit/test_http_requester_offline.py index ad4b6a8d9..479edc22c 100644 --- a/tests/unit/test_http_requester_offline.py +++ b/tests/unit/test_http_requester_offline.py @@ -45,8 +45,7 @@ def mock_network(monkeypatch): def fake_send(self, request, **kwargs): raw = _build_urllib3_response() - response = self.build_response(request, raw) - return response + return self.build_response(request, raw) monkeypatch.setattr(HTTPAdapter, "send", fake_send) @@ -194,7 +193,7 @@ def __enter__(self): self.records.clear() self.handler = _logging.Handler() self.handler.setLevel(_logging.DEBUG) - self.handler.emit = lambda record: self.records.append(record) # type: ignore[assignment] + self.handler.emit = self.records.append # type: ignore[assignment] # Force initialization of the underlying logger via the proxy. http_module.logger.warning # noqa: B018 self._target = http_module.logger._instance diff --git a/tests/unit/test_remote_context_retrieval.py b/tests/unit/test_remote_context_retrieval.py index 01151aaff..1e26d7b82 100644 --- a/tests/unit/test_remote_context_retrieval.py +++ b/tests/unit/test_remote_context_retrieval.py @@ -126,15 +126,17 @@ def get(self, url, headers=None): assert result == {"relative": "resolved"} assert call_count[0] == 2 # Ensure both requests were made # Check that the first request was made to the original context URI - assert call_args_list[0][0] == "https://example.com/base/context.json", \ - f"The first request should be made to the original context URI " \ - f"{'https://example.com/base/context.json'}, " \ + assert call_args_list[0][0] == "https://example.com/base/context.json", ( + f"The first request should be made to the original context URI " + f"{'https://example.com/base/context.json'}, " f"but got {call_args_list[0][0]}" + ) # Check that the second request was made to the resolved alternate URL - assert call_args_list[1][0] == "https://example.com/base/alternate-context.json", \ - f"The second request should be made to the resolved alternate URL " \ - f"{'https://example.com/base/alternate-context.json'}, " \ + assert call_args_list[1][0] == "https://example.com/base/alternate-context.json", ( + f"The second request should be made to the resolved alternate URL " + f"{'https://example.com/base/alternate-context.json'}, " f"but got {call_args_list[1][0]}" + ) finally: fd_format.HttpRequester = original_requester diff --git a/tests/unit/test_uri.py b/tests/unit/test_uri.py index 21d745d8e..e160d932b 100644 --- a/tests/unit/test_uri.py +++ b/tests/unit/test_uri.py @@ -172,10 +172,11 @@ def test_rocrate_uri_local_folder_invalid(): # Use verbose mode to print the error message with pytest.raises(ROCrateInvalidURIError) as excinfo: validate_rocrate_uri(uri, silent=False) - assert str( - excinfo.value) == f"\"{uri}\" is not a valid RO-Crate URI. "\ - "It MUST be either a local path to the RO-Crate root directory "\ + assert str(excinfo.value) == ( + f"\"{uri}\" is not a valid RO-Crate URI. " + "It MUST be either a local path to the RO-Crate root directory " "or a local/remote RO-Crate ZIP file." + ) def test_rocrate_uri_local_zip_valid(): @@ -192,10 +193,11 @@ def test_rocrate_uri_local_zip_invalid(): # Use verbose mode to print the error message with pytest.raises(ROCrateInvalidURIError) as excinfo: validate_rocrate_uri(uri, silent=False) - assert str( - excinfo.value) == f"\"{uri}\" is not a valid RO-Crate URI. "\ - "It MUST be either a local path to the RO-Crate root directory "\ + assert str(excinfo.value) == ( + f"\"{uri}\" is not a valid RO-Crate URI. " + "It MUST be either a local path to the RO-Crate root directory " "or a local/remote RO-Crate ZIP file." + ) def test_rocrate_uri_remote_valid(): From 60efd5aa5a58099e3dad9c78baf0bf1f6173f7e0 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Thu, 4 Jun 2026 09:37:02 +0200 Subject: [PATCH 242/352] =?UTF-8?q?chore:=20=F0=9F=94=A7=20remove=20unnece?= =?UTF-8?q?ssary=20nonlocal=20statement=20(F824)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- tests/test_cli.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/tests/test_cli.py b/tests/test_cli.py index 1b3ab518e..e06050706 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -83,8 +83,6 @@ def test_validate_skip_checks_option(cli_runner: CliRunner): called_kwargs = {} def mock_validate(*args, **kwargs): - nonlocal called_args - logger.warning(f"Mock validate called with args: {args}, kwargs: {kwargs}") called_args.extend(args) From 884f538e0074dfffe7ba22494ad7aff9ba5d02ff Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Thu, 4 Jun 2026 10:07:59 +0200 Subject: [PATCH 243/352] =?UTF-8?q?chore:=20=F0=9F=94=A7=20replace=20pypro?= =?UTF-8?q?ject-flake8=20with=20flake8=20in=20dev=20dependencies?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit pyproject-flake8 removed — flake8 7.x natively reads [tool.flake8] from pyproject.toml, the wrapper is no longer needed. --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 277814026..2c022ea71 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -73,9 +73,9 @@ enum-tools = ">=0.12,<0.13" typos = "^1.41.0" [tool.poetry.group.dev.dependencies] -pyproject-flake8 = "^6.1.0" pylint = "^3.1.0" ipykernel = "^6.29.3" +flake8 = "^7.3.0" [tool.poetry.group.test.dependencies] pytest-cov = "^5.0.0" From c7286047d88429d59fd3a760d9f449b343bbf684 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Thu, 4 Jun 2026 10:20:28 +0200 Subject: [PATCH 244/352] =?UTF-8?q?chore:=20=F0=9F=94=A7=20remove=20flake8?= =?UTF-8?q?=20config=20and=20dependency?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- pyproject.toml | 5 +---- setup.cfg | 11 ----------- 2 files changed, 1 insertion(+), 15 deletions(-) delete mode 100644 setup.cfg diff --git a/pyproject.toml b/pyproject.toml index 2c022ea71..67650c798 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -75,7 +75,7 @@ typos = "^1.41.0" [tool.poetry.group.dev.dependencies] pylint = "^3.1.0" ipykernel = "^6.29.3" -flake8 = "^7.3.0" + [tool.poetry.group.test.dependencies] pytest-cov = "^5.0.0" @@ -91,9 +91,6 @@ myst-parser = "^4.0.0" sphinx-rtd-theme = "^3.0.2" sphinx-copybutton = "^0.5.2" -[tool.flake8] -max-line-length = 120 - [build-system] requires = ["poetry-core"] build-backend = "poetry.core.masonry.api" diff --git a/setup.cfg b/setup.cfg deleted file mode 100644 index 686644698..000000000 --- a/setup.cfg +++ /dev/null @@ -1,11 +0,0 @@ -[flake8] -max-line-length = 120 -exclude = - .git - .github - .vscode - .venv - __pycache__ - build - dist - rocrate_validator.egg-info From 4db08800f99e05cd7a86c3846862b4b5f13d4bc4 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Thu, 4 Jun 2026 10:25:28 +0200 Subject: [PATCH 245/352] =?UTF-8?q?chore:=20=F0=9F=94=A7=20add=20ruff=20de?= =?UTF-8?q?pendency=20and=20configuration=20(ruff.toml)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- pyproject.toml | 2 +- ruff.toml | 63 ++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 64 insertions(+), 1 deletion(-) create mode 100644 ruff.toml diff --git a/pyproject.toml b/pyproject.toml index 67650c798..3bdf656aa 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -75,7 +75,7 @@ typos = "^1.41.0" [tool.poetry.group.dev.dependencies] pylint = "^3.1.0" ipykernel = "^6.29.3" - +ruff = "^0.15.15" [tool.poetry.group.test.dependencies] pytest-cov = "^5.0.0" diff --git a/ruff.toml b/ruff.toml new file mode 100644 index 000000000..2f03e98b8 --- /dev/null +++ b/ruff.toml @@ -0,0 +1,63 @@ +# Copyright (c) 2024-2026 CRS4 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +######################################################################## +# **Ruff** configuration for rocrate-validator. +######################################################################## +# This file is read by both the Ruff CLI and the VS Code Ruff extension; +# a config file takes precedence over the editor's `ruff.*` settings, +# so it is the single source of truth for what Ruff reports. + +# Set the maximum line length to 120 characters, +# which is a common convention in Python projects. +line-length = 120 + +# Extend the default set of rules with additional rules from various plugins. +[lint] +extend-select = [ + "F", # Pyflakes – runtime logic errors + "E", # PyCodeStyle errors – PEP 8 + "W", # PyCodeStyle warnings + "I", # isort – import ordering + "UP", # pyupgrade – modern syntax + "B", # flake8-bugbear – common bugs and bad practices + "C4", # flake8-comprehensions – proper use of list/dict/set + "SIM", # flake8-simplify – idiomatic simplifications + "RET", # flake8-return – return statement practices + "TID", # tidy imports + "TC", # TYPE_CHECKING import enforcement + "PTH", # pathlib over os.path + "FA", # from __future__ import annotations + "ISC", # implicit string concatenation + "FURB", # refurb – more idiomatic Python patterns + "RUF", # Ruff-specific rules + "PL", # pylint – PLC/PLE/PLR/PLW +] + +# Rules intentionally disabled. +ignore = [ + "UP045", # non-pep604-annotation-optional: keep `Optional[X]` instead of `X | None` (project supports 3.9) + "UP007", # non-pep604-annotation: same rationale as UP045 + "PLR0904", # too-many-public-methods: pre-existing project style + "PLR0913", # too-many-arguments: pre-existing project style + "PLR0917", # too-many-positional-arguments: pre-existing project style + "PLW3201", # bad-dunder-method-name: project uses `__method__` convention; renaming is not viable +] + +[lint.per-file-ignores] +# Allow magic value comparisons and local imports in test files. +"tests/**" = [ + "PLR2004", # magic-value-comparison: tests use literal values in assertions + "PLC0415", # import-outside-top-level: tests use local imports for mocking +] From 35095f82779308ea0dc3e6a0b34baf319803ddb7 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Thu, 4 Jun 2026 10:26:12 +0200 Subject: [PATCH 246/352] =?UTF-8?q?chore:=20=F0=9F=94=A7=20update=20poetry?= =?UTF-8?q?.lock=20after=20removing=20flake8=20and=20adding=20ruff?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- poetry.lock | 207 ++++++++++++++++++++++------------------------------ 1 file changed, 89 insertions(+), 118 deletions(-) diff --git a/poetry.lock b/poetry.lock index ef545a0f4..caf08b3b2 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 2.4.1 and should not be changed by hand. +# This file is automatically @generated by Poetry 2.2.1 and should not be changed by hand. [[package]] name = "alabaster" @@ -660,42 +660,42 @@ toml = ["tomli ; python_full_version <= \"3.11.0a6\""] [[package]] name = "debugpy" -version = "1.8.20" +version = "1.8.21" description = "An implementation of the Debug Adapter Protocol for Python" optional = false python-versions = ">=3.8" groups = ["dev"] files = [ - {file = "debugpy-1.8.20-cp310-cp310-macosx_15_0_x86_64.whl", hash = "sha256:157e96ffb7f80b3ad36d808646198c90acb46fdcfd8bb1999838f0b6f2b59c64"}, - {file = "debugpy-1.8.20-cp310-cp310-manylinux_2_34_x86_64.whl", hash = "sha256:c1178ae571aff42e61801a38b007af504ec8e05fde1c5c12e5a7efef21009642"}, - {file = "debugpy-1.8.20-cp310-cp310-win32.whl", hash = "sha256:c29dd9d656c0fbd77906a6e6a82ae4881514aa3294b94c903ff99303e789b4a2"}, - {file = "debugpy-1.8.20-cp310-cp310-win_amd64.whl", hash = "sha256:3ca85463f63b5dd0aa7aaa933d97cbc47c174896dcae8431695872969f981893"}, - {file = "debugpy-1.8.20-cp311-cp311-macosx_15_0_universal2.whl", hash = "sha256:eada6042ad88fa1571b74bd5402ee8b86eded7a8f7b827849761700aff171f1b"}, - {file = "debugpy-1.8.20-cp311-cp311-manylinux_2_34_x86_64.whl", hash = "sha256:7de0b7dfeedc504421032afba845ae2a7bcc32ddfb07dae2c3ca5442f821c344"}, - {file = "debugpy-1.8.20-cp311-cp311-win32.whl", hash = "sha256:773e839380cf459caf73cc533ea45ec2737a5cc184cf1b3b796cd4fd98504fec"}, - {file = "debugpy-1.8.20-cp311-cp311-win_amd64.whl", hash = "sha256:1f7650546e0eded1902d0f6af28f787fa1f1dbdbc97ddabaf1cd963a405930cb"}, - {file = "debugpy-1.8.20-cp312-cp312-macosx_15_0_universal2.whl", hash = "sha256:4ae3135e2089905a916909ef31922b2d733d756f66d87345b3e5e52b7a55f13d"}, - {file = "debugpy-1.8.20-cp312-cp312-manylinux_2_34_x86_64.whl", hash = "sha256:88f47850a4284b88bd2bfee1f26132147d5d504e4e86c22485dfa44b97e19b4b"}, - {file = "debugpy-1.8.20-cp312-cp312-win32.whl", hash = "sha256:4057ac68f892064e5f98209ab582abfee3b543fb55d2e87610ddc133a954d390"}, - {file = "debugpy-1.8.20-cp312-cp312-win_amd64.whl", hash = "sha256:a1a8f851e7cf171330679ef6997e9c579ef6dd33c9098458bd9986a0f4ca52e3"}, - {file = "debugpy-1.8.20-cp313-cp313-macosx_15_0_universal2.whl", hash = "sha256:5dff4bb27027821fdfcc9e8f87309a28988231165147c31730128b1c983e282a"}, - {file = "debugpy-1.8.20-cp313-cp313-manylinux_2_34_x86_64.whl", hash = "sha256:84562982dd7cf5ebebfdea667ca20a064e096099997b175fe204e86817f64eaf"}, - {file = "debugpy-1.8.20-cp313-cp313-win32.whl", hash = "sha256:da11dea6447b2cadbf8ce2bec59ecea87cc18d2c574980f643f2d2dfe4862393"}, - {file = "debugpy-1.8.20-cp313-cp313-win_amd64.whl", hash = "sha256:eb506e45943cab2efb7c6eafdd65b842f3ae779f020c82221f55aca9de135ed7"}, - {file = "debugpy-1.8.20-cp314-cp314-macosx_15_0_universal2.whl", hash = "sha256:9c74df62fc064cd5e5eaca1353a3ef5a5d50da5eb8058fcef63106f7bebe6173"}, - {file = "debugpy-1.8.20-cp314-cp314-manylinux_2_34_x86_64.whl", hash = "sha256:077a7447589ee9bc1ff0cdf443566d0ecf540ac8aa7333b775ebcb8ce9f4ecad"}, - {file = "debugpy-1.8.20-cp314-cp314-win32.whl", hash = "sha256:352036a99dd35053b37b7803f748efc456076f929c6a895556932eaf2d23b07f"}, - {file = "debugpy-1.8.20-cp314-cp314-win_amd64.whl", hash = "sha256:a98eec61135465b062846112e5ecf2eebb855305acc1dfbae43b72903b8ab5be"}, - {file = "debugpy-1.8.20-cp38-cp38-macosx_15_0_x86_64.whl", hash = "sha256:b773eb026a043e4d9c76265742bc846f2f347da7e27edf7fe97716ea19d6bfc5"}, - {file = "debugpy-1.8.20-cp38-cp38-manylinux_2_34_x86_64.whl", hash = "sha256:20d6e64ea177ab6732bffd3ce8fc6fb8879c60484ce14c3b3fe183b1761459ca"}, - {file = "debugpy-1.8.20-cp38-cp38-win32.whl", hash = "sha256:0dfd9adb4b3c7005e9c33df430bcdd4e4ebba70be533e0066e3a34d210041b66"}, - {file = "debugpy-1.8.20-cp38-cp38-win_amd64.whl", hash = "sha256:60f89411a6c6afb89f18e72e9091c3dfbcfe3edc1066b2043a1f80a3bbb3e11f"}, - {file = "debugpy-1.8.20-cp39-cp39-macosx_15_0_x86_64.whl", hash = "sha256:bff8990f040dacb4c314864da95f7168c5a58a30a66e0eea0fb85e2586a92cd6"}, - {file = "debugpy-1.8.20-cp39-cp39-manylinux_2_34_x86_64.whl", hash = "sha256:70ad9ae09b98ac307b82c16c151d27ee9d68ae007a2e7843ba621b5ce65333b5"}, - {file = "debugpy-1.8.20-cp39-cp39-win32.whl", hash = "sha256:9eeed9f953f9a23850c85d440bf51e3c56ed5d25f8560eeb29add815bd32f7ee"}, - {file = "debugpy-1.8.20-cp39-cp39-win_amd64.whl", hash = "sha256:760813b4fff517c75bfe7923033c107104e76acfef7bda011ffea8736e9a66f8"}, - {file = "debugpy-1.8.20-py2.py3-none-any.whl", hash = "sha256:5be9bed9ae3be00665a06acaa48f8329d2b9632f15fd09f6a9a8c8d9907e54d7"}, - {file = "debugpy-1.8.20.tar.gz", hash = "sha256:55bc8701714969f1ab89a6d5f2f3d40c36f91b2cbe2f65d98bf8196f6a6a2c33"}, + {file = "debugpy-1.8.21-cp310-cp310-macosx_15_0_x86_64.whl", hash = "sha256:8eeab7b5462f683452c57c0126aaa5ec4e974ddb705f39ba87dff8818c8e08f9"}, + {file = "debugpy-1.8.21-cp310-cp310-manylinux_2_34_x86_64.whl", hash = "sha256:0fddfdc130ac6d8bfc0415b0409822fa901c8f310e5c945ac5653a0352532344"}, + {file = "debugpy-1.8.21-cp310-cp310-win32.whl", hash = "sha256:72b5d676c4cbfac3bac5bb01c138a4656e843f93f03ce2a5f4e394ad49fbee73"}, + {file = "debugpy-1.8.21-cp310-cp310-win_amd64.whl", hash = "sha256:a7fe47fd23da57b9e0bec3f4a8ee65a2dc55782455ed7f2141d75ab5d2eaeef5"}, + {file = "debugpy-1.8.21-cp311-cp311-macosx_15_0_universal2.whl", hash = "sha256:da456226c7b4c69e35dbe35dcee6623d912000a77816db7856a41af1c72a0264"}, + {file = "debugpy-1.8.21-cp311-cp311-manylinux_2_34_x86_64.whl", hash = "sha256:f68b891688e61bdc08b8d364d919ff0051e0b94657b39dcd027bc3173edb7cdc"}, + {file = "debugpy-1.8.21-cp311-cp311-win32.whl", hash = "sha256:f843a8b08c2edeaf9b1582eed4f25441af21a297c22ff16bf76a662557aa9c9e"}, + {file = "debugpy-1.8.21-cp311-cp311-win_amd64.whl", hash = "sha256:84c564d8cc701d41843b29a92814c1f1bef6798724ca9d675c284ad9f6a547d7"}, + {file = "debugpy-1.8.21-cp312-cp312-macosx_15_0_universal2.whl", hash = "sha256:9f96713896f39c3dff0ee841f47320c3f2983d33c341e009361bb0ebc79adc4e"}, + {file = "debugpy-1.8.21-cp312-cp312-manylinux_2_34_x86_64.whl", hash = "sha256:c193d474f0a211191f2b4449d2d06157c689013035bd952f3b617e0ef422b176"}, + {file = "debugpy-1.8.21-cp312-cp312-win32.whl", hash = "sha256:4743373c1cac7f9e74a1b9915bf1dbe0e900eca657ffb170ae07ac8363205ae9"}, + {file = "debugpy-1.8.21-cp312-cp312-win_amd64.whl", hash = "sha256:bd7ba9dd3daa7c2f942c6ca8d4695a16bf9ac16b63615261c7982bc74f7ed20c"}, + {file = "debugpy-1.8.21-cp313-cp313-macosx_15_0_universal2.whl", hash = "sha256:13678151fc401e2d68c9880b91e28714f797d40422994572b24560ef80910a88"}, + {file = "debugpy-1.8.21-cp313-cp313-manylinux_2_34_x86_64.whl", hash = "sha256:ecbd158386c31ffe71d46f72d44d56e66331ab9b16cad649156d514368f23ab2"}, + {file = "debugpy-1.8.21-cp313-cp313-win32.whl", hash = "sha256:2c2ae706dec41d99a9ca1f7ebc987a83e65578363be6f6b3ac9067504917fae1"}, + {file = "debugpy-1.8.21-cp313-cp313-win_amd64.whl", hash = "sha256:aa648733047443eb1d07682c4ef287d36a54507b643ffdf38b09a3ef002c72a0"}, + {file = "debugpy-1.8.21-cp314-cp314-macosx_15_0_universal2.whl", hash = "sha256:9bb2a685287a2ac9b181cde89edcec64845cb51de7faaa75badb9a698bc24782"}, + {file = "debugpy-1.8.21-cp314-cp314-manylinux_2_34_x86_64.whl", hash = "sha256:3d6922439bf33fd38a3e2c447869ebc7b97da5cd3d329ff1ef9bc06c4903437e"}, + {file = "debugpy-1.8.21-cp314-cp314-win32.whl", hash = "sha256:15d4963bd5ffa48f0da0947fd06757fa7621945048a14ad7705431566d3c0e7c"}, + {file = "debugpy-1.8.21-cp314-cp314-win_amd64.whl", hash = "sha256:fe0744a12353406de0ae8ccff0d0a4a666f00801a3db8fd04e7a5f761cd520e8"}, + {file = "debugpy-1.8.21-cp38-cp38-macosx_15_0_x86_64.whl", hash = "sha256:0042da0ecd0a8b50dc4a54395ecd870d258d73fa18776f50c91fdcabdcad2675"}, + {file = "debugpy-1.8.21-cp38-cp38-manylinux_2_34_x86_64.whl", hash = "sha256:ffd932c6796afadab6993ec96745918a8cb2444dbd392074f769db5ea40ab440"}, + {file = "debugpy-1.8.21-cp38-cp38-win32.whl", hash = "sha256:4e7c2d784d78ad4b71a5f8cd7b59c167719ec8a7a0211dbb3eb1bfeda78bc4e2"}, + {file = "debugpy-1.8.21-cp38-cp38-win_amd64.whl", hash = "sha256:aa9d941d6dfe3d0407e4b3ca0b9ec466030e260fbf1174094f68785680f66db6"}, + {file = "debugpy-1.8.21-cp39-cp39-macosx_15_0_x86_64.whl", hash = "sha256:9f5171176a0084b95d2ebe55a4d1f7b2a75b74c5dbec577ebd3a85c740551c36"}, + {file = "debugpy-1.8.21-cp39-cp39-manylinux_2_34_x86_64.whl", hash = "sha256:f15c10084f9861b5e8414a48f18f8e4aadf51a98a59e72c16aa28281ca994672"}, + {file = "debugpy-1.8.21-cp39-cp39-win32.whl", hash = "sha256:4e70cc8b5079f885cb43910924ee0aab73b8b6b2a14eff23afdd9895d86e79eb"}, + {file = "debugpy-1.8.21-cp39-cp39-win_amd64.whl", hash = "sha256:e935f9dc0501be523c8a8e1853c39432e1354e9ece717ae5998fd2371c4542c3"}, + {file = "debugpy-1.8.21-py2.py3-none-any.whl", hash = "sha256:b1e37d333663c8851516a47364ef473da127f9caebe4417e6df6f5825a7e9a92"}, + {file = "debugpy-1.8.21.tar.gz", hash = "sha256:a3c53278e84c94e11bd87c53970ec391d1a67396c8b22609fcac576520e611a6"}, ] [[package]] @@ -872,33 +872,16 @@ devel = ["colorama", "json-spec", "jsonschema", "pylint", "pytest", "pytest-benc [[package]] name = "filelock" -version = "3.29.0" +version = "3.29.1" description = "A platform independent file lock." optional = false python-versions = ">=3.10" groups = ["docs"] files = [ - {file = "filelock-3.29.0-py3-none-any.whl", hash = "sha256:96f5f6344709aa1572bbf631c640e4ebeeb519e08da902c39a001882f30ac258"}, - {file = "filelock-3.29.0.tar.gz", hash = "sha256:69974355e960702e789734cb4871f884ea6fe50bd8404051a3530bc07809cf90"}, + {file = "filelock-3.29.1-py3-none-any.whl", hash = "sha256:85199dfd706869641b72b2e8955d5416a4b2b7dc4b0e8e6d97b4cc1299a6983b"}, + {file = "filelock-3.29.1.tar.gz", hash = "sha256:d97e6b1b9757569626c58caa07dc4beb1613f4a2938b1e8cc81afca398906c9e"}, ] -[[package]] -name = "flake8" -version = "6.1.0" -description = "the modular source code checker: pep8 pyflakes and co" -optional = false -python-versions = ">=3.8.1" -groups = ["dev"] -files = [ - {file = "flake8-6.1.0-py2.py3-none-any.whl", hash = "sha256:ffdfce58ea94c6580c77888a86506937f9a1a227dfcd15f245d694ae20a6b6e5"}, - {file = "flake8-6.1.0.tar.gz", hash = "sha256:d5b3857f07c030bdb5bf41c7f53799571d75c4491748a3adcd47de929e34cd23"}, -] - -[package.dependencies] -mccabe = ">=0.7.0,<0.8.0" -pycodestyle = ">=2.11.0,<2.12.0" -pyflakes = ">=3.1.0,<3.2.0" - [[package]] name = "html5lib" version = "1.1" @@ -935,14 +918,14 @@ files = [ [[package]] name = "idna" -version = "3.17" +version = "3.18" description = "Internationalized Domain Names in Applications (IDNA)" optional = false python-versions = ">=3.9" groups = ["main", "docs"] files = [ - {file = "idna-3.17-py3-none-any.whl", hash = "sha256:466e48829084efe2548012b855df21540b96f2e20e51bd124c851536556a592c"}, - {file = "idna-3.17.tar.gz", hash = "sha256:5eb0cb53bc467c12eadcf6de83163ad8527cec9416f44b9b61b19caedad2b87f"}, + {file = "idna-3.18-py3-none-any.whl", hash = "sha256:7f952cbe720b688055e3f87de14f5c3e5fdaa8bc3928985c4077ca689de849a2"}, + {file = "idna-3.18.tar.gz", hash = "sha256:ffb385a7e039654cef1ab9ef32c6fafe283c0c0467bba1d9029738ce4a14a848"}, ] [package.extras] @@ -1028,7 +1011,7 @@ pfzy = ">=0.3.1,<0.4.0" prompt-toolkit = ">=3.0.1,<4.0.0" [package.extras] -docs = ["Sphinx (>=4.1.2,<5.0.0)", "furo (>=2021.8.17b43,<2022.0.0)", "myst-parser (>=0.15.1,<0.16.0)", "sphinx-autobuild (>=2021.3.14,<2022.0.0)", "sphinx-copybutton (>=0.4.0,<0.5.0)"] +docs = ["Sphinx (>=4.1.2,<5.0.0)", "furo (>=2021.8.17-beta.43,<2022.0.0)", "myst-parser (>=0.15.1,<0.16.0)", "sphinx-autobuild (>=2021.3.14,<2022.0.0)", "sphinx-copybutton (>=0.4.0,<0.5.0)"] [[package]] name = "ipykernel" @@ -1235,7 +1218,7 @@ files = [ [package.dependencies] attrs = ">=22.2.0" -jsonschema-specifications = ">=2023.3.6" +jsonschema-specifications = ">=2023.03.6" referencing = ">=0.28.4" rpds-py = ">=0.25.0" @@ -1828,7 +1811,7 @@ files = [ ] [package.extras] -docs = ["Sphinx (>=4.1.2,<5.0.0)", "furo (>=2021.8.17b43,<2022.0.0)", "myst-parser (>=0.15.1,<0.16.0)", "sphinx-autobuild (>=2021.3.14,<2022.0.0)", "sphinx-copybutton (>=0.4.0,<0.5.0)"] +docs = ["Sphinx (>=4.1.2,<5.0.0)", "furo (>=2021.8.17-beta.43,<2022.0.0)", "myst-parser (>=0.15.1,<0.16.0)", "sphinx-autobuild (>=2021.3.14,<2022.0.0)", "sphinx-copybutton (>=0.4.0,<0.5.0)"] [[package]] name = "platformdirs" @@ -1954,18 +1937,6 @@ files = [ [package.extras] tests = ["pytest"] -[[package]] -name = "pycodestyle" -version = "2.11.1" -description = "Python style guide checker" -optional = false -python-versions = ">=3.8" -groups = ["dev"] -files = [ - {file = "pycodestyle-2.11.1-py2.py3-none-any.whl", hash = "sha256:44fe31000b2d866f2e41841b18528a505fbd7fef9017b04eff4e2648a0fadc67"}, - {file = "pycodestyle-2.11.1.tar.gz", hash = "sha256:41ba0e7afc9752dfb53ced5489e89f8186be00e599e712660695b7a75ff2663f"}, -] - [[package]] name = "pycparser" version = "3.0" @@ -1979,18 +1950,6 @@ files = [ {file = "pycparser-3.0.tar.gz", hash = "sha256:600f49d217304a5902ac3c37e1281c9fe94e4d0489de643a9504c5cdfdfc6b29"}, ] -[[package]] -name = "pyflakes" -version = "3.1.0" -description = "passive checker of Python programs" -optional = false -python-versions = ">=3.8" -groups = ["dev"] -files = [ - {file = "pyflakes-3.1.0-py2.py3-none-any.whl", hash = "sha256:4132f6d49cb4dae6819e5379898f2b8cce3c5f23994194c24b77d5da2e36f774"}, - {file = "pyflakes-3.1.0.tar.gz", hash = "sha256:a0aae034c444db0071aa077972ba4768d40c830d9539fd45bf4cd3f8f6992efc"}, -] - [[package]] name = "pygments" version = "2.20.0" @@ -2023,8 +1982,8 @@ astroid = ">=3.3.8,<=3.4.0.dev0" colorama = {version = ">=0.4.5", markers = "sys_platform == \"win32\""} dill = [ {version = ">=0.2", markers = "python_version < \"3.11\""}, - {version = ">=0.3.6", markers = "python_version == \"3.11\""}, {version = ">=0.3.7", markers = "python_version >= \"3.12\""}, + {version = ">=0.3.6", markers = "python_version == \"3.11\""}, ] isort = ">=4.2.5,<5.13 || >5.13,<7" mccabe = ">=0.6,<0.8" @@ -2051,22 +2010,6 @@ files = [ [package.extras] diagrams = ["jinja2", "railroad-diagrams"] -[[package]] -name = "pyproject-flake8" -version = "6.1.0" -description = "pyproject-flake8 (`pflake8`), a monkey patching wrapper to connect flake8 with pyproject.toml configuration" -optional = false -python-versions = ">=3.8.1" -groups = ["dev"] -files = [ - {file = "pyproject_flake8-6.1.0-py3-none-any.whl", hash = "sha256:86ea5559263c098e1aa4f866776aa2cf45362fd91a576b9fd8fbbbb55db12c4e"}, - {file = "pyproject_flake8-6.1.0.tar.gz", hash = "sha256:6da8e5a264395e0148bc11844c6fb50546f1fac83ac9210f7328664135f9e70f"}, -] - -[package.dependencies] -flake8 = "6.1.0" -tomli = {version = "*", markers = "python_version < \"3.11\""} - [[package]] name = "pyshacl" version = "0.31.0" @@ -2812,6 +2755,34 @@ jinja2 = ["ruamel.yaml.jinja2 (>=0.2)"] libyaml = ["ruamel.yaml.clibz (>=0.3.7) ; platform_python_implementation == \"CPython\""] oldlibyaml = ["ruamel.yaml.clib ; platform_python_implementation == \"CPython\""] +[[package]] +name = "ruff" +version = "0.15.15" +description = "An extremely fast Python linter and code formatter, written in Rust." +optional = false +python-versions = ">=3.7" +groups = ["dev"] +files = [ + {file = "ruff-0.15.15-py3-none-linux_armv6l.whl", hash = "sha256:cf93e5388f412e1b108b1f8b34a6e036b70fe8aff89393befad96fe48670311b"}, + {file = "ruff-0.15.15-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:ac5a646d1f6a7dadd5d50842dae2c1f9862ac887ef5d1b1375e02def791fde6e"}, + {file = "ruff-0.15.15-py3-none-macosx_11_0_arm64.whl", hash = "sha256:77d955a431430c66f72dd94e379ad38a16daea3d25094872ac4edf9e797be530"}, + {file = "ruff-0.15.15-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7614ee79c69788cf6cedd568069ade9cecc22a1ad20494efe8d0c9ebb4b622d4"}, + {file = "ruff-0.15.15-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:3cdb1679e06a1f6b47bc384714ae96f6e2fb65ca441eb78c43d2ca554176ce1f"}, + {file = "ruff-0.15.15-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2728b93d7b23a603ea2c0ac6eb73d760bd38ec9de35f35fb41e18f7a3fee7622"}, + {file = "ruff-0.15.15-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:be582fcc0db438902c7792b08d6ddf6c9b9e21addaa10092c2c741cfb09e5a45"}, + {file = "ruff-0.15.15-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7aa77465b8ecaf1a27bea098d696f7fed5e1eccbd10b321b682d6de586ae5627"}, + {file = "ruff-0.15.15-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:48decfa11d740de4889de623be1463308346312f2409a56e24aa280c86162dc4"}, + {file = "ruff-0.15.15-py3-none-manylinux_2_31_riscv64.whl", hash = "sha256:a5015088452ca0081387063649ec67f06d3d1d6b8b936a1f836b5e9657ecd48c"}, + {file = "ruff-0.15.15-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:f5294aab6356c81600fcdea3a62bb1b924dfd5e91767c12318d3f68f86af57cd"}, + {file = "ruff-0.15.15-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:db5bd4d802415cca656dc1616070b725952d6ae95eb5d4831e49fbd94a38f75f"}, + {file = "ruff-0.15.15-py3-none-musllinux_1_2_i686.whl", hash = "sha256:587a6278ed42059191c1a466e490bd7930fb50bd2e255398bc29616c895a61cb"}, + {file = "ruff-0.15.15-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:df0c1c084f5f4be9812f61518a45c440d3c30d69ce4bf6c5270e66d38338f02a"}, + {file = "ruff-0.15.15-py3-none-win32.whl", hash = "sha256:29428ea79694afbe756d45fd59b36f22b6b020dc0443cf7de0173046236964b9"}, + {file = "ruff-0.15.15-py3-none-win_amd64.whl", hash = "sha256:8df0323902e15e24bc4bf246da830573d3cf3352bd0b9a164eab335d111ff4a4"}, + {file = "ruff-0.15.15-py3-none-win_arm64.whl", hash = "sha256:3c8ceca6792f38196b8f589bc92eccd03eef286602da92e5dc05cc42ef6441b7"}, + {file = "ruff-0.15.15.tar.gz", hash = "sha256:b8dff018130b46d8e5bf0f926ef6b60cf871d6d5ae45fc9334e09632daa741d6"}, +] + [[package]] name = "six" version = "1.17.0" @@ -2826,14 +2797,14 @@ files = [ [[package]] name = "snowballstemmer" -version = "3.1.0" +version = "3.1.1" description = "This package provides 36 stemmers for 34 languages generated from Snowball algorithms." optional = false python-versions = ">=3.3" groups = ["docs"] files = [ - {file = "snowballstemmer-3.1.0-py3-none-any.whl", hash = "sha256:17e6d1da216aa07db6dad37139ea70cf13c4b2e9a096f6e64a9648fc657d3154"}, - {file = "snowballstemmer-3.1.0.tar.gz", hash = "sha256:fd9e34526b23340cd23ffea6c9f9760974ecc2c2ac9e1d81401443ccdb2a801f"}, + {file = "snowballstemmer-3.1.1-py3-none-any.whl", hash = "sha256:7e207fa178741da09cdee59d3ecec3827ad5f92b1fc5c9ff3755b639f71f5752"}, + {file = "snowballstemmer-3.1.1.tar.gz", hash = "sha256:e07bbc54a0d798fe6010a12398422e62a8bfbba95c394fd0956ef58cb4d3e260"}, ] [[package]] @@ -3404,19 +3375,19 @@ files = [ [[package]] name = "traitlets" -version = "5.15.0" +version = "5.15.1" description = "Traitlets Python configuration system" optional = false python-versions = ">=3.9" groups = ["dev", "docs"] files = [ - {file = "traitlets-5.15.0-py3-none-any.whl", hash = "sha256:fb36a18867a6803deab09f3c5e0fa81bb7b26a5c9e82501c9933f759166eff40"}, - {file = "traitlets-5.15.0.tar.gz", hash = "sha256:4fead733f81cf1c4c938e06f8ca4633896833c9d89eff878159457f4d4392971"}, + {file = "traitlets-5.15.1-py3-none-any.whl", hash = "sha256:770a53705f84b81ac107e83a1b3328ff2dae16094d8fc3cfc004e4b22dfd8e92"}, + {file = "traitlets-5.15.1.tar.gz", hash = "sha256:7b1c07854fe25acb39e009bae49f11b79ff6cbb2f27999104e9110e7a6b53722"}, ] [package.extras] docs = ["myst-parser", "pydata-sphinx-theme", "sphinx"] -test = ["argcomplete (>=3.0.3)", "mypy (>=1.7.0)", "mypy (>=1.7.0,<1.19) ; platform_python_implementation == \"PyPy\"", "pre-commit", "pytest (>=7.0,<8.2)", "pytest-mock", "pytest-mypy-testing"] +test = ["argcomplete (>=3.0.3)", "mypy (>=1.17.0,<1.19)", "pre-commit", "pytest (>=7.0,<8.2)", "pytest-mock", "pytest-mypy-testing"] [[package]] name = "typing-extensions" @@ -3433,22 +3404,22 @@ markers = {dev = "python_version < \"3.12\"", test = "python_version == \"3.10\" [[package]] name = "typos" -version = "1.47.0" +version = "1.47.2" description = "Source Code Spelling Correction" optional = false python-versions = ">=3.8" groups = ["main"] files = [ - {file = "typos-1.47.0-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:c4104785d237fd2256023ba4339e404ed2db58888af703eb0726a1441a8e85d7"}, - {file = "typos-1.47.0-py3-none-macosx_11_0_arm64.whl", hash = "sha256:cb95b6fde16fe5bab11788bc14d3d9ec49dcbeec9517378e2fca9e283e6b7822"}, - {file = "typos-1.47.0-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:653eba984d2cc55eb47d50771761bb7e0d6e52771c2489fd76b1f86fbed49a2a"}, - {file = "typos-1.47.0-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:be447ce8400154e4ae515cc9ecef99532cee6b29271ba3adbe3487304cd2c3c2"}, - {file = "typos-1.47.0-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9856752e08face132c7d08de875567675f2c54e3e04096d6ebad09c6430e16f0"}, - {file = "typos-1.47.0-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:438b8579e440ff88baf51cb577b2eb4514d065509ba41a10981e1ea9048a519b"}, - {file = "typos-1.47.0-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:19cfe53c33ac7d0c5f029fb97939b082139b63fbbb88350ab7e6df28be31629e"}, - {file = "typos-1.47.0-py3-none-win32.whl", hash = "sha256:2de87acff0b6857ce693059a6291227eec999284e16a87162178c847236bafca"}, - {file = "typos-1.47.0-py3-none-win_amd64.whl", hash = "sha256:05c1547e3dbbb6fe8a861b56cb98e9922cd5d20170ee2e7e649faa1605dfdb49"}, - {file = "typos-1.47.0.tar.gz", hash = "sha256:f00d98b8338abd6016f968fb7a3911c911010c17c333c2e102e8893b1c97db8f"}, + {file = "typos-1.47.2-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:87df3040f9d34afd9b19a9437045fbb8838a0435eb00f047e4bac48d92f2fc44"}, + {file = "typos-1.47.2-py3-none-macosx_11_0_arm64.whl", hash = "sha256:287e2718a058c561baf5f55ec6b466d9270546bcb1951a2c120e594c574b9597"}, + {file = "typos-1.47.2-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3e4ef6632b280ce237caaec38d80dd3c2d956e28aa6925f80d4e915335b94a36"}, + {file = "typos-1.47.2-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:cd7b310019943e26552809bd17f9f202b45eb0c9694f437f1708ab0868248ced"}, + {file = "typos-1.47.2-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f525edf9b67d3ede552bb70bd4171f23e5e8edec3187189dfe8d1676df630b44"}, + {file = "typos-1.47.2-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:6c36a97ab3dd8c8924cd9b907a32e9aac504fc779d0c3b05e19204ca93385c37"}, + {file = "typos-1.47.2-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:4eb36a44daed1d719ce417d2a6dd7a323d814ccdc647d9bb20d17ac2bed9e38c"}, + {file = "typos-1.47.2-py3-none-win32.whl", hash = "sha256:d0c01034bc029d8883406f3e2bed46dfa9b090ce6ad4a99e580070ae51307cfa"}, + {file = "typos-1.47.2-py3-none-win_amd64.whl", hash = "sha256:749bbba363067bfc0e54ccc6e7580750e17f5ef093c91fedf6c2eb27d32efee6"}, + {file = "typos-1.47.2.tar.gz", hash = "sha256:d303e8c495ea870f750d8b37f2d3c3fe2441b00cf18ca5d7e0b52eca1938c7b7"}, ] [[package]] @@ -3535,4 +3506,4 @@ type = ["pytest-mypy (>=1.0.1) ; platform_python_implementation != \"PyPy\""] [metadata] lock-version = "2.1" python-versions = ">=3.10,<4.0" -content-hash = "8bc0729ff37fdf976143c55e5be984ea8db53fc34d80c7cbacb4a2cb154dd67a" +content-hash = "f4d82cd5419be91be0ceddc76fc52fb8dec86a5369a1af7ebe9cb4a8c9f40ee8" From f016f66be4fe640129134ec20e138e618f2c9625 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Thu, 4 Jun 2026 10:34:47 +0200 Subject: [PATCH 247/352] =?UTF-8?q?ci:=20=F0=9F=94=A7=20replace=20flake8?= =?UTF-8?q?=20with=20ruff=20in=20lint=20workflow?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .github/workflows/testing.yaml | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/.github/workflows/testing.yaml b/.github/workflows/testing.yaml index dd1c3c137..76abf80e8 100644 --- a/.github/workflows/testing.yaml +++ b/.github/workflows/testing.yaml @@ -39,7 +39,7 @@ env: FORCE_COLOR: "1" # Force color output in CI jobs: - # Verifies pep8, pyflakes, circular complexity, and spelling + # Verifies code style, bugs, complexity, and spelling via **ruff** + **typos** lint: name: 🚨 Lint and spellcheck runs-on: ubuntu-latest @@ -51,10 +51,23 @@ jobs: uses: actions/setup-python@v6 with: python-version: ${{ env.PYTHON_VERSION }} - - name: 🔽 Install flake8 - run: pip install flake8 - - name: ⌛ Lint Python code - run: flake8 -v rocrate_validator tests + - name: 🔽 Install ruff (version from pyproject.toml) + run: | + # Read the ruff version constraint from pyproject.toml. + # Poetry uses caret syntax (e.g. "^0.15.15") which means + # "compatible with 0.15.x". pip uses PEP 440 "~=" for the + # same semantics on pre-1.0 packages. + pip install "$( + python -c " + import tomllib + cfg = tomllib.loads(open('pyproject.toml').read()) + constraint = cfg['tool']['poetry']['group']['dev']['dependencies']['ruff'] + # Drop the caret prefix so we can pass ~= to pip + version = constraint.lstrip('^') + print(f'ruff~={version}') + ")" + - name: ⌛ Lint Python code with ruff + run: ruff check rocrate_validator tests - name: ⌛ Spell check code and profiles (covers Python and SHACL) uses: crate-ci/typos@v1.47.0 From c229760b2a3e39e1894195a9311c9316caf30f39 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Thu, 4 Jun 2026 10:39:46 +0200 Subject: [PATCH 248/352] =?UTF-8?q?ci:=20=F0=9F=94=A7=20add=20github=20out?= =?UTF-8?q?put=20format=20and=20statistics=20to=20ruff=20check?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .github/workflows/testing.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/testing.yaml b/.github/workflows/testing.yaml index 76abf80e8..2a52a6666 100644 --- a/.github/workflows/testing.yaml +++ b/.github/workflows/testing.yaml @@ -67,7 +67,7 @@ jobs: print(f'ruff~={version}') ")" - name: ⌛ Lint Python code with ruff - run: ruff check rocrate_validator tests + run: ruff check rocrate_validator tests --output-format github --statistics - name: ⌛ Spell check code and profiles (covers Python and SHACL) uses: crate-ci/typos@v1.47.0 From 3342f8a72767584a7be70373886d410ed36994f7 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Thu, 4 Jun 2026 10:59:52 +0200 Subject: [PATCH 249/352] =?UTF-8?q?ci:=20=F0=9F=94=A7=20split=20ruff=20che?= =?UTF-8?q?ck=20into=20annotations=20and=20statistics=20steps?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .github/workflows/testing.yaml | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/.github/workflows/testing.yaml b/.github/workflows/testing.yaml index 2a52a6666..a86581955 100644 --- a/.github/workflows/testing.yaml +++ b/.github/workflows/testing.yaml @@ -67,7 +67,12 @@ jobs: print(f'ruff~={version}') ")" - name: ⌛ Lint Python code with ruff - run: ruff check rocrate_validator tests --output-format github --statistics + run: | + # Produce GitHub annotations; suppress exit code so the + # statistics summary still runs. + ruff check rocrate_validator tests --output-format github || true + # Emit a compact rule-count summary and fail the job if needed. + ruff check rocrate_validator tests --statistics - name: ⌛ Spell check code and profiles (covers Python and SHACL) uses: crate-ci/typos@v1.47.0 From d1008faac756ced7653623e54ee56c3a48d4539e Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Thu, 4 Jun 2026 11:57:11 +0200 Subject: [PATCH 250/352] =?UTF-8?q?style:=20=F0=9F=94=A7=20normalize=20quo?= =?UTF-8?q?tes,=20replace=20os.path=20with=20pathlib=20in=20docs/conf.py?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- docs/conf.py | 56 ++++++++++++++++++++++++++++------------------------ 1 file changed, 30 insertions(+), 26 deletions(-) diff --git a/docs/conf.py b/docs/conf.py index 872323e38..c1ee16e9d 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -22,17 +22,18 @@ import os import sys +from pathlib import Path from rocrate_validator import __version__ # update PYTHONPATH -sys.path.insert(0, os.path.abspath('.')) -sys.path.insert(0, os.path.abspath('..')) +sys.path.insert(0, str(Path.cwd())) +sys.path.insert(0, str(Path("..").resolve())) # Set project metadata -project = 'rocrate-validator' -copyright = '2024-2026, CRS4' -author = 'Marco Enrico Piras, Luca Pireddu, Simone Leo' +project = "rocrate-validator" +copyright = "2024-2026, CRS4" +author = "Marco Enrico Piras, Luca Pireddu, Simone Leo" release = __version__ github_url = "https://github.com/crs4/rocrate-validator" @@ -53,19 +54,19 @@ # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom # ones. extensions = [ - 'sphinx.ext.autodoc', - 'sphinx.ext.doctest', - 'sphinx.ext.coverage', - 'sphinx.ext.mathjax', - 'sphinx.ext.viewcode', - 'sphinx.ext.autosummary', - 'nbsphinx', - 'myst_parser', - 'sphinx.ext.mathjax', - 'enum_tools.autoenum', - 'sphinx.ext.intersphinx', - 'sphinx.ext.autosectionlabel', - 'sphinx_copybutton', + "sphinx.ext.autodoc", + "sphinx.ext.doctest", + "sphinx.ext.coverage", + "sphinx.ext.mathjax", + "sphinx.ext.viewcode", + "sphinx.ext.autosummary", + "nbsphinx", + "myst_parser", + "sphinx.ext.mathjax", + "enum_tools.autoenum", + "sphinx.ext.intersphinx", + "sphinx.ext.autosectionlabel", + "sphinx_copybutton", ] # Only auto-generate section labels for the top two heading levels: deeper @@ -78,20 +79,23 @@ # below H1 (myst.header) and use GitHub-relative anchor links that span pages # (myst.xref_missing). These are expected when including it here. suppress_warnings = [ - 'myst.header', - 'myst.xref_missing', + "myst.header", + "myst.xref_missing", ] -templates_path = ['_templates'] -# exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store', 'experiments', 'ontologies', 'tests', 'logs', 'examples', 'debug'] +templates_path = ["_templates"] +# exclude_patterns = [ +# "_build", "Thumbs.db", ".DS_Store", "experiments", +# "ontologies", "tests", "logs", "examples", "debug", +# ] # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. # This patterns also effect to html_static_path and html_extra_path -exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store', '**.ipynb_checkpoints'] +exclude_patterns = ["_build", "Thumbs.db", ".DS_Store", "**.ipynb_checkpoints"] # The name of the Pygments (syntax highlighting) style to use. -pygments_style = 'sphinx' +pygments_style = "sphinx" # A list of ignored prefixes for module index sorting. # modindex_common_prefix = [] @@ -107,9 +111,9 @@ # The theme to use for HTML and HTML Help pages. See the documentation for # a list of builtin themes. # -html_theme = 'sphinx_rtd_theme' +html_theme = "sphinx_rtd_theme" -autodoc_member_order = 'bysource' +autodoc_member_order = "bysource" autosummary_generate = True From e6c3ea46a1e3a840aec4c61ba5ef1f18dd7d4720 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Thu, 4 Jun 2026 12:41:28 +0200 Subject: [PATCH 251/352] =?UTF-8?q?chore:=20=F0=9F=94=A7=20add=20pre-commi?= =?UTF-8?q?t=20configuration=20and=20dependency?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .pre-commit-config.yaml | 56 +++++++++++++++++++ poetry.lock | 115 ++++++++++++++++++++++++++++++++++++++-- pyproject.toml | 15 ++++++ 3 files changed, 183 insertions(+), 3 deletions(-) create mode 100644 .pre-commit-config.yaml diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 000000000..e39569dd4 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,56 @@ +# Copyright (c) 2024-2026 CRS4 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +repos: + # Run typos to check for common spelling mistakes in the codebase. + - repo: https://github.com/crate-ci/typos + rev: v1.22.9 + hooks: + - id: typos + args: + - --config + - pyproject.toml + - --write-changes + + # Performs basic checks on files, such as removing trailing whitespace, + # ensuring files end with a newline, and validating YAML files. + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.6.0 + hooks: + - id: trailing-whitespace + - id: end-of-file-fixer + - id: check-yaml + + # Run Ruff for both linting and formatting. + # The ruff-check hook will report linting issues, + # while the ruff-format hook will automatically fix formatting issues. + - repo: https://github.com/astral-sh/ruff-pre-commit + rev: v0.15.15 + hooks: + - id: ruff-check + - id: ruff-format + + # Run mypy to perform static type checking on the codebase. + # This hook is configured as a manual hook to run mypy using Poetry, + # ensuring that it uses the correct virtual environment and dependencies. + # To run this hook, use the command: + # `pre-commit run --hook-stage manual` + - repo: local + hooks: + - id: mypy + name: mypy + entry: poetry run mypy + language: system + types: [python] + pass_filenames: false diff --git a/poetry.lock b/poetry.lock index caf08b3b2..9a8f80028 100644 --- a/poetry.lock +++ b/poetry.lock @@ -336,6 +336,18 @@ files = [ [package.dependencies] pycparser = {version = "*", markers = "implementation_name != \"PyPy\""} +[[package]] +name = "cfgv" +version = "3.5.0" +description = "Validate configuration and produce human readable error messages." +optional = false +python-versions = ">=3.10" +groups = ["dev"] +files = [ + {file = "cfgv-3.5.0-py2.py3-none-any.whl", hash = "sha256:a8dc6b26ad22ff227d2634a65cb388215ce6cc96bbcc5cfde7641ae87e8dacc0"}, + {file = "cfgv-3.5.0.tar.gz", hash = "sha256:d5b1034354820651caa73ede66a6294d6e95c1b00acc5e9b098e917404669132"}, +] + [[package]] name = "charset-normalizer" version = "3.4.7" @@ -754,6 +766,18 @@ files = [ graph = ["objgraph (>=1.7.2)"] profile = ["gprof2dot (>=2022.7.29)"] +[[package]] +name = "distlib" +version = "0.4.1" +description = "Distribution utilities" +optional = false +python-versions = "*" +groups = ["dev"] +files = [ + {file = "distlib-0.4.1-py2.py3-none-any.whl", hash = "sha256:9c2c552c68cbadc619f2d0ed3a69e27c351a3f4c9baa9ffb7df9e9cdc3d19a97"}, + {file = "distlib-0.4.1.tar.gz", hash = "sha256:c3804d0d2d4b5fcd44036eb860cb6660485fcdf5c2aba53dc324d805837ea65b"}, +] + [[package]] name = "docutils" version = "0.21.2" @@ -876,7 +900,7 @@ version = "3.29.1" description = "A platform independent file lock." optional = false python-versions = ">=3.10" -groups = ["docs"] +groups = ["dev", "docs"] files = [ {file = "filelock-3.29.1-py3-none-any.whl", hash = "sha256:85199dfd706869641b72b2e8955d5416a4b2b7dc4b0e8e6d97b4cc1299a6983b"}, {file = "filelock-3.29.1.tar.gz", hash = "sha256:d97e6b1b9757569626c58caa07dc4beb1613f4a2938b1e8cc81afca398906c9e"}, @@ -916,6 +940,21 @@ files = [ {file = "html5rdf-1.2.1.tar.gz", hash = "sha256:ace9b420ce52995bb4f05e7425eedf19e433c981dfe7a831ab391e2fa2e1a195"}, ] +[[package]] +name = "identify" +version = "2.6.19" +description = "File identification library for Python" +optional = false +python-versions = ">=3.10" +groups = ["dev"] +files = [ + {file = "identify-2.6.19-py2.py3-none-any.whl", hash = "sha256:20e6a87f786f768c092a721ad107fc9df0eb89347be9396cadf3f4abbd1fb78a"}, + {file = "identify-2.6.19.tar.gz", hash = "sha256:6be5020c38fcb07da56c53733538a3081ea5aa70d36a156f83044bfbf9173842"}, +] + +[package.extras] +license = ["ukkonen"] + [[package]] name = "idna" version = "3.18" @@ -1727,6 +1766,18 @@ files = [ {file = "nest_asyncio-1.6.0.tar.gz", hash = "sha256:6f172d5449aca15afd6c646851f4e31e02c598d553a667e38cafa997cfec55fe"}, ] +[[package]] +name = "nodeenv" +version = "1.10.0" +description = "Node.js virtual environment builder" +optional = false +python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7" +groups = ["dev"] +files = [ + {file = "nodeenv-1.10.0-py2.py3-none-any.whl", hash = "sha256:5bb13e3eed2923615535339b3c620e76779af4cb4c6a90deccc9e36b274d3827"}, + {file = "nodeenv-1.10.0.tar.gz", hash = "sha256:996c191ad80897d076bdfba80a41994c2b47c68e224c542b48feba42ba00f8bb"}, +] + [[package]] name = "owlrl" version = "7.1.4" @@ -1841,6 +1892,25 @@ files = [ dev = ["pre-commit", "tox"] testing = ["coverage", "pytest", "pytest-benchmark"] +[[package]] +name = "pre-commit" +version = "4.6.0" +description = "A framework for managing and maintaining multi-language pre-commit hooks." +optional = false +python-versions = ">=3.10" +groups = ["dev"] +files = [ + {file = "pre_commit-4.6.0-py2.py3-none-any.whl", hash = "sha256:e2cf246f7299edcabcf15f9b0571fdce06058527f0a06535068a86d38089f29b"}, + {file = "pre_commit-4.6.0.tar.gz", hash = "sha256:718d2208cef53fdc38206e40524a6d4d9576d103eb16f0fec11c875e7716e9d9"}, +] + +[package.dependencies] +cfgv = ">=2.0.0" +identify = ">=1.0.0" +nodeenv = ">=0.11.1" +pyyaml = ">=5.1" +virtualenv = ">=20.10.0" + [[package]] name = "prettytable" version = "3.17.0" @@ -2118,13 +2188,33 @@ files = [ [package.dependencies] six = ">=1.5" +[[package]] +name = "python-discovery" +version = "1.4.0" +description = "Python interpreter discovery" +optional = false +python-versions = ">=3.8" +groups = ["dev"] +files = [ + {file = "python_discovery-1.4.0-py3-none-any.whl", hash = "sha256:26ed78d703e234879a66244c7d4114563fb13ec5cd30a2d1357e5fb4850782da"}, + {file = "python_discovery-1.4.0.tar.gz", hash = "sha256:eb8bc7daad3c226c147e45bb4e970a1feb1bf4048ee178e6db59e197b8010ce3"}, +] + +[package.dependencies] +filelock = ">=3.15.4" +platformdirs = ">=4.3.6,<5" + +[package.extras] +docs = ["furo (>=2025.12.19)", "sphinx (>=9.1)", "sphinx-autodoc-typehints (>=3.6.3)", "sphinxcontrib-mermaid (>=2)", "sphinxcontrib-towncrier (>=0.4)", "towncrier (>=25.8)"] +testing = ["covdefaults (>=2.3)", "coverage (>=7.5.4)", "pytest (>=8.3.5)", "pytest-mock (>=3.14)", "setuptools (>=75.1)"] + [[package]] name = "pyyaml" version = "6.0.3" description = "YAML parser and emitter for Python" optional = false python-versions = ">=3.8" -groups = ["docs"] +groups = ["dev", "docs"] files = [ {file = "PyYAML-6.0.3-cp38-cp38-macosx_10_13_x86_64.whl", hash = "sha256:c2514fceb77bc5e7a2f7adfaa1feb2fb311607c9cb518dbc378688ec73d8292f"}, {file = "PyYAML-6.0.3-cp38-cp38-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:9c57bb8c96f6d1808c030b1687b9b5fb476abaa47f0db9c0101f5e9f394e97f4"}, @@ -3458,6 +3548,25 @@ h2 = ["h2 (>=4,<5)"] socks = ["pysocks (>=1.5.6,!=1.5.7,<2.0)"] zstd = ["backports-zstd (>=1.0.0) ; python_version < \"3.14\""] +[[package]] +name = "virtualenv" +version = "21.4.2" +description = "Virtual Python Environment builder" +optional = false +python-versions = ">=3.8" +groups = ["dev"] +files = [ + {file = "virtualenv-21.4.2-py3-none-any.whl", hash = "sha256:854210ca524a1a4d0d744734f4acbc721c3ffe163b85bbf5d56d14d5ae2f0fae"}, + {file = "virtualenv-21.4.2.tar.gz", hash = "sha256:38e6ee0a555615c0ea9da2ac7e9998fe8dc3b911dd33ad8eaad2020957653b0c"}, +] + +[package.dependencies] +distlib = ">=0.3.7,<1" +filelock = {version = ">=3.24.2,<4", markers = "python_version >= \"3.10\""} +platformdirs = ">=3.9.1,<5" +python-discovery = ">=1.4" +typing-extensions = {version = ">=4.13.2", markers = "python_version < \"3.11\""} + [[package]] name = "wcwidth" version = "0.7.0" @@ -3506,4 +3615,4 @@ type = ["pytest-mypy (>=1.0.1) ; platform_python_implementation != \"PyPy\""] [metadata] lock-version = "2.1" python-versions = ">=3.10,<4.0" -content-hash = "f4d82cd5419be91be0ceddc76fc52fb8dec86a5369a1af7ebe9cb4a8c9f40ee8" +content-hash = "cabed2165dd182a3b780942b4599772c212c0fa84e8c3040925a1cdcdbd411a0" diff --git a/pyproject.toml b/pyproject.toml index 3bdf656aa..478719656 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,3 +1,17 @@ +# Copyright (c) 2024-2026 CRS4 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + [tool.poetry] name = "roc-validator" version = "0.10.0" @@ -76,6 +90,7 @@ typos = "^1.41.0" pylint = "^3.1.0" ipykernel = "^6.29.3" ruff = "^0.15.15" +pre-commit = "^4.6.0" [tool.poetry.group.test.dependencies] pytest-cov = "^5.0.0" From ffcde86898da7da158911b48ebee3991202fcc5f Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Thu, 4 Jun 2026 13:41:43 +0200 Subject: [PATCH 252/352] =?UTF-8?q?chore:=20=F0=9F=94=A7=20configure=20poe?= =?UTF-8?q?try=20to=20create=20in-project=20virtualenv?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- poetry.toml | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 poetry.toml diff --git a/poetry.toml b/poetry.toml new file mode 100644 index 000000000..ab1033bd3 --- /dev/null +++ b/poetry.toml @@ -0,0 +1,2 @@ +[virtualenvs] +in-project = true From 983328cbe1e02a313aea08f591bf9cfabe209226 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Tue, 9 Jun 2026 16:52:58 +0200 Subject: [PATCH 253/352] test(ro-crate-1.2): expand entity types in fake @context for availability-flag tests --- .../ro-crate-1.2/test_availability_flags.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/tests/integration/profiles/ro-crate-1.2/test_availability_flags.py b/tests/integration/profiles/ro-crate-1.2/test_availability_flags.py index 48aba136c..ab07960ed 100644 --- a/tests/integration/profiles/ro-crate-1.2/test_availability_flags.py +++ b/tests/integration/profiles/ro-crate-1.2/test_availability_flags.py @@ -41,7 +41,18 @@ class _FakeContextResponse: headers = {"Content-Type": "application/ld+json"} def json(self): - return {"@context": {k: f"http://schema.org/{k}" for k in _FAKE_CONTEXT_KEYS}} + # Map each known key explicitly so `check_compaction` finds them, and add a + # schema.org default vocabulary so entity *types* (Dataset, File, Person, + # Organization, CreativeWork, ...) expand correctly during validation even + # though they are not listed individually. Without `@vocab`, types do not + # resolve to schema.org and the crate fails REQUIRED structural checks. + context = {k: f"http://schema.org/{k}" for k in _FAKE_CONTEXT_KEYS} + context["@vocab"] = "http://schema.org/" + # `conformsTo` is a Dublin Core term in the real RO-Crate context; the + # file-descriptor checks query `dct:conformsTo`, so it must not fall back + # to the schema.org default vocabulary. + context["conformsTo"] = {"@id": "http://purl.org/dc/terms/conformsTo", "@type": "@id"} + return {"@context": context} def _fake_context_get(url, *args, **kwargs): From f672de2b04f910b32143713e468774a664aa0248 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Tue, 9 Jun 2026 17:26:09 +0200 Subject: [PATCH 254/352] fix(utils): add exception chaining to satisfy B904 --- rocrate_validator/utils/http.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/rocrate_validator/utils/http.py b/rocrate_validator/utils/http.py index 31497d949..07817e146 100644 --- a/rocrate_validator/utils/http.py +++ b/rocrate_validator/utils/http.py @@ -402,8 +402,8 @@ def _reconfigure(self, self._close_session() try: self.cache_max_age = int(cache_max_age) - except ValueError: - raise TypeError("cache_max_age must be an integer") + except ValueError as exc: + raise TypeError("cache_max_age must be an integer") from exc self.cache_path_prefix = cache_path self.offline = bool(offline) self.no_cache = bool(no_cache) From f761112152d5ccb544dd5271fc69bd312eac3178 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Wed, 10 Jun 2026 11:30:21 +0200 Subject: [PATCH 255/352] chore(lint): :rotating_light: add missing type annotation --- rocrate_validator/utils/log.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/rocrate_validator/utils/log.py b/rocrate_validator/utils/log.py index e0b297593..4e2d91443 100644 --- a/rocrate_validator/utils/log.py +++ b/rocrate_validator/utils/log.py @@ -18,7 +18,7 @@ from io import StringIO from logging import CRITICAL, DEBUG, ERROR, INFO, WARNING, Handler, Logger, StreamHandler from logging import basicConfig as logging_basicConfig -from typing import Optional +from typing import Any, Optional import colorlog from rich.console import Console @@ -44,7 +44,7 @@ def get_log_format(level: int): return log_format -DEFAULT_SETTINGS = { +DEFAULT_SETTINGS : dict[str, Any] = { 'enabled': True, 'level': WARNING, 'format': get_log_format(WARNING) From b586b973e6b7c7fe4f27587cf49140fa3b3f5646 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Wed, 10 Jun 2026 11:39:25 +0200 Subject: [PATCH 256/352] =?UTF-8?q?fix(log):=20=F0=9F=90=9B=20fall=20back?= =?UTF-8?q?=20to=20WARNING=20for=20unknown=20log=20level=20names?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- rocrate_validator/utils/log.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rocrate_validator/utils/log.py b/rocrate_validator/utils/log.py index 4e2d91443..3e1cdce0b 100644 --- a/rocrate_validator/utils/log.py +++ b/rocrate_validator/utils/log.py @@ -122,7 +122,7 @@ def __setup_logger__(logger: Logger): # parse the log level level = settings.get('level', __settings__['level']) if not isinstance(level, int): - level = getattr(__module__, settings['level'].upper(), None) + level = getattr(__module__, settings['level'].upper(), WARNING) # set the log level logger.setLevel(level) From a095cbf6e515580909ff66088a9628e540d34bce Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Wed, 10 Jun 2026 11:41:16 +0200 Subject: [PATCH 257/352] =?UTF-8?q?chore(shacl):=20=F0=9F=94=A7=20mark=20d?= =?UTF-8?q?ebug=20commented-out=20code=20with=20noqa:=20ERA001?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- rocrate_validator/requirements/shacl/checks.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/rocrate_validator/requirements/shacl/checks.py b/rocrate_validator/requirements/shacl/checks.py index 8de33b3ad..e9d23a529 100644 --- a/rocrate_validator/requirements/shacl/checks.py +++ b/rocrate_validator/requirements/shacl/checks.py @@ -314,14 +314,14 @@ def __do_execute_check__(self, shacl_context: SHACLValidationContext): end_time = timer() logger.debug(f"Execution time for getting shapes: {end_time - start_time} seconds") - # # uncomment to save the graphs to the logs folder (for debugging purposes) - # start_time = timer() - # data_graph.serialize("logs/data_graph.ttl", format="turtle") - # shapes_graph.serialize("logs/shapes_graph.ttl", format="turtle") + ## uncomment to save the graphs to the logs folder (for debugging purposes) + # start_time = timer() # noqa: ERA001 + # data_graph.serialize("logs/data_graph.ttl", format="turtle") # noqa: ERA001 + # shapes_graph.serialize("logs/shapes_graph.ttl", format="turtle") # noqa: ERA001 # if ontology_graph: - # ontology_graph.serialize("logs/ontology_graph.ttl", format="turtle") - # end_time = timer() - # logger.debug(f"Execution time for saving graphs: {end_time - start_time} seconds") + # ontology_graph.serialize("logs/ontology_graph.ttl", format="turtle") # noqa: ERA001 + # end_time = timer() # noqa: ERA001 + # logger.debug(f"Execution time for saving graphs: {end_time - start_time} seconds") # noqa: ERA001 # validate the data graph start_time = timer() @@ -331,7 +331,7 @@ def __do_execute_check__(self, shacl_context: SHACLValidationContext): ontology_graph=ontology_graph, **shacl_context.settings.to_dict(), ) - # shacl_result.results_graph.serialize("logs/validation_results.ttl", format="turtle") + # shacl_result.results_graph.serialize("logs/validation_results.ttl", format="turtle") # noqa: ERA001 # parse the validation result end_time = timer() logger.debug("Validation '%s' conforms: %s", self.name, shacl_result.conforms) From 4d0a57b63c279ec4029cdffbed868e8007e0f9d0 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Wed, 10 Jun 2026 11:42:15 +0200 Subject: [PATCH 258/352] =?UTF-8?q?refactor(shacl):=20=E2=99=BB=EF=B8=8F?= =?UTF-8?q?=20extract=20helpers=20to=20lower=20cyclomatic=20complexity?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../requirements/shacl/checks.py | 84 ++++++++++--------- 1 file changed, 44 insertions(+), 40 deletions(-) diff --git a/rocrate_validator/requirements/shacl/checks.py b/rocrate_validator/requirements/shacl/checks.py index e9d23a529..15cc16706 100644 --- a/rocrate_validator/requirements/shacl/checks.py +++ b/rocrate_validator/requirements/shacl/checks.py @@ -423,42 +423,11 @@ def __process_failed_checks__(self, shacl_context, failed_requirements_checks, and shacl_context.settings.disable_inherited_profiles_issue_reporting ): continue - for violation in failed_requirements_checks_violations[requirementCheck.identifier]: - violating_entity = make_uris_relative(cast("Any", violation.focusNode).toPython(), - shacl_context.publicID) - violating_property = violation.resultPath.toPython() if violation.resultPath else None - violation_message = violation.get_result_message(str(shacl_context.rocrate_uri)) - registered_check_issues = shacl_context.result.get_issues_by_check(requirementCheck) - skip_requirement_check = False - # check if the violation is already registered - # and skip the requirement check if it is - for check_issue in registered_check_issues: - if ( - check_issue.message == violation_message - and check_issue.violatingProperty == violating_property - and check_issue.violatingEntity == violating_entity - and check_issue.violatingPropertyValue == violation.value - ): - skip_requirement_check = True - logger.debug( - "Skipping requirement check %s: %s", - requirementCheck.identifier, - violation_message, - ) - break - # if the check is not to be skipped, add the issue to the context - if not skip_requirement_check: - c = shacl_context.result.add_issue( - message=violation.get_result_message(str(shacl_context.rocrate_uri)), - check=requirementCheck, - violatingProperty=violating_property, - violatingEntity=violating_entity, - violatingPropertyValue=violation.value, - ) - logger.debug("Added validation issue to the context: %s", c) - # if the fail fast mode is enabled, stop the validation after the first issue - if shacl_context.fail_fast: - break + self.__register_check_violations__( + shacl_context, + requirementCheck, + failed_requirements_checks_violations[requirementCheck.identifier], + ) # If the fail fast mode is disabled, notify all the validation issues # related to profiles other than the current one. @@ -467,10 +436,7 @@ def __process_failed_checks__(self, shacl_context, failed_requirements_checks, # all together and not profile by profile if requirementCheck.identifier not in failed_requirement_checks_notified: shacl_context.result._add_executed_check(requirementCheck, False) - if ( - requirementCheck.identifier not in failed_requirement_checks_notified - and requirementCheck.requirement.profile != shacl_context.current_validation_profile - ): + if requirementCheck.requirement.profile != shacl_context.current_validation_profile: failed_requirement_checks_notified.append(requirementCheck.identifier) shacl_context.validator.notify( RequirementCheckValidationEvent( @@ -487,6 +453,44 @@ def __process_failed_checks__(self, shacl_context, failed_requirements_checks, break return failed_requirement_checks_notified + def __register_check_violations__(self, shacl_context, requirementCheck, violations): + for violation in violations: + violating_entity = make_uris_relative(cast("Any", violation.focusNode).toPython(), + shacl_context.publicID) + violating_property = violation.resultPath.toPython() if violation.resultPath else None + violation_message = violation.get_result_message(str(shacl_context.rocrate_uri)) + registered_check_issues = shacl_context.result.get_issues_by_check(requirementCheck) + skip_requirement_check = False + # check if the violation is already registered + # and skip the requirement check if it is + for check_issue in registered_check_issues: + if ( + check_issue.message == violation_message + and check_issue.violatingProperty == violating_property + and check_issue.violatingEntity == violating_entity + and check_issue.violatingPropertyValue == violation.value + ): + skip_requirement_check = True + logger.debug( + "Skipping requirement check %s: %s", + requirementCheck.identifier, + violation_message, + ) + break + # if the check is not to be skipped, add the issue to the context + if not skip_requirement_check: + c = shacl_context.result.add_issue( + message=violation.get_result_message(str(shacl_context.rocrate_uri)), + check=requirementCheck, + violatingProperty=violating_property, + violatingEntity=violating_entity, + violatingPropertyValue=violation.value, + ) + logger.debug("Added validation issue to the context: %s", c) + # if the fail fast mode is enabled, stop the validation after the first issue + if shacl_context.fail_fast: + break + def __notify_skipped_checks__(self, shacl_context, failed_requirement_checks_notified): for skipped_check in list(shacl_context.result.skipped_checks): logger.debug("Processing skipped check: %s", skipped_check.identifier) From 896b419d4df6f803b5a260e0da4fffacf782554b Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Wed, 10 Jun 2026 11:43:26 +0200 Subject: [PATCH 259/352] =?UTF-8?q?fix(uri):=20=F0=9F=90=9B=20return=20rea?= =?UTF-8?q?l=20bool=20from=20is=5Fexternal=5Freference?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- rocrate_validator/utils/uri.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rocrate_validator/utils/uri.py b/rocrate_validator/utils/uri.py index 4efba1e7c..589f9f073 100644 --- a/rocrate_validator/utils/uri.py +++ b/rocrate_validator/utils/uri.py @@ -70,7 +70,7 @@ def is_external_reference(value: object) -> bool: # Reject scheme-only input (``urn:``, ``doi:``): syntactically valid # per the grammar but semantically unusable as an identifier. - return parts.netloc or parts.path or parts.query or parts.fragment + return bool(parts.netloc or parts.path or parts.query or parts.fragment) class URI: From 982e325778d8367ca7850afa0bb7d98216f6b2a4 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Wed, 10 Jun 2026 11:51:29 +0200 Subject: [PATCH 260/352] =?UTF-8?q?fix(shacl):=20=F0=9F=90=9B=20return=20r?= =?UTF-8?q?eal=20bool=20from=20SHACLCheck.check?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- rocrate_validator/requirements/shacl/checks.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rocrate_validator/requirements/shacl/checks.py b/rocrate_validator/requirements/shacl/checks.py index 15cc16706..088f565e5 100644 --- a/rocrate_validator/requirements/shacl/checks.py +++ b/rocrate_validator/requirements/shacl/checks.py @@ -253,7 +253,7 @@ def execute_check(self, context: ValidationContext): self.identifier, ctx.current_validation_result, ) - return ctx.current_validation_result + return bool(ctx.current_validation_result) except SHACLValidationAlreadyProcessed: logger.debug( "SHACL Validation of requirement check %s (profile: %s) already processed", From 1ffb38d8b8d43221ba33a732491624f6b6db3c2c Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Wed, 10 Jun 2026 12:03:57 +0200 Subject: [PATCH 261/352] =?UTF-8?q?fix(shacl):=20=F0=9F=A9=B9=20add=20type?= =?UTF-8?q?:=20ignore=20for=20dynamic=20context=20attribute=20caching?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- rocrate_validator/requirements/shacl/models.py | 4 +++- rocrate_validator/requirements/shacl/validator.py | 3 ++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/rocrate_validator/requirements/shacl/models.py b/rocrate_validator/requirements/shacl/models.py index 3b863d90b..79347df96 100644 --- a/rocrate_validator/requirements/shacl/models.py +++ b/rocrate_validator/requirements/shacl/models.py @@ -434,7 +434,9 @@ def get_instance(cls, ctx: object): instance = getattr(ctx, "_shapes_registry_instance", None) if not instance: instance = cls() - ctx._shapes_registry_instance = instance + # `ctx` is intentionally typed `object`: the instance is cached as a + # dynamic attribute on whatever context is passed in. + ctx._shapes_registry_instance = instance # type: ignore[attr-defined] return instance diff --git a/rocrate_validator/requirements/shacl/validator.py b/rocrate_validator/requirements/shacl/validator.py index 113c17324..ea353aa2e 100644 --- a/rocrate_validator/requirements/shacl/validator.py +++ b/rocrate_validator/requirements/shacl/validator.py @@ -241,7 +241,8 @@ def get_instance(cls, context: ValidationContext) -> SHACLValidationContext: instance = getattr(context, "_shacl_validation_context", None) if not instance: instance = SHACLValidationContext(context) - context._shacl_validation_context = instance + # Cached as a dynamic attribute on the validation context. + context._shacl_validation_context = instance # type: ignore[attr-defined] return instance From d4d05231b51a3317b2355cd790c0dc4b18b62cea Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Wed, 10 Jun 2026 12:05:12 +0200 Subject: [PATCH 262/352] =?UTF-8?q?fix(paths):=20=F0=9F=A9=B9=20annotate?= =?UTF-8?q?=20file=5Fpaths=20list=20type?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- rocrate_validator/utils/paths.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rocrate_validator/utils/paths.py b/rocrate_validator/utils/paths.py index 6fdeff70d..aeb30dfc2 100644 --- a/rocrate_validator/utils/paths.py +++ b/rocrate_validator/utils/paths.py @@ -110,7 +110,7 @@ def list_matching_file_paths( :return: A list of file paths """ # initialize an empty list to store the file paths - file_paths = [] + file_paths: list[str] = [] # extension extension = get_format_extension(serialization_format) From d21aa8573883885b76371ce5733061fcbc6e0873 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Wed, 10 Jun 2026 12:06:06 +0200 Subject: [PATCH 263/352] =?UTF-8?q?docs(log):=20=F0=9F=93=9D=20convert=20u?= =?UTF-8?q?sage=20example=20from=20commented=20code=20to=20docstring?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- rocrate_validator/utils/log.py | 63 ++++++++++++++++++---------------- 1 file changed, 33 insertions(+), 30 deletions(-) diff --git a/rocrate_validator/utils/log.py b/rocrate_validator/utils/log.py index 3e1cdce0b..6ec54e445 100644 --- a/rocrate_validator/utils/log.py +++ b/rocrate_validator/utils/log.py @@ -229,33 +229,36 @@ def __getattr__(self, name): CRITICAL, DEBUG, ERROR, INFO, WARNING, StreamHandler, Optional] -# Example of usage -# if __name__ == '__main__': -# log_config = { -# 'module1': {'enabled': True, 'level': 'DEBUG'}, -# 'module2': {'enabled': False, 'level': 'INFO'}, -# 'module3': {'enabled': True, 'level': 'ERROR'}, -# } -# mgt = LoggerManager(log_config) -# logger1 = mgt.getLogger('module1') -# logger2 = mgt.getLogger('module2') -# logger3 = mgt.getLogger('module3') -# logger4 = mgt.getLogger('module4') - -# logger1.debug('This is a debug message') -# logger1.info('This is an info message') -# logger1.error('This is an error message') - -# logger2.debug('This is a debug message') -# logger2.info('This is an info message') -# logger2.error('This is an error message') - -# logger3.debug('This is a debug message') -# logger3.info('This is an info message') -# logger3.error('This is an error message') -# logger3.critical('This is a critical message') - -# logger4.debug('This is a debug message') -# logger4.info('This is an info message') -# logger4.error('This is an error message') -# logger4.critical('This is a critical message') +""" +Example of usage:: + + if __name__ == '__main__': + log_config = { + 'module1': {'enabled': True, 'level': 'DEBUG'}, + 'module2': {'enabled': False, 'level': 'INFO'}, + 'module3': {'enabled': True, 'level': 'ERROR'}, + } + mgt = LoggerManager(log_config) + logger1 = mgt.getLogger('module1') + logger2 = mgt.getLogger('module2') + logger3 = mgt.getLogger('module3') + logger4 = mgt.getLogger('module4') + + logger1.debug('This is a debug message') + logger1.info('This is an info message') + logger1.error('This is an error message') + + logger2.debug('This is a debug message') + logger2.info('This is an info message') + logger2.error('This is an error message') + + logger3.debug('This is a debug message') + logger3.info('This is an info message') + logger3.error('This is an error message') + logger3.critical('This is a critical message') + + logger4.debug('This is a debug message') + logger4.info('This is an info message') + logger4.error('This is an error message') + logger4.critical('This is a critical message') +""" From 90daf4baceed4040bbabda7a1f0988056fd5f0c5 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Wed, 10 Jun 2026 12:20:04 +0200 Subject: [PATCH 264/352] =?UTF-8?q?chore(loader):=20=F0=9F=94=A7=20use=20p?= =?UTF-8?q?yright=20ignore=20for=20jsonld=5Fcontext=20patch?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- rocrate_validator/utils/document_loader.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/rocrate_validator/utils/document_loader.py b/rocrate_validator/utils/document_loader.py index 3bef77eb8..8f9ef9d0e 100644 --- a/rocrate_validator/utils/document_loader.py +++ b/rocrate_validator/utils/document_loader.py @@ -69,9 +69,11 @@ def install_document_loader() -> bool: try: jsonld_util.source_to_json = _patched_source_to_json - # The context module imports source_to_json at module import time, - # so it must be patched separately. - jsonld_context.source_to_json = _patched_source_to_json # type: ignore[attr-defined] + # The context module does `from .util import source_to_json` at import + # time, binding its own reference to the original function. Patching + # only `util` would not intercept remote @context resolution, so the + # context module must be patched separately. + jsonld_context.source_to_json = _patched_source_to_json # pyright: ignore[reportPrivateImportUsage] except Exception as e: logger.error("Failed to install JSON-LD document loader: %s", e) return False @@ -95,7 +97,7 @@ def uninstall_document_loader() -> bool: try: jsonld_util.source_to_json = _original_source_to_json - jsonld_context.source_to_json = _original_source_to_json # type: ignore[attr-defined] + jsonld_context.source_to_json = _original_source_to_json # pyright: ignore[reportPrivateImportUsage] except Exception as e: logger.error("Failed to uninstall JSON-LD document loader: %s", e) return False From 0d21973cb975a4eacfc7ece530f5699f495d7cda Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Wed, 10 Jun 2026 12:24:02 +0200 Subject: [PATCH 265/352] =?UTF-8?q?test(loader):=20=E2=9C=85=20assert=20bo?= =?UTF-8?q?th=20jsonld=20util=20and=20context=20are=20patched?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- tests/unit/test_document_loader.py | 42 +++++++++++++++++++++++++++++- 1 file changed, 41 insertions(+), 1 deletion(-) diff --git a/tests/unit/test_document_loader.py b/tests/unit/test_document_loader.py index 2a995acae..cbe1ae8eb 100644 --- a/tests/unit/test_document_loader.py +++ b/tests/unit/test_document_loader.py @@ -46,7 +46,7 @@ def _urllib3_response(payload: bytes = b'{"@context": {"name": "https://schema.o @pytest.fixture def mock_network(monkeypatch): - from requests.adapters import HTTPAdapter + from requests.adapters import HTTPAdapter # type: ignore[import-untyped] def fake_send(self, request, **kwargs): raw = _urllib3_response() @@ -156,3 +156,43 @@ def json(self): ) with pytest.raises(RuntimeError): resolve_remote_document("https://example.org/broken") + + +def test_install_patches_both_util_and_context(tmp_path): + # Regression guard: rdflib's `context` module does + # `from .util import source_to_json`, binding its own reference at import + # time. Patching only `util` leaves remote @context resolution going through + # the original (uncached, online-only) function. Both bindings must change. + HttpRequester.initialize_cache(cache_path=str(tmp_path / "cache"), cache_max_age=-1) + from rdflib.plugins.shared.jsonld import context as jsonld_context + from rdflib.plugins.shared.jsonld import util as jsonld_util + + install_document_loader() + + assert jsonld_util.source_to_json is document_loader._patched_source_to_json + assert jsonld_context.source_to_json is document_loader._patched_source_to_json # pyright: ignore[reportPrivateImportUsage] + + +def test_uninstall_restores_both_util_and_context(tmp_path): + HttpRequester.initialize_cache(cache_path=str(tmp_path / "cache"), cache_max_age=-1) + from rdflib.plugins.shared.jsonld import context as jsonld_context + from rdflib.plugins.shared.jsonld import util as jsonld_util + + install_document_loader() + uninstall_document_loader() + + assert jsonld_util.source_to_json is document_loader._original_source_to_json + assert jsonld_context.source_to_json is document_loader._original_source_to_json # pyright: ignore[reportPrivateImportUsage] + + +def test_context_module_resolution_routes_through_http(tmp_path, mock_network): + # Exercises the exact call rdflib uses to resolve a remote @context + # (`context.source_to_json`); it must go through HttpRequester and be cached. + HttpRequester.initialize_cache(cache_path=str(tmp_path / "cache"), cache_max_age=60) + install_document_loader() + from rdflib.plugins.shared.jsonld import context as jsonld_context + + doc, _ = jsonld_context.source_to_json("https://example.org/context") # pyright: ignore[reportPrivateImportUsage] + + assert doc == {"@context": {"name": "https://schema.org/name"}} + assert HttpRequester().has_cached("https://example.org/context") is True From f7e7b7d9430614bd3a8b8e4790495cf2f61b8894 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Wed, 10 Jun 2026 12:45:13 +0200 Subject: [PATCH 266/352] =?UTF-8?q?chore(lint):=20=F0=9F=94=A5=20remove=20?= =?UTF-8?q?commented-out=20dead=20code=20(ERA001)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- rocrate_validator/cli/commands/profiles.py | 2 -- rocrate_validator/cli/commands/validate.py | 3 -- rocrate_validator/cli/main.py | 8 ----- rocrate_validator/models.py | 4 --- .../ro-crate/must/0_file_descriptor_format.py | 1 - rocrate_validator/requirements/shacl/utils.py | 31 +++++++------------ .../requirements/shacl/validator.py | 3 -- rocrate_validator/utils/http.py | 1 - .../io_helpers/output/text/layout/progress.py | 1 - .../io_helpers/output/text/layout/report.py | 4 --- .../isa-ro-crate/test_0_investigation.py | 8 ++--- .../isa-ro-crate/test_10_definedterm.py | 8 ++--- .../isa-ro-crate/test_11_propertyvalue.py | 8 ++--- .../profiles/isa-ro-crate/test_1_study.py | 8 ++--- .../profiles/isa-ro-crate/test_2_assay.py | 6 ++-- .../profiles/isa-ro-crate/test_3_process.py | 10 ++---- .../profiles/isa-ro-crate/test_4_protocol.py | 18 +++++------ .../profiles/isa-ro-crate/test_5_sample.py | 8 ++--- .../profiles/isa-ro-crate/test_6_data.py | 4 +-- .../profiles/isa-ro-crate/test_7_person.py | 24 +++++++------- .../profiles/isa-ro-crate/test_8_article.py | 12 +++---- .../profiles/isa-ro-crate/test_9_comment.py | 4 +-- tests/shared.py | 3 -- tests/test_cli.py | 5 --- tests/unit/test_rocrate.py | 15 --------- tests/unit/test_services.py | 1 - 26 files changed, 69 insertions(+), 131 deletions(-) diff --git a/rocrate_validator/cli/commands/profiles.py b/rocrate_validator/cli/commands/profiles.py index fb337320c..8fedb445a 100644 --- a/rocrate_validator/cli/commands/profiles.py +++ b/rocrate_validator/cli/commands/profiles.py @@ -293,7 +293,6 @@ def __compacted_describe_profile__(profile): f"{levels_count[2]}")) table = Table(show_header=True, - # renderer=renderer, title=f"[cyan]{len(requirements)}[/cyan] Profile Requirements", title_style="italic bold", header_style="bold cyan", @@ -345,7 +344,6 @@ def __verbose_describe_profile__(profile): count_checks += 1 table = Table(show_header=True, - # renderer=renderer, title=f"[cyan]{count_checks}[/cyan] Profile Requirements Checks", title_style="italic bold", header_style="bold cyan", diff --git a/rocrate_validator/cli/commands/validate.py b/rocrate_validator/cli/commands/validate.py index 35c7df602..043a66d23 100644 --- a/rocrate_validator/cli/commands/validate.py +++ b/rocrate_validator/cli/commands/validate.py @@ -38,9 +38,6 @@ from rocrate_validator.utils.paths import get_profiles_path from rocrate_validator.utils.uri import validate_rocrate_uri -# from rich.markdown import Markdown -# from rich.table import Table - # set the default profiles path DEFAULT_PROFILES_PATH = get_profiles_path() diff --git a/rocrate_validator/cli/main.py b/rocrate_validator/cli/main.py index f55b6ed37..c9186f71d 100644 --- a/rocrate_validator/cli/main.py +++ b/rocrate_validator/cli/main.py @@ -70,14 +70,6 @@ def cli(ctx: click.Context, debug: bool, version: bool, disable_color: bool, no_ ctx.obj['interactive'] = interactive try: - # Check the python version - # from rocrate_validator.utils import check_python_version, get_min_python_version - # if not check_python_version(): - # console.print( - # "\n[bold][red]ERROR:[/red] A Python version " - # f"{'.'.join([str(_) for _ in get_min_python_version()])} or newer is required ! [/bold]") - # sys.exit(1) - # If the version flag is set, print the version and exit if version: console.print( diff --git a/rocrate_validator/models.py b/rocrate_validator/models.py index 1229faa90..86509de89 100644 --- a/rocrate_validator/models.py +++ b/rocrate_validator/models.py @@ -1692,7 +1692,6 @@ def __init__( self._stats = self.__initialise__(settings) if not skip_initialization else {} self._result: Optional[ValidationResult] = None self._listeners: list[ValidationStatisticsListener] = [] - # self._target_profile: Optional[Profile] = None @property def validation_settings(self) -> ValidationSettings: @@ -2490,9 +2489,6 @@ def get_issues_by_check(self, check: RequirementCheck, min_severity: Optional[Se min_severity = min_severity or self.context.requirement_severity return [issue for issue in self._issues if issue.check == check and issue.severity >= min_severity] - # def get_issues_by_check_and_severity(self, check: RequirementCheck, severity: Severity) -> list[CheckIssue]: - # return [issue for issue in self.issues if issue.check == check and issue.severity == severity] - def has_issues(self, min_severity: Optional[Severity] = None) -> bool: """ Check if there are issues with a severity greater than or equal to the given `severity` diff --git a/rocrate_validator/profiles/ro-crate/must/0_file_descriptor_format.py b/rocrate_validator/profiles/ro-crate/must/0_file_descriptor_format.py index ff12892f5..527a6b3f4 100644 --- a/rocrate_validator/profiles/ro-crate/must/0_file_descriptor_format.py +++ b/rocrate_validator/profiles/ro-crate/must/0_file_descriptor_format.py @@ -149,7 +149,6 @@ def __get_remote_context__(self, context_uri: str) -> object: # Try to parse the JSON-LD and access the context jsonLD = raw_data.json()["@context"] - # logger.warning(f"Retrieved JSON-LD context: {jsonLD}") assert isinstance(jsonLD, dict) # return the JSON-LD context return jsonLD diff --git a/rocrate_validator/requirements/shacl/utils.py b/rocrate_validator/requirements/shacl/utils.py index 9c77124b4..57183c42c 100644 --- a/rocrate_validator/requirements/shacl/utils.py +++ b/rocrate_validator/requirements/shacl/utils.py @@ -70,17 +70,15 @@ def map_severity(shacl_severity: str) -> Severity: def make_uris_relative(text: str, ro_crate_path: Union[Path, str]) -> str: # globally replace the string "file://" with "./ - return text.replace(str(ro_crate_path), './') + return text.replace(str(ro_crate_path), "./") def inject_attributes(obj: object, node_graph: Graph, node: Node, exclude: Optional[list] = None) -> object: # inject attributes of the shape property - # logger.debug("Injecting attributes of node %s", node) skip_properties = ["node"] if exclude is None else [*exclude, "node"] triples = node_graph.triples((node, None, None)) for _node, p, o in triples: predicate_as_string = cast("Any", p).toPython() - # logger.debug(f"Processing {predicate_as_string} of property graph {node}") if predicate_as_string.startswith(SHACL_NS): property_name = predicate_as_string.split("#")[-1] if property_name in skip_properties: @@ -89,9 +87,6 @@ def inject_attributes(obj: object, node_graph: Graph, node: Node, exclude: Optio setattr(obj, property_name, cast("Any", o).toPython()) except AttributeError as e: logger.error(f"Error injecting attribute {property_name}: {e}") - # logger.debug("Injected attribute %s: %s", property_name, o.toPython()) - # logger.debug("Injected attributes ig node %s: %s", node, len(list(triples))) - # return the object return obj @@ -106,7 +101,7 @@ def __compute_values__(g: Graph, s: Node) -> list[tuple]: # Assuming the list of triples values is stored in a variable called 'triples_values' triples_values = [(_, x, _) for (_, x, _) in g.triples((s, None, None)) if x != RDF.type] - for (subj, pred, obj) in triples_values: + for subj, pred, obj in triples_values: if isinstance(obj, BNode): values.extend(__compute_values__(g, obj)) else: @@ -141,11 +136,13 @@ def compute_key(g: Graph, s: Node) -> str: class ShapesList: - def __init__(self, - node_shapes: list[Node], - property_shapes: list[Node], - shapes_graphs: dict[Node, Graph], - shapes_graph: Graph): + def __init__( + self, + node_shapes: list[Node], + property_shapes: list[Node], + shapes_graphs: dict[Node, Graph], + shapes_graph: Graph, + ): self._node_shapes = node_shapes self._property_shapes = property_shapes self._shapes_graph = shapes_graph @@ -276,12 +273,10 @@ def load_shapes_from_graph(g: Graph) -> ShapesList: # define the SHACL namespace SHACL = Namespace(SHACL_NS) # find all NodeShapes - node_shapes = [s for (s, _, _) in g.triples( - (None, RDF.type, SHACL.NodeShape)) if not isinstance(s, BNode)] + node_shapes = [s for (s, _, _) in g.triples((None, RDF.type, SHACL.NodeShape)) if not isinstance(s, BNode)] logger.debug("Loaded Node Shapes: %s", node_shapes) # find all PropertyShapes - property_shapes = [s for (s, _, _) in g.triples((None, RDF.type, SHACL.PropertyShape)) - if not isinstance(s, BNode)] + property_shapes = [s for (s, _, _) in g.triples((None, RDF.type, SHACL.PropertyShape)) if not isinstance(s, BNode)] logger.debug("Loaded Property Shapes: %s", property_shapes) # define the list of shapes to extract shapes = node_shapes + property_shapes @@ -299,9 +294,7 @@ def load_shapes_from_graph(g: Graph) -> ShapesList: return ShapesList(node_shapes, property_shapes, subgraphs, g) -def resolve_parent_shape( - shapes_graph: Graph, source_shape_node: Node, shapes_registry -) -> Optional[Shape]: +def resolve_parent_shape(shapes_graph: Graph, source_shape_node: Node, shapes_registry) -> Optional[Shape]: """ Try to resolve the parent NodeShape/PropertyShape for a BNode constraint node. diff --git a/rocrate_validator/requirements/shacl/validator.py b/rocrate_validator/requirements/shacl/validator.py index ea353aa2e..749c939ec 100644 --- a/rocrate_validator/requirements/shacl/validator.py +++ b/rocrate_validator/requirements/shacl/validator.py @@ -130,11 +130,8 @@ def __set_current_validation_profile__(self, profile: Profile) -> bool: if self.settings.allow_requirement_check_override: from rocrate_validator.requirements.shacl.requirements import SHACLRequirement # noqa: PLC0415 for requirement in [_ for _ in profile.requirements if isinstance(_, SHACLRequirement)]: - # logger.debug("Processing requirement: %s", requirement.name) for check in requirement.get_checks(): - # logger.debug("Processing check: %s", check) if check.overridden and check.requirement.profile != self.target_profile: - # logger.debug("Overridden check: %s", check) profile_shapes_graph -= cast("Any", check).shape.graph profile_shapes.pop(cast("Any", check).shape.key) diff --git a/rocrate_validator/utils/http.py b/rocrate_validator/utils/http.py index 07817e146..65b700d1d 100644 --- a/rocrate_validator/utils/http.py +++ b/rocrate_validator/utils/http.py @@ -440,7 +440,6 @@ def _offline_response(self, url: str) -> requests.Response: response.status_code = OFFLINE_CACHE_MISS_STATUS response.reason = "Offline: no HTTP cache backend available" response.url = url - # response._content = b"" return response def get(self, url, **_kwargs): diff --git a/rocrate_validator/utils/io_helpers/output/text/layout/progress.py b/rocrate_validator/utils/io_helpers/output/text/layout/progress.py index 861518732..2fb702ba4 100644 --- a/rocrate_validator/utils/io_helpers/output/text/layout/progress.py +++ b/rocrate_validator/utils/io_helpers/output/text/layout/progress.py @@ -89,7 +89,6 @@ def update(self, event: Event, ctx: Optional[ValidationContext] = None): logger.debug("Event: %s", event.event_type) if event.event_type == EventType.VALIDATION_START: logger.debug("Validation started") - # self.start() if event.event_type == EventType.PROFILE_VALIDATION_START: assert isinstance(event, ProfileValidationEvent) logger.debug("Profile validation start: %s", event.profile.identifier) diff --git a/rocrate_validator/utils/io_helpers/output/text/layout/report.py b/rocrate_validator/utils/io_helpers/output/text/layout/report.py index 9bad2f6b8..cc3b64136 100644 --- a/rocrate_validator/utils/io_helpers/output/text/layout/report.py +++ b/rocrate_validator/utils/io_helpers/output/text/layout/report.py @@ -206,8 +206,6 @@ def update(self, event: Event, ctx: Optional[ValidationContext] = None): # type assert ctx is not None, "Validation context must be provided" if not event.requirement.hidden: self.update_stats(ctx.result.statistics) - # elif event.event_type == EventType.PROFILE_VALIDATION_END: - # pass elif event.event_type == EventType.VALIDATION_END: assert isinstance(event, ValidationEvent) self.__show_overall_result__(event.validation_result) @@ -216,11 +214,9 @@ def update(self, event: Event, ctx: Optional[ValidationContext] = None): # type def update_stats(self, profile_stats: Optional[ValidationStatistics] = None): assert profile_stats, "Profile stats must be provided" assert self.passed_checks is not None and self.failed_checks is not None, "Layout not initialized" - # self.profile_stats = profile_stats self.requirement_checks_by_severity_container_layout["required"].update( Panel( Align( - # str(profile_stats['check_count_by_severity'][Severity.REQUIRED]) if profile_stats else "0", str(profile_stats.check_count_by_severity[Severity.REQUIRED]) if profile_stats else "0", align="center" ), diff --git a/tests/integration/profiles/isa-ro-crate/test_0_investigation.py b/tests/integration/profiles/isa-ro-crate/test_0_investigation.py index c21a81119..07040ef97 100644 --- a/tests/integration/profiles/isa-ro-crate/test_0_investigation.py +++ b/tests/integration/profiles/isa-ro-crate/test_0_investigation.py @@ -78,7 +78,7 @@ def test_isa_investigation_no_identifier(): rocrate_path=ValidROC().isa_ro_crate, requirement_severity=Severity.REQUIRED, expected_validation_result=False, - # expected_triggered_requirements=["Investigation MUST have base properties"], + # expected_triggered_requirements=["Investigation MUST have base properties"], # noqa: ERA001 expected_triggered_issues=[ "The root data entity must have a non-empty identifier" ], @@ -112,7 +112,7 @@ def test_isa_investigation_identifier_not_string(): rocrate_path=ValidROC().isa_ro_crate, requirement_severity=Severity.REQUIRED, expected_validation_result=False, - # expected_triggered_requirements=["Investigation MUST have base properties"], + # expected_triggered_requirements=["Investigation MUST have base properties"], # noqa: ERA001 expected_triggered_issues=[ "The root data entity must have a non-empty identifier" ], @@ -146,7 +146,7 @@ def test_isa_investigation_no_shoulds(): rocrate_path=ValidROC().isa_ro_crate, requirement_severity=Severity.RECOMMENDED, expected_validation_result=False, - # expected_triggered_requirements=["Investigation MUST have base properties"], + # expected_triggered_requirements=["Investigation MUST have base properties"], # noqa: ERA001 expected_triggered_issues=[ "Investigation entity SHOULD have a dateCreated", "Investigation entity SHOULD have a creator", @@ -184,7 +184,7 @@ def test_isa_investigation_shoulds_have_wrong_types(): rocrate_path=ValidROC().isa_ro_crate, requirement_severity=Severity.REQUIRED, expected_validation_result=False, - # expected_triggered_requirements=["Investigation MUST have base properties"], + # expected_triggered_requirements=["Investigation MUST have base properties"], # noqa: ERA001 expected_triggered_issues=[ "Investigation dateCreated MUST be a valid ISO 8601 date", "Investigation creator MUST be of type Person", diff --git a/tests/integration/profiles/isa-ro-crate/test_10_definedterm.py b/tests/integration/profiles/isa-ro-crate/test_10_definedterm.py index 3ffb4dbb2..49f98cef3 100644 --- a/tests/integration/profiles/isa-ro-crate/test_10_definedterm.py +++ b/tests/integration/profiles/isa-ro-crate/test_10_definedterm.py @@ -47,7 +47,7 @@ def test_isa_defined_term_name(): rocrate_path=ValidROC().isa_ro_crate, requirement_severity=Severity.REQUIRED, expected_validation_result=False, - # expected_triggered_requirements=["Study MUST have base properties"], + # expected_triggered_requirements=["Study MUST have base properties"], # noqa: ERA001 expected_triggered_issues=[ "DefinedTerm entity MUST have a non-empty name of type string" ], @@ -80,7 +80,7 @@ def test_isa_defined_term_name_of_incorrect_type(): rocrate_path=ValidROC().isa_ro_crate, requirement_severity=Severity.REQUIRED, expected_validation_result=False, - # expected_triggered_requirements=["Study MUST have base properties"], + # expected_triggered_requirements=["Study MUST have base properties"], # noqa: ERA001 expected_triggered_issues=[ "DefinedTerm entity MUST have a non-empty name of type string" ], @@ -110,7 +110,7 @@ def test_isa_defined_term_termCode(): rocrate_path=ValidROC().isa_ro_crate, requirement_severity=Severity.RECOMMENDED, expected_validation_result=False, - # expected_triggered_requirements=["Study MUST have base properties"], + # expected_triggered_requirements=["Study MUST have base properties"], # noqa: ERA001 expected_triggered_issues=[ "DefinedTerm entity SHOULD have at least one termCode" ], @@ -143,7 +143,7 @@ def test_isa_defined_term_termCode_of_incorrect_type(): rocrate_path=ValidROC().isa_ro_crate, requirement_severity=Severity.REQUIRED, expected_validation_result=False, - # expected_triggered_requirements=["Study MUST have base properties"], + # expected_triggered_requirements=["Study MUST have base properties"], # noqa: ERA001 expected_triggered_issues=["DefinedTerm termCode MUST be of type string"], profile_identifier="isa-ro-crate", rocrate_entity_mod_sparql=sparql, diff --git a/tests/integration/profiles/isa-ro-crate/test_11_propertyvalue.py b/tests/integration/profiles/isa-ro-crate/test_11_propertyvalue.py index 28b8e08a6..3683c18a8 100644 --- a/tests/integration/profiles/isa-ro-crate/test_11_propertyvalue.py +++ b/tests/integration/profiles/isa-ro-crate/test_11_propertyvalue.py @@ -46,7 +46,7 @@ def test_isa_property_value_name(): rocrate_path=ValidROC().isa_ro_crate, requirement_severity=Severity.REQUIRED, expected_validation_result=False, - # expected_triggered_requirements=["Study MUST have base properties"], + # expected_triggered_requirements=["Study MUST have base properties"], # noqa: ERA001 expected_triggered_issues=[ "PropertyValue entity MUST have a non-empty name of type string" ], @@ -80,7 +80,7 @@ def test_isa_property_value_name_of_incorrect_type(): rocrate_path=ValidROC().isa_ro_crate, requirement_severity=Severity.REQUIRED, expected_validation_result=False, - # expected_triggered_requirements=["Study MUST have base properties"], + # expected_triggered_requirements=["Study MUST have base properties"], # noqa: ERA001 expected_triggered_issues=[ "PropertyValue entity MUST have a non-empty name of type string" ], @@ -111,7 +111,7 @@ def test_isa_property_value_value(): rocrate_path=ValidROC().isa_ro_crate, requirement_severity=Severity.RECOMMENDED, expected_validation_result=False, - # expected_triggered_requirements=["Study MUST have base properties"], + # expected_triggered_requirements=["Study MUST have base properties"], # noqa: ERA001 expected_triggered_issues=[ "PropertyValue entity SHOULD have at least one value" ], @@ -147,7 +147,7 @@ def test_isa_property_value_value_of_incorrect_type(): rocrate_path=ValidROC().isa_ro_crate, requirement_severity=Severity.REQUIRED, expected_validation_result=False, - # expected_triggered_requirements=["Study MUST have base properties"], + # expected_triggered_requirements=["Study MUST have base properties"], # noqa: ERA001 expected_triggered_issues=[ "PropertyValue value MUST be of type string, float, or integer" ], diff --git a/tests/integration/profiles/isa-ro-crate/test_1_study.py b/tests/integration/profiles/isa-ro-crate/test_1_study.py index e7b14b17e..91a783f19 100644 --- a/tests/integration/profiles/isa-ro-crate/test_1_study.py +++ b/tests/integration/profiles/isa-ro-crate/test_1_study.py @@ -83,7 +83,7 @@ def test_isa_study_identifier_not_string(): rocrate_path=ValidROC().isa_ro_crate, requirement_severity=Severity.REQUIRED, expected_validation_result=False, - # expected_triggered_requirements=["Study MUST have base properties"], + # expected_triggered_requirements=["Study MUST have base properties"], # noqa: ERA001 expected_triggered_issues=[ "Study entity MUST have a non-empty identifier of type string" ], @@ -148,7 +148,7 @@ def test_isa_study_name_not_string(): rocrate_path=ValidROC().isa_ro_crate, requirement_severity=Severity.REQUIRED, expected_validation_result=False, - # expected_triggered_requirements=["Study MUST have base properties"], + # expected_triggered_requirements=["Study MUST have base properties"], # noqa: ERA001 expected_triggered_issues=[ "Study entity MUST have a non-empty name of type string" ], @@ -259,7 +259,7 @@ def test_isa_study_no_shoulds(): rocrate_path=ValidROC().isa_ro_crate, requirement_severity=Severity.RECOMMENDED, expected_validation_result=False, - # expected_triggered_requirements=["Study MUST have base properties"], + # expected_triggered_requirements=["Study MUST have base properties"], # noqa: ERA001 expected_triggered_issues=[ "Study entity SHOULD have a dateCreated", "Study entity SHOULD have a datePublished", @@ -313,7 +313,7 @@ def test_isa_study_shoulds_have_wrong_types(): rocrate_path=ValidROC().isa_ro_crate, requirement_severity=Severity.REQUIRED, expected_validation_result=False, - # expected_triggered_requirements=["Study MUST have base properties"], + # expected_triggered_requirements=["Study MUST have base properties"], # noqa: ERA001 expected_triggered_issues=[ "Study dateCreated MUST be a valid date literal", "Study datePublished MUST be a valid date literal", diff --git a/tests/integration/profiles/isa-ro-crate/test_2_assay.py b/tests/integration/profiles/isa-ro-crate/test_2_assay.py index dc98aacc8..0791f70b1 100644 --- a/tests/integration/profiles/isa-ro-crate/test_2_assay.py +++ b/tests/integration/profiles/isa-ro-crate/test_2_assay.py @@ -80,7 +80,7 @@ def test_isa_assay_identifier_not_string(): rocrate_path=ValidROC().isa_ro_crate, requirement_severity=Severity.REQUIRED, expected_validation_result=False, - # expected_triggered_requirements=["Root Data Entity must be Investigation"], + # expected_triggered_requirements=["Root Data Entity must be Investigation"], # noqa: ERA001 expected_triggered_issues=[ "Assay entity MUST have a non-empty identifier of type string" ], @@ -201,7 +201,7 @@ def test_isa_assay_no_shoulds(): rocrate_path=ValidROC().isa_ro_crate, requirement_severity=Severity.RECOMMENDED, expected_validation_result=False, - # expected_triggered_requirements=["Study MUST have base properties"], + # expected_triggered_requirements=["Study MUST have base properties"], # noqa: ERA001 expected_triggered_issues=[ "Assay entity SHOULD have a non-empty name of type string", "Assay entity SHOULD have a non-empty description of type string", @@ -259,7 +259,7 @@ def test_isa_assay_shoulds_have_wrong_types(): rocrate_path=ValidROC().isa_ro_crate, requirement_severity=Severity.REQUIRED, expected_validation_result=False, - # expected_triggered_requirements=["Study MUST have base properties"], + # expected_triggered_requirements=["Study MUST have base properties"], # noqa: ERA001 expected_triggered_issues=[ "Assay name MUST be of type string", "Assay description MUST be of type string", diff --git a/tests/integration/profiles/isa-ro-crate/test_3_process.py b/tests/integration/profiles/isa-ro-crate/test_3_process.py index 3644bed8c..695cb0091 100644 --- a/tests/integration/profiles/isa-ro-crate/test_3_process.py +++ b/tests/integration/profiles/isa-ro-crate/test_3_process.py @@ -17,7 +17,7 @@ from rocrate_validator.models import Severity -# from tests.conftest import SKIP_LOCAL_DATA_ENTITY_EXISTENCE_CHECK_IDENTIFIER +# from tests.conftest import SKIP_LOCAL_DATA_ENTITY_EXISTENCE_CHECK_IDENTIFIER # noqa: ERA001 from tests.ro_crates import ValidROC from tests.shared import SPARQL_PREFIXES, do_entity_test @@ -87,12 +87,8 @@ def test_isa_process_not_correctly_referenced_from_dataset(): rocrate_path=ValidROC().isa_ro_crate, requirement_severity=Severity.REQUIRED, expected_validation_result=True, - # expected_triggered_requirements=[ - # "Process MUST be directly referenced from a dataset" - # ], - # expected_triggered_issues=[ - # "Process MUST be directly referenced in about on a Dataset" - # ], + # expected_triggered_requirements=["Process MUST be directly referenced from a dataset"], # noqa: ERA001 + # expected_triggered_issues=["Process MUST be directly referenced in about on a Dataset"], # noqa: ERA001 profile_identifier="isa-ro-crate", rocrate_entity_mod_sparql=sparql, disable_inherited_profiles_issue_reporting=True, diff --git a/tests/integration/profiles/isa-ro-crate/test_4_protocol.py b/tests/integration/profiles/isa-ro-crate/test_4_protocol.py index 7d98ca193..38ef1ecc1 100644 --- a/tests/integration/profiles/isa-ro-crate/test_4_protocol.py +++ b/tests/integration/profiles/isa-ro-crate/test_4_protocol.py @@ -17,7 +17,7 @@ from rocrate_validator.models import Severity -# from tests.conftest import SKIP_LOCAL_DATA_ENTITY_EXISTENCE_CHECK_IDENTIFIER +# from tests.conftest import SKIP_LOCAL_DATA_ENTITY_EXISTENCE_CHECK_IDENTIFIER # noqa: ERA001 from tests.ro_crates import ValidROC from tests.shared import SPARQL_PREFIXES, do_entity_test @@ -49,7 +49,7 @@ def test_isa_protocol_no_name(): rocrate_path=ValidROC().isa_ro_crate, requirement_severity=Severity.RECOMMENDED, expected_validation_result=False, - # expected_triggered_requirements=["Protocol SHOULD have name"], + # expected_triggered_requirements=["Protocol SHOULD have name"], # noqa: ERA001 expected_triggered_issues=[ "Protocol entity SHOULD have a non-empty name of type string" ], @@ -83,7 +83,7 @@ def test_isa_protocol_name_incorrect_type(): rocrate_path=ValidROC().isa_ro_crate, requirement_severity=Severity.REQUIRED, expected_validation_result=False, - # expected_triggered_requirements=["Protocol SHOULD have name"], + # expected_triggered_requirements=["Protocol SHOULD have name"], # noqa: ERA001 expected_triggered_issues=["Protocol name MUST be of type string"], profile_identifier="isa-ro-crate", rocrate_entity_mod_sparql=sparql, @@ -113,7 +113,7 @@ def test_isa_protocol_no_description(): rocrate_path=ValidROC().isa_ro_crate, requirement_severity=Severity.RECOMMENDED, expected_validation_result=False, - # expected_triggered_requirements=["Protocol SHOULD have description"], + # expected_triggered_requirements=["Protocol SHOULD have description"], # noqa: ERA001 expected_triggered_issues=[ "Protocol entity SHOULD have a non-empty description of type string" ], @@ -148,7 +148,7 @@ def test_isa_protocol_description_incorrect_type(): rocrate_path=ValidROC().isa_ro_crate, requirement_severity=Severity.REQUIRED, expected_validation_result=False, - # expected_triggered_requirements=["Protocol SHOULD have description"], + # expected_triggered_requirements=["Protocol SHOULD have description"], # noqa: ERA001 expected_triggered_issues=["Protocol description MUST be of type string"], profile_identifier="isa-ro-crate", rocrate_entity_mod_sparql=sparql, @@ -178,7 +178,7 @@ def test_isa_protocol_no_intendedUse(): rocrate_path=ValidROC().isa_ro_crate, requirement_severity=Severity.RECOMMENDED, expected_validation_result=False, - # expected_triggered_requirements=["Protocol SHOULD have intended use"], + # expected_triggered_requirements=["Protocol SHOULD have intended use"], # noqa: ERA001 expected_triggered_issues=["Protocol entity SHOULD have an intended use"], profile_identifier="isa-ro-crate", rocrate_entity_mod_sparql=sparql, @@ -216,8 +216,8 @@ def test_isa_protocol_not_correctly_referenced_from_process(): rocrate_path=ValidROC().isa_ro_crate, requirement_severity=Severity.REQUIRED, expected_validation_result=True, - # expected_triggered_requirements=["Protocol SHOULD have intended use"], - # expected_triggered_issues=["Protocol intended use MUST be of type string or DefinedTerm"], + # expected_triggered_requirements=["Protocol SHOULD have intended use"], # noqa: ERA001 + # expected_triggered_issues=["Protocol intended use MUST be of type string or DefinedTerm"], # noqa: ERA001 profile_identifier="isa-ro-crate", rocrate_entity_mod_sparql=sparql, disable_inherited_profiles_issue_reporting=True, @@ -250,7 +250,7 @@ def test_isa_protocol_intendedUse_incorrect_type(): rocrate_path=ValidROC().isa_ro_crate, requirement_severity=Severity.REQUIRED, expected_validation_result=False, - # expected_triggered_requirements=["Protocol SHOULD have intended use"], + # expected_triggered_requirements=["Protocol SHOULD have intended use"], # noqa: ERA001 expected_triggered_issues=["Protocol intended use MUST be of type string or DefinedTerm"], profile_identifier="isa-ro-crate", rocrate_entity_mod_sparql=sparql, diff --git a/tests/integration/profiles/isa-ro-crate/test_5_sample.py b/tests/integration/profiles/isa-ro-crate/test_5_sample.py index ddb8ed503..aff6addd8 100644 --- a/tests/integration/profiles/isa-ro-crate/test_5_sample.py +++ b/tests/integration/profiles/isa-ro-crate/test_5_sample.py @@ -46,7 +46,7 @@ def test_isa_sample_name(): rocrate_path=ValidROC().isa_ro_crate, requirement_severity=Severity.REQUIRED, expected_validation_result=False, - # expected_triggered_requirements=["Study MUST have base properties"], + # expected_triggered_requirements=["Study MUST have base properties"], # noqa: ERA001 expected_triggered_issues=[ "Sample entity MUST have a non-empty name of type string" ], @@ -116,7 +116,7 @@ def test_isa_sample_name_of_incorrect_type(): rocrate_path=ValidROC().isa_ro_crate, requirement_severity=Severity.REQUIRED, expected_validation_result=False, - # expected_triggered_requirements=["Study MUST have base properties"], + # expected_triggered_requirements=["Study MUST have base properties"], # noqa: ERA001 expected_triggered_issues=[ "Sample entity MUST have a non-empty name of type string" ], @@ -149,7 +149,7 @@ def test_isa_sample_no_additional_property(): rocrate_path=ValidROC().isa_ro_crate, requirement_severity=Severity.RECOMMENDED, expected_validation_result=False, - # expected_triggered_requirements=["Study MUST have base properties"], + # expected_triggered_requirements=["Study MUST have base properties"], # noqa: ERA001 expected_triggered_issues=[ "Sample entity SHOULD have at least one additional property" ], @@ -185,7 +185,7 @@ def test_isa_sample_additional_property_of_incorrect_type(): rocrate_path=ValidROC().isa_ro_crate, requirement_severity=Severity.REQUIRED, expected_validation_result=False, - # expected_triggered_requirements=["Study MUST have base properties"], + # expected_triggered_requirements=["Study MUST have base properties"], # noqa: ERA001 expected_triggered_issues=[ "Sample additional property MUST be of type PropertyValue" ], diff --git a/tests/integration/profiles/isa-ro-crate/test_6_data.py b/tests/integration/profiles/isa-ro-crate/test_6_data.py index ec6e23c39..879991a0c 100644 --- a/tests/integration/profiles/isa-ro-crate/test_6_data.py +++ b/tests/integration/profiles/isa-ro-crate/test_6_data.py @@ -45,7 +45,7 @@ def test_isa_file_name(): rocrate_path=ValidROC().isa_ro_crate, requirement_severity=Severity.REQUIRED, expected_validation_result=False, - # expected_triggered_requirements=["Study MUST have base properties"], + # expected_triggered_requirements=["Study MUST have base properties"], # noqa: ERA001 expected_triggered_issues=[ "File entity MUST have a non-empty name of type string" ], @@ -114,7 +114,7 @@ def test_isa_file_name_of_incorrect_type(): rocrate_path=ValidROC().isa_ro_crate, requirement_severity=Severity.REQUIRED, expected_validation_result=False, - # expected_triggered_requirements=["Study MUST have base properties"], + # expected_triggered_requirements=["Study MUST have base properties"], # noqa: ERA001 expected_triggered_issues=[ "File entity MUST have a non-empty name of type string" ], diff --git a/tests/integration/profiles/isa-ro-crate/test_7_person.py b/tests/integration/profiles/isa-ro-crate/test_7_person.py index 3261b50e4..65c50f903 100644 --- a/tests/integration/profiles/isa-ro-crate/test_7_person.py +++ b/tests/integration/profiles/isa-ro-crate/test_7_person.py @@ -45,7 +45,7 @@ def test_isa_person_given_name(): rocrate_path=ValidROC().isa_ro_crate, requirement_severity=Severity.REQUIRED, expected_validation_result=False, - # expected_triggered_requirements=["Study MUST have base properties"], + # expected_triggered_requirements=["Study MUST have base properties"], # noqa: ERA001 expected_triggered_issues=[ "Person entity MUST have a non-empty given name of type string" ], @@ -118,7 +118,7 @@ def test_isa_person_given_name_of_incorrect_type(): rocrate_path=ValidROC().isa_ro_crate, requirement_severity=Severity.REQUIRED, expected_validation_result=False, - # expected_triggered_requirements=["Study MUST have base properties"], + # expected_triggered_requirements=["Study MUST have base properties"], # noqa: ERA001 expected_triggered_issues=[ "Person entity MUST have a non-empty given name of type string" ], @@ -148,7 +148,7 @@ def test_isa_person_family_name(): rocrate_path=ValidROC().isa_ro_crate, requirement_severity=Severity.RECOMMENDED, expected_validation_result=False, - # expected_triggered_requirements=["Study MUST have base properties"], + # expected_triggered_requirements=["Study MUST have base properties"], # noqa: ERA001 expected_triggered_issues=[ "Person entity SHOULD have a non-empty family name of type string" ], @@ -181,7 +181,7 @@ def test_isa_person_family_name_of_incorrect_type(): rocrate_path=ValidROC().isa_ro_crate, requirement_severity=Severity.REQUIRED, expected_validation_result=False, - # expected_triggered_requirements=["Study MUST have base properties"], + # expected_triggered_requirements=["Study MUST have base properties"], # noqa: ERA001 expected_triggered_issues=["Person family name MUST be of type string"], profile_identifier="isa-ro-crate", rocrate_entity_mod_sparql=sparql, @@ -209,7 +209,7 @@ def test_isa_person_email(): rocrate_path=ValidROC().isa_ro_crate, requirement_severity=Severity.RECOMMENDED, expected_validation_result=False, - # expected_triggered_requirements=["Study MUST have base properties"], + # expected_triggered_requirements=["Study MUST have base properties"], # noqa: ERA001 expected_triggered_issues=[ "Person entity SHOULD have a non-empty email of type string" ], @@ -242,7 +242,7 @@ def test_isa_person_email_of_incorrect_type(): rocrate_path=ValidROC().isa_ro_crate, requirement_severity=Severity.REQUIRED, expected_validation_result=False, - # expected_triggered_requirements=["Study MUST have base properties"], + # expected_triggered_requirements=["Study MUST have base properties"], # noqa: ERA001 expected_triggered_issues=["Person email MUST be of type string"], profile_identifier="isa-ro-crate", rocrate_entity_mod_sparql=sparql, @@ -271,7 +271,7 @@ def test_isa_person_identifier(): rocrate_path=ValidROC().isa_ro_crate, requirement_severity=Severity.RECOMMENDED, expected_validation_result=False, - # expected_triggered_requirements=["Study MUST have base properties"], + # expected_triggered_requirements=["Study MUST have base properties"], # noqa: ERA001 expected_triggered_issues=[ "Person entity SHOULD have a non-empty identifier of type string" ], @@ -305,7 +305,7 @@ def test_isa_person_identifier_of_incorrect_type(): rocrate_path=ValidROC().isa_ro_crate, requirement_severity=Severity.REQUIRED, expected_validation_result=False, - # expected_triggered_requirements=["Study MUST have base properties"], + # expected_triggered_requirements=["Study MUST have base properties"], # noqa: ERA001 expected_triggered_issues=["Person identifier MUST be of type string"], profile_identifier="isa-ro-crate", rocrate_entity_mod_sparql=sparql, @@ -334,7 +334,7 @@ def test_isa_person_affiliation(): rocrate_path=ValidROC().isa_ro_crate, requirement_severity=Severity.RECOMMENDED, expected_validation_result=False, - # expected_triggered_requirements=["Study MUST have base properties"], + # expected_triggered_requirements=["Study MUST have base properties"], # noqa: ERA001 expected_triggered_issues=[ "Person entity SHOULD have at least one affiliation" ], @@ -368,7 +368,7 @@ def test_isa_person_affiliation_of_incorrect_type(): rocrate_path=ValidROC().isa_ro_crate, requirement_severity=Severity.REQUIRED, expected_validation_result=False, - # expected_triggered_requirements=["Study MUST have base properties"], + # expected_triggered_requirements=["Study MUST have base properties"], # noqa: ERA001 expected_triggered_issues=["Person affiliation MUST be of type Organization"], profile_identifier="isa-ro-crate", rocrate_entity_mod_sparql=sparql, @@ -397,7 +397,7 @@ def test_isa_person_job_title(): rocrate_path=ValidROC().isa_ro_crate, requirement_severity=Severity.RECOMMENDED, expected_validation_result=False, - # expected_triggered_requirements=["Study MUST have base properties"], + # expected_triggered_requirements=["Study MUST have base properties"], # noqa: ERA001 expected_triggered_issues=["Person entity SHOULD have at least one job title"], profile_identifier="isa-ro-crate", rocrate_entity_mod_sparql=sparql, @@ -429,7 +429,7 @@ def test_isa_person_job_title_of_incorrect_type(): rocrate_path=ValidROC().isa_ro_crate, requirement_severity=Severity.REQUIRED, expected_validation_result=False, - # expected_triggered_requirements=["Study MUST have base properties"], + # expected_triggered_requirements=["Study MUST have base properties"], # noqa: ERA001 expected_triggered_issues=["Person job title MUST be of type DefinedTerm"], profile_identifier="isa-ro-crate", rocrate_entity_mod_sparql=sparql, diff --git a/tests/integration/profiles/isa-ro-crate/test_8_article.py b/tests/integration/profiles/isa-ro-crate/test_8_article.py index 745f8986b..af4d7f018 100644 --- a/tests/integration/profiles/isa-ro-crate/test_8_article.py +++ b/tests/integration/profiles/isa-ro-crate/test_8_article.py @@ -45,7 +45,7 @@ def test_isa_article_headline(): rocrate_path=ValidROC().isa_ro_crate, requirement_severity=Severity.REQUIRED, expected_validation_result=False, - # expected_triggered_requirements=["Study MUST have base properties"], + # expected_triggered_requirements=["Study MUST have base properties"], # noqa: ERA001 expected_triggered_issues=[ "Article entity MUST have a non-empty headline of type string" ], @@ -113,7 +113,7 @@ def test_isa_article_headline_of_incorrect_type(): rocrate_path=ValidROC().isa_ro_crate, requirement_severity=Severity.REQUIRED, expected_validation_result=False, - # expected_triggered_requirements=["Study MUST have base properties"], + # expected_triggered_requirements=["Study MUST have base properties"], # noqa: ERA001 expected_triggered_issues=[ "Article entity MUST have a non-empty headline of type string" ], @@ -143,7 +143,7 @@ def test_isa_article_identifier(): rocrate_path=ValidROC().isa_ro_crate, requirement_severity=Severity.RECOMMENDED, expected_validation_result=False, - # expected_triggered_requirements=["Study MUST have base properties"], + # expected_triggered_requirements=["Study MUST have base properties"], # noqa: ERA001 expected_triggered_issues=[ "Article entity MUST have a non-empty identifier of type string or PropertyValue" ], @@ -176,7 +176,7 @@ def test_isa_article_identifier_of_incorrect_type(): rocrate_path=ValidROC().isa_ro_crate, requirement_severity=Severity.REQUIRED, expected_validation_result=False, - # expected_triggered_requirements=["Study MUST have base properties"], + # expected_triggered_requirements=["Study MUST have base properties"], # noqa: ERA001 expected_triggered_issues=[ "Article entity MUST have a non-empty identifier of type string or PropertyValue" ], @@ -206,7 +206,7 @@ def test_isa_article_author(): rocrate_path=ValidROC().isa_ro_crate, requirement_severity=Severity.RECOMMENDED, expected_validation_result=False, - # expected_triggered_requirements=["Study MUST have base properties"], + # expected_triggered_requirements=["Study MUST have base properties"], # noqa: ERA001 expected_triggered_issues=["Article entity SHOULD have at least one author"], profile_identifier="isa-ro-crate", rocrate_entity_mod_sparql=sparql, @@ -237,7 +237,7 @@ def test_isa_article_author_of_incorrect_type(): rocrate_path=ValidROC().isa_ro_crate, requirement_severity=Severity.REQUIRED, expected_validation_result=False, - # expected_triggered_requirements=["Study MUST have base properties"], + # expected_triggered_requirements=["Study MUST have base properties"], # noqa: ERA001 expected_triggered_issues=["Article author MUST be of type Person"], profile_identifier="isa-ro-crate", rocrate_entity_mod_sparql=sparql, diff --git a/tests/integration/profiles/isa-ro-crate/test_9_comment.py b/tests/integration/profiles/isa-ro-crate/test_9_comment.py index e2fe56b84..75cea75d6 100644 --- a/tests/integration/profiles/isa-ro-crate/test_9_comment.py +++ b/tests/integration/profiles/isa-ro-crate/test_9_comment.py @@ -45,7 +45,7 @@ def test_isa_comment_text(): rocrate_path=ValidROC().isa_ro_crate, requirement_severity=Severity.RECOMMENDED, expected_validation_result=False, - # expected_triggered_requirements=["Study MUST have base properties"], + # expected_triggered_requirements=["Study MUST have base properties"], # noqa: ERA001 expected_triggered_issues=["Comment entity SHOULD have at least one text"], profile_identifier="isa-ro-crate", rocrate_entity_mod_sparql=sparql, @@ -76,7 +76,7 @@ def test_isa_comment_text_of_incorrect_type(): rocrate_path=ValidROC().isa_ro_crate, requirement_severity=Severity.REQUIRED, expected_validation_result=False, - # expected_triggered_requirements=["Study MUST have base properties"], + # expected_triggered_requirements=["Study MUST have base properties"], # noqa: ERA001 expected_triggered_issues=["Comment text MUST be of type string"], profile_identifier="isa-ro-crate", rocrate_entity_mod_sparql=sparql, diff --git a/tests/shared.py b/tests/shared.py index 5a2e10375..859e48e8d 100644 --- a/tests/shared.py +++ b/tests/shared.py @@ -193,9 +193,6 @@ def do_entity_test( # check requirement failed_requirements = [_.name for _ in result.failed_requirements] - # assert len(failed_requirements) == len(expected_triggered_requirements), \ - # f"Expected {len(expected_triggered_requirements)} requirements to be "\ - # f"triggered, but got {len(failed_requirements)}" # check that the expected requirements are triggered for expected_triggered_requirement in expected_triggered_requirements: diff --git a/tests/test_cli.py b/tests/test_cli.py index e06050706..d488f367f 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -138,7 +138,6 @@ def test_validate_with_invalid_profiles_path_dir(cli_runner: CliRunner): ] ) assert result.exit_code == 2 - # logger.debug(result.output) # On narrow terminals the Rich error panel wraps the message across lines # and inserts box-drawing borders (│) between words; strip those and # collapse whitespace so the match does not depend on terminal width. @@ -151,9 +150,7 @@ def test_profiles_list(cli_runner: CliRunner): Test the list of profiles. """ result = cli_runner.invoke(cli, ["profiles", "list", "--no-paging"]) - # logger.debug("Profiles list output: %s", result.output) assert result.exit_code == 0 - # assert "Available profiles:" in result.output assert "ro-crate-1.1" in result.output # Check for a known profile @@ -162,9 +159,7 @@ def test_extra_profiles_list(cli_runner: CliRunner, fake_profiles_path: Path): Test the list of extra profiles. """ result = cli_runner.invoke(cli, ["profiles", "--extra-profiles-path", fake_profiles_path, "list", "--no-paging"]) - # logger.debug("Extra profiles list output: %s", result.output) assert result.exit_code == 0 - # assert "Available profiles:" in result.output assert "Profile A" in result.output # Check for a known extra profile diff --git a/tests/unit/test_rocrate.py b/tests/unit/test_rocrate.py index df6923744..e5f624e9d 100644 --- a/tests/unit/test_rocrate.py +++ b/tests/unit/test_rocrate.py @@ -384,9 +384,6 @@ def test_paths_valid_bagit_rocrate(): assert len(files) == 16, "Should have 16 files" # check file paths - # assert roc.has_file(Path("ro-crate-metadata.json")), "Should have ro-crate-metadata.json file" - # # assert roc.has_file(Path("bagit.txt")), "Should have bagit.txt file" - # # assert roc.has_file(Path("data/ro-crate-metadata.json")), "Should have data/ro-crate-metadata.json file" # assert roc.has_file(Path("pics/2017-06-11%2012.56.14.jpg") # ), "Should have data/pics/2017-06-11 12.56.14.jpg file" @@ -423,7 +420,6 @@ def test_valid_bagit_zip_rocrate(): # test list files files = roc.list_files() logger.debug(f"Files: {files}") - # assert len(files) == 11, "Should have 11 files" # test is_file assert roc.has_file(metadata_file_descriptor), "Should be a file" @@ -463,21 +459,12 @@ def test_valid_bagit_zip_rocrate(): assert root_data_entity.name == "My Pictures", "Name should be sort_and_change" # test subEntity mainEntity - # main_entity = root_data_entity.get_property("mainEntity") - # logger.error(f"Main entity: {main_entity}") - # assert isinstance(main_entity, ROCrateEntity), "Entity should be ROCrateEntity" - # assert main_entity.id == "sort-and-change-case.ga", "Id should be main-entity" # assert "ComputationalWorkflow" in main_entity.type, "Type should be ComputationalWorkflow" # check metadata consistency - # assert main_entity.metadata == metadata, "Metadata should be the same" - # assert main_entity.metadata == roc.metadata, "Metadata should be the same" # check availability of 'pics/2017-06-11%2012.56.14.jpg' - # entity = metadata.get_entity("pics/2017-06-11%2012.56.14.jpg") - # assert entity.is_available(), "Entity should be available" - # assert roc.has_directory("data%20set/"), "Should have data%20set/ directory" ################################ @@ -487,8 +474,6 @@ def test_valid_bagit_zip_rocrate(): def test_valid_remote_zip_rocrate(): roc = ROCrateRemoteZip(ValidROC().sort_and_change_remote) - # assert isinstance(roc, ROCrateRemoteZip) - # return # # test list files files = roc.list_files() logger.debug(f"Files: {files}") diff --git a/tests/unit/test_services.py b/tests/unit/test_services.py index 8845f398a..c0165f4f1 100644 --- a/tests/unit/test_services.py +++ b/tests/unit/test_services.py @@ -159,7 +159,6 @@ def test_skip_pycheck_on_workflow_ro_crate(): # Ensure that the skipped checks are indeed skipped skipped_check_ids = {check.identifier for check in result.skipped_checks} - # logger.error("Skipped checks: %s", result.skipped_checks) assert "ro-crate-1.1_5.3" in skipped_check_ids, "Expected check 'ro-crate-1.1_5.3' to be skipped" assert "ro-crate-1.1_12.1" in skipped_check_ids, "Expected check 'ro-crate-1.1_12.1' to be skipped" From f1c407eeb4b0027a22f8f9d485956c5ec733aa50 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Wed, 10 Jun 2026 13:25:50 +0200 Subject: [PATCH 267/352] =?UTF-8?q?refactor(cli):=20=E2=99=BB=EF=B8=8F=20r?= =?UTF-8?q?educe=20complexity=20to=20satisfy=20C901?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- rocrate_validator/cli/commands/cache.py | 43 +- rocrate_validator/cli/commands/validate.py | 538 ++++++++++-------- rocrate_validator/models.py | 66 ++- .../ro-crate/must/0_file_descriptor_format.py | 190 +++---- rocrate_validator/services.py | 154 +++-- .../io_helpers/output/json/formatters.py | 24 +- .../io_helpers/output/text/layout/progress.py | 22 +- 7 files changed, 560 insertions(+), 477 deletions(-) diff --git a/rocrate_validator/cli/commands/cache.py b/rocrate_validator/cli/commands/cache.py index 04aa71035..3e2d2e856 100644 --- a/rocrate_validator/cli/commands/cache.py +++ b/rocrate_validator/cli/commands/cache.py @@ -374,25 +374,7 @@ def cache_warm( console.print("[yellow]Nothing to warm up.[/yellow]") return - table = Table(title="Warm-up results", show_lines=False) - table.add_column("URL", overflow="fold") - table.add_column("Status") - table.add_column("Detail") - ok = 0 - failed = 0 - for r in results: - colour = {"ok": "green", "skipped": "cyan", "failed": "red"}.get(r.status, "white") - table.add_row(r.url, f"[{colour}]{r.status}[/{colour}]", r.detail or "") - if r.status == "ok": - ok += 1 - elif r.status == "failed": - failed += 1 - console.print(table) - console.print( - f"[bold]Summary:[/bold] {ok} cached, {failed} failed, " - f"{len(results) - ok - failed} skipped" - ) - exit_with_failure = failed > 0 + exit_with_failure = _render_warmup_results(console, results) except Exception as e: handle_error(e, console) return @@ -400,6 +382,29 @@ def cache_warm( ctx.exit(1) +def _render_warmup_results(console, results: list[WarmUpResult]) -> bool: + """Render the warm-up results table and summary; return True if any URL failed.""" + table = Table(title="Warm-up results", show_lines=False) + table.add_column("URL", overflow="fold") + table.add_column("Status") + table.add_column("Detail") + ok = 0 + failed = 0 + for r in results: + colour = {"ok": "green", "skipped": "cyan", "failed": "red"}.get(r.status, "white") + table.add_row(r.url, f"[{colour}]{r.status}[/{colour}]", r.detail or "") + if r.status == "ok": + ok += 1 + elif r.status == "failed": + failed += 1 + console.print(table) + console.print( + f"[bold]Summary:[/bold] {ok} cached, {failed} failed, " + f"{len(results) - ok - failed} skipped" + ) + return failed > 0 + + def _resolve_warmup_urls_from_profiles(console, profiles_dir, extra_dir, requested_ids): Profile.load_profiles( profiles_path=profiles_dir, diff --git a/rocrate_validator/cli/commands/validate.py b/rocrate_validator/cli/commands/validate.py index 043a66d23..6ddd97b33 100644 --- a/rocrate_validator/cli/commands/validate.py +++ b/rocrate_validator/cli/commands/validate.py @@ -235,7 +235,7 @@ def validate_uri(ctx, param, value): show_default=True, ) @click.pass_context -def validate(ctx, # noqa: PLR0912, PLR0915 +def validate(ctx, profiles_path: Path = DEFAULT_PROFILES_PATH, extra_profiles_path: Optional[Path] = None, profile_identifier: tuple[str, ...] = (), @@ -268,23 +268,20 @@ def validate(ctx, # noqa: PLR0912, PLR0915 # override the enable_pager flag if the interactive flag is False if not interactive or sys.platform == "win32": enable_pager = False - # Log the input parameters for debugging - logger.debug("profiles_path: %s", Path(profiles_path).resolve()) - logger.debug("extra_profiles_path: %s", Path(extra_profiles_path).resolve() if extra_profiles_path else None) - logger.debug("profile_identifier: %s", profile_identifier) - logger.debug("requirement_severity: %s", requirement_severity) - logger.debug("requirement_severity_only: %s", requirement_severity_only) - - logger.debug("disable_inheritance: %s", disable_profile_inheritance) - logger.debug("rocrate_uri: %s", rocrate_uri) - logger.debug("fail_fast: %s", fail_fast) - logger.debug("no fail fast: %s", not fail_fast) - - # Cache settings - logger.debug("cache_max_age: %s", cache_max_age) - logger.debug("cache_path: %s", Path(cache_path).resolve() if cache_path else None) - logger.debug("no_cache: %s", no_cache) - logger.debug("offline: %s", offline) + _log_validation_inputs( + profiles_path=profiles_path, + extra_profiles_path=extra_profiles_path, + profile_identifier=profile_identifier, + requirement_severity=requirement_severity, + requirement_severity_only=requirement_severity_only, + disable_profile_inheritance=disable_profile_inheritance, + rocrate_uri=rocrate_uri, + fail_fast=fail_fast, + cache_max_age=cache_max_age, + cache_path=cache_path, + no_cache=no_cache, + offline=offline, + ) # --no-cache and --offline are contradictory: offline mode requires a cache # to serve requests from, while no-cache disables caching entirely. @@ -297,41 +294,9 @@ def validate(ctx, # noqa: PLR0912, PLR0915 if rocrate_uri: logger.debug("rocrate_path: %s", Path(rocrate_uri).resolve()) - # Warn the user when a remote RO-Crate is about to be validated in offline mode: - # the cached copy (if any) will be used, and it may be out of sync with the remote. - if offline and isinstance(rocrate_uri, str) and rocrate_uri.split(":", 1)[0].lower() in ("http", "https", "ftp"): - console.print( - Padding( - Rule( - "[bold yellow]WARNING:[/bold yellow] " - "[bold]The target RO-Crate is remote and offline mode is enabled.[/bold]\n" - "The cached version of the RO-Crate will be used if available.\n" - "The cached copy may be out of sync with the version currently published remotely.", - align="center", - style="bold yellow", - ), - (1, 2, 0, 2), - ) - ) + _warn_if_remote_offline(console, rocrate_uri, offline) - # Parse the skip_checks option - logger.debug("skip_checks: %s", skip_checks) - # Parse the skip_checks option - skip_checks_list: list[str] = [] - if skip_checks: - try: - for s in skip_checks: - skip_checks_list.extend(_.strip() for _ in s.split(",") if _.strip()) - logger.debug("skip_checks_list: %s", skip_checks_list) - except Exception as e: - logger.error("Error parsing skip_checks: %s", e) - if logger.isEnabledFor(logging.DEBUG): - logger.exception("Error parsing skip_checks: %s", e) - raise ValueError( - f"Invalid skip_checks value: {s}. " - "It must be a comma-separated list of Fully Qualified Check IDs." - ) from e - logger.debug("Skip checks: %s", skip_checks_list) + skip_checks_list = _parse_skip_checks(skip_checks) try: # Validation settings @@ -363,61 +328,16 @@ def validate(ctx, # noqa: PLR0912, PLR0915 # Get the available profiles available_profiles = services.get_profiles(profiles_path, extra_profiles_path=extra_profiles_path) - # Detect the profile to use for validation - autodetection = False - selected_profile = profile_identifier - # The profile selection collapses to a concrete list of identifiers - # (the CLI passes a possibly-empty tuple because of ``multiple=True``). - profile_identifiers: list[str] = list(profile_identifier) - if selected_profile is None or len(selected_profile) == 0: - - # Auto-detect the profile to use for validation (if not disabled) - candidate_profiles = None - if not no_auto_profile: - candidate_profiles = services.detect_profiles(settings=validation_settings) - logger.debug("Candidate profiles: %s", candidate_profiles) - else: - logger.info("Auto-detection of the profiles to use for validation is disabled") - - # Prompt the user to select the profile to use for validation if the interactive mode is enabled - # and no profile is auto-detected or multiple profiles are detected - if interactive and ( - not candidate_profiles or - len(candidate_profiles) == 0 or - len(candidate_profiles) == len(available_profiles) - ): - # Define the list of choices - console.print( - Padding( - Rule( - "[bold yellow]WARNING: [/bold yellow]" - "[bold]Unable to automatically detect the profile to use for validation[/bold]\n", - align="center", - style="bold yellow" - ), - (2, 2, 0, 2) - ) - ) - selected_options = multiple_choice(console, available_profiles) - profile_identifiers = [available_profiles[int( - selected_option)].identifier for selected_option in selected_options] - logger.debug("Profile selected: %s", selected_options) - console.print(Padding(Rule(style="bold yellow"), (1, 2))) - - elif candidate_profiles and len(candidate_profiles) < len(available_profiles): - logger.debug("Profile identifier autodetected: %s", candidate_profiles[0].identifier) - autodetection = True - profile_identifiers = [_.identifier for _ in candidate_profiles] - - # Fall back to the selected profile - if not profile_identifiers or len(profile_identifiers) == 0: - console.print(f"\n{' '*2}[bold yellow]WARNING: [/bold yellow]", end="") - if no_auto_profile: - console.print("[bold]Auto-detection of the profiles to use for validation is disabled[/bold]") - else: - console.print("[bold]Unable to automatically detect the profile to use for validation[/bold]") - console.print(f"{' '*11}[bold]The base `ro-crate` profile will be used for validation[/bold]") - profile_identifiers = ["ro-crate"] + # Resolve the concrete list of profile identifiers (auto-detection, + # interactive selection, or fallback to the base `ro-crate` profile). + profile_identifiers, autodetection = _resolve_profile_identifiers( + console, + interactive, + no_auto_profile, + available_profiles, + list(profile_identifier), + validation_settings, + ) # Validate the RO-Crate against the selected profiles is_valid = True @@ -425,93 +345,33 @@ def validate(ctx, # noqa: PLR0912, PLR0915 for profile in profile_identifiers: # Duplicate settings for each profile and set the profile identifier logger.info("\nValidating RO-Crate against profile: [bold cyan]%s[/bold cyan]", profile) - validation_settings = validation_settings.copy() - # Set the selected profile - validation_settings["profile_identifier"] = profile - logger.debug("Profile selected for validation: %s", validation_settings["profile_identifier"]) + profile_settings = validation_settings.copy() + profile_settings["profile_identifier"] = profile + logger.debug("Profile selected for validation: %s", profile) logger.debug("Profile autodetected: %s", autodetection) - # Initialize the validation result variable - result: Optional[ValidationResult] = None - - ######################################################################################### - # Perform and display the validation with progress bar to the console - ######################################################################################### - # Perform the validation and get the validation result + # Perform the validation and render the result for the chosen output target. if output_format == "text" and not output_file: - # If in interactive mode, show the validation progress with a progress bar - # with a live updating layout - if interactive: - # Initialize the command view for text output - command_view = ValidationCommandView( - validation_settings=ValidationSettings.parse(validation_settings), - console=console, - interactive=interactive, - no_paging=not enable_pager, - pager=pager - ) - # Validate RO-Crate against the profile and get the validation result - result = command_view.show_validation_progress(services.validate) - - # Print the validation result - if not result.passed(): - verbose_choice = "n" - if interactive and not verbose: - verbose_choice = get_single_char(console, choices=['y', 'n'], - message=( - "[bold] > Do you want to see the validation details? " - "([magenta]y/n[/magenta]): [/bold]" - )) - if verbose_choice == "y" or verbose: - command_view.display_validation_result(result) - else: - # Validate RO-Crate against the profile and get the validation result - result = services.validate(validation_settings) - # Init TextOutputFormatter for console output - console.register_formatter(TextOutputFormatter()) - # Show the final overview of the validation if no interactive mode - console.print(result.statistics) - - # Print the validation result - if not result.passed() and verbose: - out = Console(no_color=console.no_color, width=console.width, height=console.height) - out.register_formatter(TextOutputFormatter()) - out.print(result) - - ########################################################################################### - # Perform the validation without progress bar (for JSON output or text output to file) - ########################################################################################### + result = _render_console_result( + profile_settings, + console=console, + pager=pager, + interactive=interactive, + enable_pager=enable_pager, + verbose=verbose, + ) else: - # Notify the start of the validation - if interactive: - with LiveTextProgressLayout( - console=console, - profile_identifier=profile, - validation_settings=validation_settings, - callable_service=services.validate, - transient=True - ) as result: - logger.debug("Validation result obtained") - else: - # Validate RO-Crate against the profile and get the validation result - result = services.validate(validation_settings) - results[profile] = result - - # Output processing for text format to file - ################################################################################### - if output_file and output_format == "text": - if interactive: - console.print(f"\n{' '*2}📝 [bold]Writing validation results to file[/bold]{'.'*4} ", end="") - with output_file.open("w", encoding="utf-8") if output_file else sys.stdout as f: - out = Console(color_system=None, width=output_line_width, height=31, file=f) - if output_format == "text": - out.register_formatter(TextOutputFormatter()) - # Print the report layout - out.print(result.statistics) # Output the statistics first - if not result.passed() and verbose: - out.print(result) - if interactive: - console.print(f"[bold green]{output_file}[/bold green]", end="\n") + result = _render_file_or_collected_result( + profile, + profile_settings, + console=console, + interactive=interactive, + verbose=verbose, + output_format=output_format, + output_file=output_file, + output_line_width=output_line_width, + ) + results[profile] = result # Update the global validation status is_valid = is_valid and result.passed() @@ -520,48 +380,272 @@ def validate(ctx, # noqa: PLR0912, PLR0915 if fail_fast and not is_valid: break - ################################################################### - # ############## JSON OUTPUT FORMAT PROCESSING ################## # - ################################################################### # Process JSON output format if output_format == "json": - # Notify completion of the validation - if interactive: - if is_valid: - console.print( - f"\n{' '*2}✅ [bold]Validation [green]PASSED![/green]. " - f"\n{' '*5}RO-Crate is valid according to the profile(s): " - f"[cyan]{', '.join(profile_identifiers)}[/cyan][/bold]" - ) - else: - console.print(f"\n{' '*2}❌ [bold]Validation [red]FAILED![/red][/bold]") - # Notify the start of JSON output generation - if interactive: - if output_file: - console.print( - f"\n{' '*2}📝 [bold]Writing validation results in JSON format " - f"to the file \"{output_file}\"[/bold]{'.'*4} ", - end="" - ) - else: - console.print(f"\n{' '*2}📋 [bold]The validation report in JSON format: [/bold]\n") - - # Generate the JSON output and write it to the specified output file or to stdout - with output_file.open("w", encoding="utf-8") if output_file else nullcontext(sys.stdout) as f: - out = Console(width=output_line_width, file=f) - out.register_formatter(JSONOutputFormatter()) - out.print(results) - - # Notify completion of JSON output generation - if interactive and output_file: - console.print("[bold]DONE![/bold]", end="\n\n") - - ################################################################################ - # Exit with appropriate status code - ################################################################################ + _emit_json_report( + results, + profile_identifiers, + is_valid, + console=console, + interactive=interactive, + output_file=output_file, + output_line_width=output_line_width, + ) + # Exit with appropriate status code. # using ctx.exit seems to raise an Exception that gets caught below, # so we use sys.exit instead. sys.exit(0 if is_valid else 1) except Exception as e: handle_error(e, console) + + +def _log_validation_inputs( + *, + profiles_path, + extra_profiles_path, + profile_identifier, + requirement_severity, + requirement_severity_only, + disable_profile_inheritance, + rocrate_uri, + fail_fast, + cache_max_age, + cache_path, + no_cache, + offline, +) -> None: + """Log the raw validation input parameters for debugging.""" + logger.debug("profiles_path: %s", Path(profiles_path).resolve()) + logger.debug("extra_profiles_path: %s", Path(extra_profiles_path).resolve() if extra_profiles_path else None) + logger.debug("profile_identifier: %s", profile_identifier) + logger.debug("requirement_severity: %s", requirement_severity) + logger.debug("requirement_severity_only: %s", requirement_severity_only) + logger.debug("disable_inheritance: %s", disable_profile_inheritance) + logger.debug("rocrate_uri: %s", rocrate_uri) + logger.debug("fail_fast: %s", fail_fast) + logger.debug("no fail fast: %s", not fail_fast) + logger.debug("cache_max_age: %s", cache_max_age) + logger.debug("cache_path: %s", Path(cache_path).resolve() if cache_path else None) + logger.debug("no_cache: %s", no_cache) + logger.debug("offline: %s", offline) + + +def _warn_if_remote_offline(console: Console, rocrate_uri: Union[str, Path], offline: bool) -> None: + """Warn when a remote RO-Crate is validated in offline mode (the cached copy is used).""" + if offline and isinstance(rocrate_uri, str) and rocrate_uri.split(":", 1)[0].lower() in ("http", "https", "ftp"): + console.print( + Padding( + Rule( + "[bold yellow]WARNING:[/bold yellow] " + "[bold]The target RO-Crate is remote and offline mode is enabled.[/bold]\n" + "The cached version of the RO-Crate will be used if available.\n" + "The cached copy may be out of sync with the version currently published remotely.", + align="center", + style="bold yellow", + ), + (1, 2, 0, 2), + ) + ) + + +def _parse_skip_checks(skip_checks: Optional[list[str]]) -> list[str]: + """Parse the comma-separated ``--skip-checks`` option into a flat list of check IDs.""" + logger.debug("skip_checks: %s", skip_checks) + skip_checks_list: list[str] = [] + if skip_checks: + try: + for s in skip_checks: + skip_checks_list.extend(_.strip() for _ in s.split(",") if _.strip()) + except Exception as e: + logger.error("Error parsing skip_checks: %s", e) + if logger.isEnabledFor(logging.DEBUG): + logger.exception("Error parsing skip_checks: %s", e) + raise ValueError( + f"Invalid skip_checks value: {s}. " + "It must be a comma-separated list of Fully Qualified Check IDs." + ) from e + logger.debug("Skip checks: %s", skip_checks_list) + return skip_checks_list + + +def _resolve_profile_identifiers( + console: Console, + interactive: bool, + no_auto_profile: bool, + available_profiles: list, + profile_identifiers: list[str], + validation_settings: dict, +) -> tuple[list[str], bool]: + """ + Resolve the concrete list of profile identifiers to validate against. + + Applies auto-detection and interactive selection when no profile is given, + and falls back to the base ``ro-crate`` profile when nothing can be resolved. + Returns the identifiers and whether they were auto-detected. + """ + autodetection = False + if not profile_identifiers: + # Auto-detect the profile to use for validation (if not disabled) + candidate_profiles = None + if not no_auto_profile: + candidate_profiles = services.detect_profiles(settings=validation_settings) + logger.debug("Candidate profiles: %s", candidate_profiles) + else: + logger.info("Auto-detection of the profiles to use for validation is disabled") + + # Prompt the user when interactive and no single profile could be auto-detected + if interactive and ( + not candidate_profiles + or len(candidate_profiles) == 0 + or len(candidate_profiles) == len(available_profiles) + ): + console.print( + Padding( + Rule( + "[bold yellow]WARNING: [/bold yellow]" + "[bold]Unable to automatically detect the profile to use for validation[/bold]\n", + align="center", + style="bold yellow" + ), + (2, 2, 0, 2) + ) + ) + selected_options = multiple_choice(console, available_profiles) + profile_identifiers = [available_profiles[int(o)].identifier for o in selected_options] + logger.debug("Profile selected: %s", selected_options) + console.print(Padding(Rule(style="bold yellow"), (1, 2))) + elif candidate_profiles and len(candidate_profiles) < len(available_profiles): + logger.debug("Profile identifier autodetected: %s", candidate_profiles[0].identifier) + autodetection = True + profile_identifiers = [_.identifier for _ in candidate_profiles] + + # Fall back to the base profile when nothing could be resolved + if not profile_identifiers: + console.print(f"\n{' '*2}[bold yellow]WARNING: [/bold yellow]", end="") + if no_auto_profile: + console.print("[bold]Auto-detection of the profiles to use for validation is disabled[/bold]") + else: + console.print("[bold]Unable to automatically detect the profile to use for validation[/bold]") + console.print(f"{' '*11}[bold]The base `ro-crate` profile will be used for validation[/bold]") + profile_identifiers = ["ro-crate"] + + return profile_identifiers, autodetection + + +def _render_console_result( + validation_settings: dict, + *, + console: Console, + pager, + interactive: bool, + enable_pager: bool, + verbose: bool, +) -> ValidationResult: + """Validate and render the result to the interactive/text console (no output file).""" + if interactive: + command_view = ValidationCommandView( + validation_settings=ValidationSettings.parse(validation_settings), + console=console, + interactive=interactive, + no_paging=not enable_pager, + pager=pager + ) + result = command_view.show_validation_progress(services.validate) + if not result.passed(): + verbose_choice = "n" + if interactive and not verbose: + verbose_choice = get_single_char(console, choices=['y', 'n'], message=( + "[bold] > Do you want to see the validation details? " + "([magenta]y/n[/magenta]): [/bold]" + )) + if verbose_choice == "y" or verbose: + command_view.display_validation_result(result) + return result + + result = services.validate(validation_settings) + console.register_formatter(TextOutputFormatter()) + console.print(result.statistics) + if not result.passed() and verbose: + out = Console(no_color=console.no_color, width=console.width, height=console.height) + out.register_formatter(TextOutputFormatter()) + out.print(result) + return result + + +def _render_file_or_collected_result( + profile: str, + validation_settings: dict, + *, + console: Console, + interactive: bool, + verbose: bool, + output_format: str, + output_file: Optional[Path], + output_line_width: Optional[int], +) -> ValidationResult: + """Validate for the file/JSON-input path, optionally writing a text report to file.""" + if interactive: + with LiveTextProgressLayout( + console=console, + profile_identifier=profile, + validation_settings=validation_settings, + callable_service=services.validate, + transient=True + ) as result: + logger.debug("Validation result obtained") + else: + result = services.validate(validation_settings) + + # Output processing for text format to file + if output_file and output_format == "text": + if interactive: + console.print(f"\n{' '*2}📝 [bold]Writing validation results to file[/bold]{'.'*4} ", end="") + with output_file.open("w", encoding="utf-8") if output_file else sys.stdout as f: + out = Console(color_system=None, width=output_line_width, height=31, file=f) + out.register_formatter(TextOutputFormatter()) + out.print(result.statistics) # Output the statistics first + if not result.passed() and verbose: + out.print(result) + if interactive: + console.print(f"[bold green]{output_file}[/bold green]", end="\n") + return result + + +def _emit_json_report( + results: dict, + profile_identifiers: list[str], + is_valid: bool, + *, + console: Console, + interactive: bool, + output_file: Optional[Path], + output_line_width: Optional[int], +) -> None: + """Write the aggregated validation results as JSON to a file or stdout.""" + if interactive: + if is_valid: + console.print( + f"\n{' '*2}✅ [bold]Validation [green]PASSED![/green]. " + f"\n{' '*5}RO-Crate is valid according to the profile(s): " + f"[cyan]{', '.join(profile_identifiers)}[/cyan][/bold]" + ) + else: + console.print(f"\n{' '*2}❌ [bold]Validation [red]FAILED![/red][/bold]") + if output_file: + console.print( + f"\n{' '*2}📝 [bold]Writing validation results in JSON format " + f"to the file \"{output_file}\"[/bold]{'.'*4} ", + end="" + ) + else: + console.print(f"\n{' '*2}📋 [bold]The validation report in JSON format: [/bold]\n") + + # Generate the JSON output and write it to the specified output file or to stdout + with output_file.open("w", encoding="utf-8") if output_file else nullcontext(sys.stdout) as f: + out = Console(width=output_line_width, file=f) + out.register_formatter(JSONOutputFormatter()) + out.print(results) + + if interactive and output_file: + console.print("[bold]DONE![/bold]", end="\n\n") diff --git a/rocrate_validator/models.py b/rocrate_validator/models.py index 86509de89..04bd7873f 100644 --- a/rocrate_validator/models.py +++ b/rocrate_validator/models.py @@ -1866,6 +1866,39 @@ def duration(self) -> Optional[float]: return (finished_at - started_at).total_seconds() return None + @staticmethod + def __collect_requirement_checks__( + requirement, severity_validation, validation_settings, + target_profile_identifier, checks, checks_by_severity, + ) -> int: + """Count and register a requirement's checks across severities >= the requested one.""" + requirement_checks_count = 0 + for severity in ( + Severity.REQUIRED, + Severity.RECOMMENDED, + Severity.OPTIONAL, + ): + logger.debug( + f"Checking requirement: {requirement} severity: {severity} {severity < severity_validation}" + ) + # skip requirements with lower severity + if severity < severity_validation: + continue + # count the checks + requirement_checks = [ + _ + for _ in requirement.get_checks_by_level(LevelCollection.get(severity.name)) + if (not validation_settings.skip_checks or _.identifier not in validation_settings.skip_checks) + and (not _.overridden or _.requirement.profile.identifier == target_profile_identifier) + ] + num_checks = len(requirement_checks) + requirement_checks_count += num_checks + if num_checks > 0: + logger.debug(f"Requirement: {requirement} has {num_checks} checks of severity: {severity}") + checks.update(requirement_checks) + checks_by_severity[severity].update(requirement_checks) + return requirement_checks_count + @classmethod def __initialise__(cls, validation_settings: ValidationSettings): """ @@ -1912,31 +1945,14 @@ def __initialise__(cls, validation_settings: ValidationSettings): if requirement.hidden: continue - requirement_checks_count = 0 - for severity in ( - Severity.REQUIRED, - Severity.RECOMMENDED, - Severity.OPTIONAL, - ): - logger.debug( - f"Checking requirement: {requirement} severity: {severity} {severity < severity_validation}" - ) - # skip requirements with lower severity - if severity < severity_validation: - continue - # count the checks - requirement_checks = [ - _ - for _ in requirement.get_checks_by_level(LevelCollection.get(severity.name)) - if (not validation_settings.skip_checks or _.identifier not in validation_settings.skip_checks) - and (not _.overridden or _.requirement.profile.identifier == target_profile_identifier) - ] - num_checks = len(requirement_checks) - requirement_checks_count += num_checks - if num_checks > 0: - logger.debug(f"Requirement: {requirement} has {num_checks} checks of severity: {severity}") - checks.update(requirement_checks) - checks_by_severity[severity].update(requirement_checks) + requirement_checks_count = cls.__collect_requirement_checks__( + requirement, + severity_validation, + validation_settings, + target_profile_identifier, + checks, + checks_by_severity, + ) # count the requirements and checks if requirement_checks_count == 0: diff --git a/rocrate_validator/profiles/ro-crate/must/0_file_descriptor_format.py b/rocrate_validator/profiles/ro-crate/must/0_file_descriptor_format.py index 527a6b3f4..cea965d96 100644 --- a/rocrate_validator/profiles/ro-crate/must/0_file_descriptor_format.py +++ b/rocrate_validator/profiles/ro-crate/must/0_file_descriptor_format.py @@ -211,89 +211,74 @@ def check_context(self, context: ValidationContext) -> bool: @check(name="File Descriptor JSON-LD must be flattened") def check_flattened(self, context: ValidationContext) -> bool: """ Check if the file descriptor is flattened """ + return self._check_flattened_graph( + context, + lambda entity, fail_fast: self._is_entity_flat(context, entity, fail_fast=fail_fast), + ) + + def _is_entity_flat(self, context: ValidationContext, entity: Any, + is_first: bool = True, fail_fast: bool = False) -> bool: + """Recursively check that an entity (and its children) is flattened.""" + if isinstance(entity, dict): + if is_first: + return self._all_children_flat(context, entity.values(), fail_fast) + return self._validate_non_root_entity(context, entity, fail_fast) + if isinstance(entity, list): + return self._all_children_flat(context, entity, fail_fast) + return True - def _validate_non_root_entity(entity: Any, fail_fast: bool) -> bool: - result = True - if "@id" in entity and "@value" in entity: - context.result.add_issue( - ( - f'entity "{entity.get("@id", entity)}" contains both @id and @value: ' - 'an object with an @value represents a value object, which is a literal value such as ' - 'a string, number, date, or language-tagged string. This object is not an identifiable ' - 'resource, but a simple literal value.' - ), - self - ) + def _all_children_flat(self, context: ValidationContext, children: Any, fail_fast: bool) -> bool: + """Check that every child entity is flattened, honoring fail-fast.""" + result = True + for child in children: + if not self._is_entity_flat(context, child, is_first=False, fail_fast=fail_fast): result = False if fail_fast: return False - - if "@value" in entity: - if not isinstance(entity, dict): - context.result.add_issue( - f'entity "{entity.get("@id", entity)}" is not a valid value object: ' - 'it MUST be a dictionary.', - self - ) - result = False - if fail_fast: - return False - - has_language = "@language" in entity - has_type = "@type" in entity - - if has_language and has_type: - context.result.add_issue( - f'entity "{entity.get("@id", entity)}" is not a valid value object: ' - '@language and @type cannot coexist.', - self - ) - result = False - if fail_fast: - return False - - if has_language and not isinstance(entity["@value"], str): - context.result.add_issue( - f'entity "{entity.get("@id", entity)}" is not a valid value object: ' - 'if @language is present, @value must be a string.', - self - ) - result = False - if fail_fast: - return False - elif "@id" not in entity or len(entity) > 1: - context.result.add_issue( - f'entity "{entity.get("@id", entity)}" is not a valid node object reference: ' - 'it MUST have only @id, but no other properties.', - self + return result + + def _validate_non_root_entity(self, context: ValidationContext, entity: Any, fail_fast: bool) -> bool: + """Validate a non-root entity, collecting any flattening issues then emitting them.""" + issues: list[str] = [] + if "@id" in entity and "@value" in entity: + issues.append( + f'entity "{entity.get("@id", entity)}" contains both @id and @value: ' + 'an object with an @value represents a value object, which is a literal value such as ' + 'a string, number, date, or language-tagged string. This object is not an identifiable ' + 'resource, but a simple literal value.' + ) + if "@value" in entity: + if not isinstance(entity, dict): + issues.append( + f'entity "{entity.get("@id", entity)}" is not a valid value object: ' + 'it MUST be a dictionary.' ) - result = False - if fail_fast: - return False - return result - - def is_entity_flat_recursive(entity: Any, is_first: bool = True, fail_fast: bool = False) -> bool: - result = True - if isinstance(entity, dict): - if is_first: - for elem in entity.values(): - if not is_entity_flat_recursive(elem, is_first=False, fail_fast=fail_fast): - result = False - if fail_fast: - return False - elif not _validate_non_root_entity(entity, fail_fast): - result = False - if fail_fast: - return False - if isinstance(entity, list): - for element in entity: - if not is_entity_flat_recursive(element, is_first=False, fail_fast=fail_fast): - result = False - if fail_fast: - return False - return result + has_language = "@language" in entity + has_type = "@type" in entity + if has_language and has_type: + issues.append( + f'entity "{entity.get("@id", entity)}" is not a valid value object: ' + '@language and @type cannot coexist.' + ) + if has_language and not isinstance(entity["@value"], str): + issues.append( + f'entity "{entity.get("@id", entity)}" is not a valid value object: ' + 'if @language is present, @value must be a string.' + ) + elif "@id" not in entity or len(entity) > 1: + issues.append( + f'entity "{entity.get("@id", entity)}" is not a valid node object reference: ' + 'it MUST have only @id, but no other properties.' + ) + return self._emit_entity_issues(context, issues, fail_fast) - return self._check_flattened_graph(context, is_entity_flat_recursive) + def _emit_entity_issues(self, context: ValidationContext, issues: list[str], fail_fast: bool) -> bool: + """Emit collected entity issues; in fail-fast mode stop after the first.""" + for message in issues: + context.result.add_issue(message, self) + if fail_fast: + return False + return not issues def _check_flattened_graph(self, context, is_flat): try: @@ -379,45 +364,21 @@ def __get_remote_context_keys__(self, context_uri: str) -> set: raise TypeError("The context is not a dictionary") return set(jsonLD_ctx.keys()) + # Reserved JSON-LD keywords that are always allowed as entity keys. + __RESERVED_JSONLD_KEYS__ = frozenset({"@id", "@type", "@context", "@value", "@language"}) + def __check_entity_keys__(self, entity: Any, context_keys: set, unexpected_keys: Optional[dict[str, int]] = None) -> dict[str, int]: """ Check if the entity is in the correct format """ - - def add_unexpected_key(k: str, u_keys: dict) -> None: - """ Add a key to the unexpected keys dictionary """ - u_keys[k] = u_keys.get(k, 0) + 1 - - # Keys that should be skipped - SKIP_KEYS = {"@id", "@type", "@context", "@value", "@language"} - # Ensure unexpected_keys is initialized if unexpected_keys is None: unexpected_keys = {} - # If the entity is a dictionary, check each key + # If the entity is a dictionary, classify each key and recurse into values if isinstance(entity, dict): for k, v in entity.items(): - # If the key is in the skip keys, skip it - if k in SKIP_KEYS: - logger.debug(f"Key {k} is a reserved JSON-LD keyword, skipping") - - # If the key is not in the context keys, - # it can be used in compacted format only if it is a valid compact IRI - # with a prefix that is in the context - elif k not in context_keys: - logger.debug(f"Key {k} not in context keys") - - # Try to get the prefix of the compact IRI, if it has one - prefix = k.split(":", 1)[0] if ":" in k else None - logger.debug(f"Checking prefix {prefix} of key {k}") - # If the key does not have a prefix (no colon) or the prefix is not in the context keys, - # it cannot be used as a key in compacted format - if prefix is None or prefix not in context_keys: - logger.debug( - f"Key {k} does not have a valid prefix in context keys, adding to unexpected keys") - add_unexpected_key(k, unexpected_keys) - + self.__record_unexpected_key__(k, context_keys, unexpected_keys) # If the value is a dictionary or a list, check its keys recursively if isinstance(v, (dict, list)): self.__check_entity_keys__(v, context_keys, unexpected_keys) @@ -429,6 +390,23 @@ def add_unexpected_key(k: str, u_keys: dict) -> None: return unexpected_keys + def __record_unexpected_key__(self, k: str, context_keys: set, unexpected_keys: dict[str, int]) -> None: + """ Record ``k`` as unexpected unless it is reserved or a valid compact IRI prefix """ + # If the key is a reserved JSON-LD keyword, skip it + if k in self.__RESERVED_JSONLD_KEYS__: + logger.debug(f"Key {k} is a reserved JSON-LD keyword, skipping") + return + + # A key not in the context can still be valid in compacted format if it is + # a compact IRI whose prefix is in the context. + if k not in context_keys: + logger.debug(f"Key {k} not in context keys") + prefix = k.split(":", 1)[0] if ":" in k else None + logger.debug(f"Checking prefix {prefix} of key {k}") + if prefix is None or prefix not in context_keys: + logger.debug(f"Key {k} does not have a valid prefix in context keys, adding to unexpected keys") + unexpected_keys[k] = unexpected_keys.get(k, 0) + 1 + @check(name="Validation of the compaction format of the file descriptor") def check_compaction(self, context: ValidationContext) -> bool: """ Check if the file descriptor is in the **compacted** JSON-LD format """ diff --git a/rocrate_validator/services.py b/rocrate_validator/services.py index ce0430642..af6aae833 100644 --- a/rocrate_validator/services.py +++ b/rocrate_validator/services.py @@ -87,7 +87,66 @@ def validate( return result -def __initialise_validator__( # noqa: PLR0915 +def _build_validator(settings: ValidationSettings, subscribers: Optional[list[Subscriber]]) -> Validator: + """Create a validator for the given settings and register any subscribers.""" + validator = Validator(settings) + logger.debug("Validator created. Starting validation...") + if subscribers: + for subscriber in subscribers: + validator.add_subscriber(subscriber) + return validator + + +def _extract_and_validate( + settings: ValidationSettings, subscribers: Optional[list[Subscriber]], rocrate_path: Path +) -> Validator: + """Extract a (local or downloaded) zipped RO-Crate to a temp dir and validate it.""" + original_data_path = settings.rocrate_uri + with tempfile.TemporaryDirectory() as tmp_dir: + try: + with zipfile.ZipFile(rocrate_path, "r") as zip_ref: + zip_ref.extractall(tmp_dir) + logger.debug("RO-Crate extracted to temporary directory: %s", tmp_dir) + settings.rocrate_uri = URI(str(tmp_dir)) + return _build_validator(settings, subscribers) + finally: + settings.rocrate_uri = original_data_path + logger.debug("Original data path restored: %s", original_data_path) + + +def _download_remote_rocrate( + settings: ValidationSettings, subscribers: Optional[list[Subscriber]], rocrate_path: URI +) -> Validator: + """Download a remote (http/https/ftp) RO-Crate to a temp file, then extract and validate it.""" + logger.debug("RO-Crate is a remote RO-Crate") + with tempfile.NamedTemporaryFile(delete=False) as tmp_file: + requester = HttpRequester() + offline = bool(getattr(settings, "offline", False)) + # In offline mode, the cache is the only source of truth. Otherwise, + # bypass the cache to refresh the stored copy so that subsequent + # offline runs validate against the latest known remote state. + if offline: + response = requester.get(rocrate_path.uri, stream=True, allow_redirects=True) + else: + response = requester.fetch_fresh(rocrate_path.uri, stream=True, allow_redirects=True) + with response as r: + if r.status_code >= HTTP_STATUS_BAD_REQUEST: + if offline and r.status_code == HTTP_STATUS_GATEWAY_TIMEOUT: + raise FileNotFoundError( + f"Remote RO-Crate '{rocrate_path.uri}' is not available in the HTTP cache. " + f"Validate it online first, or run " + f"`rocrate-validator cache warm --crate '{rocrate_path.uri}'`." + ) + raise FileNotFoundError( + f"Failed to download remote RO-Crate '{rocrate_path.uri}' (status {r.status_code})." + ) + with Path(tmp_file.name).open("wb") as f: + shutil.copyfileobj(r.raw, f) + logger.debug("RO-Crate downloaded to temporary file: %s", tmp_file.name) + return _extract_and_validate(settings, subscribers, Path(tmp_file.name)) + + +def __initialise_validator__( settings: Union[dict, ValidationSettings], subscribers: Optional[list[Subscriber]] = None ) -> Validator: """ @@ -113,91 +172,22 @@ def __initialise_validator__( # noqa: PLR0915 disable_remote_crate_download = settings.disable_remote_crate_download logger.debug("Remote validation: %s", disable_remote_crate_download) if disable_remote_crate_download: - # create a validator - validator = Validator(settings) - logger.debug("Validator created. Starting validation...") - if subscribers: - for subscriber in subscribers: - validator.add_subscriber(subscriber) - return validator - - def __init_validator__(settings: ValidationSettings) -> Validator: - # create a validator - validator = Validator(settings) - logger.debug("Validator created. Starting validation...") - if subscribers: - for subscriber in subscribers: - validator.add_subscriber(subscriber) - return validator - - def __extract_and_validate_rocrate__(rocrate_path: Path): - # store the original data path - original_data_path = settings.rocrate_uri - - with tempfile.TemporaryDirectory() as tmp_dir: - try: - # extract the RO-Crate to the temporary directory - with zipfile.ZipFile(rocrate_path, "r") as zip_ref: - zip_ref.extractall(tmp_dir) - logger.debug("RO-Crate extracted to temporary directory: %s", tmp_dir) - # update the data path to point to the temporary directory - settings.rocrate_uri = URI(str(tmp_dir)) - # continue with the validation process - return __init_validator__(settings) - finally: - # restore the original data path - settings.rocrate_uri = original_data_path - logger.debug("Original data path restored: %s", original_data_path) - - # check if the RO-Crate is a remote RO-Crate, - # i.e., if the RO-Crate is a URL. If so, download the RO-Crate - # and extract it to a temporary directory. We support either http or https - # or ftp protocols to download the remote RO-Crate. + return _build_validator(settings, subscribers) + + # Resolve the RO-Crate source: remote URL, local ZIP, or local directory. + # We support http/https/ftp protocols to download a remote RO-Crate. if rocrate_path.scheme in ("http", "https", "ftp"): - logger.debug("RO-Crate is a remote RO-Crate") - # create a temp folder to store the downloaded RO-Crate - with tempfile.NamedTemporaryFile(delete=False) as tmp_file: - requester = HttpRequester() - offline = bool(getattr(settings, "offline", False)) - # In offline mode, the cache is the only source of truth. Otherwise, - # bypass the cache to refresh the stored copy so that subsequent - # offline runs validate against the latest known remote state. - if offline: - response = requester.get(rocrate_path.uri, stream=True, allow_redirects=True) - else: - response = requester.fetch_fresh(rocrate_path.uri, stream=True, allow_redirects=True) - with response as r: - if r.status_code >= HTTP_STATUS_BAD_REQUEST: - if offline and r.status_code == HTTP_STATUS_GATEWAY_TIMEOUT: - raise FileNotFoundError( - f"Remote RO-Crate '{rocrate_path.uri}' is not available in the HTTP cache. " - f"Validate it online first, or run " - f"`rocrate-validator cache warm --crate '{rocrate_path.uri}'`." - ) - raise FileNotFoundError( - f"Failed to download remote RO-Crate '{rocrate_path.uri}' (status {r.status_code})." - ) - with Path(tmp_file.name).open("wb") as f: - shutil.copyfileobj(r.raw, f) - logger.debug("RO-Crate downloaded to temporary file: %s", tmp_file.name) - # continue with the validation process by extracting the RO-Crate and validating it - return __extract_and_validate_rocrate__(Path(tmp_file.name)) - - # check if the RO-Crate is a ZIP file - elif rocrate_path.as_path().suffix == ".zip": + return _download_remote_rocrate(settings, subscribers, rocrate_path) + if rocrate_path.as_path().suffix == ".zip": logger.debug("RO-Crate is a local ZIP file") - # continue with the validation process by extracting the RO-Crate and validating it - return __extract_and_validate_rocrate__(rocrate_path.as_path()) - - # if the RO-Crate is not a ZIP file, directly validate the RO-Crate - elif rocrate_path.is_local_directory(): + return _extract_and_validate(settings, subscribers, rocrate_path.as_path()) + if rocrate_path.is_local_directory(): logger.debug("RO-Crate is a local directory") settings.rocrate_uri = URI(str(rocrate_path.as_path())) - return __init_validator__(settings) - else: - raise ValueError( - f"Invalid RO-Crate URI: {rocrate_path}. It MUST be a local directory or a ZIP file (local or remote)." - ) + return _build_validator(settings, subscribers) + raise ValueError( + f"Invalid RO-Crate URI: {rocrate_path}. It MUST be a local directory or a ZIP file (local or remote)." + ) def get_profiles( diff --git a/rocrate_validator/utils/io_helpers/output/json/formatters.py b/rocrate_validator/utils/io_helpers/output/json/formatters.py index 91d65d9ec..123d2af5e 100644 --- a/rocrate_validator/utils/io_helpers/output/json/formatters.py +++ b/rocrate_validator/utils/io_helpers/output/json/formatters.py @@ -95,21 +95,27 @@ def format_validation_results(data: dict[str, ValidationResult], json_output["issues"].extend(result_dict.get("issues", [])) # Add overall statistics - stats = AggregatedValidationStatistics([r.statistics for r in results if r.statistics]) - if stats: - stats_dict = stats.to_dict() - # If not verbose, remove detailed lists from statistics - if not verbose: - for key in ["passed_requirements", "failed_requirements", - "passed_checks", "failed_checks", "checks", "requirements"]: - if key in stats_dict: - stats_dict.pop(key, None) + stats_dict = _compute_overall_statistics(results, verbose=verbose) + if stats_dict is not None: json_output["statistics"] = stats_dict # Return the formatted JSON output return json.dumps(json_output, indent=4, cls=CustomEncoder) +def _compute_overall_statistics(results: list[ValidationResult], verbose: bool) -> Optional[dict[str, Any]]: + """Aggregate per-result statistics, dropping detailed lists unless verbose.""" + stats = AggregatedValidationStatistics([r.statistics for r in results if r.statistics]) + if not stats: + return None + stats_dict = stats.to_dict() + if not verbose: + for key in ("passed_requirements", "failed_requirements", + "passed_checks", "failed_checks", "checks", "requirements"): + stats_dict.pop(key, None) + return stats_dict + + def format_validation_statistics(data: ValidationStatistics) -> str: return json.dumps(data.to_dict(), indent=4, cls=CustomEncoder) diff --git a/rocrate_validator/utils/io_helpers/output/text/layout/progress.py b/rocrate_validator/utils/io_helpers/output/text/layout/progress.py index 2fb702ba4..23639091c 100644 --- a/rocrate_validator/utils/io_helpers/output/text/layout/progress.py +++ b/rocrate_validator/utils/io_helpers/output/text/layout/progress.py @@ -97,15 +97,7 @@ def update(self, event: Event, ctx: Optional[ValidationContext] = None): elif event.event_type == EventType.REQUIREMENT_CHECK_VALIDATION_START: logger.debug("Requirement check validation start") elif event.event_type == EventType.REQUIREMENT_CHECK_VALIDATION_END: - assert isinstance(event, RequirementCheckValidationEvent) - assert ctx is not None, "Validation context must be provided" - target_profile = ctx.target_validation_profile - if not event.requirement_check.requirement.hidden and \ - (not event.requirement_check.overridden - or target_profile.identifier == event.requirement_check.requirement.profile.identifier): - self.progress.update(task_id=self.requirement_check_validation, advance=1) - else: - logger.debug("Skipping requirement check validation: %s", event.requirement_check.identifier) + self.__on_requirement_check_end__(event, ctx) elif event.event_type == EventType.REQUIREMENT_VALIDATION_END: assert isinstance(event, RequirementValidationEvent) if not event.requirement.hidden: @@ -115,3 +107,15 @@ def update(self, event: Event, ctx: Optional[ValidationContext] = None): elif event.event_type == EventType.VALIDATION_END: assert isinstance(event, ValidationEvent) logger.debug("Validation ended with result: %s", event.validation_result) + + def __on_requirement_check_end__(self, event: Event, ctx: Optional[ValidationContext]) -> None: + """Advance the requirement-check progress bar, unless the check is hidden or overridden.""" + assert isinstance(event, RequirementCheckValidationEvent) + assert ctx is not None, "Validation context must be provided" + target_profile = ctx.target_validation_profile + if not event.requirement_check.requirement.hidden and \ + (not event.requirement_check.overridden + or target_profile.identifier == event.requirement_check.requirement.profile.identifier): + self.progress.update(task_id=self.requirement_check_validation, advance=1) + else: + logger.debug("Skipping requirement check validation: %s", event.requirement_check.identifier) From 8de1aa6858c34a111fe81debf477f000566e95e9 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Wed, 10 Jun 2026 13:42:14 +0200 Subject: [PATCH 268/352] =?UTF-8?q?fix(rocrate):=20=F0=9F=90=9B=20read=5Ft?= =?UTF-8?q?ext=20with=20explicit=20utf-8=20encoding?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- rocrate_validator/rocrate.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rocrate_validator/rocrate.py b/rocrate_validator/rocrate.py index ce2f2d435..4dca5d941 100644 --- a/rocrate_validator/rocrate.py +++ b/rocrate_validator/rocrate.py @@ -827,7 +827,7 @@ def get_file_content( path = self.__parse_path__(path) if not self.has_file(path): raise FileNotFoundError(f"File not found: {path}") - return path.read_bytes() if binary_mode else path.read_text() + return path.read_bytes() if binary_mode else path.read_text(encoding="utf-8") class ROCrateLocalZip(ROCrate): From e74c9b27a349293e117fd0d6d08bcbad18c04885 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Wed, 10 Jun 2026 13:42:56 +0200 Subject: [PATCH 269/352] =?UTF-8?q?style(rocrate):=20=F0=9F=8E=A8=20simpli?= =?UTF-8?q?fy=20zip=20suffix=20check=20to=20use=20!=3D?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- rocrate_validator/rocrate.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rocrate_validator/rocrate.py b/rocrate_validator/rocrate.py index 4dca5d941..a2aeafbbe 100644 --- a/rocrate_validator/rocrate.py +++ b/rocrate_validator/rocrate.py @@ -871,7 +871,7 @@ def __init_zip_reference__(self): if not self.uri.as_path().is_file(): raise ROCrateInvalidURIError(uri=path) # check if the file is a zip file - if not self.uri.as_path().suffix == ".zip": + if self.uri.as_path().suffix != ".zip": raise ROCrateInvalidURIError(uri=path) self._zipref = zipfile.ZipFile(path) logger.debug("Initialized zip reference: %s", self._zipref) From 1623f5551a211746269e03309d3cdd4de93d7e57 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Wed, 10 Jun 2026 13:45:41 +0200 Subject: [PATCH 270/352] =?UTF-8?q?fix(models):=20=F0=9F=90=9B=20call=20su?= =?UTF-8?q?per().=5F=5Finit=5F=5F=20in=20ValidationStatistics?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- rocrate_validator/models.py | 1 + 1 file changed, 1 insertion(+) diff --git a/rocrate_validator/models.py b/rocrate_validator/models.py index 04bd7873f..dc8224d34 100644 --- a/rocrate_validator/models.py +++ b/rocrate_validator/models.py @@ -1685,6 +1685,7 @@ def __init__( context: Optional[ValidationContext] = None, skip_initialization: bool = False, ): + super().__init__(name=self.__class__.__name__) if isinstance(settings, dict): settings = ValidationSettings.parse(settings) self._settings = settings From f53d0ea6c028b0f8d272371ba7b768e43b78f634 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Wed, 10 Jun 2026 15:07:57 +0200 Subject: [PATCH 271/352] =?UTF-8?q?chore(lint):=20=F0=9F=94=A7=20silence?= =?UTF-8?q?=20pylint=20false=20positives=20in=20cli=20and=20report?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- rocrate_validator/cli/main.py | 2 +- rocrate_validator/utils/io_helpers/output/text/layout/report.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/rocrate_validator/cli/main.py b/rocrate_validator/cli/main.py index c9186f71d..521b4252a 100644 --- a/rocrate_validator/cli/main.py +++ b/rocrate_validator/cli/main.py @@ -97,7 +97,7 @@ def cli(ctx: click.Context, debug: bool, version: bool, disable_color: bool, no_ if __name__ == "__main__": try: - cli() + cli() # pylint: disable=no-value-for-parameter # click injects the parameters except Exception as e: if logger.isEnabledFor(logging.DEBUG): logger.exception(f"An unexpected error occurred: {e}") diff --git a/rocrate_validator/utils/io_helpers/output/text/layout/report.py b/rocrate_validator/utils/io_helpers/output/text/layout/report.py index cc3b64136..c570fcccb 100644 --- a/rocrate_validator/utils/io_helpers/output/text/layout/report.py +++ b/rocrate_validator/utils/io_helpers/output/text/layout/report.py @@ -403,6 +403,7 @@ def run_validation(): validation_thread.join() # Check for exceptions if exception_container[0]: + # pylint: disable-next=raising-bad-type # populated with an exception by the worker thread raise exception_container[0] message.append(" DONE!", style="bold") # Final update to indicate completion From aec50bcc66127f4b83af7360b03c42fc979e9173 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Wed, 10 Jun 2026 15:15:18 +0200 Subject: [PATCH 272/352] =?UTF-8?q?style(naming):=20=F0=9F=8E=A8=20rename?= =?UTF-8?q?=20identifiers=20to=20snake=5Fcase?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- rocrate_validator/models.py | 4 ++-- .../utils/io_helpers/output/json/formatters.py | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/rocrate_validator/models.py b/rocrate_validator/models.py index dc8224d34..311b45374 100644 --- a/rocrate_validator/models.py +++ b/rocrate_validator/models.py @@ -322,11 +322,11 @@ def __get_specification_property__( property: str, namespace: Namespace, pop_first: bool = True, - as_Python_object: bool = True, + as_python_object: bool = True, ) -> Union[str, list[Union[str, URIRef]], None]: assert self._profile_specification_graph is not None, "Profile specification graph not loaded" nodes = list(self._profile_specification_graph.objects(self._profile_node, namespace[property])) - values: list = [cast("Any", v).toPython() for v in nodes] if (nodes and as_Python_object) else list(nodes) + values: list = [cast("Any", v).toPython() for v in nodes] if (nodes and as_python_object) else list(nodes) if pop_first: return values[0] if values else None return values diff --git a/rocrate_validator/utils/io_helpers/output/json/formatters.py b/rocrate_validator/utils/io_helpers/output/json/formatters.py index 123d2af5e..478073092 100644 --- a/rocrate_validator/utils/io_helpers/output/json/formatters.py +++ b/rocrate_validator/utils/io_helpers/output/json/formatters.py @@ -72,9 +72,9 @@ def format_validation_results(data: dict[str, ValidationResult], json_output["passed"] = True # Initialize the profile results dictionary - _RESULTS_KEY = "validation_results_by_profile" + results_key = "validation_results_by_profile" if verbose: - json_output[_RESULTS_KEY] = {} + json_output[results_key] = {} # Iterate over each validation result for profile_identifier, result in data.items(): @@ -86,7 +86,7 @@ def format_validation_results(data: dict[str, ValidationResult], result_dict["statistics"] = result.statistics.to_dict() # Add the result to the profiles dictionary in verbose mode if verbose: - json_output[_RESULTS_KEY][profile_identifier] = result_dict + json_output[results_key][profile_identifier] = result_dict # Update the overall passed status json_output["passed"] = json_output["passed"] and result.passed() # Update the overall list of issues From 25f5cc1c2958877ef3ca2d312f5cc03cc8639424 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Wed, 10 Jun 2026 15:37:03 +0200 Subject: [PATCH 273/352] =?UTF-8?q?fix(types):=20=F0=9F=A9=B9=20add=20type?= =?UTF-8?q?=20annotations=20and=20casts?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- rocrate_validator/models.py | 97 +++++++++++-------- .../requirements/python/__init__.py | 9 +- .../requirements/shacl/models.py | 2 +- rocrate_validator/requirements/shacl/utils.py | 2 +- .../utils/io_helpers/output/console.py | 2 +- .../io_helpers/output/text/layout/report.py | 10 +- rocrate_validator/utils/log.py | 2 +- 7 files changed, 68 insertions(+), 56 deletions(-) diff --git a/rocrate_validator/models.py b/rocrate_validator/models.py index 311b45374..fa99a1c0d 100644 --- a/rocrate_validator/models.py +++ b/rocrate_validator/models.py @@ -270,7 +270,7 @@ def __init__( self._overridden_by: list[Profile] = [] # init property to store the RDF node which is the root of the profile specification graph - self._profile_node = None + self._profile_node: Any = None # init property to store the RDF graph of the profile specification self._profile_specification_graph: Optional[Graph] = None @@ -414,7 +414,7 @@ def uri(self): @property def label(self): - return self.__get_specification_property__("label", RDFS) + return self.__get_specification_property__("label", RDFS) # type: ignore[arg-type] @property def comment(self): @@ -422,7 +422,7 @@ def comment(self): The comment added to the profile in the profile specification file (i.e., the value of the rdfs: comment property in the `profile.ttl` file). """ - return self.__get_specification_property__("comment", RDFS) + return self.__get_specification_property__("comment", RDFS) # type: ignore[arg-type] @property def version(self): @@ -2321,62 +2321,73 @@ def __compute_averall_stats__(self): """ Compute the overall aggregated statistics """ - # Initialize the overall statistics - result = { - "profiles": set(), - "requirements": set(), - "checks": set(), - "checks_by_severity": {}, - "failed_requirements": set(), - "failed_checks": set(), - "passed_requirements": set(), - "passed_checks": set(), - "started_at": None, - "finished_at": None, - "duration": 0.0, - } + profiles: set[Profile] = set() + requirements: set[Requirement] = set() + checks: set[RequirementCheck] = set() + checks_by_severity: dict[Severity, set[RequirementCheck]] = {} + failed_requirements: set[Requirement] = set() + failed_checks: set[RequirementCheck] = set() + passed_requirements: set[Requirement] = set() + passed_checks: set[RequirementCheck] = set() + started_at: Optional[datetime] = None + finished_at: Optional[datetime] = None + duration: float = 0.0 # Aggregate statistics from each ValidationStatistics instance for stats in self._statistics_list: # Aggregate profiles for profile in stats.profiles: - result["profiles"].add(profile) + profiles.add(profile) # Aggregate total requirements and checks - result["requirements"].update(stats.requirements) - result["checks"].update(stats.checks) - result["checks_by_severity"].update(stats.checks_by_severity) + requirements.update(stats.requirements) + checks.update(stats.checks) + checks_by_severity.update(stats.checks_by_severity) # Aggregate failed and passed requirements and checks - result["failed_requirements"].update(stats.failed_requirements) - result["failed_checks"].update(stats.failed_checks) - result["passed_requirements"].update(stats.passed_requirements) - result["passed_checks"].update(stats.passed_checks) + failed_requirements.update(stats.failed_requirements) + failed_checks.update(stats.failed_checks) + passed_requirements.update(stats.passed_requirements) + passed_checks.update(stats.passed_checks) # Aggregate started_at and finished_at - result["started_at"] = ( - min(result["started_at"], stats.started_at) if result["started_at"] else stats.started_at - ) - result["finished_at"] = ( - max(result["finished_at"], stats.finished_at) if result["finished_at"] else stats.finished_at - ) + if started_at is not None and stats.started_at is not None: + started_at = min(started_at, stats.started_at) + elif stats.started_at is not None: + started_at = stats.started_at + if finished_at is not None and stats.finished_at is not None: + finished_at = max(finished_at, stats.finished_at) + elif stats.finished_at is not None: + finished_at = stats.finished_at # Aggregate duration - result["duration"] += stats.duration or 0.0 + duration += stats.duration or 0.0 # Sort the sets to have consistent order - result["profiles"] = sorted(result["profiles"], key=lambda p: p.identifier) - result["requirements"] = sorted(result["requirements"], key=lambda r: r.identifier) - result["checks"] = sorted(result["checks"], key=lambda c: c.identifier) - result["checks_by_severity"] = { - k: sorted(v, key=lambda c: c.identifier) for k, v in result["checks_by_severity"].items() + sorted_profiles = sorted(profiles, key=lambda p: p.identifier) + sorted_requirements = sorted(requirements, key=lambda r: r.identifier) + sorted_checks = sorted(checks, key=lambda c: c.identifier) + sorted_checks_by_severity = { + k: sorted(v, key=lambda c: c.identifier) for k, v in checks_by_severity.items() } - result["failed_requirements"] = sorted(result["failed_requirements"], key=lambda r: r.identifier) - result["failed_checks"] = sorted(result["failed_checks"], key=lambda c: c.identifier) - result["passed_requirements"] = sorted(result["passed_requirements"], key=lambda r: r.identifier) - result["passed_checks"] = sorted(result["passed_checks"], key=lambda c: c.identifier) + sorted_failed_requirements = sorted(failed_requirements, key=lambda r: r.identifier) + sorted_failed_checks = sorted(failed_checks, key=lambda c: c.identifier) + sorted_passed_requirements = sorted(passed_requirements, key=lambda r: r.identifier) + sorted_passed_checks = sorted(passed_checks, key=lambda c: c.identifier) # return the aggregated statistics - return result + return { + "profiles": sorted_profiles, + "requirements": sorted_requirements, + "checks": sorted_checks, + "checks_by_severity": sorted_checks_by_severity, + "failed_requirements": sorted_failed_requirements, + "failed_checks": sorted_failed_checks, + "passed_requirements": sorted_passed_requirements, + "passed_checks": sorted_passed_checks, + "started_at": started_at, + "finished_at": finished_at, + "duration": duration, + } class ValidationResult: @@ -2701,7 +2712,7 @@ class ValidationSettings: #: Verbose output verbose: bool = False #: Cache max age in seconds (negative values mean "never expire") - cache_max_age: Optional[int] = DEFAULT_HTTP_CACHE_MAX_AGE + cache_max_age: int = DEFAULT_HTTP_CACHE_MAX_AGE #: Cache path cache_path: Optional[Path] = None #: Flag to enable offline mode: HTTP requests are served only from the cache diff --git a/rocrate_validator/requirements/python/__init__.py b/rocrate_validator/requirements/python/__init__.py index 60284b437..caef82fb3 100644 --- a/rocrate_validator/requirements/python/__init__.py +++ b/rocrate_validator/requirements/python/__init__.py @@ -16,7 +16,7 @@ import re from collections.abc import Callable from pathlib import Path -from typing import Optional +from typing import Any, Optional, cast from rocrate_validator.constants import EXPECTED_CHECK_PARAM_COUNT from rocrate_validator.models import ( @@ -116,20 +116,21 @@ def __init__(self, def __init_checks__(self): # initialize the list of checks - checks = [] + checks: list = [] for name, member in inspect.getmembers(self.requirement_check_class, inspect.isfunction): # verify that the attribute set by the check decorator is present if hasattr(member, "check") and member.check is True: check_name = None try: - check_name = member.name.strip() + # `name`/`severity` are attributes attached dynamically by the @check decorator + check_name = cast("Any", member).name.strip() except Exception: check_name = name.strip() check_description = member.__doc__.strip() if member.__doc__ else "" # init the check with the requirement level severity = None try: - severity = member.severity + severity = cast("Any", member).severity logger.debug("Severity set for check '%r' from decorator: %r", check_name, severity) except Exception: pass diff --git a/rocrate_validator/requirements/shacl/models.py b/rocrate_validator/requirements/shacl/models.py index 79347df96..a2fa55ddd 100644 --- a/rocrate_validator/requirements/shacl/models.py +++ b/rocrate_validator/requirements/shacl/models.py @@ -48,7 +48,7 @@ def __init__(self, node: Node, graph: Graph, parent: Optional[SHACLNode] = None) # store the shapes graph self._graph = graph # cache the hash - self._hash = None + self._hash: Optional[int] = None # store the parent shape self._parent = parent diff --git a/rocrate_validator/requirements/shacl/utils.py b/rocrate_validator/requirements/shacl/utils.py index 57183c42c..9b88f8ca1 100644 --- a/rocrate_validator/requirements/shacl/utils.py +++ b/rocrate_validator/requirements/shacl/utils.py @@ -234,7 +234,7 @@ def __extract_related_triples__(graph, subject_node, processed_nodes=None): Recursively extract all triples related to a given shape. """ - related_triples = [] + related_triples: list = [] processed_nodes = processed_nodes if processed_nodes is not None else set() diff --git a/rocrate_validator/utils/io_helpers/output/console.py b/rocrate_validator/utils/io_helpers/output/console.py index f443c5d18..59087f455 100644 --- a/rocrate_validator/utils/io_helpers/output/console.py +++ b/rocrate_validator/utils/io_helpers/output/console.py @@ -61,7 +61,7 @@ def __format_data__(self, obj, *args, **kwargs): return formatter(obj) return obj - def print(self, obj, *args, **kwargs): + def print(self, obj, *args, **kwargs): # type: ignore[override] # intentional formatting wrapper if not self.disabled: out = self.__format_data__(obj, *args, **kwargs) super().print(out, *args, **kwargs) diff --git a/rocrate_validator/utils/io_helpers/output/text/layout/report.py b/rocrate_validator/utils/io_helpers/output/text/layout/report.py index c570fcccb..5133bb40f 100644 --- a/rocrate_validator/utils/io_helpers/output/text/layout/report.py +++ b/rocrate_validator/utils/io_helpers/output/text/layout/report.py @@ -376,8 +376,8 @@ def __enter__(self): with Live(message, console=self.console, refresh_per_second=4) as live: # Start validation in background while updating dots - result_container = [None] - exception_container = [None] + result_container: list[Optional[ValidationResult]] = [None] + exception_container: list[Optional[BaseException]] = [None] def run_validation(): try: @@ -402,9 +402,9 @@ def run_validation(): # Wait for the validation thread to finish validation_thread.join() # Check for exceptions - if exception_container[0]: - # pylint: disable-next=raising-bad-type # populated with an exception by the worker thread - raise exception_container[0] + error = exception_container[0] + if error is not None: + raise error message.append(" DONE!", style="bold") # Final update to indicate completion return result_container[0] diff --git a/rocrate_validator/utils/log.py b/rocrate_validator/utils/log.py index 6ec54e445..15be23b8b 100644 --- a/rocrate_validator/utils/log.py +++ b/rocrate_validator/utils/log.py @@ -210,7 +210,7 @@ class LoggerProxy: def __init__(self, name: str): self.name = name - self._instance = None + self._instance: Optional[Logger] = None def _initialize(self): _acquireLock() From ec90e3a12743743d2ccf413dd4ea8de81c8a8f87 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Wed, 10 Jun 2026 16:13:34 +0200 Subject: [PATCH 274/352] =?UTF-8?q?refactor(models):=20=E2=99=BB=EF=B8=8F?= =?UTF-8?q?=20split=20aggregated=20statistics=20into=20raw=20+=20sort=20he?= =?UTF-8?q?lpers?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- rocrate_validator/models.py | 69 ++++++++++++++++++++++++------------- 1 file changed, 46 insertions(+), 23 deletions(-) diff --git a/rocrate_validator/models.py b/rocrate_validator/models.py index fa99a1c0d..bc2f1259d 100644 --- a/rocrate_validator/models.py +++ b/rocrate_validator/models.py @@ -389,7 +389,7 @@ def profile_specification_graph(self) -> Graph: """ The RDF graph of the profile specification. """ - return self._profile_specification_graph # type: ignore + return self._profile_specification_graph # type: ignore[return-value] @property def profile_node(self): @@ -2321,6 +2321,17 @@ def __compute_averall_stats__(self): """ Compute the overall aggregated statistics """ + raw_stats = self.__aggregate_raw_stats__(self._statistics_list) + return self.__build_sorted_stats_dict__(raw_stats) + + @classmethod + def __aggregate_raw_stats__( + cls, + statistics_list: list[ValidationStatistics], + ): + """ + Aggregate raw (unsorted) statistics from a list of ValidationStatistics instances. + """ profiles: set[Profile] = set() requirements: set[Requirement] = set() checks: set[RequirementCheck] = set() @@ -2334,7 +2345,7 @@ def __compute_averall_stats__(self): duration: float = 0.0 # Aggregate statistics from each ValidationStatistics instance - for stats in self._statistics_list: + for stats in statistics_list: # Aggregate profiles for profile in stats.profiles: profiles.add(profile) @@ -2362,33 +2373,45 @@ def __compute_averall_stats__(self): # Aggregate duration duration += stats.duration or 0.0 - # Sort the sets to have consistent order - sorted_profiles = sorted(profiles, key=lambda p: p.identifier) - sorted_requirements = sorted(requirements, key=lambda r: r.identifier) - sorted_checks = sorted(checks, key=lambda c: c.identifier) - sorted_checks_by_severity = { - k: sorted(v, key=lambda c: c.identifier) for k, v in checks_by_severity.items() - } - sorted_failed_requirements = sorted(failed_requirements, key=lambda r: r.identifier) - sorted_failed_checks = sorted(failed_checks, key=lambda c: c.identifier) - sorted_passed_requirements = sorted(passed_requirements, key=lambda r: r.identifier) - sorted_passed_checks = sorted(passed_checks, key=lambda c: c.identifier) - - # return the aggregated statistics return { - "profiles": sorted_profiles, - "requirements": sorted_requirements, - "checks": sorted_checks, - "checks_by_severity": sorted_checks_by_severity, - "failed_requirements": sorted_failed_requirements, - "failed_checks": sorted_failed_checks, - "passed_requirements": sorted_passed_requirements, - "passed_checks": sorted_passed_checks, + "profiles": profiles, + "requirements": requirements, + "checks": checks, + "checks_by_severity": checks_by_severity, + "failed_requirements": failed_requirements, + "failed_checks": failed_checks, + "passed_requirements": passed_requirements, + "passed_checks": passed_checks, "started_at": started_at, "finished_at": finished_at, "duration": duration, } + @classmethod + def __build_sorted_stats_dict__(cls, raw_stats): + """ + Sort the raw aggregated sets and build the final sorted statistics dict. + """ + sorted_checks_by_severity = {} + for severity_key, severity_checks in raw_stats["checks_by_severity"].items(): + sorted_checks_by_severity[severity_key] = sorted( + severity_checks, key=lambda c: c.identifier + ) + + return { + "profiles": sorted(raw_stats["profiles"], key=lambda p: p.identifier), + "requirements": sorted(raw_stats["requirements"], key=lambda r: r.identifier), + "checks": sorted(raw_stats["checks"], key=lambda c: c.identifier), + "checks_by_severity": sorted_checks_by_severity, + "failed_requirements": sorted(raw_stats["failed_requirements"], key=lambda r: r.identifier), + "failed_checks": sorted(raw_stats["failed_checks"], key=lambda c: c.identifier), + "passed_requirements": sorted(raw_stats["passed_requirements"], key=lambda r: r.identifier), + "passed_checks": sorted(raw_stats["passed_checks"], key=lambda c: c.identifier), + "started_at": raw_stats["started_at"], + "finished_at": raw_stats["finished_at"], + "duration": raw_stats["duration"], + } + class ValidationResult: """ From f8df3fd3c127d959bb16a8a8c7ae4afbc0123e4d Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Wed, 10 Jun 2026 16:32:46 +0200 Subject: [PATCH 275/352] =?UTF-8?q?style(models):=20=F0=9F=8E=A8=20fix=20i?= =?UTF-8?q?mport=20order=20of=20TYPE=5FCHECKING=20block?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- rocrate_validator/models.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/rocrate_validator/models.py b/rocrate_validator/models.py index bc2f1259d..fd573880c 100644 --- a/rocrate_validator/models.py +++ b/rocrate_validator/models.py @@ -28,9 +28,6 @@ from typing import TYPE_CHECKING, Any, Optional, Protocol, Union, cast from urllib.error import HTTPError -if TYPE_CHECKING: - from collections.abc import Collection - import enum_tools from rdflib import RDF, RDFS, Graph, Namespace, URIRef @@ -72,6 +69,9 @@ ) from rocrate_validator.utils.uri import URI +if TYPE_CHECKING: + from collections.abc import Collection + # set the default profiles path DEFAULT_PROFILES_PATH = get_profiles_path() From fb0a45119465e87b92593438950d4b2947bec440 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Wed, 10 Jun 2026 16:38:36 +0200 Subject: [PATCH 276/352] =?UTF-8?q?chore(rocrate):=20=F0=9F=94=A7=20disabl?= =?UTF-8?q?e=20consider-using-with=20for=20retained=20ZipFile?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- rocrate_validator/rocrate.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/rocrate_validator/rocrate.py b/rocrate_validator/rocrate.py index a2aeafbbe..af2f48cec 100644 --- a/rocrate_validator/rocrate.py +++ b/rocrate_validator/rocrate.py @@ -873,7 +873,7 @@ def __init_zip_reference__(self): # check if the file is a zip file if self.uri.as_path().suffix != ".zip": raise ROCrateInvalidURIError(uri=path) - self._zipref = zipfile.ZipFile(path) + self._zipref = zipfile.ZipFile(path) # pylint: disable=consider-using-with logger.debug("Initialized zip reference: %s", self._zipref) def __get_file_info__(self, path: Union[str, Path]) -> zipfile.ZipInfo: @@ -972,7 +972,7 @@ def __init_zip_reference__(self): central_directory_data = self.__fetch_range__(url, central_directory_offset, central_directory_offset + central_directory_size - 1) # Step 5: Parse the central directory and return the zip file - self._zipref = zipfile.ZipFile(io.BytesIO(central_directory_data)) + self._zipref = zipfile.ZipFile(io.BytesIO(central_directory_data)) # pylint: disable=consider-using-with @property def size(self) -> int: From 2d7984e893fde99f05e5ec7d08011416b6ae8c7c Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Wed, 10 Jun 2026 16:39:25 +0200 Subject: [PATCH 277/352] =?UTF-8?q?refactor(log):=20=E2=99=BB=EF=B8=8F=20u?= =?UTF-8?q?se=20context-managed=20lock=20instead=20of=20manual=20acquire/r?= =?UTF-8?q?elease?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- rocrate_validator/utils/log.py | 41 +++++----------------------------- 1 file changed, 6 insertions(+), 35 deletions(-) diff --git a/rocrate_validator/utils/log.py b/rocrate_validator/utils/log.py index 15be23b8b..231dc31da 100644 --- a/rocrate_validator/utils/log.py +++ b/rocrate_validator/utils/log.py @@ -61,24 +61,6 @@ def get_log_format(level: int): _lock = threading.RLock() -def _acquireLock(): - """ - Acquire the module-level lock for serializing access to shared data. - - This should be released with _releaseLock(). - """ - if _lock: - _lock.acquire() - - -def _releaseLock(): - """ - Release the module-level lock acquired by calling _acquireLock(). - """ - if _lock: - _lock.release() - - # reference to the list of create loggers __loggers__: dict[str, Logger] = {} @@ -145,28 +127,23 @@ def __setup_logger__(logger: Logger): def __create_logger__(name: str) -> Logger: if not isinstance(name, str): raise TypeError('A logger name must be a string') - _acquireLock() - try: + with _lock: # Return the cached logger if it already exists, otherwise create it. logger = __loggers__.get(name) if logger is None: logger = colorlog.getLogger(name) __setup_logger__(logger) __loggers__[name] = logger - finally: - _releaseLock() return logger def basicConfig(level: int, modules_config: Optional[dict] = None): """Set the log level and format for the logger""" - _acquireLock() - - # set the default log level to ERROR for loggers of other modules - logging_basicConfig(level=ERROR) + with _lock: + # set the default log level to ERROR for loggers of other modules + logging_basicConfig(level=ERROR) - # set the default log level and format - try: + # set the default log level and format if not isinstance(level, int): level = getattr(__module__, level.upper(), None) @@ -196,9 +173,6 @@ def basicConfig(level: int, modules_config: Optional[dict] = None): for logger in __loggers__.values(): __setup_logger__(logger) - finally: - _releaseLock() - def getLogger(name: str) -> "LoggerProxy": return LoggerProxy(name) @@ -213,12 +187,9 @@ def __init__(self, name: str): self._instance: Optional[Logger] = None def _initialize(self): - _acquireLock() - try: + with _lock: if self._instance is None: self._instance = __create_logger__(self.name) - finally: - _releaseLock() def __getattr__(self, name): self._initialize() From 885581e0f80e40868cbc37cdfb4747652e95dc4e Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Wed, 10 Jun 2026 16:50:28 +0200 Subject: [PATCH 278/352] =?UTF-8?q?refactor(utils):=20=E2=99=BB=EF=B8=8F?= =?UTF-8?q?=20move=20local=20imports=20to=20top=20level=20(PLC0415)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- rocrate_validator/utils/config.py | 4 +--- rocrate_validator/utils/versioning.py | 2 +- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/rocrate_validator/utils/config.py b/rocrate_validator/utils/config.py index 0c96cb2d8..5923382da 100644 --- a/rocrate_validator/utils/config.py +++ b/rocrate_validator/utils/config.py @@ -15,6 +15,7 @@ import toml from rocrate_validator.utils import log as logging +from rocrate_validator.utils.paths import get_config_path # set up logging logger = logging.getLogger(__name__) @@ -31,9 +32,6 @@ def get_config() -> dict: """ global _config # noqa: PLW0603 if _config is None: - from .paths import get_config_path # noqa: PLC0415 - - # Read the pyproject.toml file _config = toml.load(get_config_path()) return _config diff --git a/rocrate_validator/utils/versioning.py b/rocrate_validator/utils/versioning.py index 0221a750d..fe7e244b6 100644 --- a/rocrate_validator/utils/versioning.py +++ b/rocrate_validator/utils/versioning.py @@ -13,6 +13,7 @@ # limitations under the License. import re +import subprocess import sys from typing import Optional @@ -30,7 +31,6 @@ def run_git_command(command: list[str]) -> Optional[str]: :param command: The git command :return: The output of the command """ - import subprocess # noqa: PLC0415 try: return subprocess.check_output(command, stderr=subprocess.DEVNULL).decode().strip() From bfaad4dbd4e93e5941474a4fd32bab397734cd30 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Wed, 10 Jun 2026 16:51:11 +0200 Subject: [PATCH 279/352] =?UTF-8?q?style(versioning):=20=F0=9F=8E=A8=20use?= =?UTF-8?q?=20double=20quotes=20for=20strings?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- rocrate_validator/utils/versioning.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/rocrate_validator/utils/versioning.py b/rocrate_validator/utils/versioning.py index fe7e244b6..2d83879b6 100644 --- a/rocrate_validator/utils/versioning.py +++ b/rocrate_validator/utils/versioning.py @@ -46,7 +46,7 @@ def get_git_commit() -> str: :return: The git commit hash """ - return run_git_command(['git', 'rev-parse', '--short', 'HEAD']) or "" + return run_git_command(["git", "rev-parse", "--short", "HEAD"]) or "" def is_release_tag(git_sha: str) -> bool: @@ -56,7 +56,7 @@ def is_release_tag(git_sha: str) -> bool: :param git_sha: The git sha :return: True if the sha corresponds to a release tag, False otherwise """ - tags = run_git_command(['git', 'tag', '--points-at', git_sha]) + tags = run_git_command(["git", "tag", "--points-at", git_sha]) return bool(tags) @@ -66,7 +66,7 @@ def get_last_tag() -> str: :return: The last tag """ - return run_git_command(['git', 'describe', '--tags', '--abbrev=0']) or "" + return run_git_command(["git", "describe", "--tags", "--abbrev=0"]) or "" def get_commit_distance(tag: Optional[str] = None) -> int: @@ -78,7 +78,7 @@ def get_commit_distance(tag: Optional[str] = None) -> int: if not tag: tag = get_last_tag() try: - count = run_git_command(['git', 'rev-list', '--count', f"{tag}..HEAD"]) + count = run_git_command(["git", "rev-list", "--count", f"{tag}..HEAD"]) return int(count) if count else 0 except Exception as e: if logger.isEnabledFor(logging.DEBUG): @@ -93,7 +93,7 @@ def has_uncommitted_changes() -> bool: :return: True if there are uncommitted changes, False otherwise """ - return bool(run_git_command(['git', 'status', '--porcelain'])) + return bool(run_git_command(["git", "status", "--porcelain"])) def get_version() -> str: @@ -129,7 +129,7 @@ def get_min_python_version() -> tuple[int, ...]: min_version_str = config["tool"]["poetry"]["dependencies"]["python"] assert min_version_str, "The minimum Python version is required" # remove any non-digit characters - min_version_str = re.sub(r'[^\d.]+', '', min_version_str) + min_version_str = re.sub(r"[^\d.]+", "", min_version_str) # convert the version string to a tuple min_version = tuple(map(int, min_version_str.split("."))) logger.debug(f"Minimum Python version: {min_version}") From ab29b17def8f71780ca5e440fc9deeeb90f42196 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Wed, 10 Jun 2026 17:08:08 +0200 Subject: [PATCH 280/352] =?UTF-8?q?refactor(lint):=20=E2=99=BB=EF=B8=8F=20?= =?UTF-8?q?fix=20shadowing/unused-arg=20naming=20(A002/ARG002)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- rocrate_validator/__init__.py | 4 ++-- rocrate_validator/errors.py | 4 ++-- rocrate_validator/models.py | 14 +++++++------- rocrate_validator/rocrate.py | 2 +- rocrate_validator/utils/cache_warmup.py | 2 +- .../utils/io_helpers/output/console.py | 2 +- .../io_helpers/output/json/formatters.py | 19 +++++++++---------- 7 files changed, 23 insertions(+), 24 deletions(-) diff --git a/rocrate_validator/__init__.py b/rocrate_validator/__init__.py index d1d271ab6..ef190af33 100644 --- a/rocrate_validator/__init__.py +++ b/rocrate_validator/__init__.py @@ -12,9 +12,9 @@ # See the License for the specific language governing permissions and # limitations under the License. -def get_version(): +def _get_version(): from rocrate_validator.utils.versioning import get_version # noqa: PLC0415 return get_version() -__version__ = get_version() +__version__ = _get_version() diff --git a/rocrate_validator/errors.py b/rocrate_validator/errors.py index 83e18b004..bd615b749 100644 --- a/rocrate_validator/errors.py +++ b/rocrate_validator/errors.py @@ -154,8 +154,8 @@ def __repr__(self): class InvalidSerializationFormat(ROCValidatorError): """Raised when an invalid serialization format is provided.""" - def __init__(self, format: Optional[str] = None): - self._format = format + def __init__(self, fmt: Optional[str] = None): + self._format = fmt @property def serialization_format(self) -> Optional[str]: diff --git a/rocrate_validator/models.py b/rocrate_validator/models.py index fd573880c..4fe2f02f5 100644 --- a/rocrate_validator/models.py +++ b/rocrate_validator/models.py @@ -124,7 +124,7 @@ def __eq__(self, other: object) -> bool: return self.name == other.name and self.severity == other.severity def __lt__(self, other: object) -> bool: - # TODO: this ordering is not totally coherent, since for two objects a and b + # NOTE: this ordering is not totally coherent, since for two objects a and b # with equal Severity but different names you would have # not a < b, which implies a >= b # and also a != b and not a > b, which is incoherent with a >= b @@ -319,13 +319,13 @@ def __init__( def __get_specification_property__( self, - property: str, + prop: str, namespace: Namespace, pop_first: bool = True, as_python_object: bool = True, ) -> Union[str, list[Union[str, URIRef]], None]: assert self._profile_specification_graph is not None, "Profile specification graph not loaded" - nodes = list(self._profile_specification_graph.objects(self._profile_node, namespace[property])) + nodes = list(self._profile_specification_graph.objects(self._profile_node, namespace[prop])) values: list = [cast("Any", v).toPython() for v in nodes] if (nodes and as_python_object) else list(nodes) if pop_first: return values[0] if values else None @@ -1996,7 +1996,7 @@ def __initialise__(cls, validation_settings: ValidationSettings): def update(self, event: Event, ctx: Optional[ValidationContext] = None) -> None: self.__event_handlers__.get(event.event_type, lambda e, c: None)(event, ctx) - def __handle_validation_start__(self, event: Event, _ctx: Optional[ValidationContext]) -> None: + def __handle_validation_start__(self, _event: Event, _ctx: Optional[ValidationContext]) -> None: logger.debug("Validation started") self._stats["started_at"] = datetime.now(timezone.utc) @@ -2004,10 +2004,10 @@ def __handle_profile_validation_start__(self, event: Event, _ctx: Optional[Valid assert isinstance(event, ProfileValidationEvent) logger.debug("Profile validation start: %s", event.profile.identifier) - def __handle_requirement_validation_start__(self, event: Event, _ctx: Optional[ValidationContext]) -> None: + def __handle_requirement_validation_start__(self, _event: Event, _ctx: Optional[ValidationContext]) -> None: logger.debug("Requirement validation start") - def __handle_requirement_check_validation_start__(self, event: Event, _ctx: Optional[ValidationContext]) -> None: + def __handle_requirement_check_validation_start__(self, _event: Event, _ctx: Optional[ValidationContext]) -> None: logger.debug("Requirement check validation start") def __handle_requirement_check_validation_end__(self, event: Event, ctx: Optional[ValidationContext]) -> None: @@ -2668,7 +2668,7 @@ def to_json(self, path: Optional[Path] = None) -> str: class CustomEncoder(json.JSONEncoder): - def default(self, obj): + def default(self, obj): # pylint: disable=arguments-renamed if isinstance(obj, CheckIssue): return obj.__dict__ if isinstance(obj, Path): diff --git a/rocrate_validator/rocrate.py b/rocrate_validator/rocrate.py index af2f48cec..51b3dd5cb 100644 --- a/rocrate_validator/rocrate.py +++ b/rocrate_validator/rocrate.py @@ -194,7 +194,7 @@ def has_types(self, entity_types: list[str], all_types: bool = False) -> bool: return all(t in e_types for t in entity_types) return any(t in e_types for t in entity_types) - def __process_property__(self, name: str, data: object) -> object: + def __process_property__(self, _name: str, data: object) -> object: if isinstance(data, dict) and "@id" in data: entity = self.metadata.get_entity(data["@id"]) if entity is None: diff --git a/rocrate_validator/utils/cache_warmup.py b/rocrate_validator/utils/cache_warmup.py index 57d902d10..330d922a3 100644 --- a/rocrate_validator/utils/cache_warmup.py +++ b/rocrate_validator/utils/cache_warmup.py @@ -81,7 +81,7 @@ def discover_profile_cacheable_urls(profile: Profile) -> list[str]: urls: list[str] = [] try: for row in graph.query(_CACHEABLE_URLS_SPARQL): - artifact = cast("Any", row).artifact + artifact = cast(Any, row).artifact if artifact is None: continue value = str(artifact) diff --git a/rocrate_validator/utils/io_helpers/output/console.py b/rocrate_validator/utils/io_helpers/output/console.py index 59087f455..011ac5d7a 100644 --- a/rocrate_validator/utils/io_helpers/output/console.py +++ b/rocrate_validator/utils/io_helpers/output/console.py @@ -55,7 +55,7 @@ def register_formatter(self, formatter: OutputFormatter, type_: Optional[type] = assert type_ is not None # guaranteed by the check above self._formatters[type_] = formatter - def __format_data__(self, obj, *args, **kwargs): + def __format_data__(self, obj, *_, **__): # pylint: disable=unused-argument formatter = self._formatters.get(type(obj)) if formatter: return formatter(obj) diff --git a/rocrate_validator/utils/io_helpers/output/json/formatters.py b/rocrate_validator/utils/io_helpers/output/json/formatters.py index 478073092..569da8c83 100644 --- a/rocrate_validator/utils/io_helpers/output/json/formatters.py +++ b/rocrate_validator/utils/io_helpers/output/json/formatters.py @@ -32,16 +32,17 @@ logger = logging.getLogger(__name__) -def format_validation_result(data: ValidationResult, - console: Console, - console_options: ConsoleOptions) -> str: - return format_validation_results({data.context.profile_identifier: data}, - console=console, console_options=console_options) +def format_validation_result(data: ValidationResult, console: Console, console_options: ConsoleOptions) -> str: + return format_validation_results( + {data.context.profile_identifier: data}, console=console, console_options=console_options + ) -def format_validation_results(data: dict[str, ValidationResult], - console: Optional[Console] = None, - console_options: Optional[ConsoleOptions] = None) -> str: +def format_validation_results( + data: dict[str, ValidationResult], + console: Optional[Console] = None, # pylint: disable=unused-argument + console_options: Optional[ConsoleOptions] = None, +) -> str: # pylint: disable=unused-argument # Initialize an empty JSON output json_output: dict[str, Any] = { @@ -130,7 +131,6 @@ def __rich_console__(self, console: Console, options: ConsoleOptions) -> RenderR class ValidationStatisticsJSONOutputFormatter(OutputFormatter): - def __init__(self, statistics: ValidationStatistics): self._statistics = statistics @@ -139,7 +139,6 @@ def __rich_console__(self, console: Console, options: ConsoleOptions) -> RenderR class ValidationResultsJSONOutputFormatter(OutputFormatter): - def __init__(self, results: dict[str, ValidationResult]): self._results = results From 0998cb7a76fb0776a51a4eac67ab74c4e4d095b5 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Wed, 10 Jun 2026 17:11:50 +0200 Subject: [PATCH 281/352] =?UTF-8?q?style:=20=F0=9F=8E=A8=20reformat?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../utils/io_helpers/output/json/formatters.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/rocrate_validator/utils/io_helpers/output/json/formatters.py b/rocrate_validator/utils/io_helpers/output/json/formatters.py index 569da8c83..38c7b2927 100644 --- a/rocrate_validator/utils/io_helpers/output/json/formatters.py +++ b/rocrate_validator/utils/io_helpers/output/json/formatters.py @@ -111,8 +111,14 @@ def _compute_overall_statistics(results: list[ValidationResult], verbose: bool) return None stats_dict = stats.to_dict() if not verbose: - for key in ("passed_requirements", "failed_requirements", - "passed_checks", "failed_checks", "checks", "requirements"): + for key in ( + "passed_requirements", + "failed_requirements", + "passed_checks", + "failed_checks", + "checks", + "requirements", + ): stats_dict.pop(key, None) return stats_dict @@ -122,7 +128,6 @@ def format_validation_statistics(data: ValidationStatistics) -> str: class ValidationResultJSONOutputFormatter(OutputFormatter): - def __init__(self, result: ValidationResult): self._result = result From 5d00a4fc4b31e7bbe02e573b4edc21fd9d7b456e Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Wed, 10 Jun 2026 17:12:50 +0200 Subject: [PATCH 282/352] =?UTF-8?q?fix(rocrate):=20=F0=9F=90=9B=20use=20Va?= =?UTF-8?q?lueError=20instead=20of=20generic=20Exception?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- rocrate_validator/rocrate.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/rocrate_validator/rocrate.py b/rocrate_validator/rocrate.py index 51b3dd5cb..15faab500 100644 --- a/rocrate_validator/rocrate.py +++ b/rocrate_validator/rocrate.py @@ -981,7 +981,7 @@ def size(self) -> int: file_size = response.headers.get('Content-Length') if file_size is not None: return int(file_size) - raise Exception("Could not determine the file size from the headers") + raise ValueError("Could not determine the file size from the headers") @staticmethod def __fetch_range__(uri: str, start, end): @@ -995,7 +995,7 @@ def __find_eocd__(data): eocd_signature = b'PK\x05\x06' eocd_offset = data.rfind(eocd_signature) if eocd_offset == -1: - raise Exception("EOCD not found") + raise ValueError("EOCD not found") return eocd_offset @staticmethod From 98f84d433063fbaabd585dc6e54fd99362112b5d Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Wed, 10 Jun 2026 17:14:44 +0200 Subject: [PATCH 283/352] =?UTF-8?q?fix(report):=20=F0=9F=90=9B=20declare?= =?UTF-8?q?=20layout=20attributes=20in=20=5F=5Finit=5F=5F=20(W0201)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- rocrate_validator/utils/io_helpers/output/text/layout/report.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/rocrate_validator/utils/io_helpers/output/text/layout/report.py b/rocrate_validator/utils/io_helpers/output/text/layout/report.py index 5133bb40f..1a81455b1 100644 --- a/rocrate_validator/utils/io_helpers/output/text/layout/report.py +++ b/rocrate_validator/utils/io_helpers/output/text/layout/report.py @@ -74,6 +74,8 @@ def __init__(self, console: Console, self.failed_checks: Optional[Layout] = None self.report_details_container: Optional[Layout] = None self.overall_result: Optional[Layout] = None + self.requirement_checks_by_severity_container_layout: Any = None + self.checks_stats_layout: Any = None @property def layout(self): From cfe6ec4f1581580148be1c01c85a322d50392a0d Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Wed, 10 Jun 2026 17:19:35 +0200 Subject: [PATCH 284/352] =?UTF-8?q?chore(lint):=20=F0=9F=94=A7=20enable=20?= =?UTF-8?q?mccabe,=20eradicate,=20and=20pylint-parity=20preview=20rules=20?= =?UTF-8?q?in=20ruff?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- ruff.toml | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/ruff.toml b/ruff.toml index 2f03e98b8..44c574d89 100644 --- a/ruff.toml +++ b/ruff.toml @@ -43,8 +43,22 @@ extend-select = [ "FURB", # refurb – more idiomatic Python patterns "RUF", # Ruff-specific rules "PL", # pylint – PLC/PLE/PLR/PLW + "C90", # mccabe – cyclomatic complexity + "ERA", # eradicate – commented-out code + "PLR0914", # too-many-locals (preview) – pylint R0914 parity + "PLR1702", # too-many-nested-blocks (preview) – pylint R1702 parity ] +# Activate the preview-stage rules selected above (PLR0914 / PLR1702 are +# not yet stabilised in Ruff). `explicit-preview-rules` keeps preview-stage +# rules from being pulled in via the category selectors above (PL, E, B, ...), +# so only PLR0914 / PLR1702 are added from the preview set. NOTE: enabling +# `preview` also widens category membership to a handful of *stable* rules +# (BLE/TRY/D/PIE/...); those are explicitly suppressed in `ignore` below to +# keep the net effect limited to the two pylint-parity rules requested. +preview = true +explicit-preview-rules = true + # Rules intentionally disabled. ignore = [ "UP045", # non-pep604-annotation-optional: keep `Optional[X]` instead of `X | None` (project supports 3.9) @@ -53,6 +67,20 @@ ignore = [ "PLR0913", # too-many-arguments: pre-existing project style "PLR0917", # too-many-positional-arguments: pre-existing project style "PLW3201", # bad-dunder-method-name: project uses `__method__` convention; renaming is not viable + # --- Stable rules pulled in only as a side effect of `preview = true` --- + # (not part of the original selection; suppressed to keep the preview flag + # scoped to PLR0914 / PLR1702. Enable individually later if desired.) + "BLE001", # blind-except + "TRY401", # verbose-log-message + "TRY201", # verbose-raise + "TRY002", # raise-vanilla-class + "D419", # empty-docstring + "PIE804", # unnecessary-dict-kwargs + "PIE810", # multiple-starts-ends-with + "DTZ901", # datetime-min-max + "PYI034", # non-self-return-type + "PYI061", # redundant-none-literal + "S110", # try-except-pass ] [lint.per-file-ignores] @@ -60,4 +88,9 @@ ignore = [ "tests/**" = [ "PLR2004", # magic-value-comparison: tests use literal values in assertions "PLC0415", # import-outside-top-level: tests use local imports for mocking + "C901", # complex-structure: test helpers/scenarios are not held to the prod complexity gate ] + +# Cyclomatic complexity threshold for the mccabe (C90) check. +[lint.mccabe] +max-complexity = 10 From 6271b29a43cf1b0c26e5f04f20f3db7934e9eec2 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Wed, 10 Jun 2026 22:43:32 +0200 Subject: [PATCH 285/352] =?UTF-8?q?style(cache-warmup):=20=F0=9F=8E=A8=20q?= =?UTF-8?q?uote=20the=20cast=20type=20(TC006)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- rocrate_validator/utils/cache_warmup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rocrate_validator/utils/cache_warmup.py b/rocrate_validator/utils/cache_warmup.py index 330d922a3..57d902d10 100644 --- a/rocrate_validator/utils/cache_warmup.py +++ b/rocrate_validator/utils/cache_warmup.py @@ -81,7 +81,7 @@ def discover_profile_cacheable_urls(profile: Profile) -> list[str]: urls: list[str] = [] try: for row in graph.query(_CACHEABLE_URLS_SPARQL): - artifact = cast(Any, row).artifact + artifact = cast("Any", row).artifact if artifact is None: continue value = str(artifact) From 49ce684f67f145c322d8b1c90aacbf620db6283b Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Thu, 11 Jun 2026 08:31:35 +0200 Subject: [PATCH 286/352] =?UTF-8?q?fix(cache):=20=F0=9F=90=9B=20add=20time?= =?UTF-8?q?zone=20to=20datetime.min=20(DTZ901)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- rocrate_validator/cli/commands/cache.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/rocrate_validator/cli/commands/cache.py b/rocrate_validator/cli/commands/cache.py index 3e2d2e856..400cd9180 100644 --- a/rocrate_validator/cli/commands/cache.py +++ b/rocrate_validator/cli/commands/cache.py @@ -21,7 +21,7 @@ import copy as _copy import json -from datetime import datetime +from datetime import datetime, timezone from pathlib import Path from typing import Optional @@ -547,7 +547,7 @@ def _collect_cache_entries( if sort_by == "url": entries.sort(key=lambda e: e["url"].lower(), reverse=reverse) elif sort_by == "created": - entries.sort(key=lambda e: e["created_at"] or datetime.min, reverse=reverse) + entries.sort(key=lambda e: e["created_at"] or datetime.min.replace(tzinfo=timezone.utc), reverse=reverse) else: # "size" entries.sort(key=lambda e: e["size"], reverse=reverse) return entries From 69912ab37037f7bfd8738e9d2dc01c281f6ae149 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Thu, 11 Jun 2026 08:44:15 +0200 Subject: [PATCH 287/352] =?UTF-8?q?fix(http):=20=F0=9F=90=9B=20use=20Self?= =?UTF-8?q?=20return=20type=20in=20=5F=5Fnew=5F=5F=20(PYI034)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- rocrate_validator/utils/http.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/rocrate_validator/utils/http.py b/rocrate_validator/utils/http.py index 65b700d1d..58eac0ba9 100644 --- a/rocrate_validator/utils/http.py +++ b/rocrate_validator/utils/http.py @@ -20,7 +20,10 @@ import string import threading from pathlib import Path -from typing import Any, Optional +from typing import TYPE_CHECKING, Any, Optional + +if TYPE_CHECKING: + from typing_extensions import Self import requests @@ -99,7 +102,7 @@ class HttpRequester: _instance = None _lock = threading.Lock() - def __new__(cls, *args, **kwargs) -> HttpRequester: + def __new__(cls, *args, **kwargs) -> Self: if cls._instance is None: logger.debug(f"Creating instance of {cls.__name__} with args: {args}, kwargs: {kwargs}") with cls._lock: From c6a1badaf528faf38819445320f470f986ba92e0 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Thu, 11 Jun 2026 08:46:45 +0200 Subject: [PATCH 288/352] =?UTF-8?q?refactor(test):=20=E2=99=BB=EF=B8=8F=20?= =?UTF-8?q?merge=20startswith=20calls=20into=20tuple=20(PIE810)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- tests/shared.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/shared.py b/tests/shared.py index 859e48e8d..dd4b0384a 100644 --- a/tests/shared.py +++ b/tests/shared.py @@ -50,7 +50,7 @@ def collect_ids(obj): if isinstance(obj, dict): if "@id" in obj: idv = obj["@id"] - if isinstance(idv, str) and (idv.startswith("./") or idv.startswith("../") or idv.startswith("#")): + if isinstance(idv, str) and idv.startswith(("./", "../", "#")): rel_ids.add(idv) for v in obj.values(): collect_ids(v) From 9380fb8c5a33a095743d8b72d9520b31917d29e8 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Thu, 11 Jun 2026 08:50:10 +0200 Subject: [PATCH 289/352] =?UTF-8?q?refactor(test):=20=E2=99=BB=EF=B8=8F=20?= =?UTF-8?q?replace=20unnecessary=20dict=20kwargs=20with=20explicit=20keywo?= =?UTF-8?q?rds=20(PIE804)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- tests/shared.py | 18 +++++------ tests/unit/requirements/test_profiles.py | 38 ++++++++++-------------- 2 files changed, 24 insertions(+), 32 deletions(-) diff --git a/tests/shared.py b/tests/shared.py index dd4b0384a..e289a1d91 100644 --- a/tests/shared.py +++ b/tests/shared.py @@ -170,16 +170,14 @@ def do_entity_test( # validate RO-Crate result: models.ValidationResult = services.validate( models.ValidationSettings( - **{ - "rocrate_uri": rocrate_path, - "requirement_severity": requirement_severity, - "abort_on_first": abort_on_first, - "profile_identifier": profile_identifier, - "skip_checks": skip_checks, - "rocrate_relative_root_path": rocrate_relative_root_path, - "metadata_only": metadata_only, - "metadata_dict": metadata_dict, - }, + rocrate_uri=rocrate_path, + requirement_severity=requirement_severity, + abort_on_first=abort_on_first, + profile_identifier=profile_identifier, + skip_checks=skip_checks, + rocrate_relative_root_path=rocrate_relative_root_path, + metadata_only=metadata_only, + metadata_dict=metadata_dict, **kwargs, ) ) diff --git a/tests/unit/requirements/test_profiles.py b/tests/unit/requirements/test_profiles.py index 17383f421..f4693f7c9 100644 --- a/tests/unit/requirements/test_profiles.py +++ b/tests/unit/requirements/test_profiles.py @@ -304,14 +304,12 @@ def test_zero_shape_target_profile_triggers_pyshacl_run(fake_profiles_path: str) no SHACLCheck would be recorded as executed for the wrapper target.""" settings = ValidationSettings( - **{ - "profiles_path": fake_profiles_path, - "profile_identifier": "c-wrapper", - "rocrate_uri": ValidROC().wrroc_paper, - "enable_profile_inheritance": True, - "allow_requirement_check_override": True, - "disable_check_for_duplicates": True, - } + profiles_path=fake_profiles_path, + profile_identifier="c-wrapper", + rocrate_uri=ValidROC().wrroc_paper, + enable_profile_inheritance=True, + allow_requirement_check_override=True, + disable_check_for_duplicates=True, ) result = Validator(settings).validate() @@ -526,13 +524,11 @@ def test_shacl_check_deactivated_via_cross_profile_triple(fake_profiles_path: st SHACLCheck.deactivated and the pre-load pass in Validator.""" settings = ValidationSettings( - **{ - "profiles_path": fake_profiles_path, - "profile_identifier": "c-deactivated-direct", - "rocrate_uri": ValidROC().wrroc_paper, - "enable_profile_inheritance": True, - "allow_requirement_check_override": True, - } + profiles_path=fake_profiles_path, + profile_identifier="c-deactivated-direct", + rocrate_uri=ValidROC().wrroc_paper, + enable_profile_inheritance=True, + allow_requirement_check_override=True, ) validator = Validator(settings) context = ValidationContext(validator, validator.validation_settings) @@ -570,13 +566,11 @@ def test_shacl_check_deactivation_scoped_to_descendants(fake_profiles_path: str) one another's checks.""" settings = ValidationSettings( - **{ - "profiles_path": fake_profiles_path, - "profile_identifier": "c", - "rocrate_uri": ValidROC().wrroc_paper, - "enable_profile_inheritance": True, - "allow_requirement_check_override": True, - } + profiles_path=fake_profiles_path, + profile_identifier="c", + rocrate_uri=ValidROC().wrroc_paper, + enable_profile_inheritance=True, + allow_requirement_check_override=True, ) validator = Validator(settings) context = ValidationContext(validator, validator.validation_settings) From feb3e07f83a94457f89336983443bf9fad5f4017 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Thu, 11 Jun 2026 08:58:01 +0200 Subject: [PATCH 290/352] =?UTF-8?q?fix(constants):=20=F0=9F=90=9B=20fix=20?= =?UTF-8?q?inference=20options=20type=20hint=20for=20None=20(PYI061)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- rocrate_validator/constants.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rocrate_validator/constants.py b/rocrate_validator/constants.py index 9e8311b91..affc8b6d6 100644 --- a/rocrate_validator/constants.py +++ b/rocrate_validator/constants.py @@ -74,7 +74,7 @@ RDF_SERIALIZATION_FORMATS = typing.get_args(RDF_SERIALIZATION_FORMATS_TYPES) # Define allowed inference options -VALID_INFERENCE_OPTIONS_TYPES = typing.Literal["owlrl", "rdfs", "both", None] +VALID_INFERENCE_OPTIONS_TYPES = typing.Literal["owlrl", "rdfs", "both"] | None VALID_INFERENCE_OPTIONS = typing.get_args(VALID_INFERENCE_OPTIONS_TYPES) # Define allowed requirement levels From 83d3392bed65df4c157be24071b0aa39eeaffb8f Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Thu, 11 Jun 2026 09:03:58 +0200 Subject: [PATCH 291/352] =?UTF-8?q?fix(python):=20=F0=9F=94=87=20log=20mis?= =?UTF-8?q?sing=20decorator=20severity=20instead=20of=20silent=20pass=20(S?= =?UTF-8?q?110)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- rocrate_validator/requirements/python/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rocrate_validator/requirements/python/__init__.py b/rocrate_validator/requirements/python/__init__.py index caef82fb3..5165b3024 100644 --- a/rocrate_validator/requirements/python/__init__.py +++ b/rocrate_validator/requirements/python/__init__.py @@ -133,7 +133,7 @@ def __init_checks__(self): severity = cast("Any", member).severity logger.debug("Severity set for check '%r' from decorator: %r", check_name, severity) except Exception: - pass + logger.debug(f"No severity set for check '{check_name}' from decorator.") if not severity: logger.debug(f"No explicit severity set for check '{check_name}' from decorator." f"Getting severity from path: {self.severity_from_path}") From 98426c09936ecc94b3acc3a73d395a4358b257a2 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Thu, 11 Jun 2026 09:14:49 +0200 Subject: [PATCH 292/352] =?UTF-8?q?style(typing):=20=F0=9F=8E=A8=20use=20P?= =?UTF-8?q?EP=20604=20union=20syntax=20(X=20|=20Y)=20instead=20of=20Union?= =?UTF-8?q?=20(UP007)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- rocrate_validator/cli/commands/validate.py | 6 ++-- rocrate_validator/errors.py | 8 ++--- rocrate_validator/events.py | 4 +-- rocrate_validator/models.py | 28 +++++++-------- .../requirements/shacl/models.py | 4 +-- rocrate_validator/requirements/shacl/utils.py | 4 +-- .../requirements/shacl/validator.py | 18 +++++----- rocrate_validator/rocrate.py | 36 +++++++++---------- rocrate_validator/services.py | 10 +++--- rocrate_validator/utils/io_helpers/colors.py | 3 +- .../io_helpers/output/text/layout/progress.py | 4 +-- rocrate_validator/utils/uri.py | 6 ++-- tests/shared.py | 4 +-- 13 files changed, 67 insertions(+), 68 deletions(-) diff --git a/rocrate_validator/cli/commands/validate.py b/rocrate_validator/cli/commands/validate.py index 6ddd97b33..9fd0ff2ac 100644 --- a/rocrate_validator/cli/commands/validate.py +++ b/rocrate_validator/cli/commands/validate.py @@ -17,7 +17,7 @@ import sys from contextlib import nullcontext from pathlib import Path -from typing import Optional, Union +from typing import Optional import rich_click as click from rich.padding import Padding @@ -245,7 +245,7 @@ def validate(ctx, requirement_severity: str = Severity.REQUIRED.name, requirement_severity_only: bool = False, skip_checks: Optional[list[str]] = None, - rocrate_uri: Union[str, Path] = ".", + rocrate_uri: str | Path = ".", relative_root_path: Optional[Path] = None, fail_fast: bool = False, no_paging: bool = False, @@ -431,7 +431,7 @@ def _log_validation_inputs( logger.debug("offline: %s", offline) -def _warn_if_remote_offline(console: Console, rocrate_uri: Union[str, Path], offline: bool) -> None: +def _warn_if_remote_offline(console: Console, rocrate_uri: str | Path, offline: bool) -> None: """Warn when a remote RO-Crate is validated in offline mode (the cached copy is used).""" if offline and isinstance(rocrate_uri, str) and rocrate_uri.split(":", 1)[0].lower() in ("http", "https", "ftp"): console.print( diff --git a/rocrate_validator/errors.py b/rocrate_validator/errors.py index bd615b749..f3b06309c 100644 --- a/rocrate_validator/errors.py +++ b/rocrate_validator/errors.py @@ -14,7 +14,7 @@ from __future__ import annotations -from typing import TYPE_CHECKING, Optional, Union +from typing import TYPE_CHECKING, Optional if TYPE_CHECKING: # Imported only for type-checking to avoid a circular import: @@ -252,12 +252,12 @@ def __repr__(self): class ROCrateInvalidURIError(ROCValidatorError): """Raised when an invalid URI is provided.""" - def __init__(self, uri: Union[str, Path, URI], message: Optional[str] = None): + def __init__(self, uri: str | Path | URI, message: Optional[str] = None): self._uri = uri self._message = message or self.default_error_message(uri) @property - def uri(self) -> Union[str, Path, URI]: + def uri(self) -> str | Path | URI: """The invalid URI, as originally provided (str, Path, or URI).""" return self._uri @@ -278,7 +278,7 @@ def __repr__(self) -> str: return f"ROCrateInvalidURIError({self._uri!r})" @classmethod - def default_error_message(cls, uri: Union[str, Path, URI]) -> str: + def default_error_message(cls, uri: str | Path | URI) -> str: return ( f"\"{uri!s}\" is not a valid RO-Crate URI. " "It MUST be either a local path to the RO-Crate root directory or a local/remote RO-Crate ZIP file." diff --git a/rocrate_validator/events.py b/rocrate_validator/events.py index 6d83b57d0..04ef200cc 100644 --- a/rocrate_validator/events.py +++ b/rocrate_validator/events.py @@ -15,7 +15,7 @@ import enum from abc import ABC, abstractmethod from functools import total_ordering -from typing import Any, Optional, Union +from typing import Any, Optional import enum_tools @@ -166,7 +166,7 @@ def add_subscriber(self, subscriber): def remove_subscriber(self, subscriber): self.subscribers.remove(subscriber) - def notify(self, event: Union[Event, EventType], ctx: Optional[Any] = None): + def notify(self, event: Event | EventType, ctx: Optional[Any] = None): if isinstance(event, EventType): event = Event(event) # Check if the event has already been notified diff --git a/rocrate_validator/models.py b/rocrate_validator/models.py index 4fe2f02f5..3734dd7a7 100644 --- a/rocrate_validator/models.py +++ b/rocrate_validator/models.py @@ -77,7 +77,7 @@ logger = logging.getLogger(__name__) -BaseTypes = Union[str, Path, bool, int, None] +BaseTypes = str | Path | bool | int | None @enum.unique @@ -323,7 +323,7 @@ def __get_specification_property__( namespace: Namespace, pop_first: bool = True, as_python_object: bool = True, - ) -> Union[str, list[Union[str, URIRef]], None]: + ) -> str | list[str | URIRef] | None: assert self._profile_specification_graph is not None, "Profile specification graph not loaded" nodes = list(self._profile_specification_graph.objects(self._profile_node, namespace[prop])) values: list = [cast("Any", v).toPython() for v in nodes] if (nodes and as_python_object) else list(nodes) @@ -685,8 +685,8 @@ def __init_token_version__(self) -> tuple[str, Optional[str]]: @classmethod def __load_profile_path__( cls, - profiles_base_path: Union[str, Path], - profile_path: Union[str, Path], + profiles_base_path: str | Path, + profile_path: str | Path, publicID: Optional[str] = None, severity: Severity = Severity.REQUIRED, ) -> Profile: @@ -709,8 +709,8 @@ def __load_profile_path__( @classmethod def __load_profiles_paths__( cls, - profiles_path: Optional[Union[str, Path]] = None, - extra_profiles_path: Optional[Union[str, Path]] = None, + profiles_path: Optional[str | Path] = None, + extra_profiles_path: Optional[str | Path] = None, ) -> list[tuple[Path, Path]]: """ Load the paths of the profiles from the given profiles path and extra profiles path. @@ -755,8 +755,8 @@ def __load_profiles_paths__( @classmethod def load_profiles( cls, - profiles_path: Union[str, Path], - extra_profiles_path: Optional[Union[str, Path]] = None, + profiles_path: str | Path, + extra_profiles_path: Optional[str | Path] = None, publicID: Optional[str] = None, severity: Severity = Severity.REQUIRED, allow_requirement_check_override: bool = True, @@ -1681,7 +1681,7 @@ class ValidationStatistics(Subscriber): def __init__( self, - settings: Union[dict, ValidationSettings], + settings: dict | ValidationSettings, context: Optional[ValidationContext] = None, skip_initialization: bool = False, ): @@ -2718,7 +2718,7 @@ class ValidationSettings: disable_remote_crate_download: bool = True # Requirement settings #: The requirement severity - requirement_severity: Union[str, Severity] = Severity.REQUIRED + requirement_severity: str | Severity = Severity.REQUIRED #: Flag to validate requirement severity only skipping check with lower or higher severity requirement_severity_only: bool = False # Requirement check settings @@ -2816,7 +2816,7 @@ def rocrate_uri(self) -> Optional[URI]: return self._rocrate_uri @rocrate_uri.setter - def rocrate_uri(self, value: Union[str, Path, URI]): + def rocrate_uri(self, value: str | Path | URI): """ Set the RO-Crate URI. @@ -2828,7 +2828,7 @@ def rocrate_uri(self, value: Union[str, Path, URI]): self._rocrate_uri: URI = URI(str(value)) @classmethod - def parse(cls, settings: Union[dict, ValidationSettings]) -> ValidationSettings: + def parse(cls, settings: dict | ValidationSettings) -> ValidationSettings: """ Parse the settings to a ValidationSettings object. @@ -3005,7 +3005,7 @@ class Validator(Publisher): Validates the RO-Crate against the specified subset of the profile requirements. """ - def __init__(self, settings: Union[dict, ValidationSettings]): + def __init__(self, settings: dict | ValidationSettings): self._validation_settings = ValidationSettings.parse(settings) super().__init__() # initialize the current context @@ -3192,7 +3192,7 @@ def __invoke_post_validation_hooks__(self, context: ValidationContext): requirement_type.finalize(context) logger.debug("Finalizing requirement types: completed") - def notify(self, event: Union[Event, EventType], ctx: Optional[Any] = None): + def notify(self, event: Event | EventType, ctx: Optional[Any] = None): """Override notify to update statistics""" assert self.__current_context__ is not None, "No current validation context" result: ValidationResult = self.__current_context__.result diff --git a/rocrate_validator/requirements/shacl/models.py b/rocrate_validator/requirements/shacl/models.py index a2fa55ddd..6420bdbfb 100644 --- a/rocrate_validator/requirements/shacl/models.py +++ b/rocrate_validator/requirements/shacl/models.py @@ -14,7 +14,7 @@ from __future__ import annotations -from typing import TYPE_CHECKING, Optional, Union, cast +from typing import TYPE_CHECKING, Optional, cast from rdflib import Graph, Literal, Namespace, URIRef @@ -356,7 +356,7 @@ def is_node_deactivated(self, node: Node) -> bool: return True return False - def load_shapes(self, shapes_path: Union[str, Path], publicID: Optional[str] = None) -> list[Shape]: + def load_shapes(self, shapes_path: str | Path, publicID: Optional[str] = None) -> list[Shape]: """ Load the shapes from the graph """ diff --git a/rocrate_validator/requirements/shacl/utils.py b/rocrate_validator/requirements/shacl/utils.py index 9b88f8ca1..357c77483 100644 --- a/rocrate_validator/requirements/shacl/utils.py +++ b/rocrate_validator/requirements/shacl/utils.py @@ -15,7 +15,7 @@ from __future__ import annotations import hashlib -from typing import TYPE_CHECKING, Any, Optional, Union, cast +from typing import TYPE_CHECKING, Any, Optional, cast if TYPE_CHECKING: from pathlib import Path @@ -68,7 +68,7 @@ def map_severity(shacl_severity: str) -> Severity: raise RuntimeError(f"Unrecognized SHACL severity term {shacl_severity}") -def make_uris_relative(text: str, ro_crate_path: Union[Path, str]) -> str: +def make_uris_relative(text: str, ro_crate_path: Path | str) -> str: # globally replace the string "file://" with "./ return text.replace(str(ro_crate_path), "./") diff --git a/rocrate_validator/requirements/shacl/validator.py b/rocrate_validator/requirements/shacl/validator.py index 749c939ec..34c151a85 100644 --- a/rocrate_validator/requirements/shacl/validator.py +++ b/rocrate_validator/requirements/shacl/validator.py @@ -15,7 +15,7 @@ from __future__ import annotations from pathlib import Path -from typing import TYPE_CHECKING, Any, Optional, Union, cast +from typing import TYPE_CHECKING, Any, Optional, cast import pyshacl from rdflib import BNode, Graph @@ -310,7 +310,7 @@ def sourceConstraintComponent(self): f"Unable to get source constraint component from violation node {self._violation_node}" return self._source_constraint_component - def get_result_message(self, ro_crate_path: Union[Path, str]) -> str: + def get_result_message(self, ro_crate_path: Path | str) -> str: if not self._result_message: message = self.graph.value(self._violation_node, URIRef(f"{SHACL_NS}resultMessage")) assert message is not None, f"Unable to get result message from violation node {self._violation_node}" @@ -318,12 +318,12 @@ def get_result_message(self, ro_crate_path: Union[Path, str]) -> str: return self._result_message @property - def sourceShape(self) -> Union[URIRef, BNode]: + def sourceShape(self) -> URIRef | BNode: if not self._source_shape_node: self._source_shape_node = self.graph.value(self._violation_node, URIRef(f"{SHACL_NS}sourceShape")) assert self._source_shape_node is not None, \ f"Unable to get source shape node from violation node {self._violation_node}" - return cast("Union[URIRef, BNode]", self._source_shape_node) + return cast("URIRef | BNode", self._source_shape_node) class SHACLValidationResult: @@ -373,8 +373,8 @@ class SHACLValidator: def __init__( self, - shapes_graph: Optional[Union[GraphLike, str, bytes]], - ont_graph: Optional[Union[GraphLike, str, bytes]] = None, + shapes_graph: Optional[GraphLike | str | bytes], + ont_graph: Optional[GraphLike | str | bytes] = None, ) -> None: """ Create a new SHACLValidator instance. @@ -391,17 +391,17 @@ def __init__( self._ont_graph = ont_graph @property - def shapes_graph(self) -> Optional[Union[GraphLike, str, bytes]]: + def shapes_graph(self) -> Optional[GraphLike | str | bytes]: return self._shapes_graph @property - def ont_graph(self) -> Optional[Union[GraphLike, str, bytes]]: + def ont_graph(self) -> Optional[GraphLike | str | bytes]: return self._ont_graph def validate( self, # data to validate - data_graph: Union[GraphLike, str, bytes], + data_graph: GraphLike | str | bytes, # validation settings abort_on_first: Optional[bool] = True, advanced: Optional[bool] = True, diff --git a/rocrate_validator/rocrate.py b/rocrate_validator/rocrate.py index 15faab500..983155a27 100644 --- a/rocrate_validator/rocrate.py +++ b/rocrate_validator/rocrate.py @@ -21,7 +21,7 @@ import zipfile from abc import ABC, abstractmethod from pathlib import Path -from typing import Any, Optional, Union, cast +from typing import Any, Optional, cast from urllib.parse import unquote from rdflib import Graph @@ -47,8 +47,8 @@ def id(self) -> str: return cast('str', self._raw_data.get('@id')) @property - def type(self) -> Union[str, list[str]]: - return cast('Union[str, list[str]]', self._raw_data.get('@type')) + def type(self) -> str | list[str]: + return cast('str | list[str]', self._raw_data.get('@type')) def is_dataset(self) -> bool: return self.has_type('Dataset') @@ -83,7 +83,7 @@ def get_id_as_path(cls, entity_id: str, ro_crate: Optional[ROCrate] = None) -> P @staticmethod def get_path_from_identifier( identifier: str, - rocrate_path: Optional[Union[str, Path]] = None, + rocrate_path: Optional[str | Path] = None, decode: bool = False, ) -> Path: """ @@ -362,7 +362,7 @@ def get_entities(self) -> list[ROCrateEntity]: return [ROCrateEntity(self, entity) for entity in self.as_dict().get("@graph", [])] def get_entities_by_type( - self, entity_type: Union[str, list[str]] + self, entity_type: str | list[str] ) -> list[ROCrateEntity]: entity_types = [entity_type] if isinstance(entity_type, str) else entity_type return [e for e in self.get_entities() if e.has_types(entity_types)] @@ -444,7 +444,7 @@ class ROCrate(ABC): Base class for representing and interacting with a Research Object Crate (RO-Crate). """ - def __new__(cls, uri: Union[str, Path, URI], relative_root_path: Optional[Path] = None): + def __new__(cls, uri: str | Path | URI, relative_root_path: Optional[Path] = None): """ Factory method to create the appropriate ROCrate subclass instance. @@ -469,7 +469,7 @@ def __new__(cls, uri: Union[str, Path, URI], relative_root_path: Optional[Path] instance.relative_root_path = relative_root_path return instance - def __init__(self, uri: Union[str, Path, URI], relative_root_path: Optional[Path] = None) -> None: + def __init__(self, uri: str | Path | URI, relative_root_path: Optional[Path] = None) -> None: """ Initialize the RO-Crate. @@ -668,7 +668,7 @@ def get_file_size(self, path: Path) -> int: @abstractmethod def get_file_content( self, path: Path, binary_mode: bool = True - ) -> Union[str, bytes]: + ) -> str | bytes: """ Get the content of a file in the RO-Crate. @@ -685,7 +685,7 @@ def get_file_content( @staticmethod def get_external_file_content( uri: str, binary_mode: bool = True - ) -> Union[str, bytes]: + ) -> str | bytes: """ Get the content of an external file. @@ -741,7 +741,7 @@ def from_metadata_dict( @staticmethod def new_instance( - uri: Union[str, Path, URI], relative_root_path: Optional[Path] = None + uri: str | Path | URI, relative_root_path: Optional[Path] = None ) -> ROCrate: """ Create a new instance of the RO-Crate based on the URI. @@ -792,7 +792,7 @@ class ROCrateLocalFolder(ROCrate): Class representing an RO-Crate stored in a local folder. """ - def __init__(self, path: Union[str, Path, URI], relative_root_path: Optional[Path] = None): + def __init__(self, path: str | Path | URI, relative_root_path: Optional[Path] = None): super().__init__(path, relative_root_path=relative_root_path) # cache the list of files @@ -823,7 +823,7 @@ def get_file_size(self, path: Path) -> int: def get_file_content( self, path: Path, binary_mode: bool = True - ) -> Union[str, bytes]: + ) -> str | bytes: path = self.__parse_path__(path) if not self.has_file(path): raise FileNotFoundError(f"File not found: {path}") @@ -833,7 +833,7 @@ def get_file_content( class ROCrateLocalZip(ROCrate): def __init__( self, - path: Union[str, Path, URI], + path: str | Path | URI, relative_root_path: Optional[Path] = None, init_zip: bool = True, ): @@ -876,7 +876,7 @@ def __init_zip_reference__(self): self._zipref = zipfile.ZipFile(path) # pylint: disable=consider-using-with logger.debug("Initialized zip reference: %s", self._zipref) - def __get_file_info__(self, path: Union[str, Path]) -> zipfile.ZipInfo: + def __get_file_info__(self, path: str | Path) -> zipfile.ZipInfo: assert self._zipref is not None, "Zip reference not initialized" try: return self._zipref.getinfo(str(path)) @@ -934,7 +934,7 @@ def get_file_size(self, path: Path) -> int: def get_file_content( self, path: Path, binary_mode: bool = True - ) -> Union[str, bytes]: + ) -> str | bytes: path = self.__parse_path__(path) if not self.has_file(path): raise FileNotFoundError(f"File not found: {path}") @@ -945,7 +945,7 @@ def get_file_content( class ROCrateRemoteZip(ROCrateLocalZip): - def __init__(self, path: Union[str, Path, URI], relative_root_path: Optional[Path] = None): + def __init__(self, path: str | Path | URI, relative_root_path: Optional[Path] = None): super().__init__(path, relative_root_path=relative_root_path, init_zip=False) # # initialize the zip reference @@ -1016,7 +1016,7 @@ def __init__(self, uri, relative_root_path=None): assert self.is_bagit_wrapping_crate(uri), "Not a BagIt-wrapped RO-Crate" @staticmethod - def is_bagit_wrapping_crate(uri: Union[str, Path, URI]) -> bool: + def is_bagit_wrapping_crate(uri: str | Path | URI) -> bool: """ Check if the RO-Crate is a BagIt-wrapped crate. @@ -1097,7 +1097,7 @@ def __check_search_path__(self, path): class ROCrateBagitLocalFolder(BagitROCrate, ROCrateLocalFolder): - def __init__(self, uri: Union[str, Path, URI], relative_root_path: Optional[Path] = None): + def __init__(self, uri: str | Path | URI, relative_root_path: Optional[Path] = None): # initialize the parent classes super(ROCrateLocalFolder, self).__init__(uri, relative_root_path=relative_root_path) # check if the path is a BagIt-wrapped crate diff --git a/rocrate_validator/services.py b/rocrate_validator/services.py index af6aae833..8c9680680 100644 --- a/rocrate_validator/services.py +++ b/rocrate_validator/services.py @@ -17,7 +17,7 @@ import tempfile import zipfile from pathlib import Path -from typing import Optional, Union +from typing import Optional from rocrate_validator.constants import HTTP_STATUS_BAD_REQUEST, HTTP_STATUS_GATEWAY_TIMEOUT from rocrate_validator.errors import ProfileNotFound @@ -35,7 +35,7 @@ logger = logging.getLogger(__name__) -def detect_profiles(settings: Union[dict, ValidationSettings]) -> list[Profile]: +def detect_profiles(settings: dict | ValidationSettings) -> list[Profile]: # initialize the validator validator = __initialise_validator__(settings) # detect the profiles @@ -45,7 +45,7 @@ def detect_profiles(settings: Union[dict, ValidationSettings]) -> list[Profile]: def validate_metadata_as_dict( - metadata_dict: dict, settings: Union[dict, ValidationSettings], subscribers: Optional[list[Subscriber]] = None + metadata_dict: dict, settings: dict | ValidationSettings, subscribers: Optional[list[Subscriber]] = None ) -> ValidationResult: """ Validate the RO-Crate metadata only against a profile and return the validation result. @@ -64,7 +64,7 @@ def validate_metadata_as_dict( def validate( - settings: Union[dict, ValidationSettings], subscribers: Optional[list[Subscriber]] = None + settings: dict | ValidationSettings, subscribers: Optional[list[Subscriber]] = None ) -> ValidationResult: """ Validate a RO-Crate against a profile and return the validation result @@ -147,7 +147,7 @@ def _download_remote_rocrate( def __initialise_validator__( - settings: Union[dict, ValidationSettings], subscribers: Optional[list[Subscriber]] = None + settings: dict | ValidationSettings, subscribers: Optional[list[Subscriber]] = None ) -> Validator: """ Validate a RO-Crate against a profile diff --git a/rocrate_validator/utils/io_helpers/colors.py b/rocrate_validator/utils/io_helpers/colors.py index fa6e28424..f2896d489 100644 --- a/rocrate_validator/utils/io_helpers/colors.py +++ b/rocrate_validator/utils/io_helpers/colors.py @@ -12,12 +12,11 @@ # See the License for the specific language governing permissions and # limitations under the License. -from typing import Union from rocrate_validator.models import LevelCollection, Severity -def get_severity_color(severity: Union[str, Severity]) -> str: +def get_severity_color(severity: str | Severity) -> str: """ Get the color for the severity diff --git a/rocrate_validator/utils/io_helpers/output/text/layout/progress.py b/rocrate_validator/utils/io_helpers/output/text/layout/progress.py index 23639091c..3728c0460 100644 --- a/rocrate_validator/utils/io_helpers/output/text/layout/progress.py +++ b/rocrate_validator/utils/io_helpers/output/text/layout/progress.py @@ -13,7 +13,7 @@ # limitations under the License. -from typing import Optional, Union +from typing import Optional from rich.progress import BarColumn, Progress, TextColumn, TimeElapsedColumn @@ -39,7 +39,7 @@ class ProgressMonitor(Subscriber): REQUIREMENT_VALIDATION = "Requirements" REQUIREMENT_CHECK_VALIDATION = "Requirements Checks" - def __init__(self, settings: Union[dict, ValidationSettings], + def __init__(self, settings: dict | ValidationSettings, stats: Optional[ValidationStatistics] = None): self.__progress = Progress( TextColumn("[progress.description]{task.description}"), diff --git a/rocrate_validator/utils/uri.py b/rocrate_validator/utils/uri.py index 589f9f073..0906ad154 100644 --- a/rocrate_validator/utils/uri.py +++ b/rocrate_validator/utils/uri.py @@ -15,7 +15,7 @@ import enum import re from pathlib import Path -from typing import Optional, Union +from typing import Optional from urllib.parse import ParseResult, parse_qsl, urlparse, urlsplit from rocrate_validator import errors @@ -118,7 +118,7 @@ class URI: # Backwards-compatible alias kept for callers that still inspect it. REMOTE_SUPPORTED_SCHEMA = SUPPORTED_ROCRATE_SCHEMES[:-1] # http, https, ftp - def __init__(self, uri: Union[str, Path]): + def __init__(self, uri: str | Path): if uri is None or (isinstance(uri, str) and not uri.strip()): raise ValueError("Invalid URI: empty value") self._uri = uri = str(uri) @@ -282,7 +282,7 @@ def __hash__(self): return hash(self._uri) -def validate_rocrate_uri(uri: Union[str, Path, URI], silent: bool = False) -> bool: +def validate_rocrate_uri(uri: str | Path | URI, silent: bool = False) -> bool: """ Validate the RO-Crate URI diff --git a/tests/shared.py b/tests/shared.py index e289a1d91..b430ab1e4 100644 --- a/tests/shared.py +++ b/tests/shared.py @@ -22,7 +22,7 @@ import tempfile from collections.abc import Collection from pathlib import Path -from typing import Optional, TypeVar, Union +from typing import Optional, TypeVar from urllib.parse import urljoin import rdflib @@ -121,7 +121,7 @@ def _prepare_temp_rocrate( def do_entity_test( - rocrate_path: Union[Path, str], + rocrate_path: Path | str, requirement_severity: models.Severity, expected_validation_result: bool, expected_triggered_requirements: Optional[list[str]] = None, From 95e05ebb23f41a8e8f154087a3ab828c2f5ebe12 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Thu, 11 Jun 2026 09:22:35 +0200 Subject: [PATCH 293/352] =?UTF-8?q?style(typing):=20=F0=9F=8E=A8=20use=20X?= =?UTF-8?q?=20|=20None=20instead=20of=20Optional[X]=20(UP045)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- rocrate_validator/cli/commands/cache.py | 37 ++-- rocrate_validator/cli/commands/profiles.py | 8 +- rocrate_validator/cli/commands/validate.py | 23 +- rocrate_validator/cli/ui/text/validate.py | 10 +- rocrate_validator/errors.py | 40 ++-- rocrate_validator/events.py | 8 +- rocrate_validator/models.py | 196 +++++++++--------- .../ro-crate/must/0_file_descriptor_format.py | 4 +- .../requirements/python/__init__.py | 20 +- .../requirements/shacl/checks.py | 10 +- .../requirements/shacl/errors.py | 5 +- .../requirements/shacl/models.py | 52 ++--- .../requirements/shacl/requirements.py | 4 +- rocrate_validator/requirements/shacl/utils.py | 10 +- .../requirements/shacl/validator.py | 66 +++--- rocrate_validator/rocrate.py | 46 ++-- rocrate_validator/services.py | 17 +- rocrate_validator/utils/cache_warmup.py | 10 +- rocrate_validator/utils/collections.py | 4 +- rocrate_validator/utils/document_loader.py | 4 +- rocrate_validator/utils/http.py | 14 +- rocrate_validator/utils/io_helpers/input.py | 20 +- .../utils/io_helpers/output/__init__.py | 4 +- .../utils/io_helpers/output/console.py | 6 +- .../utils/io_helpers/output/json/__init__.py | 4 +- .../io_helpers/output/json/formatters.py | 8 +- .../utils/io_helpers/output/text/__init__.py | 4 +- .../io_helpers/output/text/layout/progress.py | 7 +- .../io_helpers/output/text/layout/report.py | 34 +-- rocrate_validator/utils/log.py | 4 +- rocrate_validator/utils/python_helpers.py | 5 +- rocrate_validator/utils/rdf.py | 3 +- rocrate_validator/utils/uri.py | 5 +- rocrate_validator/utils/versioning.py | 5 +- tests/shared.py | 20 +- tests/unit/requirements/test_shacl_checks.py | 3 +- 36 files changed, 354 insertions(+), 366 deletions(-) diff --git a/rocrate_validator/cli/commands/cache.py b/rocrate_validator/cli/commands/cache.py index 400cd9180..0d8ec9c2e 100644 --- a/rocrate_validator/cli/commands/cache.py +++ b/rocrate_validator/cli/commands/cache.py @@ -23,7 +23,6 @@ import json from datetime import datetime, timezone from pathlib import Path -from typing import Optional from rich.table import Table @@ -39,7 +38,7 @@ logger = logging.getLogger(__name__) -def _resolve_cache_path(cache_path: Optional[Path]) -> Path: +def _resolve_cache_path(cache_path: Path | None) -> Path: """Return the effective cache path, creating the parent directory.""" path = get_default_http_cache_path() if cache_path is None else Path(cache_path) path.parent.mkdir(parents=True, exist_ok=True) @@ -73,7 +72,7 @@ def cache(ctx): help="Path to the HTTP cache directory (defaults to the user cache dir)", ) @click.pass_context -def cache_info(ctx, cache_path: Optional[Path] = None): +def cache_info(ctx, cache_path: Path | None = None): """ Display information about the HTTP cache. """ @@ -139,10 +138,10 @@ def cache_info(ctx, cache_path: Optional[Path] = None): @click.pass_context def cache_list( ctx, - cache_path: Optional[Path] = None, - url_filter: Optional[str] = None, + cache_path: Path | None = None, + url_filter: str | None = None, sort_by: str = "created", - sort_order: Optional[str] = None, + sort_order: str | None = None, as_json: bool = False, ): """ @@ -207,7 +206,7 @@ def cache_list( help="Do not prompt for confirmation before removing cache entries", ) @click.pass_context -def cache_reset(ctx, cache_path: Optional[Path] = None, yes: bool = False): +def cache_reset(ctx, cache_path: Path | None = None, yes: bool = False): """ Remove every entry from the HTTP cache. """ @@ -310,13 +309,13 @@ def cache_reset(ctx, cache_path: Optional[Path] = None, yes: bool = False): @click.pass_context def cache_warm( ctx, - cache_path: Optional[Path] = None, - profiles_path: Optional[Path] = None, - extra_profiles_path: Optional[Path] = None, - profile_identifier: Optional[list[str]] = None, + cache_path: Path | None = None, + profiles_path: Path | None = None, + extra_profiles_path: Path | None = None, + profile_identifier: list[str] | None = None, all_profiles: bool = False, - crate: Optional[list[str]] = None, - url: Optional[list[str]] = None, + crate: list[str] | None = None, + url: list[str] | None = None, ): """ Pre-populate the HTTP cache with resources declared by profiles and with @@ -340,7 +339,7 @@ def cache_warm( requested_ids = list(profile_identifier or []) urls: list[str] = [] - profile_scope: Optional[str] = None + profile_scope: str | None = None # Only fall back to "warm all profiles" when the user gave no other # source (no -p, no --crate, no --url, no --all-profiles). @@ -487,13 +486,13 @@ def _format_bytes(size: int) -> str: return f"{value:.2f} {units[idx]}" -def _format_dt(value: Optional[datetime]) -> str: +def _format_dt(value: datetime | None) -> str: if value is None: return "—" return value.strftime("%Y-%m-%d %H:%M:%SZ") if value.tzinfo else value.strftime("%Y-%m-%d %H:%M:%S") -def _format_expires(value: Optional[datetime], is_expired: bool) -> str: +def _format_expires(value: datetime | None, is_expired: bool) -> str: if value is None: return "never" formatted = _format_dt(value) @@ -504,9 +503,9 @@ def _format_expires(value: Optional[datetime], is_expired: bool) -> str: def _collect_cache_entries( - url_filter: Optional[str] = None, + url_filter: str | None = None, sort_by: str = "size", - sort_order: Optional[str] = None, + sort_order: str | None = None, ) -> list[dict]: """ Read every cached response and return a list of plain dicts. Filtering @@ -555,7 +554,7 @@ def _collect_cache_entries( def _entry_to_dict(entry: dict) -> dict: """JSON-safe view of an entry produced by ``_collect_cache_entries``.""" - def _iso(value: Optional[datetime]) -> Optional[str]: + def _iso(value: datetime | None) -> str | None: return value.isoformat() if value is not None else None return { "url": entry["url"], diff --git a/rocrate_validator/cli/commands/profiles.py b/rocrate_validator/cli/commands/profiles.py index 8fedb445a..8ed830893 100644 --- a/rocrate_validator/cli/commands/profiles.py +++ b/rocrate_validator/cli/commands/profiles.py @@ -15,7 +15,7 @@ import re import sys from pathlib import Path -from typing import Any, Optional +from typing import Any from rich.align import Align from rich.markdown import Markdown @@ -58,7 +58,7 @@ ) @click.pass_context def profiles(ctx, profiles_path: Path = DEFAULT_PROFILES_PATH, - extra_profiles_path: Optional[Path] = None): + extra_profiles_path: Path | None = None): """ [magenta]rocrate-validator:[/magenta] Manage profiles """ @@ -177,9 +177,9 @@ def list_profiles(ctx, no_paging: bool = False): # , profiles_path: Path = DEFA @click.pass_context def describe_profile(ctx, profile_identifier: str = DEFAULT_PROFILE_IDENTIFIER, - check_identifier: Optional[str] = None, + check_identifier: str | None = None, profiles_path: Path = DEFAULT_PROFILES_PATH, - extra_profiles_path: Optional[Path] = None, + extra_profiles_path: Path | None = None, verbose: bool = False, no_paging: bool = False): """ Show a profile, or — when CHECK_IDENTIFIER is given — show a single requirement check. diff --git a/rocrate_validator/cli/commands/validate.py b/rocrate_validator/cli/commands/validate.py index 9fd0ff2ac..596eb4549 100644 --- a/rocrate_validator/cli/commands/validate.py +++ b/rocrate_validator/cli/commands/validate.py @@ -17,7 +17,6 @@ import sys from contextlib import nullcontext from pathlib import Path -from typing import Optional import rich_click as click from rich.padding import Padding @@ -237,24 +236,24 @@ def validate_uri(ctx, param, value): @click.pass_context def validate(ctx, profiles_path: Path = DEFAULT_PROFILES_PATH, - extra_profiles_path: Optional[Path] = None, + extra_profiles_path: Path | None = None, profile_identifier: tuple[str, ...] = (), metadata_only: bool = False, no_auto_profile: bool = False, disable_profile_inheritance: bool = False, requirement_severity: str = Severity.REQUIRED.name, requirement_severity_only: bool = False, - skip_checks: Optional[list[str]] = None, + skip_checks: list[str] | None = None, rocrate_uri: str | Path = ".", - relative_root_path: Optional[Path] = None, + relative_root_path: Path | None = None, fail_fast: bool = False, no_paging: bool = False, verbose: bool = False, output_format: str = "text", - output_file: Optional[Path] = None, - output_line_width: Optional[int] = None, + output_file: Path | None = None, + output_line_width: int | None = None, cache_max_age: int = constants.DEFAULT_HTTP_CACHE_MAX_AGE, - cache_path: Optional[Path] = None, + cache_path: Path | None = None, no_cache: bool = False, offline: bool = False): """ @@ -449,7 +448,7 @@ def _warn_if_remote_offline(console: Console, rocrate_uri: str | Path, offline: ) -def _parse_skip_checks(skip_checks: Optional[list[str]]) -> list[str]: +def _parse_skip_checks(skip_checks: list[str] | None) -> list[str]: """Parse the comma-separated ``--skip-checks`` option into a flat list of check IDs.""" logger.debug("skip_checks: %s", skip_checks) skip_checks_list: list[str] = [] @@ -581,8 +580,8 @@ def _render_file_or_collected_result( interactive: bool, verbose: bool, output_format: str, - output_file: Optional[Path], - output_line_width: Optional[int], + output_file: Path | None, + output_line_width: int | None, ) -> ValidationResult: """Validate for the file/JSON-input path, optionally writing a text report to file.""" if interactive: @@ -619,8 +618,8 @@ def _emit_json_report( *, console: Console, interactive: bool, - output_file: Optional[Path], - output_line_width: Optional[int], + output_file: Path | None, + output_line_width: int | None, ) -> None: """Write the aggregated validation results as JSON to a file or stdout.""" if interactive: diff --git a/rocrate_validator/cli/ui/text/validate.py b/rocrate_validator/cli/ui/text/validate.py index 8a607e770..f6eb16e96 100644 --- a/rocrate_validator/cli/ui/text/validate.py +++ b/rocrate_validator/cli/ui/text/validate.py @@ -14,7 +14,7 @@ from __future__ import annotations -from typing import TYPE_CHECKING, Any, Optional +from typing import TYPE_CHECKING, Any from rocrate_validator.utils import log as logging from rocrate_validator.utils.io_helpers.output.console import Console @@ -37,18 +37,18 @@ class ValidationCommandView: """ def __init__(self, - validation_settings: Optional[ValidationSettings], + validation_settings: ValidationSettings | None, interactive: bool = True, no_paging: bool = False, - pager: Optional[SystemPager] = None, - console: Optional[Console] = None): + pager: SystemPager | None = None, + console: Console | None = None): self.console = console or Console() self.interactive = interactive self.pager = pager if not no_paging else None # reference to the validation settings self.validation_settings = validation_settings # reference to the report layout - self._report_layout: Optional[ValidationReportLayout] = None + self._report_layout: ValidationReportLayout | None = None # Register text output formatter self.console.register_formatter(TextOutputFormatter()) diff --git a/rocrate_validator/errors.py b/rocrate_validator/errors.py index f3b06309c..d71cb4e6c 100644 --- a/rocrate_validator/errors.py +++ b/rocrate_validator/errors.py @@ -14,7 +14,7 @@ from __future__ import annotations -from typing import TYPE_CHECKING, Optional +from typing import TYPE_CHECKING if TYPE_CHECKING: # Imported only for type-checking to avoid a circular import: @@ -31,11 +31,11 @@ class ROCValidatorError(Exception): class ProfilesDirectoryNotFound(ROCValidatorError): """Raised when the profiles directory is not found.""" - def __init__(self, profiles_path: Optional[str] = None): + def __init__(self, profiles_path: str | None = None): self._profiles_path = profiles_path @property - def profiles_path(self) -> Optional[str]: + def profiles_path(self) -> str | None: """The path to the profiles directory.""" return self._profiles_path @@ -49,11 +49,11 @@ def __repr__(self): class InvalidProfilePath(ROCValidatorError): """Raised when an invalid profile path is provided.""" - def __init__(self, profile_path: Optional[str] = None): + def __init__(self, profile_path: str | None = None): self._profile_path = profile_path @property - def profile_path(self) -> Optional[str]: + def profile_path(self) -> str | None: """The invalid profile path.""" return self._profile_path @@ -67,17 +67,17 @@ def __repr__(self): class ProfileNotFound(ROCValidatorError): """Raised when a profile is not found.""" - def __init__(self, profile_name: Optional[str] = None, message: Optional[str] = None): + def __init__(self, profile_name: str | None = None, message: str | None = None): self._profile_name = profile_name self._message = message @property - def profile_name(self) -> Optional[str]: + def profile_name(self) -> str | None: """The name of the profile.""" return self._profile_name @property - def message(self) -> Optional[str]: + def message(self) -> str | None: """The error message.""" return self._message @@ -91,11 +91,11 @@ def __repr__(self): class ProfileSpecificationNotFound(ROCValidatorError): """Raised when the profile specification is not found.""" - def __init__(self, spec_file: Optional[str] = None): + def __init__(self, spec_file: str | None = None): self._spec_file = spec_file @property - def spec_file(self) -> Optional[str]: + def spec_file(self) -> str | None: """The name of the profile specification file.""" return self._spec_file @@ -112,11 +112,11 @@ def __repr__(self): class ProfileSpecificationError(ROCValidatorError): """Raised when an error occurs in the profile specification.""" - def __init__(self, message: Optional[str] = None): + def __init__(self, message: str | None = None): self._message = message @property - def message(self) -> Optional[str]: + def message(self) -> str | None: """The error message.""" return self._message @@ -130,7 +130,7 @@ def __repr__(self): class DuplicateRequirementCheck(ROCValidatorError): """Raised when a duplicate requirement check is found.""" - def __init__(self, check_name: str, profile_name: Optional[str] = None): + def __init__(self, check_name: str, profile_name: str | None = None): self._check_name = check_name self._profile_name = profile_name @@ -140,7 +140,7 @@ def check_name(self) -> str: return self._check_name @property - def profile_name(self) -> Optional[str]: + def profile_name(self) -> str | None: """The name of the profile.""" return self._profile_name @@ -154,11 +154,11 @@ def __repr__(self): class InvalidSerializationFormat(ROCValidatorError): """Raised when an invalid serialization format is provided.""" - def __init__(self, fmt: Optional[str] = None): + def __init__(self, fmt: str | None = None): self._format = fmt @property - def serialization_format(self) -> Optional[str]: + def serialization_format(self) -> str | None: """The invalid serialization format.""" return self._format @@ -252,7 +252,7 @@ def __repr__(self): class ROCrateInvalidURIError(ROCValidatorError): """Raised when an invalid URI is provided.""" - def __init__(self, uri: str | Path | URI, message: Optional[str] = None): + def __init__(self, uri: str | Path | URI, message: str | None = None): self._uri = uri self._message = message or self.default_error_message(uri) @@ -288,17 +288,17 @@ def default_error_message(cls, uri: str | Path | URI) -> str: class ROCrateMetadataNotFoundError(ROCValidatorError): """Raised when the RO-Crate metadata is not found.""" - def __init__(self, message: Optional[str] = None, path: Optional[str] = None): + def __init__(self, message: str | None = None, path: str | None = None): self._message = message self._path = path @property - def message(self) -> Optional[str]: + def message(self) -> str | None: """The error message.""" return self._message @property - def path(self) -> Optional[str]: + def path(self) -> str | None: """The path where the error occurred.""" return self._path diff --git a/rocrate_validator/events.py b/rocrate_validator/events.py index 04ef200cc..3041751a7 100644 --- a/rocrate_validator/events.py +++ b/rocrate_validator/events.py @@ -15,7 +15,7 @@ import enum from abc import ABC, abstractmethod from functools import total_ordering -from typing import Any, Optional +from typing import Any import enum_tools @@ -59,7 +59,7 @@ class Event: Base class for representing events """ - def __init__(self, event_type: EventType, message: Optional[str] = None): + def __init__(self, event_type: EventType, message: str | None = None): """ Initialize the event. @@ -138,7 +138,7 @@ def __init__(self, name): self.name = name @abstractmethod - def update(self, event: Event, ctx: Optional[Any] = None): + def update(self, event: Event, ctx: Any | None = None): """ Update the subscriber with the event @@ -166,7 +166,7 @@ def add_subscriber(self, subscriber): def remove_subscriber(self, subscriber): self.subscribers.remove(subscriber) - def notify(self, event: Event | EventType, ctx: Optional[Any] = None): + def notify(self, event: Event | EventType, ctx: Any | None = None): if isinstance(event, EventType): event = Event(event) # Check if the event has already been notified diff --git a/rocrate_validator/models.py b/rocrate_validator/models.py index 3734dd7a7..13be6ee8f 100644 --- a/rocrate_validator/models.py +++ b/rocrate_validator/models.py @@ -25,7 +25,7 @@ from datetime import datetime, timezone from functools import total_ordering from pathlib import Path -from typing import TYPE_CHECKING, Any, Optional, Protocol, Union, cast +from typing import TYPE_CHECKING, Any, Protocol, cast from urllib.error import HTTPError import enum_tools @@ -228,9 +228,9 @@ def __init__( self, profiles_base_path: Path, profile_path: Path, - requirements: Optional[list[Requirement]] = None, - identifier: Optional[str] = None, - publicID: Optional[str] = None, + requirements: list[Requirement] | None = None, + identifier: str | None = None, + publicID: str | None = None, severity: Severity = Severity.REQUIRED, ): """ @@ -258,11 +258,11 @@ def __init__( :meta private: """ - self._identifier: Optional[str] = identifier + self._identifier: str | None = identifier self._profiles_base_path = profiles_base_path self._profile_path = profile_path - self._name: Optional[str] = None - self._description: Optional[str] = None + self._name: str | None = None + self._description: str | None = None self._requirements: list[Requirement] = requirements if requirements is not None else [] self._publicID = publicID self._severity = severity @@ -273,7 +273,7 @@ def __init__( self._profile_node: Any = None # init property to store the RDF graph of the profile specification - self._profile_specification_graph: Optional[Graph] = None + self._profile_specification_graph: Graph | None = None # check if the profile specification file exists spec_file = self.profile_specification_file_path @@ -489,7 +489,7 @@ def profile_specification_file_path(self) -> Path: return self.path / PROFILE_SPECIFICATION_FILE @property - def publicID(self) -> Optional[str]: + def publicID(self) -> str | None: """ The public identifier of the RO-Crate which is validated by the profile. @@ -542,7 +542,7 @@ def get_requirements(self, severity: Severity = Severity.REQUIRED, exact_match: or (exact_match and requirement.severity_from_path == severity) ] - def get_requirement(self, name: str) -> Optional[Requirement]: + def get_requirement(self, name: str) -> Requirement | None: """ Get the requirement with the given name """ @@ -551,7 +551,7 @@ def get_requirement(self, name: str) -> Optional[Requirement]: return requirement return None - def get_requirement_check(self, check_name: str) -> Optional[RequirementCheck]: + def get_requirement_check(self, check_name: str) -> RequirementCheck | None: """ Get the requirement check with the given name """ @@ -628,7 +628,7 @@ def to_dict(self) -> dict: } @staticmethod - def __extract_version_from_token__(token: str) -> Optional[str]: + def __extract_version_from_token__(token: str) -> str | None: if not token: return None pattern = r"\Wv?(\d+(\.\d+(\.\d+)?)?)" @@ -637,11 +637,11 @@ def __extract_version_from_token__(token: str) -> Optional[str]: return matches[-1][0] return None - def __get_consistent_version__(self, candidate_token: str) -> Optional[str]: + def __get_consistent_version__(self, candidate_token: str) -> str | None: candidates = { _ for _ in [ - cast("Optional[str]", self.__get_specification_property__("version", SCHEMA_ORG_NS)), + cast("str | None", self.__get_specification_property__("version", SCHEMA_ORG_NS)), self.__extract_version_from_token__(candidate_token), self.__extract_version_from_token__(str(self.path.relative_to(self._profiles_base_path))), self.__extract_version_from_token__(str(self.uri)), @@ -664,9 +664,9 @@ def __extract_token_from_path__(self) -> str: # Replace slashes with hyphens return identifier.replace("/", "-") - def __init_token_version__(self) -> tuple[str, Optional[str]]: + def __init_token_version__(self) -> tuple[str, str | None]: # try to extract the token from the specs or the path - candidate_token = cast("Optional[str]", self.__get_specification_property__("hasToken", PROF_NS)) + candidate_token = cast("str | None", self.__get_specification_property__("hasToken", PROF_NS)) if not candidate_token: candidate_token = self.__extract_token_from_path__() logger.debug("Candidate token: %s", candidate_token) @@ -687,7 +687,7 @@ def __load_profile_path__( cls, profiles_base_path: str | Path, profile_path: str | Path, - publicID: Optional[str] = None, + publicID: str | None = None, severity: Severity = Severity.REQUIRED, ) -> Profile: # if the path is a string, convert it to a Path @@ -709,8 +709,8 @@ def __load_profile_path__( @classmethod def __load_profiles_paths__( cls, - profiles_path: Optional[str | Path] = None, - extra_profiles_path: Optional[str | Path] = None, + profiles_path: str | Path | None = None, + extra_profiles_path: str | Path | None = None, ) -> list[tuple[Path, Path]]: """ Load the paths of the profiles from the given profiles path and extra profiles path. @@ -756,8 +756,8 @@ def __load_profiles_paths__( def load_profiles( cls, profiles_path: str | Path, - extra_profiles_path: Optional[str | Path] = None, - publicID: Optional[str] = None, + extra_profiles_path: str | Path | None = None, + publicID: str | None = None, severity: Severity = Severity.REQUIRED, allow_requirement_check_override: bool = True, ) -> list[Profile]: @@ -913,7 +913,7 @@ def all(cls) -> list[Profile]: return cls.__profiles_map.values() @classmethod - def find_in_list(cls, profiles: Collection[Profile], profile_identifier: str) -> Optional[Profile]: + def find_in_list(cls, profiles: Collection[Profile], profile_identifier: str) -> Profile | None: """ Find a profile with the given identifier in the given list of profiles @@ -955,8 +955,8 @@ def __init__( self, profile: Profile, name: str = "", - description: Optional[str] = None, - path: Optional[Path] = None, + description: str | None = None, + path: Path | None = None, initialize_checks: bool = True, ): """ @@ -964,13 +964,13 @@ def __init__( :meta private: """ - self._order_number: Optional[int] = None + self._order_number: int | None = None self._profile = profile self._description = description self._path = path # path of code implementing the requirement - self._level_from_path: Optional[RequirementLevel] = None + self._level_from_path: RequirementLevel | None = None self._checks: list[RequirementCheck] = [] - self._overridden: Optional[bool] = None + self._overridden: bool | None = None if not name and path: self._name = get_requirement_name_from_file(path) @@ -1025,11 +1025,11 @@ def name(self) -> str: return self._name @property - def severity_from_path(self) -> Optional[Severity]: + def severity_from_path(self) -> Severity | None: return self.requirement_level_from_path.severity if self.requirement_level_from_path else None @property - def requirement_level_from_path(self) -> Optional[RequirementLevel]: + def requirement_level_from_path(self) -> RequirementLevel | None: if not self._level_from_path and self._path: try: self._level_from_path = LevelCollection.get(self._path.parent.name) @@ -1066,7 +1066,7 @@ def hidden(self) -> bool: pass @property - def path(self) -> Optional[Path]: + def path(self) -> Path | None: return self._path @abstractmethod @@ -1076,7 +1076,7 @@ def __init_checks__(self) -> list[RequirementCheck]: def get_checks(self) -> list[RequirementCheck]: return self._checks.copy() - def get_check(self, name: str) -> Optional[RequirementCheck]: + def get_check(self, name: str) -> RequirementCheck | None: for check in self._checks: if check.name == name: return check @@ -1394,7 +1394,7 @@ class SourceSnippet: """ language: str code: str - source_path: Optional[Path] = None + source_path: Path | None = None @total_ordering @@ -1403,10 +1403,10 @@ class RequirementCheck(ABC): def __init__( self, requirement: Requirement, - name: Optional[str], - level: Optional[RequirementLevel] = LevelCollection.REQUIRED, - description: Optional[str] = None, - hidden: Optional[bool] = None, + name: str | None, + level: RequirementLevel | None = LevelCollection.REQUIRED, + description: str | None = None, + hidden: bool | None = None, deactivated: bool = False, ): self._requirement: Requirement = requirement @@ -1495,7 +1495,7 @@ def hidden(self) -> bool: def execute_check(self, context: ValidationContext) -> bool: raise NotImplementedError() - def get_source_snippet(self) -> Optional[SourceSnippet]: + def get_source_snippet(self) -> SourceSnippet | None: """ Return the source code that implements this check, or ``None`` if the backing source cannot be extracted for this check kind. @@ -1546,10 +1546,10 @@ class CheckIssue: def __init__( self, check: RequirementCheck, - message: Optional[str] = None, - violatingProperty: Optional[str] = None, - violatingEntity: Optional[str] = None, - value: Optional[str] = None, + message: str | None = None, + violatingProperty: str | None = None, + violatingEntity: str | None = None, + value: str | None = None, ): self._message = message self._check: RequirementCheck = check @@ -1558,7 +1558,7 @@ def __init__( self._propertyValue = value @property - def message(self) -> Optional[str]: + def message(self) -> str | None: """The message associated with the issue""" return self._message @@ -1582,7 +1582,7 @@ def check(self) -> RequirementCheck: return self._check @property - def violatingEntity(self) -> Optional[str]: + def violatingEntity(self) -> str | None: """ It represents the specific element being evaluated that fails to meet the defined rules or constraints within a validation process. @@ -1594,7 +1594,7 @@ def violatingEntity(self) -> Optional[str]: return self._violatingEntity @property - def violatingProperty(self) -> Optional[str]: + def violatingProperty(self) -> str | None: """ It refers to the specific property or relationship within an item that leads to a validation failure. @@ -1607,7 +1607,7 @@ def violatingProperty(self) -> Optional[str]: return self._violatingProperty @property - def violatingPropertyValue(self) -> Optional[str]: + def violatingPropertyValue(self) -> str | None: """ It represents the value of the violatingProperty that leads to a validation failure. @@ -1682,7 +1682,7 @@ class ValidationStatistics(Subscriber): def __init__( self, settings: dict | ValidationSettings, - context: Optional[ValidationContext] = None, + context: ValidationContext | None = None, skip_initialization: bool = False, ): super().__init__(name=self.__class__.__name__) @@ -1691,7 +1691,7 @@ def __init__( self._settings = settings self._context = context self._stats = self.__initialise__(settings) if not skip_initialization else {} - self._result: Optional[ValidationResult] = None + self._result: ValidationResult | None = None self._listeners: list[ValidationStatisticsListener] = [] @property @@ -1702,7 +1702,7 @@ def validation_settings(self) -> ValidationSettings: return self._settings @property - def validation_result(self) -> Optional[ValidationResult]: + def validation_result(self) -> ValidationResult | None: """ Get the validation result """ @@ -1843,21 +1843,21 @@ def validated_checks(self) -> list[RequirementCheck]: return self._stats.get("validated_checks", []) @property - def started_at(self) -> Optional[datetime]: + def started_at(self) -> datetime | None: """ Get the timestamp when validation started """ return self._stats.get("started_at") @property - def finished_at(self) -> Optional[datetime]: + def finished_at(self) -> datetime | None: """ Get the timestamp when validation finished """ return self._stats.get("finished_at") @property - def duration(self) -> Optional[float]: + def duration(self) -> float | None: """ Get the duration of the validation process in seconds """ @@ -1993,24 +1993,24 @@ def __initialise__(cls, validation_settings: ValidationSettings): logger.debug(result) return result - def update(self, event: Event, ctx: Optional[ValidationContext] = None) -> None: + def update(self, event: Event, ctx: ValidationContext | None = None) -> None: self.__event_handlers__.get(event.event_type, lambda e, c: None)(event, ctx) - def __handle_validation_start__(self, _event: Event, _ctx: Optional[ValidationContext]) -> None: + def __handle_validation_start__(self, _event: Event, _ctx: ValidationContext | None) -> None: logger.debug("Validation started") self._stats["started_at"] = datetime.now(timezone.utc) - def __handle_profile_validation_start__(self, event: Event, _ctx: Optional[ValidationContext]) -> None: + def __handle_profile_validation_start__(self, event: Event, _ctx: ValidationContext | None) -> None: assert isinstance(event, ProfileValidationEvent) logger.debug("Profile validation start: %s", event.profile.identifier) - def __handle_requirement_validation_start__(self, _event: Event, _ctx: Optional[ValidationContext]) -> None: + def __handle_requirement_validation_start__(self, _event: Event, _ctx: ValidationContext | None) -> None: logger.debug("Requirement validation start") - def __handle_requirement_check_validation_start__(self, _event: Event, _ctx: Optional[ValidationContext]) -> None: + def __handle_requirement_check_validation_start__(self, _event: Event, _ctx: ValidationContext | None) -> None: logger.debug("Requirement check validation start") - def __handle_requirement_check_validation_end__(self, event: Event, ctx: Optional[ValidationContext]) -> None: + def __handle_requirement_check_validation_end__(self, event: Event, ctx: ValidationContext | None) -> None: assert isinstance(event, RequirementCheckValidationEvent) assert ctx is not None target_profile = ctx.target_validation_profile @@ -2036,7 +2036,7 @@ def __handle_requirement_check_validation_end__(self, event: Event, ctx: Optiona event.requirement_check.identifier, ) - def __handle_requirement_validation_end__(self, event: Event, _ctx: Optional[ValidationContext]) -> None: + def __handle_requirement_validation_end__(self, event: Event, _ctx: ValidationContext | None) -> None: assert isinstance(event, RequirementValidationEvent) if not event.requirement.hidden: if event.validation_result: @@ -2046,12 +2046,12 @@ def __handle_requirement_validation_end__(self, event: Event, _ctx: Optional[Val self._stats["validated_requirements"].append(event.requirement) self.notify_listeners() - def __handle_profile_validation_end__(self, event: Event, _ctx: Optional[ValidationContext]) -> None: + def __handle_profile_validation_end__(self, event: Event, _ctx: ValidationContext | None) -> None: assert isinstance(event, ProfileValidationEvent) self._stats["validated_profiles"].append(event.profile) logger.debug("Profile validation ended: %s", event.profile.identifier) - def __handle_validation_end__(self, event: Event, _ctx: Optional[ValidationContext]) -> None: + def __handle_validation_end__(self, event: Event, _ctx: ValidationContext | None) -> None: assert isinstance(event, ValidationEvent) self._result = event.validation_result self._stats["finished_at"] = datetime.now(timezone.utc) @@ -2297,14 +2297,14 @@ def failed_checks(self) -> set[RequirementCheck]: return self._overall_stats.get("failed_checks", set()) @property - def started_at(self) -> Optional[datetime]: + def started_at(self) -> datetime | None: """ Get the timestamp when the aggregated validation started """ return self._overall_stats.get("started_at") @property - def finished_at(self) -> Optional[datetime]: + def finished_at(self) -> datetime | None: """ Get the timestamp when the aggregated validation finished """ @@ -2340,8 +2340,8 @@ def __aggregate_raw_stats__( failed_checks: set[RequirementCheck] = set() passed_requirements: set[Requirement] = set() passed_checks: set[RequirementCheck] = set() - started_at: Optional[datetime] = None - finished_at: Optional[datetime] = None + started_at: datetime | None = None + finished_at: datetime | None = None duration: float = 0.0 # Aggregate statistics from each ValidationStatistics instance @@ -2492,7 +2492,7 @@ def _add_executed_check(self, check: RequirementCheck, result: bool): self._skipped_checks.remove(check) logger.debug("Removing check '%s' from skipped checks", check.name) - def get_executed_check_result(self, check: RequirementCheck) -> Optional[bool]: + def get_executed_check_result(self, check: RequirementCheck) -> bool | None: """ Get the result of an executed check """ @@ -2525,14 +2525,14 @@ def issues(self) -> list[CheckIssue]: """ return self._issues.copy() - def get_issues(self, min_severity: Optional[Severity] = None) -> list[CheckIssue]: + def get_issues(self, min_severity: Severity | None = None) -> list[CheckIssue]: """ Get the issues found during the validation with a severity greater than or equal to `min_severity` """ min_severity = min_severity or self.context.requirement_severity return [issue for issue in self._issues if issue.severity >= min_severity] - def get_issues_by_check(self, check: RequirementCheck, min_severity: Optional[Severity] = None) -> list[CheckIssue]: + def get_issues_by_check(self, check: RequirementCheck, min_severity: Severity | None = None) -> list[CheckIssue]: """ Get the issues found during the validation for a specific check with a severity greater than or equal to `min_severity` @@ -2540,14 +2540,14 @@ def get_issues_by_check(self, check: RequirementCheck, min_severity: Optional[Se min_severity = min_severity or self.context.requirement_severity return [issue for issue in self._issues if issue.check == check and issue.severity >= min_severity] - def has_issues(self, min_severity: Optional[Severity] = None) -> bool: + def has_issues(self, min_severity: Severity | None = None) -> bool: """ Check if there are issues with a severity greater than or equal to the given `severity` """ min_severity = min_severity or self.context.requirement_severity return any(issue.severity >= min_severity for issue in self._issues) - def passed(self, min_severity: Optional[Severity] = None) -> bool: + def passed(self, min_severity: Severity | None = None) -> bool: """ Check if all checks passed with a severity greater than or equal to the given `severity` """ @@ -2558,9 +2558,9 @@ def add_issue( self, message: str, check: RequirementCheck, - violatingEntity: Optional[str] = None, - violatingProperty: Optional[str] = None, - violatingPropertyValue: Optional[str] = None, + violatingEntity: str | None = None, + violatingProperty: str | None = None, + violatingPropertyValue: str | None = None, ) -> CheckIssue: """ Add an issue to the validation result @@ -2656,7 +2656,7 @@ def to_dict(self) -> dict: result["validation_settings"]["rocrate_validator_version"] = __version__ return result - def to_json(self, path: Optional[Path] = None) -> str: + def to_json(self, path: Path | None = None) -> str: """ Convert the ValidationResult to a JSON string """ @@ -2691,12 +2691,12 @@ class ValidationSettings: #: The URI of the RO-Crate rocrate_uri: URI #: The relative root path of the RO-Crate - rocrate_relative_root_path: Optional[Path] = None + rocrate_relative_root_path: Path | None = None # Profile settings #: The path to the profiles profiles_path: Path = DEFAULT_PROFILES_PATH #: The path to the extra profiles - extra_profiles_path: Optional[Path] = None + extra_profiles_path: Path | None = None #: The profile identifier to validate against profile_identifier: str = DEFAULT_PROFILE_IDENTIFIER #: Flag to enable profile inheritance @@ -2711,7 +2711,7 @@ class ValidationSettings: enable_profile_inheritance: bool = True # Validation settings #: Flag to abort on first error - abort_on_first: Optional[bool] = False + abort_on_first: bool | None = False #: Flag to disable reporting of issues related to inherited profiles disable_inherited_profiles_issue_reporting: bool = False #: Flag to disable remote crate download @@ -2727,17 +2727,17 @@ class ValidationSettings: #: Flag to disable the check for duplicates disable_check_for_duplicates: bool = False #: Checks to skip - skip_checks: Optional[list[str]] = None + skip_checks: list[str] | None = None #: Flag to validate only the metadata of the RO-Crate metadata_only: bool = False #: RO-Crate metadata as dictionary - metadata_dict: Optional[dict] = None + metadata_dict: dict | None = None #: Verbose output verbose: bool = False #: Cache max age in seconds (negative values mean "never expire") cache_max_age: int = DEFAULT_HTTP_CACHE_MAX_AGE #: Cache path - cache_path: Optional[Path] = None + cache_path: Path | None = None #: Flag to enable offline mode: HTTP requests are served only from the cache offline: bool = False #: Flag to disable the HTTP cache entirely: every request hits the network @@ -2806,7 +2806,7 @@ def to_dict(self): return result @property # type: ignore[no-redef] - def rocrate_uri(self) -> Optional[URI]: + def rocrate_uri(self) -> URI | None: """ Get the RO-Crate URI @@ -2851,14 +2851,14 @@ class ValidationEvent(Event): def __init__( self, event_type: EventType, - validation_result: Optional[ValidationResult] = None, - message: Optional[str] = None, + validation_result: ValidationResult | None = None, + message: str | None = None, ): super().__init__(event_type, message) self._validation_result = validation_result @property - def validation_result(self) -> Optional[ValidationResult]: + def validation_result(self) -> ValidationResult | None: return self._validation_result @@ -2867,7 +2867,7 @@ def __init__( self, event_type: EventType, profile: Profile, - message: Optional[str] = None, + message: str | None = None, ): assert event_type in ( EventType.PROFILE_VALIDATION_START, @@ -2903,8 +2903,8 @@ def __init__( self, event_type: EventType, requirement: Requirement, - validation_result: Optional[bool] = None, - message: Optional[str] = None, + validation_result: bool | None = None, + message: str | None = None, ): assert event_type in ( EventType.REQUIREMENT_VALIDATION_START, @@ -2919,7 +2919,7 @@ def requirement(self) -> Requirement: return self._requirement @property - def validation_result(self) -> Optional[bool]: + def validation_result(self) -> bool | None: return self._validation_result def __str__(self) -> str: @@ -2945,8 +2945,8 @@ def __init__( self, event_type: EventType, requirement_check: RequirementCheck, - validation_result: Optional[bool] = None, - message: Optional[str] = None, + validation_result: bool | None = None, + message: str | None = None, ): assert event_type in ( EventType.REQUIREMENT_CHECK_VALIDATION_START, @@ -2961,7 +2961,7 @@ def requirement_check(self) -> RequirementCheck: return self._requirement_check @property - def validation_result(self) -> Optional[bool]: + def validation_result(self) -> bool | None: return self._validation_result def __str__(self) -> str: @@ -3009,7 +3009,7 @@ def __init__(self, settings: dict | ValidationSettings): self._validation_settings = ValidationSettings.parse(settings) super().__init__() # initialize the current context - self.__current_context__: Optional[ValidationContext] = None + self.__current_context__: ValidationContext | None = None @property def validation_settings(self) -> ValidationSettings: @@ -3087,7 +3087,7 @@ def validate_requirements(self, requirements: list[Requirement]) -> ValidationRe # perform the requirements validation return self.__do_validate__(requirements) - def __do_validate__(self, requirements: Optional[list[Requirement]] = None) -> ValidationResult: + def __do_validate__(self, requirements: list[Requirement] | None = None) -> ValidationResult: # initialize the validation context context = ValidationContext(self, self.validation_settings) @@ -3192,7 +3192,7 @@ def __invoke_post_validation_hooks__(self, context: ValidationContext): requirement_type.finalize(context) logger.debug("Finalizing requirement types: completed") - def notify(self, event: Event | EventType, ctx: Optional[Any] = None): + def notify(self, event: Event | EventType, ctx: Any | None = None): """Override notify to update statistics""" assert self.__current_context__ is not None, "No current validation context" result: ValidationResult = self.__current_context__.result @@ -3213,13 +3213,13 @@ def __init__(self, validator: Validator, settings: ValidationSettings): # reference to the settings self._settings = settings # reference to the data graph - self._data_graph: Optional[Graph] = None + self._data_graph: Graph | None = None # reference to the profiles - self._profiles: Optional[list[Profile]] = None + self._profiles: list[Profile] | None = None # reference to the target profile - self._target_validation_profile: Optional[Profile] = None + self._target_validation_profile: Profile | None = None # reference to the validation result - self._result: Optional[ValidationResult] = None + self._result: ValidationResult | None = None # additional properties for the context self._properties: dict = {} # URLs already reported as missing from the HTTP cache during this run @@ -3301,7 +3301,7 @@ def profiles_path(self) -> Path: return profiles_path @property - def extra_profiles_path(self) -> Optional[Path]: + def extra_profiles_path(self) -> Path | None: """ The path to the extra profiles diff --git a/rocrate_validator/profiles/ro-crate/must/0_file_descriptor_format.py b/rocrate_validator/profiles/ro-crate/must/0_file_descriptor_format.py index cea965d96..7c3ad96d8 100644 --- a/rocrate_validator/profiles/ro-crate/must/0_file_descriptor_format.py +++ b/rocrate_validator/profiles/ro-crate/must/0_file_descriptor_format.py @@ -13,7 +13,7 @@ # limitations under the License. import re -from typing import Any, Optional +from typing import Any from urllib.parse import urljoin from rocrate_validator.constants import HTTP_STATUS_OK @@ -369,7 +369,7 @@ def __get_remote_context_keys__(self, context_uri: str) -> set: def __check_entity_keys__(self, entity: Any, context_keys: set, - unexpected_keys: Optional[dict[str, int]] = None) -> dict[str, int]: + unexpected_keys: dict[str, int] | None = None) -> dict[str, int]: """ Check if the entity is in the correct format """ # Ensure unexpected_keys is initialized if unexpected_keys is None: diff --git a/rocrate_validator/requirements/python/__init__.py b/rocrate_validator/requirements/python/__init__.py index 5165b3024..6b6d45aaa 100644 --- a/rocrate_validator/requirements/python/__init__.py +++ b/rocrate_validator/requirements/python/__init__.py @@ -16,7 +16,7 @@ import re from collections.abc import Callable from pathlib import Path -from typing import Any, Optional, cast +from typing import Any, cast from rocrate_validator.constants import EXPECTED_CHECK_PARAM_COUNT from rocrate_validator.models import ( @@ -46,8 +46,8 @@ def __init__(self, requirement: Requirement, name: str, check_function: Callable[[RequirementCheck, ValidationContext], bool], - description: Optional[str] = None, - level: Optional[RequirementLevel] = LevelCollection.REQUIRED, + description: str | None = None, + level: RequirementLevel | None = LevelCollection.REQUIRED, deactivated: bool = False): """ check_function: a function that accepts an instance of PyFunctionCheck and a ValidationContext. @@ -72,7 +72,7 @@ def execute_check(self, context: ValidationContext) -> bool: return True return self._check_function(self, context) - def get_source_snippet(self) -> Optional[SourceSnippet]: + def get_source_snippet(self) -> SourceSnippet | None: try: code = inspect.getsource(self._check_function) except (OSError, TypeError) as e: @@ -109,8 +109,8 @@ def __init__(self, profile: Profile, requirement_check_class: type[PyFunctionCheck], name: str = "", - description: Optional[str] = None, - path: Optional[Path] = None): + description: str | None = None, + path: Path | None = None): self.requirement_check_class = requirement_check_class super().__init__(profile, name, description, path, initialize_checks=True) @@ -156,7 +156,7 @@ def hidden(self) -> bool: return getattr(self.requirement_check_class, "hidden", False) -def requirement(name: str, description: Optional[str] = None, hidden: bool = False): +def requirement(name: str, description: str | None = None, hidden: bool = False): """ A decorator to mark a class as a requirement class. @@ -185,8 +185,8 @@ def decorator(cls): return decorator -def check(name: Optional[str] = None, - severity: Optional[Severity] = None, +def check(name: str | None = None, + severity: Severity | None = None, deactivated: bool = False): """ A decorator to mark a function as a check. @@ -237,7 +237,7 @@ class PyRequirementLoader(RequirementLoader): def load(self, profile: Profile, requirement_level: RequirementLevel, file_path: Path, - publicID: Optional[str] = None) -> list[Requirement]: + publicID: str | None = None) -> list[Requirement]: # instantiate a list to store the requirements requirements: list[Requirement] = [] diff --git a/rocrate_validator/requirements/shacl/checks.py b/rocrate_validator/requirements/shacl/checks.py index 088f565e5..17120d910 100644 --- a/rocrate_validator/requirements/shacl/checks.py +++ b/rocrate_validator/requirements/shacl/checks.py @@ -62,10 +62,10 @@ def __init__( self, requirement: Requirement, shape: Shape, - name: Optional[str] = None, + name: str | None = None, root: bool = False, - hidden: Optional[bool] = None, - level: Optional[RequirementLevel] = None, + hidden: bool | None = None, + level: RequirementLevel | None = None, ) -> None: self._shape = shape self._root = root @@ -159,7 +159,7 @@ def __compute_requirement_level__(self) -> RequirementLevel: return derived return LevelCollection.REQUIRED - def __derive_level_from_properties__(self) -> Optional[RequirementLevel]: + def __derive_level_from_properties__(self) -> RequirementLevel | None: properties = getattr(self._shape, "properties", None) if not properties: return None @@ -178,7 +178,7 @@ def level(self) -> RequirementLevel: def severity(self) -> Severity: return self.level.severity - def get_source_snippet(self) -> Optional[SourceSnippet]: + def get_source_snippet(self) -> SourceSnippet | None: if self._shape is None: return None try: diff --git a/rocrate_validator/requirements/shacl/errors.py b/rocrate_validator/requirements/shacl/errors.py index 6465a4054..44ba40856 100644 --- a/rocrate_validator/requirements/shacl/errors.py +++ b/rocrate_validator/requirements/shacl/errors.py @@ -12,7 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -from typing import Optional from rocrate_validator.errors import ValidationError from rocrate_validator.requirements.shacl.validator import SHACLValidationResult @@ -22,7 +21,7 @@ class SHACLValidationError(ValidationError): def __init__( self, - result: Optional[SHACLValidationResult] = None, + result: SHACLValidationResult | None = None, message: str = "Document does not conform to SHACL shapes.", path: str = ".", code: int = 500, @@ -31,7 +30,7 @@ def __init__( self._result = result @property - def result(self) -> Optional[SHACLValidationResult]: + def result(self) -> SHACLValidationResult | None: return self._result def __repr__(self): diff --git a/rocrate_validator/requirements/shacl/models.py b/rocrate_validator/requirements/shacl/models.py index 6420bdbfb..8cb5a37c3 100644 --- a/rocrate_validator/requirements/shacl/models.py +++ b/rocrate_validator/requirements/shacl/models.py @@ -14,7 +14,7 @@ from __future__ import annotations -from typing import TYPE_CHECKING, Optional, cast +from typing import TYPE_CHECKING, cast from rdflib import Graph, Literal, Namespace, URIRef @@ -35,11 +35,11 @@ class SHACLNode: # define default values - _name: Optional[str] = None - _description: Optional[str] = None - severity: Optional[str] = None + _name: str | None = None + _description: str | None = None + severity: str | None = None - def __init__(self, node: Node, graph: Graph, parent: Optional[SHACLNode] = None): + def __init__(self, node: Node, graph: Graph, parent: SHACLNode | None = None): # store the shape key self._key = None @@ -48,7 +48,7 @@ def __init__(self, node: Node, graph: Graph, parent: Optional[SHACLNode] = None) # store the shapes graph self._graph = graph # cache the hash - self._hash: Optional[int] = None + self._hash: int | None = None # store the parent shape self._parent = parent @@ -67,7 +67,7 @@ def name(self, value: str): self._name = value @property - def description(self) -> Optional[str]: + def description(self) -> str | None: """Return the description of the shape""" return self._description @@ -99,7 +99,7 @@ def graph(self): return self._graph @property - def parent(self) -> Optional[SHACLNode]: + def parent(self) -> SHACLNode | None: """Return the parent shape of the shape""" return self._parent @@ -108,7 +108,7 @@ def level(self) -> RequirementLevel: """Return the requirement level of the shape""" return self.get_declared_level() or LevelCollection.REQUIRED - def get_declared_level(self) -> Optional[RequirementLevel]: + def get_declared_level(self) -> RequirementLevel | None: """Return the declared level of the shape""" severity = self.get_declared_severity() if severity: @@ -118,7 +118,7 @@ def get_declared_level(self) -> Optional[RequirementLevel]: pass return None - def get_declared_severity(self) -> Optional[Severity]: + def get_declared_severity(self) -> Severity | None: """Return the declared severity of the shape""" severity = getattr(self, "severity", None) if severity == f"{SHACL_NS}Violation": @@ -163,7 +163,7 @@ def compute_hash(graph: Graph, node: Node) -> int: class SHACLNodeCollection(SHACLNode): - def __init__(self, node: Node, graph: Graph, properties: Optional[list[PropertyShape]] = None): + def __init__(self, node: Node, graph: Graph, properties: list[PropertyShape] | None = None): super().__init__(node, graph) # store the properties self._properties = properties or [] @@ -173,7 +173,7 @@ def properties(self) -> list[PropertyShape]: """Return the properties of the shape""" return self._properties.copy() - def get_property(self, name) -> Optional[PropertyShape]: + def get_property(self, name) -> PropertyShape | None: """Return the property of the shape with the given name""" for prop in self._properties: if prop.name == name: @@ -207,19 +207,19 @@ class PropertyGroup(SHACLNodeCollection): class PropertyShape(Shape): # define default values - _name: Optional[str] = None - _short_name: Optional[str] = None - _description: Optional[str] = None - group: Optional[str] = None - defaultValue: Optional[str] = None + _name: str | None = None + _short_name: str | None = None + _description: str | None = None + group: str | None = None + defaultValue: str | None = None order: int = 0 # store the reference to the property group - _property_group: Optional[PropertyGroup] = None + _property_group: PropertyGroup | None = None def __init__(self, node: Node, graph: Graph, - parent: Optional[Shape] = None): + parent: Shape | None = None): # call the parent constructor super().__init__(node, graph) # store the parent shape @@ -272,12 +272,12 @@ def graph(self) -> Graph: return self._graph @property - def parent(self) -> Optional[Shape]: + def parent(self) -> Shape | None: """Return the parent shape of the shape property""" - return cast("Optional[Shape]", self._parent) + return cast("Shape | None", self._parent) @property - def propertyGroup(self) -> Optional[PropertyGroup]: + def propertyGroup(self) -> PropertyGroup | None: """Return the group of the shape property""" return self._property_group @@ -319,7 +319,7 @@ def remove_shape(self, shape: Shape): self._shapes.pop(shape.key, None) self._shapes_graph -= shape.graph - def get_shape(self, shape_key: str) -> Optional[Shape]: + def get_shape(self, shape_key: str) -> Shape | None: logger.debug("Searching for shape %s in the registry: %s", shape_key, self._shapes) result = self._shapes.get(shape_key, None) if not result: @@ -331,7 +331,7 @@ def extend(self, shapes: dict[str, Shape], graph: Graph) -> None: self._shapes.update(shapes) self._shapes_graph += graph - def get_shape_by_name(self, name: str) -> Optional[Shape]: + def get_shape_by_name(self, name: str) -> Shape | None: for shape in self._shapes.values(): if shape.name == name: return shape @@ -356,7 +356,7 @@ def is_node_deactivated(self, node: Node) -> bool: return True return False - def load_shapes(self, shapes_path: str | Path, publicID: Optional[str] = None) -> list[Shape]: + def load_shapes(self, shapes_path: str | Path, publicID: str | None = None) -> list[Shape]: """ Load the shapes from the graph """ @@ -442,7 +442,7 @@ def get_instance(cls, ctx: object): def __process_property_group__( groups: dict[str, PropertyGroup], property_shape: PropertyShape -) -> Optional[PropertyGroup]: +) -> PropertyGroup | None: group_name = property_shape.group if group_name: if group_name not in groups: diff --git a/rocrate_validator/requirements/shacl/requirements.py b/rocrate_validator/requirements/shacl/requirements.py index d0f5e3c82..d4a3f2b22 100644 --- a/rocrate_validator/requirements/shacl/requirements.py +++ b/rocrate_validator/requirements/shacl/requirements.py @@ -13,7 +13,7 @@ # limitations under the License. from pathlib import Path -from typing import Any, Optional, cast +from typing import Any, cast from rdflib import RDF @@ -166,7 +166,7 @@ def shapes_registry(self) -> ShapesRegistry: return self._shape_registry def load( - self, profile: Profile, requirement_level: RequirementLevel, file_path: Path, publicID: Optional[str] = None + self, profile: Profile, requirement_level: RequirementLevel, file_path: Path, publicID: str | None = None ) -> list[Requirement]: assert file_path is not None, "The file path cannot be None" shapes: list[Shape] = self.shapes_registry.load_shapes(file_path, publicID) diff --git a/rocrate_validator/requirements/shacl/utils.py b/rocrate_validator/requirements/shacl/utils.py index 357c77483..9ddb4c141 100644 --- a/rocrate_validator/requirements/shacl/utils.py +++ b/rocrate_validator/requirements/shacl/utils.py @@ -15,7 +15,7 @@ from __future__ import annotations import hashlib -from typing import TYPE_CHECKING, Any, Optional, cast +from typing import TYPE_CHECKING, Any, cast if TYPE_CHECKING: from pathlib import Path @@ -73,7 +73,7 @@ def make_uris_relative(text: str, ro_crate_path: Path | str) -> str: return text.replace(str(ro_crate_path), "./") -def inject_attributes(obj: object, node_graph: Graph, node: Node, exclude: Optional[list] = None) -> object: +def inject_attributes(obj: object, node_graph: Graph, node: Node, exclude: list | None = None) -> object: # inject attributes of the shape property skip_properties = ["node"] if exclude is None else [*exclude, "node"] triples = node_graph.triples((node, None, None)) @@ -205,7 +205,7 @@ def get_shape_property_graph(self, shape_node: Node, shape_property: Node) -> Gr return property_graph @classmethod - def load_from_file(cls, file_path: str, publicID: Optional[str] = None) -> ShapesList: + def load_from_file(cls, file_path: str, publicID: str | None = None) -> ShapesList: """ Load the shapes from the file @@ -256,7 +256,7 @@ def __extract_related_triples__(graph, subject_node, processed_nodes=None): return related_triples -def load_shapes_from_file(file_path: str, publicID: Optional[str] = None) -> ShapesList: +def load_shapes_from_file(file_path: str, publicID: str | None = None) -> ShapesList: try: # Check the file path is not None assert file_path is not None, "The file path cannot be None" @@ -294,7 +294,7 @@ def load_shapes_from_graph(g: Graph) -> ShapesList: return ShapesList(node_shapes, property_shapes, subgraphs, g) -def resolve_parent_shape(shapes_graph: Graph, source_shape_node: Node, shapes_registry) -> Optional[Shape]: +def resolve_parent_shape(shapes_graph: Graph, source_shape_node: Node, shapes_registry) -> Shape | None: """ Try to resolve the parent NodeShape/PropertyShape for a BNode constraint node. diff --git a/rocrate_validator/requirements/shacl/validator.py b/rocrate_validator/requirements/shacl/validator.py index 34c151a85..97019ee89 100644 --- a/rocrate_validator/requirements/shacl/validator.py +++ b/rocrate_validator/requirements/shacl/validator.py @@ -15,7 +15,7 @@ from __future__ import annotations from pathlib import Path -from typing import TYPE_CHECKING, Any, Optional, cast +from typing import TYPE_CHECKING, Any, cast import pyshacl from rdflib import BNode, Graph @@ -48,7 +48,7 @@ class SHACLValidationSkip(Exception): class SHACLValidationAlreadyProcessed(Exception): - def __init__(self, profile_identifier: str, result: Optional[bool]) -> None: + def __init__(self, profile_identifier: str, result: bool | None) -> None: super().__init__(f"Profile {profile_identifier} has already been processed") self.result = result @@ -97,7 +97,7 @@ def __init__(self, context: ValidationContext): super().__init__(context.validator, context.settings) self._base_context: ValidationContext = context # reference to the ontology path - self._ontology_path: Optional[Path] = None + self._ontology_path: Path | None = None # reference to the contextual ShapeRegistry instance self._shapes_registry: ShapesRegistry = ShapesRegistry() @@ -106,10 +106,10 @@ def __init__(self, context: ValidationContext): self._processed_profiles: dict[str, bool] = {} # reference to the current validation profile - self._current_validation_profile: Optional[Profile] = None + self._current_validation_profile: Profile | None = None # store the validation result of the current profile (a pass/fail boolean) - self._validation_result: Optional[bool] = None + self._validation_result: bool | None = None # reference to the contextual ontology graph self._ontology_graph: Graph = Graph() @@ -152,11 +152,11 @@ def base_context(self) -> ValidationContext: return self._base_context @property - def current_validation_profile(self) -> Optional[Profile]: + def current_validation_profile(self) -> Profile | None: return self._current_validation_profile @property - def current_validation_result(self) -> Optional[bool]: + def current_validation_result(self) -> bool | None: return self._validation_result @current_validation_result.setter @@ -167,7 +167,7 @@ def current_validation_result(self, result: bool): # mark the profile as processed and store the result self._processed_profiles[self._current_validation_profile.identifier] = result - def get_validation_result(self, profile: Profile) -> Optional[bool]: + def get_validation_result(self, profile: Profile) -> bool | None: assert profile is not None, "Invalid profile" return self._processed_profiles.get(profile.identifier, None) @@ -188,7 +188,7 @@ def __get_ontology_path__(self, profile_path: Path, ontology_filename: str = DEF self._ontology_path = Path(f"{profile_path}/{ontology_filename}") return self._ontology_path - def __get_data_graph_base__(self) -> Optional[str]: + def __get_data_graph_base__(self) -> str | None: """ Get the @base from the RO-Crate metadata JSON-LD. @@ -205,9 +205,9 @@ def __get_data_graph_base__(self) -> Optional[str]: return None def __load_ontology_graph__(self, profile_path: Path, - ontology_filename: str = DEFAULT_ONTOLOGY_FILE) -> Optional[Graph]: + ontology_filename: str = DEFAULT_ONTOLOGY_FILE) -> Graph | None: # load the graph of ontologies - ontology_graph: Optional[Graph] = None + ontology_graph: Graph | None = None ontology_path = self.__get_ontology_path__(profile_path, ontology_filename) if ontology_path.exists(): logger.debug("Loading ontologies: %s", ontology_path) @@ -257,14 +257,14 @@ def __init__(self, result: SHACLValidationResult, violation_node: Node, graph: G self._graph = graph # initialize the properties for lazy loading - self._focus_node: Optional[Node] = None - self._result_message: Optional[str] = None - self._result_path: Optional[Node] = None - self._severity: Optional[Severity] = None - self._source_constraint_component: Optional[Node] = None - self._source_shape: Optional[Node] = None - self._source_shape_node: Optional[Node] = None - self._value: Optional[Node] = None + self._focus_node: Node | None = None + self._result_message: str | None = None + self._result_path: Node | None = None + self._severity: Severity | None = None + self._source_constraint_component: Node | None = None + self._source_shape: Node | None = None + self._source_shape_node: Node | None = None + self._value: Node | None = None @property def node(self) -> Node: @@ -329,7 +329,7 @@ def sourceShape(self) -> URIRef | BNode: class SHACLValidationResult: def __init__(self, results_graph: Graph, - results_text: Optional[str] = None) -> None: + results_text: str | None = None) -> None: # validate the results graph input assert results_graph is not None, "Invalid graph" assert isinstance(results_graph, Graph), "Invalid graph type" @@ -365,7 +365,7 @@ def violations(self) -> list[SHACLViolation]: return self._violations @property - def text(self) -> Optional[str]: + def text(self) -> str | None: return self._text @@ -373,8 +373,8 @@ class SHACLValidator: def __init__( self, - shapes_graph: Optional[GraphLike | str | bytes], - ont_graph: Optional[GraphLike | str | bytes] = None, + shapes_graph: GraphLike | str | bytes | None, + ont_graph: GraphLike | str | bytes | None = None, ) -> None: """ Create a new SHACLValidator instance. @@ -391,11 +391,11 @@ def __init__( self._ont_graph = ont_graph @property - def shapes_graph(self) -> Optional[GraphLike | str | bytes]: + def shapes_graph(self) -> GraphLike | str | bytes | None: return self._shapes_graph @property - def ont_graph(self) -> Optional[GraphLike | str | bytes]: + def ont_graph(self) -> GraphLike | str | bytes | None: return self._ont_graph def validate( @@ -403,19 +403,19 @@ def validate( # data to validate data_graph: GraphLike | str | bytes, # validation settings - abort_on_first: Optional[bool] = True, - advanced: Optional[bool] = True, - inference: Optional[VALID_INFERENCE_OPTIONS_TYPES] = None, - inplace: Optional[bool] = False, + abort_on_first: bool | None = True, + advanced: bool | None = True, + inference: VALID_INFERENCE_OPTIONS_TYPES | None = None, + inplace: bool | None = False, meta_shacl: bool = False, iterate_rules: bool = True, # SHACL validation severity - allow_infos: Optional[bool] = True, - allow_warnings: Optional[bool] = True, + allow_infos: bool | None = True, + allow_warnings: bool | None = True, # serialization settings - serialization_output_path: Optional[str] = None, + serialization_output_path: str | None = None, serialization_output_format: - Optional[RDF_SERIALIZATION_FORMATS_TYPES] = "turtle", + RDF_SERIALIZATION_FORMATS_TYPES | None = "turtle", **kwargs, ) -> SHACLValidationResult: """ diff --git a/rocrate_validator/rocrate.py b/rocrate_validator/rocrate.py index 983155a27..940c3d435 100644 --- a/rocrate_validator/rocrate.py +++ b/rocrate_validator/rocrate.py @@ -21,7 +21,7 @@ import zipfile from abc import ABC, abstractmethod from pathlib import Path -from typing import Any, Optional, cast +from typing import Any, cast from urllib.parse import unquote from rdflib import Graph @@ -72,7 +72,7 @@ def is_remote(self) -> bool: return self.id_as_uri.is_remote_resource() @classmethod - def get_id_as_path(cls, entity_id: str, ro_crate: Optional[ROCrate] = None) -> Path: + def get_id_as_path(cls, entity_id: str, ro_crate: ROCrate | None = None) -> Path: return cls.get_path_from_identifier( entity_id, ro_crate.uri.as_path() @@ -83,7 +83,7 @@ def get_id_as_path(cls, entity_id: str, ro_crate: Optional[ROCrate] = None) -> P @staticmethod def get_path_from_identifier( identifier: str, - rocrate_path: Optional[str | Path] = None, + rocrate_path: str | Path | None = None, decode: bool = False, ) -> Path: """ @@ -299,11 +299,11 @@ def __hash__(self) -> int: class ROCrateMetadata: METADATA_FILE_DESCRIPTOR = "ro-crate-metadata.json" - def __init__(self, ro_crate: ROCrate, metadata_dict: Optional[dict] = None) -> None: + def __init__(self, ro_crate: ROCrate, metadata_dict: dict | None = None) -> None: self._ro_crate = ro_crate self._dict = metadata_dict - self._json: Optional[str] = json.dumps(metadata_dict) if metadata_dict else None - self._graph: Optional[Graph] = None + self._json: str | None = json.dumps(metadata_dict) if metadata_dict else None + self._graph: Graph | None = None @property def ro_crate(self) -> ROCrate: @@ -331,7 +331,7 @@ def get_root_data_entity(self) -> ROCrateEntity: raise ValueError("no main entity in metadata file descriptor") return main_entity - def get_root_data_entity_conforms_to(self) -> Optional[list[str]]: + def get_root_data_entity_conforms_to(self) -> list[str] | None: try: root_data_entity = self.get_root_data_entity() result = root_data_entity.get_property("conformsTo", []) @@ -352,7 +352,7 @@ def get_main_workflow(self) -> ROCrateEntity: raise ValueError("no main workflow in metadata file descriptor") return main_workflow - def get_entity(self, entity_id: str) -> Optional[ROCrateEntity]: + def get_entity(self, entity_id: str) -> ROCrateEntity | None: for entity in self.as_dict().get("@graph", []): if entity.get("@id") == entity_id: return ROCrateEntity(self, entity) @@ -390,7 +390,7 @@ def get_web_data_entities(self) -> list[ROCrateEntity]: if (entity.has_type("File") or entity.has_type("Dataset")) and entity.is_remote() ] - def get_conforms_to(self) -> Optional[list[str]]: + def get_conforms_to(self) -> list[str] | None: try: file_descriptor = self.get_file_descriptor_entity() result = file_descriptor.get_property("conformsTo", []) @@ -417,7 +417,7 @@ def as_dict(self) -> dict: self._dict = json.loads(self.as_json()) return self._dict - def as_graph(self, publicID: Optional[str] = None) -> Graph: + def as_graph(self, publicID: str | None = None) -> Graph: if not self._graph: # if the graph is not cached, load it self._graph = Graph(base=publicID or str(self.ro_crate.uri)) @@ -444,7 +444,7 @@ class ROCrate(ABC): Base class for representing and interacting with a Research Object Crate (RO-Crate). """ - def __new__(cls, uri: str | Path | URI, relative_root_path: Optional[Path] = None): + def __new__(cls, uri: str | Path | URI, relative_root_path: Path | None = None): """ Factory method to create the appropriate ROCrate subclass instance. @@ -469,7 +469,7 @@ def __new__(cls, uri: str | Path | URI, relative_root_path: Optional[Path] = Non instance.relative_root_path = relative_root_path return instance - def __init__(self, uri: str | Path | URI, relative_root_path: Optional[Path] = None) -> None: + def __init__(self, uri: str | Path | URI, relative_root_path: Path | None = None) -> None: """ Initialize the RO-Crate. @@ -486,13 +486,13 @@ def __init__(self, uri: str | Path | URI, relative_root_path: Optional[Path] = N self.relative_root_path = relative_root_path # cache the list of files - self._files: Optional[list[Path]] = None + self._files: list[Path] | None = None # initialize variables to cache the data - self._dict: Optional[dict] = None - self._graph: Optional[Graph] = None + self._dict: dict | None = None + self._graph: Graph | None = None - self._metadata: Optional[ROCrateMetadata] = None + self._metadata: ROCrateMetadata | None = None @property def uri(self) -> URI: @@ -555,7 +555,7 @@ def __get_search_path__(self, path: Path) -> tuple[Path, Path]: search_path = path return search_path, root_path - def __check_search_path__(self, path) -> tuple[Optional[Path], Optional[Path]]: + def __check_search_path__(self, path) -> tuple[Path | None, Path | None]: """ " Extract the search path if it does not contain the relative root path. @@ -741,7 +741,7 @@ def from_metadata_dict( @staticmethod def new_instance( - uri: str | Path | URI, relative_root_path: Optional[Path] = None + uri: str | Path | URI, relative_root_path: Path | None = None ) -> ROCrate: """ Create a new instance of the RO-Crate based on the URI. @@ -792,7 +792,7 @@ class ROCrateLocalFolder(ROCrate): Class representing an RO-Crate stored in a local folder. """ - def __init__(self, path: str | Path | URI, relative_root_path: Optional[Path] = None): + def __init__(self, path: str | Path | URI, relative_root_path: Path | None = None): super().__init__(path, relative_root_path=relative_root_path) # cache the list of files @@ -834,13 +834,13 @@ class ROCrateLocalZip(ROCrate): def __init__( self, path: str | Path | URI, - relative_root_path: Optional[Path] = None, + relative_root_path: Path | None = None, init_zip: bool = True, ): super().__init__(path, relative_root_path=relative_root_path) # initialize the zip reference - self._zipref: Optional[zipfile.ZipFile] = None + self._zipref: zipfile.ZipFile | None = None if init_zip: self.__init_zip_reference__() @@ -945,7 +945,7 @@ def get_file_content( class ROCrateRemoteZip(ROCrateLocalZip): - def __init__(self, path: str | Path | URI, relative_root_path: Optional[Path] = None): + def __init__(self, path: str | Path | URI, relative_root_path: Path | None = None): super().__init__(path, relative_root_path=relative_root_path, init_zip=False) # # initialize the zip reference @@ -1097,7 +1097,7 @@ def __check_search_path__(self, path): class ROCrateBagitLocalFolder(BagitROCrate, ROCrateLocalFolder): - def __init__(self, uri: str | Path | URI, relative_root_path: Optional[Path] = None): + def __init__(self, uri: str | Path | URI, relative_root_path: Path | None = None): # initialize the parent classes super(ROCrateLocalFolder, self).__init__(uri, relative_root_path=relative_root_path) # check if the path is a BagIt-wrapped crate diff --git a/rocrate_validator/services.py b/rocrate_validator/services.py index 8c9680680..ec8eda8a5 100644 --- a/rocrate_validator/services.py +++ b/rocrate_validator/services.py @@ -17,7 +17,6 @@ import tempfile import zipfile from pathlib import Path -from typing import Optional from rocrate_validator.constants import HTTP_STATUS_BAD_REQUEST, HTTP_STATUS_GATEWAY_TIMEOUT from rocrate_validator.errors import ProfileNotFound @@ -45,7 +44,7 @@ def detect_profiles(settings: dict | ValidationSettings) -> list[Profile]: def validate_metadata_as_dict( - metadata_dict: dict, settings: dict | ValidationSettings, subscribers: Optional[list[Subscriber]] = None + metadata_dict: dict, settings: dict | ValidationSettings, subscribers: list[Subscriber] | None = None ) -> ValidationResult: """ Validate the RO-Crate metadata only against a profile and return the validation result. @@ -64,7 +63,7 @@ def validate_metadata_as_dict( def validate( - settings: dict | ValidationSettings, subscribers: Optional[list[Subscriber]] = None + settings: dict | ValidationSettings, subscribers: list[Subscriber] | None = None ) -> ValidationResult: """ Validate a RO-Crate against a profile and return the validation result @@ -87,7 +86,7 @@ def validate( return result -def _build_validator(settings: ValidationSettings, subscribers: Optional[list[Subscriber]]) -> Validator: +def _build_validator(settings: ValidationSettings, subscribers: list[Subscriber] | None) -> Validator: """Create a validator for the given settings and register any subscribers.""" validator = Validator(settings) logger.debug("Validator created. Starting validation...") @@ -98,7 +97,7 @@ def _build_validator(settings: ValidationSettings, subscribers: Optional[list[Su def _extract_and_validate( - settings: ValidationSettings, subscribers: Optional[list[Subscriber]], rocrate_path: Path + settings: ValidationSettings, subscribers: list[Subscriber] | None, rocrate_path: Path ) -> Validator: """Extract a (local or downloaded) zipped RO-Crate to a temp dir and validate it.""" original_data_path = settings.rocrate_uri @@ -115,7 +114,7 @@ def _extract_and_validate( def _download_remote_rocrate( - settings: ValidationSettings, subscribers: Optional[list[Subscriber]], rocrate_path: URI + settings: ValidationSettings, subscribers: list[Subscriber] | None, rocrate_path: URI ) -> Validator: """Download a remote (http/https/ftp) RO-Crate to a temp file, then extract and validate it.""" logger.debug("RO-Crate is a remote RO-Crate") @@ -147,7 +146,7 @@ def _download_remote_rocrate( def __initialise_validator__( - settings: dict | ValidationSettings, subscribers: Optional[list[Subscriber]] = None + settings: dict | ValidationSettings, subscribers: list[Subscriber] | None = None ) -> Validator: """ Validate a RO-Crate against a profile @@ -192,7 +191,7 @@ def __initialise_validator__( def get_profiles( profiles_path: Path = DEFAULT_PROFILES_PATH, - extra_profiles_path: Optional[Path] = None, + extra_profiles_path: Path | None = None, severity=Severity.OPTIONAL, allow_requirement_check_override: bool = ValidationSettings.allow_requirement_check_override, ) -> list[Profile]: @@ -232,7 +231,7 @@ def get_profiles( def get_profile( profile_identifier: str, profiles_path: Path = DEFAULT_PROFILES_PATH, - extra_profiles_path: Optional[Path] = None, + extra_profiles_path: Path | None = None, severity=Severity.OPTIONAL, allow_requirement_check_override: bool = ValidationSettings.allow_requirement_check_override, ) -> Profile: diff --git a/rocrate_validator/utils/cache_warmup.py b/rocrate_validator/utils/cache_warmup.py index 57d902d10..bbf4cb174 100644 --- a/rocrate_validator/utils/cache_warmup.py +++ b/rocrate_validator/utils/cache_warmup.py @@ -27,7 +27,7 @@ import os from dataclasses import dataclass -from typing import TYPE_CHECKING, Any, Optional, cast +from typing import TYPE_CHECKING, Any, cast from rocrate_validator import constants from rocrate_validator.utils import log as logging @@ -64,7 +64,7 @@ class WarmUpResult: """Outcome of a warm-up operation.""" url: str status: str # "ok", "skipped", "failed" - detail: Optional[str] = None + detail: str | None = None def discover_profile_cacheable_urls(profile: Profile) -> list[str]: @@ -139,7 +139,7 @@ def warm_up_urls(urls: Sequence[str]) -> list[WarmUpResult]: return results -def _get_profile_for_warmup(settings) -> Optional[Profile]: +def _get_profile_for_warmup(settings) -> Profile | None: if getattr(settings, "offline", False): return None if getattr(settings, "cache_path", None) is None: @@ -154,7 +154,7 @@ def _get_profile_for_warmup(settings) -> Optional[Profile]: return _find_profile(profile_identifier, settings) -def auto_warm_up_for_settings(settings: ValidationSettings) -> Optional[list[WarmUpResult]]: +def auto_warm_up_for_settings(settings: ValidationSettings) -> list[WarmUpResult] | None: """ Perform a best-effort synchronous warm-up triggered by ``ValidationSettings.__post_init__``. @@ -186,7 +186,7 @@ def auto_warm_up_for_settings(settings: ValidationSettings) -> Optional[list[War return results -def _find_profile(identifier, settings) -> Optional[Profile]: +def _find_profile(identifier, settings) -> Profile | None: """ Look up a loaded profile by identifier. Accepts either a string or a list (the settings sometimes store a list of identifiers). diff --git a/rocrate_validator/utils/collections.py b/rocrate_validator/utils/collections.py index a8f2dc98f..878a2b281 100644 --- a/rocrate_validator/utils/collections.py +++ b/rocrate_validator/utils/collections.py @@ -14,8 +14,6 @@ from __future__ import annotations -from typing import Optional - class MapIndex: @@ -25,7 +23,7 @@ def __init__(self, name: str, unique: bool = False): class MultiIndexMap: - def __init__(self, key: str = "id", indexes: Optional[list[MapIndex]] = None): + def __init__(self, key: str = "id", indexes: list[MapIndex] | None = None): self._key = key # initialize an empty dictionary to store the indexes self._indices: dict[str, dict] = {} diff --git a/rocrate_validator/utils/document_loader.py b/rocrate_validator/utils/document_loader.py index 8f9ef9d0e..18a979aec 100644 --- a/rocrate_validator/utils/document_loader.py +++ b/rocrate_validator/utils/document_loader.py @@ -26,7 +26,7 @@ import json import threading -from typing import Any, Optional +from typing import Any from rdflib.plugins.shared.jsonld import context as jsonld_context from rdflib.plugins.shared.jsonld import util as jsonld_util @@ -128,7 +128,7 @@ def _fetch_json_ld(url: str) -> Any: return json.loads(response.text) -def resolve_remote_document(url: str) -> tuple[Optional[dict], Optional[str]]: +def resolve_remote_document(url: str) -> tuple[dict | None, str | None]: """ Resolve a remote JSON-LD document, returning ``(json, content_type)``. diff --git a/rocrate_validator/utils/http.py b/rocrate_validator/utils/http.py index 58eac0ba9..5af8ad541 100644 --- a/rocrate_validator/utils/http.py +++ b/rocrate_validator/utils/http.py @@ -20,7 +20,7 @@ import string import threading from pathlib import Path -from typing import TYPE_CHECKING, Any, Optional +from typing import TYPE_CHECKING, Any if TYPE_CHECKING: from typing_extensions import Self @@ -75,14 +75,14 @@ def __init__(self, url: str): self.url = url -def find_offline_cache_miss(exc: BaseException) -> Optional[OfflineCacheMissError]: +def find_offline_cache_miss(exc: BaseException) -> OfflineCacheMissError | None: """ Walk the chain of an exception (``__cause__``/``__context__``) looking for an :class:`OfflineCacheMissError`. Returns the first match, or ``None`` if the chain does not contain one. """ seen: set[int] = set() - current: Optional[BaseException] = exc + current: BaseException | None = exc while current is not None and id(current) not in seen: seen.add(id(current)) if isinstance(current, OfflineCacheMissError): @@ -115,7 +115,7 @@ def __new__(cls, *args, **kwargs) -> Self: def __init__(self, cache_max_age: int = constants.DEFAULT_HTTP_CACHE_MAX_AGE, - cache_path: Optional[str] = None, + cache_path: str | None = None, offline: bool = False, no_cache: bool = False): logger.debug(f"Initializing instance of {self.__class__.__name__} {self}") @@ -144,7 +144,7 @@ def __init__(self, else: logger.debug(f"Instance of {self} already initialized") - def __initialize_session__(self, cache_max_age: int, cache_path: Optional[str] = None): + def __initialize_session__(self, cache_max_age: int, cache_path: str | None = None): # initialize the session # The session can be a CachedSession, a plain requests.Session, or the # duck-typed _OfflineFallbackSession; HTTP methods are delegated dynamically @@ -353,7 +353,7 @@ def cache_info(self) -> dict[str, Any]: @classmethod def initialize_cache(cls, cache_max_age: int = constants.DEFAULT_HTTP_CACHE_MAX_AGE, - cache_path: Optional[str] = None, + cache_path: str | None = None, offline: bool = False, no_cache: bool = False) -> HttpRequester: """ @@ -394,7 +394,7 @@ def _close_session(self) -> None: def _reconfigure(self, cache_max_age: int = constants.DEFAULT_HTTP_CACHE_MAX_AGE, - cache_path: Optional[str] = None, + cache_path: str | None = None, offline: bool = False, no_cache: bool = False) -> None: """ diff --git a/rocrate_validator/utils/io_helpers/input.py b/rocrate_validator/utils/io_helpers/input.py index a9cc759c1..246ec1dec 100644 --- a/rocrate_validator/utils/io_helpers/input.py +++ b/rocrate_validator/utils/io_helpers/input.py @@ -15,7 +15,7 @@ from __future__ import annotations import sys -from typing import TYPE_CHECKING, Optional +from typing import TYPE_CHECKING from InquirerPy import prompt from InquirerPy.base.control import Choice @@ -31,9 +31,9 @@ logger = logging.getLogger(__name__) -def __get_single_char_win32__(console: Optional[Console] = None, end: str = "\n", - message: Optional[str] = None, - choices: Optional[list[str]] = None) -> str: +def __get_single_char_win32__(console: Console | None = None, end: str = "\n", + message: str | None = None, + choices: list[str] | None = None) -> str: """ Get a single character from the console """ @@ -53,9 +53,9 @@ def __get_single_char_win32__(console: Optional[Console] = None, end: str = "\n" return char -def __get_single_char_unix__(console: Optional[Console] = None, end: str = "\n", - message: Optional[str] = None, - choices: Optional[list[str]] = None) -> str: +def __get_single_char_unix__(console: Console | None = None, end: str = "\n", + message: str | None = None, + choices: list[str] | None = None) -> str: """ Get a single character from the console """ @@ -82,9 +82,9 @@ def __get_single_char_unix__(console: Optional[Console] = None, end: str = "\n", return char -def get_single_char(console: Optional[Console] = None, end: str = "\n", - message: Optional[str] = None, - choices: Optional[list[str]] = None) -> str: +def get_single_char(console: Console | None = None, end: str = "\n", + message: str | None = None, + choices: list[str] | None = None) -> str: """ Get a single character from the console """ diff --git a/rocrate_validator/utils/io_helpers/output/__init__.py b/rocrate_validator/utils/io_helpers/output/__init__.py index db970fa61..1baa1129a 100644 --- a/rocrate_validator/utils/io_helpers/output/__init__.py +++ b/rocrate_validator/utils/io_helpers/output/__init__.py @@ -15,7 +15,7 @@ from __future__ import annotations -from typing import TYPE_CHECKING, Any, Optional, Protocol, cast +from typing import TYPE_CHECKING, Any, Protocol, cast from rocrate_validator.utils import log as logging @@ -41,7 +41,7 @@ def __rich_console__(self, console: Console, options: ConsoleOptions) -> RenderR class BaseOutputFormatter(OutputFormatter): - def __init__(self, data: Optional[Any] = None): + def __init__(self, data: Any | None = None): # Formatters are registered as classes (instantiated with the data to # render), so the map values are formatter types, not instances. self._fmap: dict[type, type[OutputFormatter]] = {} diff --git a/rocrate_validator/utils/io_helpers/output/console.py b/rocrate_validator/utils/io_helpers/output/console.py index 011ac5d7a..52417b2d5 100644 --- a/rocrate_validator/utils/io_helpers/output/console.py +++ b/rocrate_validator/utils/io_helpers/output/console.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from typing import Any, Optional +from typing import Any from rich.console import Console as BaseConsole @@ -27,7 +27,7 @@ class Console(BaseConsole): """Rich console that can be disabled.""" def __init__(self, *args, disabled: bool = False, interactive: bool = True, - formatters: Optional[dict[type, Any]] = None, **kwargs): + formatters: dict[type, Any] | None = None, **kwargs): force_jupyter = kwargs.pop("force_jupyter", None) if force_jupyter is None: force_jupyter = False if self.__jupyter_environment__() else None @@ -45,7 +45,7 @@ def __jupyter_environment__(self) -> bool: from rocrate_validator.cli.utils import running_in_jupyter # noqa: PLC0415 return running_in_jupyter() - def register_formatter(self, formatter: OutputFormatter, type_: Optional[type] = None): + def register_formatter(self, formatter: OutputFormatter, type_: type | None = None): if type_ is None and not isinstance(formatter, BaseOutputFormatter): raise ValueError("type_ must be provided if formatter is not a BaseOutputFormatter") if isinstance(formatter, BaseOutputFormatter): diff --git a/rocrate_validator/utils/io_helpers/output/json/__init__.py b/rocrate_validator/utils/io_helpers/output/json/__init__.py index 5318fb246..9aa1c29b5 100644 --- a/rocrate_validator/utils/io_helpers/output/json/__init__.py +++ b/rocrate_validator/utils/io_helpers/output/json/__init__.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from typing import Any, Optional +from typing import Any from rocrate_validator.models import ValidationResult, ValidationStatistics from rocrate_validator.utils import log as logging @@ -29,7 +29,7 @@ class JSONOutputFormatter(BaseOutputFormatter): - def __init__(self, data: Optional[Any] = None): + def __init__(self, data: Any | None = None): super().__init__(data) self.add_type_formatter(ValidationResult, ValidationResultJSONOutputFormatter) self.add_type_formatter(dict, ValidationResultsJSONOutputFormatter) diff --git a/rocrate_validator/utils/io_helpers/output/json/formatters.py b/rocrate_validator/utils/io_helpers/output/json/formatters.py index 38c7b2927..c6d0fd629 100644 --- a/rocrate_validator/utils/io_helpers/output/json/formatters.py +++ b/rocrate_validator/utils/io_helpers/output/json/formatters.py @@ -13,7 +13,7 @@ # limitations under the License. import json -from typing import Any, Optional +from typing import Any from rich.console import ConsoleOptions, RenderResult @@ -40,8 +40,8 @@ def format_validation_result(data: ValidationResult, console: Console, console_o def format_validation_results( data: dict[str, ValidationResult], - console: Optional[Console] = None, # pylint: disable=unused-argument - console_options: Optional[ConsoleOptions] = None, + console: Console | None = None, # pylint: disable=unused-argument + console_options: ConsoleOptions | None = None, ) -> str: # pylint: disable=unused-argument # Initialize an empty JSON output @@ -104,7 +104,7 @@ def format_validation_results( return json.dumps(json_output, indent=4, cls=CustomEncoder) -def _compute_overall_statistics(results: list[ValidationResult], verbose: bool) -> Optional[dict[str, Any]]: +def _compute_overall_statistics(results: list[ValidationResult], verbose: bool) -> dict[str, Any] | None: """Aggregate per-result statistics, dropping detailed lists unless verbose.""" stats = AggregatedValidationStatistics([r.statistics for r in results if r.statistics]) if not stats: diff --git a/rocrate_validator/utils/io_helpers/output/text/__init__.py b/rocrate_validator/utils/io_helpers/output/text/__init__.py index b4233553d..7090f1be5 100644 --- a/rocrate_validator/utils/io_helpers/output/text/__init__.py +++ b/rocrate_validator/utils/io_helpers/output/text/__init__.py @@ -14,7 +14,7 @@ from __future__ import annotations -from typing import TYPE_CHECKING, Any, Optional +from typing import TYPE_CHECKING, Any from rocrate_validator.models import ValidationResult, ValidationStatistics from rocrate_validator.utils import log as logging @@ -33,7 +33,7 @@ class TextOutputFormatter(BaseOutputFormatter): - def __init__(self, data: Optional[Any] = None): + def __init__(self, data: Any | None = None): super().__init__(data) self.add_type_formatter(ValidationResult, ValidationResultTextOutputFormatter) self.add_type_formatter(ValidationStatistics, ValidationStatisticsTextOutputFormatter) diff --git a/rocrate_validator/utils/io_helpers/output/text/layout/progress.py b/rocrate_validator/utils/io_helpers/output/text/layout/progress.py index 3728c0460..f1576f061 100644 --- a/rocrate_validator/utils/io_helpers/output/text/layout/progress.py +++ b/rocrate_validator/utils/io_helpers/output/text/layout/progress.py @@ -13,7 +13,6 @@ # limitations under the License. -from typing import Optional from rich.progress import BarColumn, Progress, TextColumn, TimeElapsedColumn @@ -40,7 +39,7 @@ class ProgressMonitor(Subscriber): REQUIREMENT_CHECK_VALIDATION = "Requirements Checks" def __init__(self, settings: dict | ValidationSettings, - stats: Optional[ValidationStatistics] = None): + stats: ValidationStatistics | None = None): self.__progress = Progress( TextColumn("[progress.description]{task.description}"), BarColumn(), @@ -85,7 +84,7 @@ def stop(self): def progress(self) -> Progress: return self.__progress - def update(self, event: Event, ctx: Optional[ValidationContext] = None): + def update(self, event: Event, ctx: ValidationContext | None = None): logger.debug("Event: %s", event.event_type) if event.event_type == EventType.VALIDATION_START: logger.debug("Validation started") @@ -108,7 +107,7 @@ def update(self, event: Event, ctx: Optional[ValidationContext] = None): assert isinstance(event, ValidationEvent) logger.debug("Validation ended with result: %s", event.validation_result) - def __on_requirement_check_end__(self, event: Event, ctx: Optional[ValidationContext]) -> None: + def __on_requirement_check_end__(self, event: Event, ctx: ValidationContext | None) -> None: """Advance the requirement-check progress bar, unless the check is hidden or overridden.""" assert isinstance(event, RequirementCheckValidationEvent) assert ctx is not None, "Validation context must be provided" diff --git a/rocrate_validator/utils/io_helpers/output/text/layout/report.py b/rocrate_validator/utils/io_helpers/output/text/layout/report.py index 1a81455b1..23b30c0b8 100644 --- a/rocrate_validator/utils/io_helpers/output/text/layout/report.py +++ b/rocrate_validator/utils/io_helpers/output/text/layout/report.py @@ -16,7 +16,7 @@ import threading import time -from typing import TYPE_CHECKING, Any, Optional +from typing import TYPE_CHECKING, Any from rich.align import Align from rich.layout import Layout @@ -58,22 +58,22 @@ class ValidationReportLayout(Layout): def __init__(self, console: Console, settings: ValidationSettings, - statistics: Optional[ValidationStatistics] = None, + statistics: ValidationStatistics | None = None, profile_autodetected: bool = False): super().__init__() self.console = console self.validation_settings = settings self.statistics = statistics self.profile_autodetected = profile_autodetected - self.result: Optional[ValidationResult] = None - self.__layout: Optional[Padding] = None - self._validation_checks_progress: Optional[Layout] = None - self.__progress_monitor: Optional[ProgressMonitor] = None - self.requirement_checks_container_layout: Optional[Layout] = None - self.passed_checks: Optional[Layout] = None - self.failed_checks: Optional[Layout] = None - self.report_details_container: Optional[Layout] = None - self.overall_result: Optional[Layout] = None + self.result: ValidationResult | None = None + self.__layout: Padding | None = None + self._validation_checks_progress: Layout | None = None + self.__progress_monitor: ProgressMonitor | None = None + self.requirement_checks_container_layout: Layout | None = None + self.passed_checks: Layout | None = None + self.failed_checks: Layout | None = None + self.report_details_container: Layout | None = None + self.overall_result: Layout | None = None self.requirement_checks_by_severity_container_layout: Any = None self.checks_stats_layout: Any = None @@ -183,7 +183,7 @@ def __init_layout__(self): if result: self.__show_overall_result__(result) - def update(self, event: Event, ctx: Optional[ValidationContext] = None): # type: ignore[override] + def update(self, event: Event, ctx: ValidationContext | None = None): # type: ignore[override] logger.debug("Event: %s", event.event_type) if event.event_type == EventType.PROFILE_VALIDATION_START: assert isinstance(event, ProfileValidationEvent) @@ -213,7 +213,7 @@ def update(self, event: Event, ctx: Optional[ValidationContext] = None): # type self.__show_overall_result__(event.validation_result) logger.debug("Validation ended with result: %s", event.validation_result) - def update_stats(self, profile_stats: Optional[ValidationStatistics] = None): + def update_stats(self, profile_stats: ValidationStatistics | None = None): assert profile_stats, "Profile stats must be provided" assert self.passed_checks is not None and self.failed_checks is not None, "Layout not initialized" self.requirement_checks_by_severity_container_layout["required"].update( @@ -279,7 +279,7 @@ def update_stats(self, profile_stats: Optional[ValidationStatistics] = None): ) ) - def __show_overall_result__(self, result: Optional[ValidationResult]): + def __show_overall_result__(self, result: ValidationResult | None): assert result, "Validation result must be provided" assert self.overall_result is not None, "Layout not initialized" self.result = result @@ -322,7 +322,7 @@ def __init__(self, console: Console, validation_settings: dict, super().__init__(console, validation_settings, result, profile_autodetected) # type: ignore[arg-type] self.refresh_per_second = refresh_per_second self.transient = transient - self._live: Optional[Live] = None + self._live: Live | None = None def __enter__(self): """Enter the context and start live rendering.""" @@ -378,8 +378,8 @@ def __enter__(self): with Live(message, console=self.console, refresh_per_second=4) as live: # Start validation in background while updating dots - result_container: list[Optional[ValidationResult]] = [None] - exception_container: list[Optional[BaseException]] = [None] + result_container: list[ValidationResult | None] = [None] + exception_container: list[BaseException | None] = [None] def run_validation(): try: diff --git a/rocrate_validator/utils/log.py b/rocrate_validator/utils/log.py index 231dc31da..0c91ec1a4 100644 --- a/rocrate_validator/utils/log.py +++ b/rocrate_validator/utils/log.py @@ -137,7 +137,7 @@ def __create_logger__(name: str) -> Logger: return logger -def basicConfig(level: int, modules_config: Optional[dict] = None): +def basicConfig(level: int, modules_config: dict | None = None): """Set the log level and format for the logger""" with _lock: # set the default log level to ERROR for loggers of other modules @@ -184,7 +184,7 @@ class LoggerProxy: def __init__(self, name: str): self.name = name - self._instance: Optional[Logger] = None + self._instance: Logger | None = None def _initialize(self): with _lock: diff --git a/rocrate_validator/utils/python_helpers.py b/rocrate_validator/utils/python_helpers.py index d96ee0472..e3cdce4ab 100644 --- a/rocrate_validator/utils/python_helpers.py +++ b/rocrate_validator/utils/python_helpers.py @@ -17,7 +17,6 @@ import sys from importlib import import_module from pathlib import Path -from typing import Optional from rocrate_validator.utils import log as logging @@ -26,7 +25,7 @@ def get_classes_from_file( - file_path: Path, filter_class: Optional[type] = None, class_name_suffix: Optional[str] = None + file_path: Path, filter_class: type | None = None, class_name_suffix: str | None = None ) -> dict[str, type]: """Get all classes in a Python file""" # ensure the file path is a Path object @@ -74,7 +73,7 @@ def to_camel_case(snake_str: str) -> str: return components[0].capitalize() + "".join(x.title() for x in components[1:]) -def get_requirement_name_from_file(file: Path, check_name: Optional[str] = None) -> str: +def get_requirement_name_from_file(file: Path, check_name: str | None = None) -> str: """ Get the requirement name from the file diff --git a/rocrate_validator/utils/rdf.py b/rocrate_validator/utils/rdf.py index cfe32e90b..7b10ebb8c 100644 --- a/rocrate_validator/utils/rdf.py +++ b/rocrate_validator/utils/rdf.py @@ -11,7 +11,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from typing import Optional from rdflib import Graph @@ -43,7 +42,7 @@ def get_full_graph( return full_graph -def extract_base_from_jsonld(json_data: dict) -> Optional[str]: +def extract_base_from_jsonld(json_data: dict) -> str | None: """ Extract the @base from the @context of a JSON-LD document. diff --git a/rocrate_validator/utils/uri.py b/rocrate_validator/utils/uri.py index 0906ad154..58160d02b 100644 --- a/rocrate_validator/utils/uri.py +++ b/rocrate_validator/utils/uri.py @@ -15,7 +15,6 @@ import enum import re from pathlib import Path -from typing import Optional from urllib.parse import ParseResult, parse_qsl, urlparse, urlsplit from rocrate_validator import errors @@ -164,7 +163,7 @@ def scheme(self) -> str: return self._parse_result.scheme @property - def fragment(self) -> Optional[str]: + def fragment(self) -> str | None: fragment = self._parse_result.fragment return fragment or None @@ -180,7 +179,7 @@ def get_path(self) -> str: def get_query_string(self) -> str: return self._parse_result.query - def get_query_param(self, param: str) -> Optional[str]: + def get_query_param(self, param: str) -> str | None: query_params = dict(parse_qsl(self._parse_result.query)) return query_params.get(param) diff --git a/rocrate_validator/utils/versioning.py b/rocrate_validator/utils/versioning.py index 2d83879b6..86e434e20 100644 --- a/rocrate_validator/utils/versioning.py +++ b/rocrate_validator/utils/versioning.py @@ -15,7 +15,6 @@ import re import subprocess import sys -from typing import Optional from rocrate_validator.utils import log as logging from rocrate_validator.utils.config import get_config @@ -24,7 +23,7 @@ logger = logging.getLogger(__name__) -def run_git_command(command: list[str]) -> Optional[str]: +def run_git_command(command: list[str]) -> str | None: """ Run a git command and return the output @@ -69,7 +68,7 @@ def get_last_tag() -> str: return run_git_command(["git", "describe", "--tags", "--abbrev=0"]) or "" -def get_commit_distance(tag: Optional[str] = None) -> int: +def get_commit_distance(tag: str | None = None) -> int: """ Get the distance in commits between the current commit and the last tag diff --git a/tests/shared.py b/tests/shared.py index b430ab1e4..8183880d5 100644 --- a/tests/shared.py +++ b/tests/shared.py @@ -22,7 +22,7 @@ import tempfile from collections.abc import Collection from pathlib import Path -from typing import Optional, TypeVar +from typing import TypeVar from urllib.parse import urljoin import rdflib @@ -91,8 +91,8 @@ def replace_uri_in_graph(graph, old_uri_str, new_uri_str): def _prepare_temp_rocrate( rocrate_path: Path, - rocrate_entity_patch: Optional[dict], - rocrate_entity_mod_sparql: Optional[str], + rocrate_entity_patch: dict | None, + rocrate_entity_mod_sparql: str | None, ) -> Path: temp_rocrate_path = Path(tempfile.TemporaryDirectory().name) shutil.copytree(rocrate_path, temp_rocrate_path) @@ -124,16 +124,16 @@ def do_entity_test( rocrate_path: Path | str, requirement_severity: models.Severity, expected_validation_result: bool, - expected_triggered_requirements: Optional[list[str]] = None, - expected_triggered_issues: Optional[list[str]] = None, + expected_triggered_requirements: list[str] | None = None, + expected_triggered_issues: list[str] | None = None, abort_on_first: bool = False, profile_identifier: str = DEFAULT_PROFILE_IDENTIFIER, - rocrate_entity_patch: Optional[dict] = None, - rocrate_entity_mod_sparql: Optional[str] = None, - skip_checks: Optional[list[str]] = (), - rocrate_relative_root_path: Optional[str] = None, + rocrate_entity_patch: dict | None = None, + rocrate_entity_mod_sparql: str | None = None, + skip_checks: list[str] | None = (), + rocrate_relative_root_path: str | None = None, metadata_only: bool = False, - metadata_dict: Optional[dict] = None, + metadata_dict: dict | None = None, **kwargs, ): """ diff --git a/tests/unit/requirements/test_shacl_checks.py b/tests/unit/requirements/test_shacl_checks.py index 7079e51cd..59f8d0833 100644 --- a/tests/unit/requirements/test_shacl_checks.py +++ b/tests/unit/requirements/test_shacl_checks.py @@ -13,7 +13,6 @@ # limitations under the License. import logging -from typing import Optional from rdflib import BNode, Graph, Namespace, URIRef @@ -224,7 +223,7 @@ def test_resolve_parent_shape_with_property_bnode(): assert result.key == shape.key -def _make_property(graph: Graph, severity_term: Optional[str] = None) -> PropertyShape: +def _make_property(graph: Graph, severity_term: str | None = None) -> PropertyShape: """Build a PropertyShape on a fresh BNode, optionally setting sh:severity.""" prop = PropertyShape(BNode(), graph) if severity_term is not None: From 343916a878b6b3e98c6a55d578462b19d1a4eb02 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Thu, 11 Jun 2026 09:27:01 +0200 Subject: [PATCH 294/352] =?UTF-8?q?refactor(python):=20=E2=99=BB=EF=B8=8F?= =?UTF-8?q?=20simplify=20check=20attribute=20test=20with=20getattr?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- rocrate_validator/requirements/python/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rocrate_validator/requirements/python/__init__.py b/rocrate_validator/requirements/python/__init__.py index 6b6d45aaa..f5dfbf6f7 100644 --- a/rocrate_validator/requirements/python/__init__.py +++ b/rocrate_validator/requirements/python/__init__.py @@ -119,7 +119,7 @@ def __init_checks__(self): checks: list = [] for name, member in inspect.getmembers(self.requirement_check_class, inspect.isfunction): # verify that the attribute set by the check decorator is present - if hasattr(member, "check") and member.check is True: + if bool(getattr(member, "check", False)): check_name = None try: # `name`/`severity` are attributes attached dynamically by the @check decorator From 695c6a08d002ccb8d5c69033d59ed2643b451d9b Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Thu, 11 Jun 2026 09:27:50 +0200 Subject: [PATCH 295/352] =?UTF-8?q?style(output):=20=F0=9F=8E=A8=20use=20e?= =?UTF-8?q?llipsis=20instead=20of=20pass=20for=20protocol=20stub?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- rocrate_validator/utils/io_helpers/output/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rocrate_validator/utils/io_helpers/output/__init__.py b/rocrate_validator/utils/io_helpers/output/__init__.py index 1baa1129a..5488d5cdc 100644 --- a/rocrate_validator/utils/io_helpers/output/__init__.py +++ b/rocrate_validator/utils/io_helpers/output/__init__.py @@ -36,7 +36,7 @@ class OutputFormatter(Protocol): """Protocol for output formatters.""" def __rich_console__(self, console: Console, options: ConsoleOptions) -> RenderResult: - pass + ... class BaseOutputFormatter(OutputFormatter): From 2371ca896b898183757a766c9d2c8a979ff84284 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Thu, 11 Jun 2026 09:44:26 +0200 Subject: [PATCH 296/352] =?UTF-8?q?fix(logging):=20=F0=9F=94=87=20replace?= =?UTF-8?q?=20redundant=20exception=20args=20with=20meaningful=20messages?= =?UTF-8?q?=20(TRY401)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- rocrate_validator/cli/commands/validate.py | 4 +-- rocrate_validator/cli/main.py | 4 +-- rocrate_validator/models.py | 6 ++-- .../ro-crate/must/0_file_descriptor_format.py | 28 +++++++-------- .../workflow-ro-crate/may/1_main_workflow.py | 8 ++--- .../workflow-ro-crate/must/0_main_workflow.py | 4 +-- .../requirements/shacl/requirements.py | 2 +- rocrate_validator/rocrate.py | 36 +++++++++---------- rocrate_validator/utils/uri.py | 2 +- tests/shared.py | 3 +- 10 files changed, 48 insertions(+), 49 deletions(-) diff --git a/rocrate_validator/cli/commands/validate.py b/rocrate_validator/cli/commands/validate.py index 596eb4549..faa91cdac 100644 --- a/rocrate_validator/cli/commands/validate.py +++ b/rocrate_validator/cli/commands/validate.py @@ -53,7 +53,7 @@ def validate_uri(ctx, param, value): validate_rocrate_uri(value) except ROCrateInvalidURIError as e: if logger.isEnabledFor(logging.DEBUG): - logger.exception(e) + logger.exception("Invalid RO-Crate URI provided: %s", value) raise click.BadParameter(e.message, param=param) from e return value @@ -459,7 +459,7 @@ def _parse_skip_checks(skip_checks: list[str] | None) -> list[str]: except Exception as e: logger.error("Error parsing skip_checks: %s", e) if logger.isEnabledFor(logging.DEBUG): - logger.exception("Error parsing skip_checks: %s", e) + logger.exception("Error parsing skip_checks") raise ValueError( f"Invalid skip_checks value: {s}. " "It must be a comma-separated list of Fully Qualified Check IDs." diff --git a/rocrate_validator/cli/main.py b/rocrate_validator/cli/main.py index 521b4252a..64a43b216 100644 --- a/rocrate_validator/cli/main.py +++ b/rocrate_validator/cli/main.py @@ -98,7 +98,7 @@ def cli(ctx: click.Context, debug: bool, version: bool, disable_color: bool, no_ if __name__ == "__main__": try: cli() # pylint: disable=no-value-for-parameter # click injects the parameters - except Exception as e: + except Exception: if logger.isEnabledFor(logging.DEBUG): - logger.exception(f"An unexpected error occurred: {e}") + logger.exception("An unexpected error occurred") sys.exit(2) diff --git a/rocrate_validator/models.py b/rocrate_validator/models.py index 13be6ee8f..a1757d948 100644 --- a/rocrate_validator/models.py +++ b/rocrate_validator/models.py @@ -3068,9 +3068,9 @@ def detect_rocrate_profiles(self) -> list[Profile]: ) return candidate_profiles - except Exception as e: + except Exception: if logger.isEnabledFor(logging.DEBUG): - logger.exception(e) + logger.exception("Error detecting RO-Crate profiles") return [] def validate(self) -> ValidationResult: @@ -3479,7 +3479,7 @@ def __load_profiles__(self) -> list[Profile]: except AttributeError as e: # raised when the profile is not found if logger.isEnabledFor(logging.DEBUG): - logger.exception(e) + logger.exception("Profile not found: %s", self.profile_identifier) raise ProfileNotFound( self.profile_identifier, message=f"Profile '{self.profile_identifier}' not found in '{self.profiles_path}'", diff --git a/rocrate_validator/profiles/ro-crate/must/0_file_descriptor_format.py b/rocrate_validator/profiles/ro-crate/must/0_file_descriptor_format.py index 7c3ad96d8..72f0d8a97 100644 --- a/rocrate_validator/profiles/ro-crate/must/0_file_descriptor_format.py +++ b/rocrate_validator/profiles/ro-crate/must/0_file_descriptor_format.py @@ -74,11 +74,11 @@ def check(self, context: ValidationContext) -> bool: logger.debug("Checking validity of JSON file at %s", context.ro_crate.metadata) context.ro_crate.metadata.as_dict() return True - except Exception as e: + except Exception: context.result.add_issue( f'RO-Crate file descriptor "{context.rel_fd_path}" is not in the correct format', self) if logger.isEnabledFor(logging.DEBUG): - logger.exception(e) + logger.exception("RO-Crate file descriptor is not in the correct format") return False @@ -162,9 +162,9 @@ def __check_remote_context__(self, context_uri: str) -> bool: jsonLD, dict), f"The retrieved context from {context_uri} is not \ a valid JSON-LD context: it is not a dictionary" return True - except Exception as e: + except Exception: if logger.isEnabledFor(logging.DEBUG): - logger.exception(e) + logger.exception("Error validating JSON-LD context is a dictionary") return False def __check_contexts__(self, context: ValidationContext, jsonld_context: object) -> bool: @@ -203,9 +203,9 @@ def check_context(self, context: ValidationContext) -> bool: # Check if the context is valid return self.__check_contexts__(context, json_dict["@context"]) - except Exception as e: + except Exception: if logger.isEnabledFor(logging.DEBUG): - logger.exception(e) + logger.exception("Error extracting @context from file descriptor") return False @check(name="File Descriptor JSON-LD must be flattened") @@ -294,9 +294,9 @@ def _check_flattened_graph(self, context, is_flat): if fail_fast: return False return result - except Exception as e: + except Exception: if logger.isEnabledFor(logging.DEBUG): - logger.exception(e) + logger.exception("Error flattening JSON-LD file descriptor") return False @check(name="Validation of the @id property of the file descriptor entities") @@ -312,9 +312,9 @@ def check_identifiers(self, context: ValidationContext) -> bool: "file descriptor does not contain the @id attribute", self) return False return True - except Exception as e: + except Exception: if logger.isEnabledFor(logging.DEBUG): - logger.exception(e) + logger.exception("Error validating @id property of file descriptor entities") return False @check(name="Validation of the @type property of the file descriptor entities") @@ -330,9 +330,9 @@ def check_types(self, context: ValidationContext) -> bool: "file descriptor does not contain the @type attribute", self) return False return True - except Exception as e: + except Exception: if logger.isEnabledFor(logging.DEBUG): - logger.exception(e) + logger.exception("Error validating @type property of file descriptor entities") return False def __get_context_keys__(self, context: object) -> set: @@ -423,7 +423,7 @@ def check_compaction(self, context: ValidationContext) -> bool: logger.debug(f"{context_keys}") except Exception as e: if logger.isEnabledFor(logging.DEBUG): - logger.exception(e) + logger.exception("Error getting context keys from JSON-LD") context.result.add_issue(str(e), self) return False @@ -450,7 +450,7 @@ def check_compaction(self, context: ValidationContext) -> bool: return True except Exception as e: if logger.isEnabledFor(logging.DEBUG): - logger.exception(e) + logger.exception("Unexpected error during file descriptor validation") context.result.add_issue( f'Unexpected error: {e}', self) return False diff --git a/rocrate_validator/profiles/workflow-ro-crate/may/1_main_workflow.py b/rocrate_validator/profiles/workflow-ro-crate/may/1_main_workflow.py index 9bc07b7f5..c64746900 100644 --- a/rocrate_validator/profiles/workflow-ro-crate/may/1_main_workflow.py +++ b/rocrate_validator/profiles/workflow-ro-crate/may/1_main_workflow.py @@ -42,9 +42,9 @@ def check_workflow_diagram(self, context: ValidationContext) -> bool: context.result.add_issue(f"Workflow diagram '{image.id}' not found in crate", self) return False return True - except Exception as e: + except Exception: if logger.isEnabledFor(logging.DEBUG): - logger.exception(f"Unexpected error: {e}") + logger.exception("Unexpected error checking main workflow image existence") return False @check(name="Workflow description existence") @@ -62,7 +62,7 @@ def check_workflow_description(self, context: ValidationContext) -> bool: f"Workflow CWL description {main_workflow_subject.id} not found in crate", self) return False return True - except Exception as e: + except Exception: if logger.isEnabledFor(logging.DEBUG): - logger.exception(f"Unexpected error: {e}") + logger.exception("Unexpected error checking workflow description existence") return False diff --git a/rocrate_validator/profiles/workflow-ro-crate/must/0_main_workflow.py b/rocrate_validator/profiles/workflow-ro-crate/must/0_main_workflow.py index 6d4f147e9..375905f7d 100644 --- a/rocrate_validator/profiles/workflow-ro-crate/must/0_main_workflow.py +++ b/rocrate_validator/profiles/workflow-ro-crate/must/0_main_workflow.py @@ -36,9 +36,9 @@ def check_workflow(self, context: ValidationContext) -> bool: context.result.add_issue(f"Main Workflow {main_workflow.id} not found in crate", self) return False return True - except ValueError as e: + except ValueError: context.result.add_issue("Unable to check the existence of the main workflow file " "because the metadata file descriptor doesn't contain a `mainEntity`", self) if logger.isEnabledFor(logging.DEBUG): - logger.exception(e) + logger.exception("Unable to check main workflow file existence") return False diff --git a/rocrate_validator/requirements/shacl/requirements.py b/rocrate_validator/requirements/shacl/requirements.py index d4a3f2b22..f63801261 100644 --- a/rocrate_validator/requirements/shacl/requirements.py +++ b/rocrate_validator/requirements/shacl/requirements.py @@ -146,7 +146,7 @@ def finalize(cls, context: ValidationContext) -> None: else: logger.warning("Forced SHACL run for zero-shape target profile %s failed: %s", target.identifier, e) if logger.isEnabledFor(logging.DEBUG): - logger.exception(e) + logger.exception("Forced SHACL run for zero-shape target profile failed") finally: shacl_context.__unset_current_validation_profile__() diff --git a/rocrate_validator/rocrate.py b/rocrate_validator/rocrate.py index 940c3d435..0e530e830 100644 --- a/rocrate_validator/rocrate.py +++ b/rocrate_validator/rocrate.py @@ -265,9 +265,9 @@ def check_availability(self) -> AvailabilityStatus: return AvailabilityStatus.UNCHECKABLE return self._check_local_availability() - except Exception as e: + except Exception: if logger.isEnabledFor(logging.DEBUG): - logger.exception(e) + logger.exception("Error checking entity availability") return AvailabilityStatus.UNAVAILABLE def is_available(self) -> bool: @@ -276,9 +276,9 @@ def is_available(self) -> bool: def get_size(self) -> int: try: return self.metadata.ro_crate.get_file_size(Path(self.id)) - except Exception as e: + except Exception: if logger.isEnabledFor(logging.DEBUG): - logger.exception(e) + logger.exception("Error getting entity file size") return 0 def __str__(self) -> str: @@ -313,9 +313,9 @@ def ro_crate(self) -> ROCrate: def size(self) -> int: try: return len(self.as_json()) - except Exception as e: + except Exception: if logger.isEnabledFor(logging.DEBUG): - logger.exception(e) + logger.exception("Error computing entity JSON size") return 0 def get_file_descriptor_entity(self) -> ROCrateEntity: @@ -340,9 +340,9 @@ def get_root_data_entity_conforms_to(self) -> list[str] | None: if not isinstance(result, list): result = [result] return [_.id for _ in result] - except Exception as e: + except Exception: if logger.isEnabledFor(logging.DEBUG): - logger.exception(e) + logger.exception("Error getting entity image") return None def get_main_workflow(self) -> ROCrateEntity: @@ -399,9 +399,9 @@ def get_conforms_to(self) -> list[str] | None: if not isinstance(result, list): result = [result] return [_.id for _ in result] - except Exception as e: + except Exception: if logger.isEnabledFor(logging.DEBUG): - logger.exception(e) + logger.exception("Error getting entity identifiers by type") return None def as_json(self) -> str: @@ -631,9 +631,9 @@ def has_file(self, path: Path) -> bool: """ try: return self.__parse_path__(path).is_file() - except Exception as e: + except Exception: if logger.isEnabledFor(logging.DEBUG): - logger.exception(e) + logger.exception("Error checking if path is a file") return False def has_directory(self, path: Path) -> bool: @@ -648,9 +648,9 @@ def has_directory(self, path: Path) -> bool: """ try: return self.__parse_path__(path).is_dir() - except Exception as e: + except Exception: if logger.isEnabledFor(logging.DEBUG): - logger.exception(e) + logger.exception("Error checking if path is a directory") return False @abstractmethod @@ -852,9 +852,9 @@ def __del__(self): if self._zipref and self._zipref.fp is not None: self._zipref.close() del self._zipref - except Exception as e: + except Exception: if logger.isEnabledFor(logging.DEBUG): - logger.exception(e) + logger.exception("Error closing zip reference") def __parse_path__(self, path): assert path, "Path cannot be None" @@ -1070,9 +1070,9 @@ def is_bagit_wrapping_crate(uri: str | Path | URI) -> bool: logger.debug("Presence of 'data/ro-crate-metadata.json': %s", has_ro_crate_metadata) result = has_bagit_txt and has_ro_crate_metadata del temp_crate - except Exception as e: + except Exception: if logger.isEnabledFor(logging.DEBUG): - logger.exception(e) + logger.exception("Error loading remote BagIt RO-Crate metadata") return result def __check_search_path__(self, path): diff --git a/rocrate_validator/utils/uri.py b/rocrate_validator/utils/uri.py index 58160d02b..7587f166b 100644 --- a/rocrate_validator/utils/uri.py +++ b/rocrate_validator/utils/uri.py @@ -311,7 +311,7 @@ def validate_rocrate_uri(uri: str | Path | URI, silent: bool = False) -> bool: except ValueError as e: logger.error(e) if logger.isEnabledFor(logging.DEBUG): - logger.exception(e) + logger.exception("Invalid RO-Crate URI: %s", uri) raise errors.ROCrateInvalidURIError(uri) from e except Exception as e: if not silent: diff --git a/tests/shared.py b/tests/shared.py index 8183880d5..35aa5c72c 100644 --- a/tests/shared.py +++ b/tests/shared.py @@ -213,8 +213,7 @@ def do_entity_test( ) except Exception as e: if logger.isEnabledFor(logging.DEBUG): - logger.exception(e) - logger.debug("Failed to validate RO-Crate @ path: %s", rocrate_path) + logger.exception("Failed to validate RO-Crate @ path: %s", rocrate_path) logger.debug("Requirement severity: %s", requirement_severity) logger.debug("Expected validation result: %s", expected_validation_result) logger.debug("Expected triggered requirements: %s", expected_triggered_requirements) From c502a9145585595a2339134c05f9cdb38daada50 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Thu, 11 Jun 2026 09:51:55 +0200 Subject: [PATCH 297/352] =?UTF-8?q?refactor(test):=20=F0=9F=94=A5=20remove?= =?UTF-8?q?=20empty=20docstrings=20(D419)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../profiles/ro-crate/test_data_entity_metadata.py | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/tests/integration/profiles/ro-crate/test_data_entity_metadata.py b/tests/integration/profiles/ro-crate/test_data_entity_metadata.py index 85f83e00f..04b40e9b4 100644 --- a/tests/integration/profiles/ro-crate/test_data_entity_metadata.py +++ b/tests/integration/profiles/ro-crate/test_data_entity_metadata.py @@ -83,7 +83,6 @@ def test_directory_data_entity_wo_trailing_slash(): def test_missing_data_entity_encoding_format(): - """""" do_entity_test( paths.missing_data_entity_encoding_format, models.Severity.RECOMMENDED, @@ -94,7 +93,6 @@ def test_missing_data_entity_encoding_format(): def test_invalid_data_entity_encoding_format_pronom(): - """""" do_entity_test( paths.invalid_data_entity_encoding_format_pronom, models.Severity.RECOMMENDED, @@ -105,7 +103,6 @@ def test_invalid_data_entity_encoding_format_pronom(): def test_invalid_data_entity_encoding_format_ctx_website_type(): - """""" do_entity_test( paths.invalid_encoding_format_ctx_entity_missing_ws_type, models.Severity.RECOMMENDED, @@ -116,7 +113,6 @@ def test_invalid_data_entity_encoding_format_ctx_website_type(): def test_invalid_data_entity_encoding_format_ctx_website_name(): - """""" do_entity_test( paths.invalid_encoding_format_ctx_entity_missing_ws_name, models.Severity.RECOMMENDED, @@ -127,7 +123,6 @@ def test_invalid_data_entity_encoding_format_ctx_website_name(): def test_valid_data_entity_encoding_format_pronom(): - """""" do_entity_test( paths.valid_encoding_format_pronom, models.Severity.RECOMMENDED, @@ -137,7 +132,6 @@ def test_valid_data_entity_encoding_format_pronom(): def test_valid_data_entity_encoding_format_ctx_website(): - """""" do_entity_test( paths.valid_encoding_format_ctx_entity, models.Severity.RECOMMENDED, @@ -147,7 +141,6 @@ def test_valid_data_entity_encoding_format_ctx_website(): def test_missing_file_data_entity_with_quoted_name(): - """""" do_entity_test( paths.missing_file_data_entity_with_quoted_name, models.Severity.REQUIRED, @@ -158,7 +151,6 @@ def test_missing_file_data_entity_with_quoted_name(): def test_missing_file_data_entity_with_unquoted_name(): - """""" do_entity_test( paths.missing_file_data_entity_with_unquoted_name, models.Severity.REQUIRED, @@ -169,7 +161,6 @@ def test_missing_file_data_entity_with_unquoted_name(): def test_missing_dataset_entity_with_quoted_name(): - """""" do_entity_test( paths.missing_dataset_data_entity_with_quoted_name, models.Severity.REQUIRED, @@ -180,7 +171,6 @@ def test_missing_dataset_entity_with_quoted_name(): def test_missing_dataset_entity_with_unquoted_name(): - """""" do_entity_test( paths.missing_dataset_data_entity_with_unquoted_name, models.Severity.REQUIRED, @@ -191,7 +181,6 @@ def test_missing_dataset_entity_with_unquoted_name(): def test_missing_absolute_path_data_entity(): - """""" do_entity_test( paths.missing_file_data_entity_with_absolute_path, models.Severity.RECOMMENDED, @@ -202,7 +191,6 @@ def test_missing_absolute_path_data_entity(): def test_valid_rocrate_with_data_entities(): - """""" do_entity_test(ValidROC().rocrate_with_data_entities, models.Severity.REQUIRED, True, profile_identifier="ro-crate") From ba2ef28013cf35de866c7ea5215766f0871df912 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Thu, 11 Jun 2026 10:03:26 +0200 Subject: [PATCH 298/352] =?UTF-8?q?fix(typing):=20=F0=9F=90=9B=20use=20bar?= =?UTF-8?q?e=20raise=20instead=20of=20raise=20e=20(TRY201)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- rocrate_validator/utils/uri.py | 4 ++-- tests/shared.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/rocrate_validator/utils/uri.py b/rocrate_validator/utils/uri.py index 7587f166b..e3d12e1a4 100644 --- a/rocrate_validator/utils/uri.py +++ b/rocrate_validator/utils/uri.py @@ -313,7 +313,7 @@ def validate_rocrate_uri(uri: str | Path | URI, silent: bool = False) -> bool: if logger.isEnabledFor(logging.DEBUG): logger.exception("Invalid RO-Crate URI: %s", uri) raise errors.ROCrateInvalidURIError(uri) from e - except Exception as e: + except Exception: if not silent: - raise e + raise return False diff --git a/tests/shared.py b/tests/shared.py index 35aa5c72c..c868afb6f 100644 --- a/tests/shared.py +++ b/tests/shared.py @@ -211,7 +211,7 @@ def do_entity_test( raise AssertionError( f'The expected issue "{expected_issue}" was not found in the detected issues' ) - except Exception as e: + except Exception: if logger.isEnabledFor(logging.DEBUG): logger.exception("Failed to validate RO-Crate @ path: %s", rocrate_path) logger.debug("Requirement severity: %s", requirement_severity) @@ -220,7 +220,7 @@ def do_entity_test( logger.debug("Expected triggered issues: %s", expected_triggered_issues) logger.debug("Failed requirements: %s", failed_requirements) logger.debug("Detected issues: %s", detected_issues) - raise e + raise finally: # cleanup if temp_rocrate_path is not None: From bcbbdc34cb470710555ae6396b3c042604675f41 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Thu, 11 Jun 2026 10:09:55 +0200 Subject: [PATCH 299/352] =?UTF-8?q?fix(services):=20=F0=9F=90=9B=20guard?= =?UTF-8?q?=20original=5Fdata=5Fpath=20against=20None=20on=20restore?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- rocrate_validator/services.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/rocrate_validator/services.py b/rocrate_validator/services.py index ec8eda8a5..20d089b59 100644 --- a/rocrate_validator/services.py +++ b/rocrate_validator/services.py @@ -109,8 +109,9 @@ def _extract_and_validate( settings.rocrate_uri = URI(str(tmp_dir)) return _build_validator(settings, subscribers) finally: - settings.rocrate_uri = original_data_path - logger.debug("Original data path restored: %s", original_data_path) + if original_data_path is not None: + settings.rocrate_uri = original_data_path + logger.debug("Original data path restored: %s", original_data_path) def _download_remote_rocrate( From d7069c001d86fb675df0f4fd5aac893c60f98450 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Thu, 11 Jun 2026 10:11:02 +0200 Subject: [PATCH 300/352] =?UTF-8?q?fix(rocrate):=20=F0=9F=90=9B=20narrow?= =?UTF-8?q?=20as=5Fdict=20return=20type=20and=20add=20None=20guard?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- rocrate_validator/rocrate.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/rocrate_validator/rocrate.py b/rocrate_validator/rocrate.py index 0e530e830..2de0e04cc 100644 --- a/rocrate_validator/rocrate.py +++ b/rocrate_validator/rocrate.py @@ -411,10 +411,11 @@ def as_json(self) -> str: )) return self._json - def as_dict(self) -> dict: + def as_dict(self) -> dict[Any, Any]: if not self._dict: # if the dictionary is not cached, load it self._dict = json.loads(self.as_json()) + assert self._dict is not None, "Metadata dictionary should not be None after loading" return self._dict def as_graph(self, publicID: str | None = None) -> Graph: From fa9b7b190fdfa306328fcdd18c7e709603987b82 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Thu, 11 Jun 2026 10:13:00 +0200 Subject: [PATCH 301/352] =?UTF-8?q?refactor(events):=20=E2=99=BB=EF=B8=8F?= =?UTF-8?q?=20import=20document=5Fenum=20directly=20and=20fix=20formatting?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- rocrate_validator/events.py | 7 +++---- rocrate_validator/models.py | 4 ++-- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/rocrate_validator/events.py b/rocrate_validator/events.py index 3041751a7..0004bbca4 100644 --- a/rocrate_validator/events.py +++ b/rocrate_validator/events.py @@ -17,7 +17,7 @@ from functools import total_ordering from typing import Any -import enum_tools +from enum_tools.documentation import document_enum from rocrate_validator.utils import log as logging @@ -26,10 +26,10 @@ @enum.unique -@enum_tools.documentation.document_enum +@document_enum @total_ordering class EventType(enum.Enum): - """ Event types """ + """Event types""" #: Validation start VALIDATION_START = 0 @@ -127,7 +127,6 @@ def __ne__(self, other): class Subscriber(ABC): - """ Subscriber interface. Objects that want to be notified of events generated during the validation process diff --git a/rocrate_validator/models.py b/rocrate_validator/models.py index a1757d948..e006fc3da 100644 --- a/rocrate_validator/models.py +++ b/rocrate_validator/models.py @@ -28,7 +28,7 @@ from typing import TYPE_CHECKING, Any, Protocol, cast from urllib.error import HTTPError -import enum_tools +from enum_tools.documentation import document_enum from rdflib import RDF, RDFS, Graph, Namespace, URIRef from rocrate_validator import __version__ @@ -81,7 +81,7 @@ @enum.unique -@enum_tools.documentation.document_enum +@document_enum @total_ordering class Severity(enum.Enum): """ From 4156d7d8f380ad436a584669eacc7d05b178c39c Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Thu, 11 Jun 2026 12:09:30 +0200 Subject: [PATCH 302/352] =?UTF-8?q?fix(typing):=20=F0=9F=9B=A1=EF=B8=8F=20?= =?UTF-8?q?add=20None=20guards,=20narrow=20types,=20and=20fix=20imports?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- rocrate_validator/cli/commands/cache.py | 2 +- rocrate_validator/cli/commands/profiles.py | 20 ++++- rocrate_validator/cli/commands/validate.py | 7 +- rocrate_validator/models.py | 78 ++++++++++++------- .../requirements/shacl/validator.py | 6 ++ rocrate_validator/utils/config.py | 2 +- rocrate_validator/utils/http.py | 2 +- rocrate_validator/utils/io_helpers/input.py | 2 +- .../io_helpers/output/text/formatters.py | 3 +- 9 files changed, 86 insertions(+), 36 deletions(-) diff --git a/rocrate_validator/cli/commands/cache.py b/rocrate_validator/cli/commands/cache.py index 0d8ec9c2e..3f67c2623 100644 --- a/rocrate_validator/cli/commands/cache.py +++ b/rocrate_validator/cli/commands/cache.py @@ -419,7 +419,7 @@ def _resolve_warmup_urls_from_profiles(console, profiles_dir, extra_dir, request if profile is None: candidates = Profile.get_by_token(ident) or [] if candidates: - profile = max(candidates, key=lambda p: p.version) + profile = max(candidates, key=lambda p: p.version or "") if len(candidates) > 1: ambiguous_fallbacks.append((ident, profile, candidates)) if profile is None: diff --git a/rocrate_validator/cli/commands/profiles.py b/rocrate_validator/cli/commands/profiles.py index 8ed830893..28b80bf77 100644 --- a/rocrate_validator/cli/commands/profiles.py +++ b/rocrate_validator/cli/commands/profiles.py @@ -139,8 +139,14 @@ def list_profiles(ctx, no_paging: bool = False): # , profiles_path: Path = DEFA [f"[{v['color']}]{k}[/{v['color']}]: {v['count']}" for k, v in checks_info.items()]) # Add the row to the table + profile_name = ( + "\n".join(map(str, profile.name)) + if isinstance(profile.name, list) + else str(profile.name or "") + ) + table.add_row(profile.identifier, profile.uri, profile.version, - profile.name, Markdown(profile.description.strip()), + profile_name, Markdown((profile.description or "").strip()), "\n".join([p.identifier for p in profile.inherited_profiles]), checks_summary) table.add_row() @@ -222,8 +228,16 @@ def describe_profile(ctx, # Set the subheader content subheader_content = f"[bold cyan]Version:[/bold cyan] [italic green]{profile.version}[/italic green]\n" subheader_content += f"[bold cyan]URI:[/bold cyan] [italic yellow]{profile.uri}[/italic yellow]\n\n" - subheader_content += f"[bold cyan]Name:[/bold cyan] [italic]{profile.name.strip()}[/italic]\n" - subheader_content += f"[bold cyan]Description:[/bold cyan] [italic]{profile.description.strip()}[/italic]" + profile_name = profile.name or "" + if isinstance(profile_name, list): + profile_name = ", ".join(str(name).strip() for name in profile_name if str(name).strip()) + else: + profile_name = str(profile_name).strip() + subheader_content += f"[bold cyan]Name:[/bold cyan] [italic]{profile_name}[/italic]\n" + subheader_content += ( + f"[bold cyan]Description:[/bold cyan] " + f"[italic]{(profile.description or '').strip()}[/italic]" + ) # Add path info to the subheader subheader_content += ( "\n\n" diff --git a/rocrate_validator/cli/commands/validate.py b/rocrate_validator/cli/commands/validate.py index faa91cdac..566daede7 100644 --- a/rocrate_validator/cli/commands/validate.py +++ b/rocrate_validator/cli/commands/validate.py @@ -461,7 +461,7 @@ def _parse_skip_checks(skip_checks: list[str] | None) -> list[str]: if logger.isEnabledFor(logging.DEBUG): logger.exception("Error parsing skip_checks") raise ValueError( - f"Invalid skip_checks value: {s}. " + f"Invalid skip_checks value: {skip_checks}. " "It must be a comma-separated list of Fully Qualified Check IDs." ) from e logger.debug("Skip checks: %s", skip_checks_list) @@ -511,6 +511,8 @@ def _resolve_profile_identifiers( ) ) selected_options = multiple_choice(console, available_profiles) + if selected_options is None or isinstance(selected_options, bool): + selected_options = [] profile_identifiers = [available_profiles[int(o)].identifier for o in selected_options] logger.debug("Profile selected: %s", selected_options) console.print(Padding(Rule(style="bold yellow"), (1, 2))) @@ -596,6 +598,9 @@ def _render_file_or_collected_result( else: result = services.validate(validation_settings) + if result is None: + raise RuntimeError("Validation did not produce a result") + # Output processing for text format to file if output_file and output_format == "text": if interactive: diff --git a/rocrate_validator/models.py b/rocrate_validator/models.py index e006fc3da..6ca9e11bf 100644 --- a/rocrate_validator/models.py +++ b/rocrate_validator/models.py @@ -456,7 +456,11 @@ def parents(self) -> list[Profile]: The list of profiles that this profile is a profile of as specified in the profile specification file. """ - return [self.__profiles_map.get_by_key(_) for _ in self.is_profile_of] + return [ + profile + for profile in (self.__profiles_map.get_by_key(_) for _ in self.is_profile_of) + if profile is not None + ] @property def siblings(self) -> list[Profile]: @@ -506,7 +510,7 @@ def severity(self) -> Severity: return self._severity @property - def description(self) -> str: + def description(self) -> str | None: """ The description of the profile as specified in the profile specification file (i.e., the value of the rdfs: comment property in the `profile.ttl` file). @@ -516,7 +520,8 @@ def description(self) -> str: with self.readme_file_path.open(encoding="utf-8") as f: self._description = f.read() else: - self._description = self.comment + comment = self.comment + self._description = str(comment) if comment else None return self._description @property @@ -571,6 +576,8 @@ def __get_nested_profiles__(cls, source: str) -> list[str]: if p not in visited: visited.append(p) profile = cls.__profiles_map.get_by_key(p) + if profile is None: + continue inherited_profiles = profile.is_profile_of if inherited_profiles: for p in sorted(inherited_profiles, reverse=True): @@ -586,7 +593,13 @@ def inherited_profiles(self) -> list[Profile]: if not inherited_profiles or len(inherited_profiles) == 0: inherited_profiles = Profile.__get_nested_profiles__(self.uri) profile_keys = self.__profiles_map.keys - return [self.__profiles_map.get_by_key(_) for _ in inherited_profiles if _ in profile_keys] + return [ + profile + for key in inherited_profiles + if key in profile_keys + for profile in [self.__profiles_map.get_by_key(key)] + if isinstance(profile, Profile) + ] def add_requirement(self, requirement: Requirement): self._requirements.append(requirement) @@ -824,7 +837,7 @@ def load_profiles( ) @classmethod - def get_by_identifier(cls, identifier: str) -> Profile: + def get_by_identifier(cls, identifier: str) -> Profile | None: """ Get the profile with the given identifier @@ -834,10 +847,13 @@ def get_by_identifier(cls, identifier: str) -> Profile: :return: the profile :rtype: Profile """ - return cls.__profiles_map.get_by_index("identifier", identifier) + profile = cls.__profiles_map.get_by_index("identifier", identifier) + if isinstance(profile, list): + return cast("Profile | None", profile[0] if profile else None) + return cast("Profile | None", profile) @classmethod - def get_by_uri(cls, uri: str) -> Profile: + def get_by_uri(cls, uri: str) -> Profile | None: """ Get the profile with the given URI @@ -847,7 +863,7 @@ def get_by_uri(cls, uri: str) -> Profile: :return: the profile :rtype: Profile """ - return cls.__profiles_map.get_by_key(uri) + return cast("Profile | None", cls.__profiles_map.get_by_key(uri)) @classmethod def get_by_name(cls, name: str) -> list[Profile]: @@ -860,7 +876,7 @@ def get_by_name(cls, name: str) -> list[Profile]: :return: the profile :rtype: Profile """ - return cls.__profiles_map.get_by_index("name", name) + return cast("list[Profile]", cls.__profiles_map.get_by_index("name", name) or []) @classmethod def get_by_token(cls, token: str) -> list[Profile]: @@ -873,7 +889,7 @@ def get_by_token(cls, token: str) -> list[Profile]: :return: the profile :rtype: Profile """ - return cls.__profiles_map.get_by_index("token", token) + return cast("list[Profile]", cls.__profiles_map.get_by_index("token", token) or []) @classmethod def get_sibling_profiles(cls, profile: Profile) -> list[Profile]: @@ -910,7 +926,7 @@ def all(cls) -> list[Profile]: :return: the list of profiles :rtype: list[Profile] """ - return cls.__profiles_map.values() + return list(cls.__profiles_map.values()) @classmethod def find_in_list(cls, profiles: Collection[Profile], profile_identifier: str) -> Profile | None: @@ -2668,16 +2684,16 @@ def to_json(self, path: Path | None = None) -> str: class CustomEncoder(json.JSONEncoder): - def default(self, obj): # pylint: disable=arguments-renamed - if isinstance(obj, CheckIssue): - return obj.__dict__ - if isinstance(obj, Path): - return str(obj) - if isinstance(obj, (RequirementCheck, Requirement)): - return obj.identifier - if isinstance(obj, (Severity, RequirementLevel)): - return obj.name - return super().default(obj) + def default(self, o): + if isinstance(o, CheckIssue): + return o.__dict__ + if isinstance(o, Path): + return str(o) + if isinstance(o, (RequirementCheck, Requirement)): + return o.identifier + if isinstance(o, (Severity, RequirementLevel)): + return o.name + return super().default(o) @dataclass @@ -2689,7 +2705,7 @@ class ValidationSettings: """ #: The URI of the RO-Crate - rocrate_uri: URI + rocrate_uri: URI # pyright: ignore[reportRedeclaration] #: The relative root path of the RO-Crate rocrate_relative_root_path: Path | None = None # Profile settings @@ -3229,8 +3245,10 @@ def __init__(self, validator: Validator, settings: ValidationSettings): if settings.metadata_dict: self._rocrate = ROCrate.from_metadata_dict(settings.metadata_dict) else: + rocrate_uri = settings.rocrate_uri + assert rocrate_uri is not None, "RO-Crate URI is required when metadata_dict is not provided" self._rocrate = ROCrate.new_instance( - settings.rocrate_uri, + rocrate_uri, relative_root_path=settings.rocrate_relative_root_path, ) assert isinstance(self._rocrate, ROCrate), "Invalid RO-Crate instance" @@ -3346,7 +3364,10 @@ def rocrate_uri(self) -> URI: :return: The URI of the RO-Crate :rtype: Path """ - return self.settings.rocrate_uri + rocrate_uri = self.settings.rocrate_uri + if rocrate_uri is None: + raise ValueError("RO-Crate URI is not set") + return rocrate_uri @property def fail_fast(self) -> bool: @@ -3471,11 +3492,9 @@ def __load_profiles__(self) -> list[Profile]: logger.debug("Candidate profiles found by token: %s", profile) if candidate_profiles: # Find the profile with the highest version number - profile = max(candidate_profiles, key=lambda p: p.version) + profile = max(candidate_profiles, key=lambda p: p.version or "") self.settings.profile_identifier = profile.identifier logger.debug("Profile with the highest version number: %s", profile) - # if the profile is found by token, set the profile name to the identifier - self.settings.profile_identifier = profile.identifier except AttributeError as e: # raised when the profile is not found if logger.isEnabledFor(logging.DEBUG): @@ -3484,6 +3503,11 @@ def __load_profiles__(self) -> list[Profile]: self.profile_identifier, message=f"Profile '{self.profile_identifier}' not found in '{self.profiles_path}'", ) from e + if profile is None: + raise ProfileNotFound( + self.profile_identifier, + message=f"Profile '{self.profile_identifier}' not found in '{self.profiles_path}'", + ) # if the inheritance is enabled, return only the target profile if not self.inheritance_enabled: diff --git a/rocrate_validator/requirements/shacl/validator.py b/rocrate_validator/requirements/shacl/validator.py index 97019ee89..5eca51038 100644 --- a/rocrate_validator/requirements/shacl/validator.py +++ b/rocrate_validator/requirements/shacl/validator.py @@ -487,6 +487,12 @@ def validate( logger.debug("pyshacl.validate result: Results Graph: %r", results_graph) logger.debug("pyshacl.validate result: Results Text: %r", results_text) + if not isinstance(results_graph, Graph): + raise TypeError( + "pyshacl.validate returned a non-Graph results_graph: " + f"{type(results_graph).__name__}" + ) + # serialize the results graph if serialization_output_path: assert serialization_output_format in [ diff --git a/rocrate_validator/utils/config.py b/rocrate_validator/utils/config.py index 5923382da..a9bc5c8c3 100644 --- a/rocrate_validator/utils/config.py +++ b/rocrate_validator/utils/config.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -import toml +import toml # type: ignore[import-untyped] from rocrate_validator.utils import log as logging from rocrate_validator.utils.paths import get_config_path diff --git a/rocrate_validator/utils/http.py b/rocrate_validator/utils/http.py index 5af8ad541..91256d3ce 100644 --- a/rocrate_validator/utils/http.py +++ b/rocrate_validator/utils/http.py @@ -25,7 +25,7 @@ if TYPE_CHECKING: from typing_extensions import Self -import requests +import requests # type: ignore[import-untyped] from rocrate_validator import constants from rocrate_validator.utils import log as logging diff --git a/rocrate_validator/utils/io_helpers/input.py b/rocrate_validator/utils/io_helpers/input.py index 246ec1dec..711303db4 100644 --- a/rocrate_validator/utils/io_helpers/input.py +++ b/rocrate_validator/utils/io_helpers/input.py @@ -17,8 +17,8 @@ import sys from typing import TYPE_CHECKING -from InquirerPy import prompt from InquirerPy.base.control import Choice +from InquirerPy.resolver import prompt from rocrate_validator.utils import log as logging diff --git a/rocrate_validator/utils/io_helpers/output/text/formatters.py b/rocrate_validator/utils/io_helpers/output/text/formatters.py index 8502453e2..fabe90196 100644 --- a/rocrate_validator/utils/io_helpers/output/text/formatters.py +++ b/rocrate_validator/utils/io_helpers/output/text/formatters.py @@ -95,5 +95,6 @@ def __rich_console__(self, console: Console, options: ConsoleOptions) -> RenderR statistics=self._validation_statistics ) logger.debug(layout.layout) - yield layout.layout + if layout.layout is not None: + yield layout.layout yield Padding("\n", (0, 0)) From 21e6f47f1afc29c762093f3e66fe14c7194520bd Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Thu, 11 Jun 2026 12:10:39 +0200 Subject: [PATCH 303/352] =?UTF-8?q?test(typing):=20=F0=9F=A7=AA=20fix=20te?= =?UTF-8?q?sts=20for=20stricter=20type=20annotations?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adapt test code to match the tightened types in the production code: - rocrate_uri now expects URI instead of str → wrap paths with URI() - log level now expects int/constant instead of string - Add type annotations where mypy requires them - Adjust assertions and fixtures for Profile | None returns --- tests/conftest.py | 2 +- tests/integration/test_offline_mode.py | 28 +++++------ tests/ro_crates.py | 6 +-- tests/shared.py | 12 +++-- tests/test_cli.py | 8 ++-- tests/test_cli_cache.py | 4 +- tests/test_models.py | 3 +- .../requirements/test_load_requirements.py | 43 ++++++++++++----- tests/unit/requirements/test_profiles.py | 47 ++++++++++--------- tests/unit/requirements/test_shacl_checks.py | 37 ++++++++------- tests/unit/requirements/test_shacl_utils.py | 2 +- tests/unit/test_cache_warmup.py | 11 +++-- tests/unit/test_http_requester_offline.py | 4 +- tests/unit/test_http_requester_reconfigure.py | 7 +-- tests/unit/test_remote_context_retrieval.py | 1 + tests/unit/test_requirement_lifecycle.py | 4 +- tests/unit/test_rocrate.py | 45 +++++++++--------- tests/unit/test_services.py | 22 ++++----- tests/unit/test_validation_settings.py | 38 +++++++-------- 19 files changed, 179 insertions(+), 145 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index 5d3d7b1b1..7d57a770e 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -25,7 +25,7 @@ # set up logging logging.basicConfig( - level="warning", + level=logging.WARNING, modules_config={ # "rocrate_validator.models": {"level": logging.DEBUG} } diff --git a/tests/integration/test_offline_mode.py b/tests/integration/test_offline_mode.py index 10dd0ac54..3d90750a2 100644 --- a/tests/integration/test_offline_mode.py +++ b/tests/integration/test_offline_mode.py @@ -23,7 +23,7 @@ from click.testing import CliRunner from rocrate_validator.cli.main import cli -from rocrate_validator.models import ValidationSettings +from rocrate_validator.models import URI, ValidationSettings from rocrate_validator.utils.http import OFFLINE_CACHE_MISS_STATUS, HttpRequester from tests.conftest import SKIP_LOCAL_DATA_ENTITY_EXISTENCE_CHECK_IDENTIFIER from tests.ro_crates import ValidROC @@ -52,7 +52,7 @@ def network_interceptor(monkeypatch): """ from requests.adapters import HTTPAdapter - recorder = {"calls": []} + recorder: dict[str, list[str]] = {"calls": []} def fake_send(self, request, **kwargs): recorder["calls"].append(request.url) @@ -77,7 +77,7 @@ def cli_runner() -> CliRunner: def test_offline_flag_configures_cache(tmp_path): settings = ValidationSettings( - rocrate_uri=str(ValidROC().wrroc_paper_long_date), + rocrate_uri=URI(ValidROC().wrroc_paper_long_date), offline=True, cache_path=tmp_path / "cache", ) @@ -90,7 +90,7 @@ def test_offline_flag_configures_cache(tmp_path): def test_offline_default_path_is_persistent(tmp_path, monkeypatch): monkeypatch.setenv("XDG_CACHE_HOME", str(tmp_path / "xdg")) ValidationSettings( - rocrate_uri=str(ValidROC().wrroc_paper_long_date), + rocrate_uri=URI(ValidROC().wrroc_paper_long_date), offline=True, cache_path=None, ) @@ -102,7 +102,7 @@ def test_offline_default_path_is_persistent(tmp_path, monkeypatch): def test_offline_cache_miss_yields_504_response(tmp_path): ValidationSettings( - rocrate_uri=str(ValidROC().wrroc_paper_long_date), + rocrate_uri=URI(ValidROC().wrroc_paper_long_date), offline=True, cache_path=tmp_path / "cache", ) @@ -120,7 +120,7 @@ def test_online_then_offline_share_default_cache(tmp_path, network_interceptor, url = "https://example.org/ctx" ValidationSettings( - rocrate_uri=str(ValidROC().wrroc_paper_long_date), + rocrate_uri=URI(ValidROC().wrroc_paper_long_date), offline=False, cache_max_age=60, ) @@ -133,7 +133,7 @@ def test_online_then_offline_share_default_cache(tmp_path, network_interceptor, HttpRequester.reset() ValidationSettings( - rocrate_uri=str(ValidROC().wrroc_paper_long_date), + rocrate_uri=URI(ValidROC().wrroc_paper_long_date), offline=True, ) offline_info = HttpRequester().cache_info() @@ -147,7 +147,7 @@ def test_offline_reuses_cached_response(tmp_path, network_interceptor): cache_path = tmp_path / "cache" # First: online run populates the cache. ValidationSettings( - rocrate_uri=str(ValidROC().wrroc_paper_long_date), + rocrate_uri=URI(ValidROC().wrroc_paper_long_date), offline=False, cache_path=cache_path, cache_max_age=60, @@ -161,7 +161,7 @@ def test_offline_reuses_cached_response(tmp_path, network_interceptor): # Second: offline run must not hit the network but still get the cached doc. HttpRequester.reset() ValidationSettings( - rocrate_uri=str(ValidROC().wrroc_paper_long_date), + rocrate_uri=URI(ValidROC().wrroc_paper_long_date), offline=True, cache_path=cache_path, ) @@ -175,7 +175,7 @@ def test_offline_reuses_cached_response(tmp_path, network_interceptor): def test_no_cache_disables_cache_backend(tmp_path, network_interceptor): """no_cache=True must skip the cache and hit the network every call.""" ValidationSettings( - rocrate_uri=str(ValidROC().wrroc_paper_long_date), + rocrate_uri=URI(ValidROC().wrroc_paper_long_date), offline=False, no_cache=True, ) @@ -192,7 +192,7 @@ def test_no_cache_disables_cache_backend(tmp_path, network_interceptor): def test_negative_cache_max_age_means_never_expire(tmp_path, network_interceptor): """cache_max_age<0 must enable the cache with no expiration.""" ValidationSettings( - rocrate_uri=str(ValidROC().wrroc_paper_long_date), + rocrate_uri=URI(ValidROC().wrroc_paper_long_date), offline=False, cache_max_age=-1, cache_path=tmp_path / "cache", @@ -210,7 +210,7 @@ def test_negative_cache_max_age_means_never_expire(tmp_path, network_interceptor def test_offline_with_disabled_cache_raises(): with pytest.raises(ValueError, match="Offline mode requires the HTTP cache"): ValidationSettings( - rocrate_uri=str(ValidROC().wrroc_paper_long_date), + rocrate_uri=URI(ValidROC().wrroc_paper_long_date), offline=True, no_cache=True, ) @@ -389,7 +389,7 @@ def test_auto_warm_up_skipped_when_offline(tmp_path, network_interceptor, monkey """Auto warm-up must not run when offline mode is active.""" monkeypatch.setenv("ROCRATE_VALIDATOR_AUTO_WARM", "1") ValidationSettings( - rocrate_uri=str(ValidROC().wrroc_paper_long_date), + rocrate_uri=URI(ValidROC().wrroc_paper_long_date), offline=True, cache_path=tmp_path / "cache", ) @@ -399,7 +399,7 @@ def test_auto_warm_up_skipped_when_offline(tmp_path, network_interceptor, monkey def test_auto_warm_up_disabled_via_env(tmp_path, network_interceptor, monkeypatch): monkeypatch.setenv("ROCRATE_VALIDATOR_AUTO_WARM", "0") ValidationSettings( - rocrate_uri=str(ValidROC().wrroc_paper_long_date), + rocrate_uri=URI(ValidROC().wrroc_paper_long_date), offline=False, cache_path=tmp_path / "cache", ) diff --git a/tests/ro_crates.py b/tests/ro_crates.py index 7d9407687..efed37470 100644 --- a/tests/ro_crates.py +++ b/tests/ro_crates.py @@ -13,7 +13,7 @@ # limitations under the License. from pathlib import Path -from tempfile import TemporaryDirectory +from tempfile import mkdtemp from pytest import fixture @@ -83,7 +83,7 @@ def workflow_roc_string_license(self) -> Path: return VALID_CRATES_DATA_PATH / "workflow-roc-string-license" @property - def sort_and_change_remote(self) -> Path: + def sort_and_change_remote(self) -> str: return "https://raw.githubusercontent.com/lifemonitor/validator-test-data/main/sortchangecase.crate.zip" @property @@ -129,7 +129,7 @@ class InvalidFileDescriptor: @property def missing_file_descriptor(self) -> Path: - return TemporaryDirectory() + return Path(mkdtemp()) @property def invalid_json_format(self) -> Path: diff --git a/tests/shared.py b/tests/shared.py index c868afb6f..83491a1b0 100644 --- a/tests/shared.py +++ b/tests/shared.py @@ -130,7 +130,7 @@ def do_entity_test( profile_identifier: str = DEFAULT_PROFILE_IDENTIFIER, rocrate_entity_patch: dict | None = None, rocrate_entity_mod_sparql: str | None = None, - skip_checks: list[str] | None = (), + skip_checks: list[str] | None = None, rocrate_relative_root_path: str | None = None, metadata_only: bool = False, metadata_dict: dict | None = None, @@ -153,7 +153,8 @@ def do_entity_test( rocrate_path = Path(rocrate_path) temp_rocrate_path = None - if any([rocrate_entity_patch, rocrate_entity_mod_sparql]) and rocrate_path.is_dir(): + if (any([rocrate_entity_patch, rocrate_entity_mod_sparql]) + and isinstance(rocrate_path, Path) and rocrate_path.is_dir()): temp_rocrate_path = _prepare_temp_rocrate(rocrate_path, rocrate_entity_patch, rocrate_entity_mod_sparql) rocrate_path = temp_rocrate_path @@ -168,14 +169,17 @@ def do_entity_test( logger.debug("Checks to skip: %s", skip_checks) # validate RO-Crate + relative_root_path = ( + Path(rocrate_relative_root_path) if rocrate_relative_root_path is not None else None + ) result: models.ValidationResult = services.validate( models.ValidationSettings( - rocrate_uri=rocrate_path, + rocrate_uri=models.URI(rocrate_path), requirement_severity=requirement_severity, abort_on_first=abort_on_first, profile_identifier=profile_identifier, skip_checks=skip_checks, - rocrate_relative_root_path=rocrate_relative_root_path, + rocrate_relative_root_path=relative_root_path, metadata_only=metadata_only, metadata_dict=metadata_dict, **kwargs, diff --git a/tests/test_cli.py b/tests/test_cli.py index d488f367f..4f38f0f86 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -79,8 +79,8 @@ def test_validate_subcmd_invalid_local_archive_rocrate(cli_runner: CliRunner): def test_validate_skip_checks_option(cli_runner: CliRunner): # Patch the validation service to capture the skip_checks argument - called_args = [] - called_kwargs = {} + called_args: list = [] + called_kwargs: dict = {} def mock_validate(*args, **kwargs): logger.warning(f"Mock validate called with args: {args}, kwargs: {kwargs}") @@ -121,7 +121,7 @@ def mock_validate(*args, **kwargs): # Check if the skip_checks value matches the expected value assert list(skip_checks_1 + skip_checks_2) == settings["skip_checks"], \ - f"Expected skip_checks to be {list(skip_checks_1 + skip_checks_2)}, but got {settings.skip_checks}" + f"Expected skip_checks to be {list(skip_checks_1 + skip_checks_2)}, but got {settings['skip_checks']}" def test_validate_with_invalid_profiles_path_dir(cli_runner: CliRunner): @@ -158,7 +158,7 @@ def test_extra_profiles_list(cli_runner: CliRunner, fake_profiles_path: Path): """ Test the list of extra profiles. """ - result = cli_runner.invoke(cli, ["profiles", "--extra-profiles-path", fake_profiles_path, "list", "--no-paging"]) + result = cli_runner.invoke(cli, ["profiles", "--extra-profiles-path", str(fake_profiles_path), "list", "--no-paging"]) assert result.exit_code == 0 assert "Profile A" in result.output # Check for a known extra profile diff --git a/tests/test_cli_cache.py b/tests/test_cli_cache.py index 8be899558..7999adaea 100644 --- a/tests/test_cli_cache.py +++ b/tests/test_cli_cache.py @@ -76,7 +76,9 @@ def _make_profile_stub(identifier: str, version: str, token: str): """Lightweight stand-in for a Profile used only by token fallback tests.""" class _Stub: - pass + identifier: str + version: str + token: str stub = _Stub() stub.identifier = identifier diff --git a/tests/test_models.py b/tests/test_models.py index 4095ef1d6..a0e048a2b 100644 --- a/tests/test_models.py +++ b/tests/test_models.py @@ -15,7 +15,7 @@ import pytest from rocrate_validator import models, services -from rocrate_validator.models import LevelCollection, RequirementLevel, Severity, ValidationSettings +from rocrate_validator.models import URI, LevelCollection, RequirementLevel, Severity, ValidationSettings from tests.ro_crates import InvalidRootDataEntity, WROCInvalidReadme @@ -72,6 +72,7 @@ def test_level_collection(): @pytest.fixture def validation_settings(): return ValidationSettings( + rocrate_uri=URI("file:///"), requirement_severity=Severity.OPTIONAL, abort_on_first=False ) diff --git a/tests/unit/requirements/test_load_requirements.py b/tests/unit/requirements/test_load_requirements.py index 2f0eb302d..bf0952ade 100644 --- a/tests/unit/requirements/test_load_requirements.py +++ b/tests/unit/requirements/test_load_requirements.py @@ -14,6 +14,7 @@ import logging from pathlib import Path +from typing import Any from rocrate_validator.constants import DEFAULT_PROFILE_IDENTIFIER from rocrate_validator.models import LevelCollection, Profile, Severity @@ -27,6 +28,13 @@ paths = InvalidFileDescriptorEntity() +def _require(profile: Profile, name: str): + """Return the named requirement, asserting it exists (it must for these tests).""" + requirement = profile.get_requirement(name) + assert requirement is not None, f"Requirement {name!r} not found" + return requirement + + def test_requirements_loading(profiles_requirement_loading: str): # The order of the requirement levels @@ -39,7 +47,7 @@ def test_requirements_loading(profiles_requirement_loading: str): number_of_checks_per_requirement = 4 # Define the settings - settings = { + settings: dict[str, Any] = { "profiles_path": profiles_requirement_loading, "severity": Severity.OPTIONAL } @@ -58,8 +66,13 @@ def test_requirements_loading(profiles_requirement_loading: str): # Sort requirements by their order sorted_requirements = sorted( - requirements, key=lambda x: (-x.severity_from_path.value, x.path.name, x.name) - if x.severity_from_path else (0, x.path.name, x.name)) + requirements, + key=lambda x: ( + -(x.severity_from_path.value if x.severity_from_path else 0), + x.path.name if x.path else "", + x.name, + ), + ) # Check the order of the requirements for i, requirement in enumerate(sorted_requirements): @@ -69,7 +82,7 @@ def test_requirements_loading(profiles_requirement_loading: str): # Check the requirements and their checks for requirement_name in requirements_names: logger.debug("The requirement: %r", requirement_name) - requirement = profile.get_requirement(requirement_name) + requirement = _require(profile, requirement_name) assert requirement.name == requirement_name, "The name of the requirement is incorrect" if requirement_name in ["A", "B"]: assert requirement.severity_from_path is None, "The severity of the requirement should be None" @@ -110,8 +123,14 @@ def test_order_of_loaded_profile_requirements(profiles_path: str): requirement.name, requirement.severity_from_path, requirement.path) # Sort requirements by their order - requirements = sorted(requirements, key=lambda x: (-x.severity_from_path.value, x.path.name, x.name) - if x.severity_from_path else (0, x.path.name, x.name)) + requirements = sorted( + requirements, + key=lambda x: ( + -(x.severity_from_path.value if x.severity_from_path else 0), + x.path.name if x.path else "", + x.name, + ), + ) # Check the order of the requirements for i, requirement in enumerate(requirements): @@ -122,7 +141,7 @@ def test_order_of_loaded_profile_requirements(profiles_path: str): for r in profile.get_requirements(severity=Severity.OPTIONAL): logger.debug("The requirement: %r -> severity: %r", r.name, r.severity_from_path) - r = profile.get_requirement("RO-Crate Root Data Entity RECOMMENDED value") + r = _require(profile, "RO-Crate Root Data Entity RECOMMENDED value") assert r.severity_from_path == Severity.RECOMMENDED, "The severity of the requirement should be RECOMMENDED" # Check the number of requirement checks @@ -145,7 +164,7 @@ def test_hidden_requirements(profiles_loading_hidden_requirements: str): requirements_names = ["A", "B", "A_MUST", "B_MUST"] # Define the settings - settings = { + settings: dict[str, Any] = { "profiles_path": profiles_loading_hidden_requirements, "severity": Severity.OPTIONAL } @@ -163,17 +182,17 @@ def test_hidden_requirements(profiles_loading_hidden_requirements: str): assert len(requirements) == len(requirements_names), "The number of requirements is incorrect" # Check if the requirement A is hidden - requirement_a = profile.get_requirement("A") + requirement_a = _require(profile, "A") assert requirement_a.hidden, "The requirement A should be hidden" # Check if the requirement B is hidden - requirement_b = profile.get_requirement("B") + requirement_b = _require(profile, "B") assert requirement_b.hidden, "The requirement B should be hidden" # Check if the requirement A_MUST is not hidden - requirement_a_must = profile.get_requirement("A_MUST") + requirement_a_must = _require(profile, "A_MUST") assert not requirement_a_must.hidden, "The requirement A_MUST should not be hidden" # Check if the requirement B_MUST is not hidden - requirement_b_must = profile.get_requirement("B_MUST") + requirement_b_must = _require(profile, "B_MUST") assert not requirement_b_must.hidden, "The requirement B_MUST should not be hidden" diff --git a/tests/unit/requirements/test_profiles.py b/tests/unit/requirements/test_profiles.py index f4693f7c9..4a5920950 100644 --- a/tests/unit/requirements/test_profiles.py +++ b/tests/unit/requirements/test_profiles.py @@ -14,13 +14,14 @@ import logging from pathlib import Path +from typing import Any import pytest from rdflib import Literal, Namespace from rocrate_validator.constants import DEFAULT_PROFILE_IDENTIFIER, SHACL_NS from rocrate_validator.errors import DuplicateRequirementCheck, InvalidProfilePath, ProfileSpecificationError -from rocrate_validator.models import Profile, ValidationContext, ValidationSettings, Validator +from rocrate_validator.models import URI, Profile, ValidationContext, ValidationSettings, Validator from rocrate_validator.requirements.shacl.checks import SHACLCheck from rocrate_validator.requirements.shacl.models import ShapesRegistry from tests.ro_crates import InvalidFileDescriptorEntity, ValidROC @@ -53,14 +54,14 @@ def test_order_of_loaded_profiles(profiles_path: str): def test_load_invalid_profile_from_validation_context(fake_profiles_path: str): """Test the loaded profiles from the validator context.""" - settings = { + settings_dict: dict[str, Any] = { "profiles_path": "/tmp/random_path_xxx", "profile_identifier": DEFAULT_PROFILE_IDENTIFIER, "rocrate_uri": ValidROC().wrroc_paper, "enable_profile_inheritance": False, } - settings = ValidationSettings(**settings) + settings = ValidationSettings(**settings_dict) assert not settings.enable_profile_inheritance, "The inheritance mode should be set to False" validator = Validator(settings) @@ -75,14 +76,14 @@ def test_load_invalid_profile_from_validation_context(fake_profiles_path: str): def test_load_valid_profile_without_inheritance_from_validation_context(fake_profiles_path: str): """Test the loaded profiles from the validator context.""" - settings = { + settings_dict: dict[str, Any] = { "profiles_path": fake_profiles_path, "profile_identifier": "c", "rocrate_uri": ValidROC().wrroc_paper, "enable_profile_inheritance": False, } - settings = ValidationSettings(**settings) + settings = ValidationSettings(**settings_dict) assert not settings.enable_profile_inheritance, "The inheritance mode should be set to False" validator = Validator(settings) @@ -99,7 +100,7 @@ def test_load_valid_profile_without_inheritance_from_validation_context(fake_pro def test_profile_spec_properties(fake_profiles_path: str): """Test the loaded profiles from the validator context.""" - settings = { + settings_dict: dict[str, Any] = { "profiles_path": fake_profiles_path, "profile_identifier": "c", "rocrate_uri": ValidROC().wrroc_paper, @@ -107,7 +108,7 @@ def test_profile_spec_properties(fake_profiles_path: str): "disable_check_for_duplicates": True, } - settings = ValidationSettings(**settings) + settings = ValidationSettings(**settings_dict) assert settings.enable_profile_inheritance, "The inheritance mode should be set to True" validator = Validator(settings) @@ -221,7 +222,7 @@ def __perform_test__(profile_identifier: str, expected_inherited_profiles: list[ def test_load_invalid_profile_no_override_enabled(fake_profiles_path: str): """Test the loaded profiles from the validator context.""" - settings = { + settings_dict: dict[str, Any] = { "profiles_path": fake_profiles_path, "profile_identifier": "invalid-duplicated-shapes", "rocrate_uri": ValidROC().wrroc_paper, @@ -229,7 +230,7 @@ def test_load_invalid_profile_no_override_enabled(fake_profiles_path: str): "allow_requirement_check_override": False, } - settings = ValidationSettings(**settings) + settings = ValidationSettings(**settings_dict) assert settings.enable_profile_inheritance, "The inheritance mode should be set to True" assert not settings.allow_requirement_check_override, "The override mode should be set to False" @@ -245,7 +246,7 @@ def test_load_invalid_profile_no_override_enabled(fake_profiles_path: str): def test_load_invalid_profile_with_override_on_same_profile(fake_profiles_path: str): """Test the loaded profiles from the validator context.""" - settings = { + settings_dict: dict[str, Any] = { "profiles_path": fake_profiles_path, "profile_identifier": "invalid-duplicated-shapes", "rocrate_uri": ValidROC().wrroc_paper, @@ -253,7 +254,7 @@ def test_load_invalid_profile_with_override_on_same_profile(fake_profiles_path: "allow_requirement_check_override": False, } - settings = ValidationSettings(**settings) + settings = ValidationSettings(**settings_dict) assert settings.enable_profile_inheritance, "The inheritance mode should be set to True" assert not settings.allow_requirement_check_override, "The override mode should be set to `True`" validator = Validator(settings) @@ -268,7 +269,7 @@ def test_load_invalid_profile_with_override_on_same_profile(fake_profiles_path: def test_load_valid_profile_with_override_on_inherited_profile(fake_profiles_path: str): """Test the loaded profiles from the validator context.""" - settings = { + settings_dict: dict[str, Any] = { "profiles_path": fake_profiles_path, "profile_identifier": "c-overridden", "rocrate_uri": ValidROC().wrroc_paper, @@ -276,7 +277,7 @@ def test_load_valid_profile_with_override_on_inherited_profile(fake_profiles_pat "allow_requirement_check_override": True, } - settings = ValidationSettings(**settings) + settings = ValidationSettings(**settings_dict) assert settings.enable_profile_inheritance, "The inheritance mode should be set to True" assert settings.allow_requirement_check_override, "The override mode should be set to `True`" validator = Validator(settings) @@ -304,9 +305,9 @@ def test_zero_shape_target_profile_triggers_pyshacl_run(fake_profiles_path: str) no SHACLCheck would be recorded as executed for the wrapper target.""" settings = ValidationSettings( - profiles_path=fake_profiles_path, + profiles_path=Path(fake_profiles_path), profile_identifier="c-wrapper", - rocrate_uri=ValidROC().wrroc_paper, + rocrate_uri=URI(ValidROC().wrroc_paper), enable_profile_inheritance=True, allow_requirement_check_override=True, disable_check_for_duplicates=True, @@ -471,15 +472,15 @@ def disabled(self, ctx): def enabled(self, ctx): return True - assert disabled.deactivated is True - assert enabled.deactivated is False + assert disabled.deactivated is True # pyright: ignore[reportFunctionMemberAccess] + assert enabled.deactivated is False # pyright: ignore[reportFunctionMemberAccess] def test_shacl_shape_with_deactivated_marks_check_skipped(fake_profiles_path: str): """A child profile that overrides an inherited NodeShape by `sh:name` and sets `sh:deactivated true` should produce a check whose `deactivated` property is True; the parent's check should be marked as `overridden`.""" - settings = { + settings_dict: dict[str, Any] = { "profiles_path": fake_profiles_path, "profile_identifier": "c-deactivated", "rocrate_uri": ValidROC().wrroc_paper, @@ -487,7 +488,7 @@ def test_shacl_shape_with_deactivated_marks_check_skipped(fake_profiles_path: st "allow_requirement_check_override": True, } - settings = ValidationSettings(**settings) + settings = ValidationSettings(**settings_dict) validator = Validator(settings) context = ValidationContext(validator, validator.validation_settings) @@ -524,9 +525,9 @@ def test_shacl_check_deactivated_via_cross_profile_triple(fake_profiles_path: st SHACLCheck.deactivated and the pre-load pass in Validator.""" settings = ValidationSettings( - profiles_path=fake_profiles_path, + profiles_path=Path(fake_profiles_path), profile_identifier="c-deactivated-direct", - rocrate_uri=ValidROC().wrroc_paper, + rocrate_uri=URI(ValidROC().wrroc_paper), enable_profile_inheritance=True, allow_requirement_check_override=True, ) @@ -566,9 +567,9 @@ def test_shacl_check_deactivation_scoped_to_descendants(fake_profiles_path: str) one another's checks.""" settings = ValidationSettings( - profiles_path=fake_profiles_path, + profiles_path=Path(fake_profiles_path), profile_identifier="c", - rocrate_uri=ValidROC().wrroc_paper, + rocrate_uri=URI(ValidROC().wrroc_paper), enable_profile_inheritance=True, allow_requirement_check_override=True, ) diff --git a/tests/unit/requirements/test_shacl_checks.py b/tests/unit/requirements/test_shacl_checks.py index 59f8d0833..a25b656ae 100644 --- a/tests/unit/requirements/test_shacl_checks.py +++ b/tests/unit/requirements/test_shacl_checks.py @@ -13,11 +13,12 @@ # limitations under the License. import logging +from typing import cast from rdflib import BNode, Graph, Namespace, URIRef from rocrate_validator.constants import SHACL_NS -from rocrate_validator.models import LevelCollection +from rocrate_validator.models import LevelCollection, Requirement from rocrate_validator.requirements.shacl.checks import SHACLCheck from rocrate_validator.requirements.shacl.models import NodeShape, PropertyShape, Shape, ShapesRegistry from rocrate_validator.requirements.shacl.utils import resolve_parent_shape @@ -44,7 +45,7 @@ def test_description_fallback_shape_with_description(): shape._name = "TestShape" shape._description = "Test Description" - req = MockRequirement() + req = cast("Requirement", MockRequirement()) check = SHACLCheck(req, shape) assert check.description == "Test Description" @@ -57,7 +58,7 @@ def test_description_fallback_shape_without_description(): shape._name = "TestShape" shape._description = None - req = MockRequirement() + req = cast("Requirement", MockRequirement()) check = SHACLCheck(req, shape) assert check.description == "Check for TestShape" @@ -69,11 +70,11 @@ def test_description_fallback_parent_description(): shape = Shape(URIRef("http://example.org/shape"), g) shape._name = "ChildShape" shape._description = None - shape._parent = MockParentShape( + shape._parent = cast("Shape", MockParentShape( name="ParentShape", description="Parent Description" - ) + )) - req = MockRequirement() + req = cast("Requirement", MockRequirement()) check = SHACLCheck(req, shape) assert check.description == "Parent Description" @@ -86,7 +87,7 @@ def test_description_fallback_no_name_no_description(): shape._name = None shape._description = None - req = MockRequirement() + req = cast("Requirement", MockRequirement()) check = SHACLCheck(req, shape) # BNode generates a name from node_name, so we check it starts with the fallback prefix @@ -99,9 +100,9 @@ def test_description_fallback_no_description_no_parent_description(): shape = Shape(BNode(), g) shape._name = "ChildShape" shape._description = None - shape._parent = MockParentShape(name="ParentShape", description=None) + shape._parent = cast("Shape", MockParentShape(name="ParentShape", description=None)) - req = MockRequirement() + req = cast("Requirement", MockRequirement()) check = SHACLCheck(req, shape) assert check.description == "Check for ChildShape" @@ -115,9 +116,9 @@ def test_property_shape_description_fallback(): prop = PropertyShape(URIRef("http://example.org/property"), g) prop._name = "testProperty" prop._description = None - prop._parent = MockParentShape(name="ParentShape", description="Parent Description") + prop._parent = cast("Shape", MockParentShape(name="ParentShape", description="Parent Description")) - req = MockRequirement() + req = cast("Requirement", MockRequirement()) check = SHACLCheck(req, prop) assert "testProperty" in check.description @@ -242,7 +243,7 @@ def test_derive_level_picks_most_stringent_declared_property_severity(): shape.add_property(_make_property(g, f"{SHACL_NS}Warning")) shape.add_property(_make_property(g, f"{SHACL_NS}Info")) - check = SHACLCheck(MockRequirement(), shape) + check = SHACLCheck(cast("Requirement", MockRequirement()), shape) assert check.level == LevelCollection.RECOMMENDED @@ -254,7 +255,7 @@ def test_derive_level_with_uniform_property_severity(): shape.add_property(_make_property(g, f"{SHACL_NS}Info")) shape.add_property(_make_property(g, f"{SHACL_NS}Info")) - check = SHACLCheck(MockRequirement(), shape) + check = SHACLCheck(cast("Requirement", MockRequirement()), shape) assert check.level == LevelCollection.OPTIONAL @@ -266,7 +267,7 @@ def test_derive_level_ignores_properties_without_declared_severity(): shape.add_property(_make_property(g)) # no severity declared shape.add_property(_make_property(g, f"{SHACL_NS}Warning")) - check = SHACLCheck(MockRequirement(), shape) + check = SHACLCheck(cast("Requirement", MockRequirement()), shape) assert check.level == LevelCollection.RECOMMENDED @@ -278,7 +279,7 @@ def test_derive_level_falls_back_to_required_when_no_property_declares_severity( shape.add_property(_make_property(g)) shape.add_property(_make_property(g)) - check = SHACLCheck(MockRequirement(), shape) + check = SHACLCheck(cast("Requirement", MockRequirement()), shape) assert check.level == LevelCollection.REQUIRED @@ -290,7 +291,7 @@ def test_shape_declared_severity_takes_precedence_over_derivation(): shape.severity = f"{SHACL_NS}Warning" shape.add_property(_make_property(g, f"{SHACL_NS}Violation")) - check = SHACLCheck(MockRequirement(), shape) + check = SHACLCheck(cast("Requirement", MockRequirement()), shape) assert check.level == LevelCollection.RECOMMENDED @@ -302,7 +303,7 @@ def test_path_based_level_takes_precedence_over_derivation(): shape.add_property(_make_property(g, f"{SHACL_NS}Info")) check = SHACLCheck( - MockRequirement(requirement_level_from_path=LevelCollection.SHOULD), shape + cast("Requirement", MockRequirement(requirement_level_from_path=LevelCollection.SHOULD)), shape ) assert check.level == LevelCollection.SHOULD @@ -313,6 +314,6 @@ def test_derive_level_for_node_shape_without_properties(): g = Graph() shape = NodeShape(URIRef("http://example.org/NodeShape"), g) - check = SHACLCheck(MockRequirement(), shape) + check = SHACLCheck(cast("Requirement", MockRequirement()), shape) assert check.level == LevelCollection.REQUIRED diff --git a/tests/unit/requirements/test_shacl_utils.py b/tests/unit/requirements/test_shacl_utils.py index 6a64c7cc0..63b51dde2 100644 --- a/tests/unit/requirements/test_shacl_utils.py +++ b/tests/unit/requirements/test_shacl_utils.py @@ -33,7 +33,7 @@ EX = Namespace("http://example.org/") -def _build_two_property_shape() -> tuple[Graph, URIRef, URIRef, URIRef]: +def _build_two_property_shape() -> tuple[Graph, URIRef, BNode, BNode]: """ Build a NodeShape with two sibling property shapes. diff --git a/tests/unit/test_cache_warmup.py b/tests/unit/test_cache_warmup.py index a36d664ff..0fbc09d18 100644 --- a/tests/unit/test_cache_warmup.py +++ b/tests/unit/test_cache_warmup.py @@ -17,11 +17,12 @@ from __future__ import annotations import io +from typing import cast import pytest import urllib3 -from rocrate_validator.models import Profile +from rocrate_validator.models import Profile, ValidationSettings from rocrate_validator.utils.cache_warmup import ( auto_warm_up_for_settings, discover_cacheable_urls_from_profiles, @@ -79,7 +80,7 @@ def sample_profile(tmp_path): @pytest.fixture def mock_network(monkeypatch): - from requests.adapters import HTTPAdapter + from requests.adapters import HTTPAdapter # type: ignore[import-untyped] def fake_send(self, request, **kwargs): raw = urllib3.HTTPResponse( @@ -149,7 +150,7 @@ class _Settings: profiles_path = get_profiles_path() extra_profiles_path = None - assert auto_warm_up_for_settings(_Settings()) is None + assert auto_warm_up_for_settings(cast("ValidationSettings", _Settings())) is None def test_auto_warm_up_disabled_via_env(monkeypatch, tmp_path): @@ -162,7 +163,7 @@ class _Settings: profiles_path = get_profiles_path() extra_profiles_path = None - assert auto_warm_up_for_settings(_Settings()) is None + assert auto_warm_up_for_settings(cast("ValidationSettings", _Settings())) is None def test_auto_warm_up_noop_when_no_cache_path(): @@ -173,4 +174,4 @@ class _Settings: profiles_path = get_profiles_path() extra_profiles_path = None - assert auto_warm_up_for_settings(_Settings()) is None + assert auto_warm_up_for_settings(cast("ValidationSettings", _Settings())) is None diff --git a/tests/unit/test_http_requester_offline.py b/tests/unit/test_http_requester_offline.py index 479edc22c..60bf9e863 100644 --- a/tests/unit/test_http_requester_offline.py +++ b/tests/unit/test_http_requester_offline.py @@ -196,7 +196,9 @@ def __enter__(self): self.handler.emit = self.records.append # type: ignore[assignment] # Force initialization of the underlying logger via the proxy. http_module.logger.warning # noqa: B018 - self._target = http_module.logger._instance + target = http_module.logger._instance + assert target is not None # initialized above via the proxy access + self._target = target self._target.addHandler(self.handler) self._previous_level = self._target.level self._target.setLevel(_logging.DEBUG) diff --git a/tests/unit/test_http_requester_reconfigure.py b/tests/unit/test_http_requester_reconfigure.py index 46e05a184..cbc6ab2f1 100644 --- a/tests/unit/test_http_requester_reconfigure.py +++ b/tests/unit/test_http_requester_reconfigure.py @@ -81,7 +81,7 @@ def test_reconfigure_preserves_instance_attributes(tmp_path): singleton (e.g. methods patched by tests).""" requester = _initialize(tmp_path / "cache-1", cache_max_age=60) sentinel = object() - requester.custom_marker = sentinel + requester.custom_marker = sentinel # pyright: ignore[reportAttributeAccessIssue] _initialize(tmp_path / "cache-2", cache_max_age=60) @@ -112,7 +112,8 @@ def test_pinned_wrapper_survives_reconfigure(tmp_path): as an instance attribute: after a reconfigure rebuilds the session, that wrapper must still target the live session, not a closed one.""" requester = _initialize(tmp_path / "cache-1", cache_max_age=60) - requester.get = requester.get # pin the wrapper as an instance attribute + # pin the wrapper as an instance attribute + requester.get = requester.get # pyright: ignore[reportAttributeAccessIssue] _initialize(tmp_path / "cache-2", cache_max_age=60) # rebuilds the session @@ -141,7 +142,7 @@ def test_validation_settings_preserves_singleton(tmp_path): requester = _initialize(tmp_path / "cache", cache_max_age=60) marker = object() - requester.custom_marker = marker + requester.custom_marker = marker # pyright: ignore[reportAttributeAccessIssue] # ``offline=True`` keeps the construction self-contained (no warm-up/network). ValidationSettings( diff --git a/tests/unit/test_remote_context_retrieval.py b/tests/unit/test_remote_context_retrieval.py index 1e26d7b82..1c903e2b8 100644 --- a/tests/unit/test_remote_context_retrieval.py +++ b/tests/unit/test_remote_context_retrieval.py @@ -26,6 +26,7 @@ def fd_format(): "fd_format", "rocrate_validator/profiles/ro-crate/must/0_file_descriptor_format.py" ) + assert spec is not None and spec.loader is not None module = importlib.util.module_from_spec(spec) sys.modules["fd_format"] = module spec.loader.exec_module(module) diff --git a/tests/unit/test_requirement_lifecycle.py b/tests/unit/test_requirement_lifecycle.py index 15c6d224c..aff09a66b 100644 --- a/tests/unit/test_requirement_lifecycle.py +++ b/tests/unit/test_requirement_lifecycle.py @@ -15,7 +15,7 @@ import pytest from rocrate_validator import services -from rocrate_validator.models import RequirementLoader, Severity, ValidationContext, ValidationSettings +from rocrate_validator.models import URI, RequirementLoader, Severity, ValidationContext, ValidationSettings from tests.ro_crates import InvalidRootDataEntity @@ -39,7 +39,7 @@ def finalize(self, context: ValidationContext) -> None: @pytest.fixture def validation_settings(): return ValidationSettings( - rocrate_uri=str(InvalidRootDataEntity().invalid_root_type), + rocrate_uri=URI(str(InvalidRootDataEntity().invalid_root_type)), requirement_severity=Severity.OPTIONAL, abort_on_first=False, ) diff --git a/tests/unit/test_rocrate.py b/tests/unit/test_rocrate.py index e5f624e9d..c46d4db21 100644 --- a/tests/unit/test_rocrate.py +++ b/tests/unit/test_rocrate.py @@ -72,7 +72,7 @@ def test_is_bagit_rocrate(): def test_abstract_bagit_rocrate_instantiation(): # Check that the base class BagItROCrate cannot be instantiated directly with pytest.raises(TypeError, match="Can't instantiate"): - BagitROCrate(ValidROC().bagit) + BagitROCrate(ValidROC().bagit) # type: ignore[abstract] def test_rocrate_factory(): @@ -104,21 +104,21 @@ def test_rocrate_factory(): def test_rocrate_constructor(): - roc = ROCrate(ValidROC().wrroc_paper) + roc = ROCrate(ValidROC().wrroc_paper) # type: ignore[abstract] assert isinstance(roc, ROCrateLocalFolder), "Should be a ROCrateLocalFolder" - roc = ROCrate(ValidROC().sort_and_change_archive) + roc = ROCrate(ValidROC().sort_and_change_archive) # type: ignore[abstract] assert isinstance(roc, ROCrateLocalZip), "Should be a ROCrateLocalZip" - roc = ROCrate(ValidROC().sort_and_change_remote) + roc = ROCrate(ValidROC().sort_and_change_remote) # type: ignore[abstract] assert isinstance(roc, ROCrateRemoteZip), "Should be a ROCrateRemoteZip" - roc = ROCrate(ValidROC().bagit) + roc = ROCrate(ValidROC().bagit) # type: ignore[abstract] assert isinstance(roc, BagitROCrate), "Should be a BagItROCrate" assert isinstance(roc, ROCrateLocalFolder), "Should be a ROCrateLocalFolder" assert isinstance(roc, ROCrateBagitLocalFolder), "Should be a ROCrateBagitLocalFolder" - roc = ROCrate(ValidROC().bagit_zip) + roc = ROCrate(ValidROC().bagit_zip) # pyright: ignore[reportAbstractUsage] assert isinstance(roc, BagitROCrate), "Should be a BagItROCrate" assert isinstance(roc, ROCrateLocalZip), "Should be a ROCrateLocalZip" assert isinstance(roc, ROCrateBagitLocalZip), "Should be a ROCrateBagitLocalZip" @@ -151,7 +151,7 @@ def test_parse_path(): def test_local_folder_with_relative_root(): # set relative root path - relative_root_path = "data" + relative_root_path = Path("data") # create ROCrateBagitLocalFolder with relative root path roc = ROCrateLocalFolder(ValidROC().bagit, relative_root_path=relative_root_path) assert isinstance(roc, ROCrateLocalFolder) @@ -171,10 +171,10 @@ def test_local_folder_with_relative_root(): assert parsed_path == ValidROC().bagit / relative_root_path / path, "Parsed path should be data/file.txt" # test has_file - assert roc.has_file("data/ro-crate-metadata.json"), "Should have ro-crate-metadata.json file" + assert roc.has_file(Path("data/ro-crate-metadata.json")), "Should have ro-crate-metadata.json file" # test get_file_content - content = roc.get_file_content("data/ro-crate-metadata.json") + content = roc.get_file_content(Path("data/ro-crate-metadata.json")) assert isinstance(content, bytes), "Content should be bytes" @@ -203,7 +203,7 @@ def test_remote_bagit_rocrate(): assert roc.has_directory(Path("data%20set/")), "Should have data%20set/ directory" assert roc.has_directory(Path("data set3/")), "Should have data set3/ directory" # test has file - assert roc.has_file("pics/2018-06-11 12.56.14.jpg"), "Should have pics/2018-06-11%2012.56.14.jpg file" + assert roc.has_file(Path("pics/2018-06-11 12.56.14.jpg")), "Should have pics/2018-06-11%2012.56.14.jpg file" # test file availability img_2018 = roc.metadata.get_entity("pics/2018-06-11%2012.56.14.jpg") @@ -271,7 +271,7 @@ def test_valid_local_rocrate(): # ROCrateLocalFolder def test_valid_local_folder_rocrate_with_relative_root(): # set relative root path - relative_root_path = "custom-relative-root" + relative_root_path = Path("custom-relative-root") # create ROCrateLocalFolder with relative root path roc = ROCrateLocalFolder(ValidROC().rocrate_with_relative_root, relative_root_path=relative_root_path) @@ -279,18 +279,19 @@ def test_valid_local_folder_rocrate_with_relative_root(): logger.debug("Testing bagit with relative root path: %s", relative_root_path) # inspect ro-crate-metadata.json to confirm correct relative root path - assert roc.has_file("ro-crate-metadata.json"), "Should have ro-crate-metadata.json file" + assert roc.has_file(Path("ro-crate-metadata.json")), "Should have ro-crate-metadata.json file" - metadata_path = roc.get_file_content("ro-crate-metadata.json", binary_mode=False) - logger.debug(f"ro-crate-metadata.json content: {metadata_path}") + metadata_path = roc.get_file_content(Path("ro-crate-metadata.json"), binary_mode=False) + metadata_text = metadata_path.decode("utf-8") if isinstance(metadata_path, bytes) else metadata_path + logger.debug(f"ro-crate-metadata.json content: {metadata_text}") # test has_file - assert roc.has_file("ro-crate-metadata.json"), "Should have ro-crate-metadata.json file" - assert roc.has_file("pics/2017-06-11%252012.56.14.jpg"), \ + assert roc.has_file(Path("ro-crate-metadata.json")), "Should have ro-crate-metadata.json file" + assert roc.has_file(Path("pics/2017-06-11%252012.56.14.jpg")), \ "Should have pics/2017-06-11%252012.56.14.jpg file" # test get_file_content - content = roc.get_file_content("ro-crate-metadata.json") + content = roc.get_file_content(Path("ro-crate-metadata.json")) assert isinstance(content, bytes), "Content should be bytes" # check availability @@ -375,7 +376,7 @@ def test_valid_zip_rocrate(): ################################ def test_paths_valid_bagit_rocrate(): - roc = ROCrate(ValidROC().bagit_zip) + roc = ROCrate(ValidROC().bagit_zip) # type: ignore[abstract] assert isinstance(roc, ROCrateLocalZip) # test list files @@ -394,7 +395,7 @@ def test_paths_valid_bagit_rocrate(): assert roc.has_directory(Path("data set3")), "Should have data set3/ directory" assert roc.has_directory(Path("data set3/")), "Should have data set3/ directory" - assert roc.has_file("pics/2018-06-11 12.56.14.jpg"), "Should have pics/2018-06-11%2012.56.14.jpg file" + assert roc.has_file(Path("pics/2018-06-11 12.56.14.jpg")), "Should have pics/2018-06-11%2012.56.14.jpg file" dataset3 = roc.metadata.get_entity("data set3/") assert dataset3 is not None, "Should have data set3/ entity" @@ -414,7 +415,7 @@ def test_paths_valid_bagit_rocrate(): def test_valid_bagit_zip_rocrate(): - roc = ROCrate(ValidROC().bagit_zip) + roc = ROCrate(ValidROC().bagit_zip) # type: ignore[abstract] assert isinstance(roc, ROCrateLocalZip) # test list files @@ -532,10 +533,10 @@ def test_valid_remote_zip_rocrate(): def test_external_file(): - content = ROCrate.get_external_file_content(ValidROC().sort_and_change_remote) + content = ROCrate.get_external_file_content(str(ValidROC().sort_and_change_remote)) assert isinstance(content, bytes), "Content should be bytes" - size = ROCrate.get_external_file_size(ValidROC().sort_and_change_remote) + size = ROCrate.get_external_file_size(str(ValidROC().sort_and_change_remote)) assert size == 137039, "Size should be 137039" diff --git a/tests/unit/test_services.py b/tests/unit/test_services.py index c0165f4f1..ae6ed477c 100644 --- a/tests/unit/test_services.py +++ b/tests/unit/test_services.py @@ -17,7 +17,7 @@ import tempfile from pathlib import Path -from rocrate_validator.models import ValidationSettings +from rocrate_validator.models import URI, ValidationSettings from rocrate_validator.rocrate import ROCrateMetadata from rocrate_validator.services import detect_profiles, get_profiles, validate from rocrate_validator.utils import log as logging @@ -62,7 +62,7 @@ def test_extra_profiles_list(fake_profiles_path: Path): def test_valid_local_rocrate(): logger.debug("Validating a local RO-Crate: %s", ValidROC().wrroc_paper) profiles = detect_profiles(ValidationSettings( - rocrate_uri=ValidROC().wrroc_paper + rocrate_uri=URI(ValidROC().wrroc_paper) )) logger.debug("Candidate profiles: %s", profiles) @@ -77,7 +77,7 @@ def test_valid_local_workflow_rocrate(): crate_path = ValidROC().workflow_roc logger.debug("Validating a local RO-Crate: %s", crate_path) profiles = detect_profiles(ValidationSettings( - rocrate_uri=crate_path + rocrate_uri=URI(crate_path) )) assert len(profiles) == 1, "Expected a single profile" assert profiles[0].identifier == "workflow-ro-crate-1.0", "Expected the 'workflow-ro-crate-1.0' profile" @@ -88,7 +88,7 @@ def test_valid_local_process_run_crate(): crate_path = ValidROC().process_run_crate logger.debug("Validating a local RO-Crate: %s", crate_path) profiles = detect_profiles(ValidationSettings( - rocrate_uri=crate_path + rocrate_uri=URI(crate_path) )) assert len(profiles) == 1, "Expected a single profile" assert profiles[0].identifier == "process-run-crate-0.5", "Expected the 'process-run-crate-0.5' profile" @@ -99,7 +99,7 @@ def test_valid_local_workflow_testing_ro_crate(): crate_path = ValidROC().workflow_testing_ro_crate logger.debug("Validating a local RO-Crate: %s", crate_path) profiles = detect_profiles(ValidationSettings( - rocrate_uri=crate_path + rocrate_uri=URI(crate_path) )) assert len(profiles) == 1, "Expected a single profile" assert profiles[0].identifier == "workflow-testing-ro-crate-0.1", \ @@ -113,7 +113,7 @@ def test_disable_inherited_profiles_issue_reporting(): # First, validate with inherited profiles issue reporting enabled settings = ValidationSettings( - rocrate_uri=crate_path, + rocrate_uri=URI(crate_path), disable_inherited_profiles_issue_reporting=False ) result = validate(settings) @@ -133,7 +133,7 @@ def test_disable_inherited_profiles_issue_reporting(): # Check that all reported issues are from the main profile main_profile_identifier = "workflow-testing-ro-crate-0.1" for issue in result.get_issues(): - assert issue.check.profile.identifier == main_profile_identifier, \ + assert issue.check.requirement.profile.identifier == main_profile_identifier, \ "All reported issues should belong to the main profile when inherited profiles issue reporting is disabled" @@ -141,7 +141,7 @@ def test_skip_pycheck_on_workflow_ro_crate(): # Set the rocrate_uri to the workflow testing RO-Crate crate_path = InvalidFileDescriptorEntity().invalid_conforms_to logger.debug("Validating a local RO-Crate: %s", crate_path) - settings = ValidationSettings(rocrate_uri=crate_path) + settings = ValidationSettings(rocrate_uri=URI(crate_path)) result = validate(settings) assert not result.passed(), \ "The RO-Crate is expected to be invalid because of an incorrect conformsTo field and missing resources" @@ -168,7 +168,7 @@ def test_valid_local_multi_profile_crate(): crate_path = InvalidMultiProfileROC().invalid_multi_profile_crate logger.debug("Validating a local RO-Crate: %s", crate_path) profiles = detect_profiles(ValidationSettings( - rocrate_uri=crate_path + rocrate_uri=URI(crate_path) )) assert len(profiles) == 2, "Expected two profiles" @@ -192,7 +192,7 @@ def test_valid_crate_folder_with_metadata_only(): # Define shared settings object settings = ValidationSettings( - rocrate_uri=Path(tmpdirname), + rocrate_uri=URI(Path(tmpdirname)), metadata_only=True ) @@ -218,7 +218,7 @@ def test_valid_crate_metadata_dict_with_metadata_only(): metadata_dict = json.loads(f.read()) # Define shared settings object - settings = ValidationSettings( + settings = ValidationSettings( # type: ignore[call-arg] # rocrate_uri not needed in metadata-dict mode metadata_dict=metadata_dict ) diff --git a/tests/unit/test_validation_settings.py b/tests/unit/test_validation_settings.py index 9dcd2b400..add220a06 100644 --- a/tests/unit/test_validation_settings.py +++ b/tests/unit/test_validation_settings.py @@ -14,7 +14,7 @@ import pytest -from rocrate_validator.models import Severity, ValidationSettings +from rocrate_validator.models import URI, Severity, ValidationSettings def test_validation_settings_parse_dict(): @@ -37,8 +37,8 @@ def test_validation_settings_parse_dict(): def test_validation_settings_parse_object(): existing_settings = ValidationSettings( - rocrate_uri="/path/to/data", - profiles_path="/path/to/profiles", + rocrate_uri=URI("/path/to/data"), + profiles_path="/path/to/profiles", # type: ignore[arg-type] requirement_severity=Severity.RECOMMENDED, enable_profile_inheritance=False, disable_inherited_profiles_issue_reporting=True, @@ -55,13 +55,13 @@ def test_validation_settings_parse_object(): def test_validation_settings_parse_invalid_type(): with pytest.raises(ValueError): - ValidationSettings.parse("invalid_settings") + ValidationSettings.parse("invalid_settings") # type: ignore[arg-type] def test_validation_settings_to_dict(): settings = ValidationSettings( - rocrate_uri="/path/to/data", - profiles_path="/path/to/profiles", + rocrate_uri=URI("/path/to/data"), + profiles_path="/path/to/profiles", # type: ignore[arg-type] requirement_severity=Severity.RECOMMENDED, enable_profile_inheritance=False ) @@ -73,53 +73,53 @@ def test_validation_settings_to_dict(): def test_validation_settings_enable_profile_inheritance(): - settings = ValidationSettings(enable_profile_inheritance=True) + settings = ValidationSettings(enable_profile_inheritance=True) # type: ignore[call-arg] assert settings.enable_profile_inheritance is True - settings = ValidationSettings(enable_profile_inheritance=False) + settings = ValidationSettings(enable_profile_inheritance=False) # type: ignore[call-arg] assert settings.enable_profile_inheritance is False def test_validation_settings_disable_inherited_profiles_issue_reporting(): - settings = ValidationSettings() + settings = ValidationSettings() # type: ignore[call-arg] assert settings.disable_inherited_profiles_issue_reporting is False - settings = ValidationSettings(disable_inherited_profiles_issue_reporting=True) + settings = ValidationSettings(disable_inherited_profiles_issue_reporting=True) # type: ignore[call-arg] assert settings.disable_inherited_profiles_issue_reporting is True - settings = ValidationSettings(disable_inherited_profiles_issue_reporting=False) + settings = ValidationSettings(disable_inherited_profiles_issue_reporting=False) # type: ignore[call-arg] assert settings.disable_inherited_profiles_issue_reporting is False def test_validation_settings_data_path(): - settings = ValidationSettings(rocrate_uri="/path/to/data") + settings = ValidationSettings(rocrate_uri=URI("/path/to/data")) assert str(settings.rocrate_uri) == "/path/to/data" def test_validation_settings_profiles_path(): - settings = ValidationSettings(profiles_path="/path/to/profiles") + settings = ValidationSettings(profiles_path="/path/to/profiles") # type: ignore[call-arg, arg-type] assert settings.profiles_path == "/path/to/profiles" def test_validation_settings_requirement_severity(): - settings = ValidationSettings(requirement_severity=Severity.RECOMMENDED) + settings = ValidationSettings(requirement_severity=Severity.RECOMMENDED) # type: ignore[call-arg] assert settings.requirement_severity == Severity.RECOMMENDED def test_validation_settings_abort_on_first(): - settings = ValidationSettings(abort_on_first=True) + settings = ValidationSettings(abort_on_first=True) # type: ignore[call-arg] assert settings.abort_on_first is True def test_validation_settings_metadata_only(): - settings = ValidationSettings(metadata_only=True) + settings = ValidationSettings(metadata_only=True) # type: ignore[call-arg] assert settings.metadata_only is True - settings = ValidationSettings(metadata_only=False) + settings = ValidationSettings(metadata_only=False) # type: ignore[call-arg] assert settings.metadata_only is False def test_validation_settings_metadata_dict(): - metadata = {"@graph": []} - settings = ValidationSettings(metadata_dict=metadata) + metadata: dict = {"@graph": []} + settings = ValidationSettings(metadata_dict=metadata) # type: ignore[call-arg] assert settings.metadata_dict == metadata From b15e301b67f2bfe6f778602329a6278d95994f6a Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Thu, 11 Jun 2026 12:20:16 +0200 Subject: [PATCH 304/352] =?UTF-8?q?test(integration):=20=F0=9F=90=9B=20fix?= =?UTF-8?q?=20context=20manager=20usage=20on=20Path=20object?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../ro-crate/test_file_descriptor_format.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/tests/integration/profiles/ro-crate/test_file_descriptor_format.py b/tests/integration/profiles/ro-crate/test_file_descriptor_format.py index 09fed9779..a85d0cc6a 100644 --- a/tests/integration/profiles/ro-crate/test_file_descriptor_format.py +++ b/tests/integration/profiles/ro-crate/test_file_descriptor_format.py @@ -27,14 +27,14 @@ def test_missing_file_descriptor(): """Test a RO-Crate without a file descriptor.""" - with paths.missing_file_descriptor as rocrate_path: - do_entity_test( - rocrate_path, - models.Severity.REQUIRED, - False, - ["File Descriptor existence"], - [] - ) + rocrate_path = paths.missing_file_descriptor + do_entity_test( + rocrate_path, + models.Severity.REQUIRED, + False, + ["File Descriptor existence"], + [] + ) def test_not_valid_json_format(): From d76f453cfbf82b50f044c23340fb3f45ae3f9539 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Thu, 11 Jun 2026 12:35:16 +0200 Subject: [PATCH 305/352] =?UTF-8?q?test(models):=20=F0=9F=A7=AA=20wrap=20r?= =?UTF-8?q?ocrate=5Furi=20with=20URI=20for=20stricter=20type=20validation?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- tests/test_models.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/test_models.py b/tests/test_models.py index a0e048a2b..1732457bd 100644 --- a/tests/test_models.py +++ b/tests/test_models.py @@ -80,7 +80,7 @@ def validation_settings(): # @pytest.mark.skip(reason="Temporarily disabled: we need an RO-Crate with multiple failed requirements to test this.") def test_sortability_requirements(validation_settings: ValidationSettings): - validation_settings.rocrate_uri = InvalidRootDataEntity().invalid_root_type + validation_settings.rocrate_uri = URI(InvalidRootDataEntity().invalid_root_type.as_uri()) result: models.ValidationResult = services.validate(validation_settings) failed_requirements = sorted(result.failed_requirements, reverse=True) assert len(failed_requirements) > 1 @@ -89,7 +89,7 @@ def test_sortability_requirements(validation_settings: ValidationSettings): def test_sortability_checks(validation_settings: ValidationSettings): - validation_settings.rocrate_uri = WROCInvalidReadme().wroc_readme_wrong_encoding_format + validation_settings.rocrate_uri = URI(WROCInvalidReadme().wroc_readme_wrong_encoding_format.as_uri()) result: models.ValidationResult = services.validate(validation_settings) failed_checks = sorted(result.failed_checks, reverse=True) assert len(failed_checks) > 1 @@ -100,7 +100,7 @@ def test_sortability_checks(validation_settings: ValidationSettings): def test_sortability_issues(validation_settings: ValidationSettings): - validation_settings.rocrate_uri = WROCInvalidReadme().wroc_readme_wrong_encoding_format + validation_settings.rocrate_uri = URI(WROCInvalidReadme().wroc_readme_wrong_encoding_format.as_uri()) result: models.ValidationResult = services.validate(validation_settings) issues = sorted(result.get_issues(min_severity=Severity.OPTIONAL), reverse=True) assert len(issues) > 1 From 51de5a31ec010a539752d6210973c11ecb909676 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Thu, 11 Jun 2026 12:36:08 +0200 Subject: [PATCH 306/352] =?UTF-8?q?style(test):=20=F0=9F=8E=A8=20wrap=20lo?= =?UTF-8?q?ng=20cli=5Frunner.invoke=20line?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- tests/test_cli.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tests/test_cli.py b/tests/test_cli.py index 4f38f0f86..99fb66b8e 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -158,7 +158,10 @@ def test_extra_profiles_list(cli_runner: CliRunner, fake_profiles_path: Path): """ Test the list of extra profiles. """ - result = cli_runner.invoke(cli, ["profiles", "--extra-profiles-path", str(fake_profiles_path), "list", "--no-paging"]) + result = cli_runner.invoke( + cli, + ["profiles", "--extra-profiles-path", str(fake_profiles_path), "list", "--no-paging"], + ) assert result.exit_code == 0 assert "Profile A" in result.output # Check for a known extra profile From 4b3d584750ef45c1ca795e72786bee368fe6026b Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Thu, 11 Jun 2026 13:27:57 +0200 Subject: [PATCH 307/352] =?UTF-8?q?fix(test):=20=F0=9F=90=9B=20use=20str()?= =?UTF-8?q?=20instead=20of=20as=5Furi()=20for=20local=20crate=20paths?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- tests/test_models.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/test_models.py b/tests/test_models.py index 1732457bd..3c4802842 100644 --- a/tests/test_models.py +++ b/tests/test_models.py @@ -80,7 +80,7 @@ def validation_settings(): # @pytest.mark.skip(reason="Temporarily disabled: we need an RO-Crate with multiple failed requirements to test this.") def test_sortability_requirements(validation_settings: ValidationSettings): - validation_settings.rocrate_uri = URI(InvalidRootDataEntity().invalid_root_type.as_uri()) + validation_settings.rocrate_uri = URI(str(InvalidRootDataEntity().invalid_root_type)) result: models.ValidationResult = services.validate(validation_settings) failed_requirements = sorted(result.failed_requirements, reverse=True) assert len(failed_requirements) > 1 @@ -89,7 +89,7 @@ def test_sortability_requirements(validation_settings: ValidationSettings): def test_sortability_checks(validation_settings: ValidationSettings): - validation_settings.rocrate_uri = URI(WROCInvalidReadme().wroc_readme_wrong_encoding_format.as_uri()) + validation_settings.rocrate_uri = URI(str(WROCInvalidReadme().wroc_readme_wrong_encoding_format)) result: models.ValidationResult = services.validate(validation_settings) failed_checks = sorted(result.failed_checks, reverse=True) assert len(failed_checks) > 1 @@ -100,7 +100,7 @@ def test_sortability_checks(validation_settings: ValidationSettings): def test_sortability_issues(validation_settings: ValidationSettings): - validation_settings.rocrate_uri = URI(WROCInvalidReadme().wroc_readme_wrong_encoding_format.as_uri()) + validation_settings.rocrate_uri = URI(str(WROCInvalidReadme().wroc_readme_wrong_encoding_format)) result: models.ValidationResult = services.validate(validation_settings) issues = sorted(result.get_issues(min_severity=Severity.OPTIONAL), reverse=True) assert len(issues) > 1 From f379c44835c71ab0f248b03f73acdcdfc9d3a488 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Fri, 12 Jun 2026 09:10:04 +0200 Subject: [PATCH 308/352] =?UTF-8?q?refactor(cli):=20=F0=9F=94=A8=20extract?= =?UTF-8?q?=20EventDispatcher=20to=20route=20validation=20events?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduce an EventDispatcher base class (Subscriber) that maps event types to typed _on_* hooks and centralizes the hidden/overridden check filtering, so subscribers only handle actionable events. - ProgressMonitor now extends EventDispatcher and overrides only the relevant _on_* hooks instead of a monolithic update() with manual event-type branching - ValidationReportLayout delegates event handling to a dedicated _ReportLayoutSubscriber, separating rendering from event dispatch - expose ValidationReportLayout.subscribers and consume it from the validate command instead of building the subscriber list inline - de-duplicate the hidden/overridden guard previously copy-pasted in both progress and report layouts --- rocrate_validator/cli/ui/text/validate.py | 2 +- .../output/text/layout/dispatcher.py | 127 ++++++++++++++++++ .../io_helpers/output/text/layout/progress.py | 83 ++++-------- .../io_helpers/output/text/layout/report.py | 88 ++++++------ 4 files changed, 197 insertions(+), 103 deletions(-) create mode 100644 rocrate_validator/utils/io_helpers/output/text/layout/dispatcher.py diff --git a/rocrate_validator/cli/ui/text/validate.py b/rocrate_validator/cli/ui/text/validate.py index f6eb16e96..9f8081662 100644 --- a/rocrate_validator/cli/ui/text/validate.py +++ b/rocrate_validator/cli/ui/text/validate.py @@ -87,7 +87,7 @@ def show_validation_progress(self, validation_command: Callable) -> Any: result = self.report_layout.live( lambda: validation_command( self.validation_settings, - subscribers=[self.report_layout, self.report_layout.progress_monitor] + subscribers=self.report_layout.subscribers ) ) logger.debug("Validation completed with result: %s", result) diff --git a/rocrate_validator/utils/io_helpers/output/text/layout/dispatcher.py b/rocrate_validator/utils/io_helpers/output/text/layout/dispatcher.py new file mode 100644 index 000000000..4e02d81fe --- /dev/null +++ b/rocrate_validator/utils/io_helpers/output/text/layout/dispatcher.py @@ -0,0 +1,127 @@ +# Copyright (c) 2024-2026 CRS4 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import ClassVar + +from rocrate_validator.events import Event, EventType, Subscriber +from rocrate_validator.models import ( + ProfileValidationEvent, + RequirementCheckValidationEvent, + RequirementValidationEvent, + ValidationContext, + ValidationEvent, +) +from rocrate_validator.utils import log as logging + +logger = logging.getLogger(__name__) + + +class EventDispatcher(Subscriber): + """ + Subscriber that routes validation events to typed ``_on_*`` hooks. + + Hidden requirements and overridden checks (relative to the target + profile) are filtered out before dispatch, so subclasses only see + actionable events and don't need to repeat the guard. + """ + + _HANDLERS: ClassVar[dict[EventType, str]] = { + EventType.VALIDATION_START: "_on_validation_start", + EventType.PROFILE_VALIDATION_START: "_on_profile_validation_start", + EventType.REQUIREMENT_VALIDATION_START: "_on_requirement_validation_start", + EventType.REQUIREMENT_CHECK_VALIDATION_START: "_on_requirement_check_validation_start", + EventType.REQUIREMENT_CHECK_VALIDATION_END: "_on_requirement_check_validation_end", + EventType.REQUIREMENT_VALIDATION_END: "_on_requirement_validation_end", + EventType.PROFILE_VALIDATION_END: "_on_profile_validation_end", + EventType.VALIDATION_END: "_on_validation_end", + } + + _CHECK_EVENTS: ClassVar[frozenset[EventType]] = frozenset({ + EventType.REQUIREMENT_CHECK_VALIDATION_START, + EventType.REQUIREMENT_CHECK_VALIDATION_END, + }) + + _REQUIREMENT_EVENTS: ClassVar[frozenset[EventType]] = frozenset({ + EventType.REQUIREMENT_VALIDATION_START, + EventType.REQUIREMENT_VALIDATION_END, + }) + + def __init__(self, name: str | None = None): + super().__init__(name or type(self).__name__) + + def update(self, event: Event, ctx: ValidationContext | None = None) -> None: + logger.debug("Event: %s", event.event_type) + if not self._should_dispatch(event, ctx): + return + handler_name = self._HANDLERS.get(event.event_type) + if handler_name is not None: + getattr(self, handler_name)(event, ctx) + + def _should_dispatch(self, event: Event, ctx: ValidationContext | None) -> bool: + et = event.event_type + if et in self._CHECK_EVENTS: + assert isinstance(event, RequirementCheckValidationEvent) + if self._is_check_actionable(event, ctx): + return True + logger.debug("Skipping check: %s", event.requirement_check.identifier) + return False + if et in self._REQUIREMENT_EVENTS: + assert isinstance(event, RequirementValidationEvent) + return not event.requirement.hidden + return True + + def _on_validation_start(self, event: Event, ctx: ValidationContext | None) -> None: + pass + + def _on_profile_validation_start(self, event: ProfileValidationEvent, + ctx: ValidationContext | None) -> None: + pass + + def _on_requirement_validation_start(self, event: RequirementValidationEvent, + ctx: ValidationContext | None) -> None: + pass + + def _on_requirement_check_validation_start(self, event: RequirementCheckValidationEvent, + ctx: ValidationContext | None) -> None: + pass + + def _on_requirement_check_validation_end(self, event: RequirementCheckValidationEvent, + ctx: ValidationContext | None) -> None: + pass + + def _on_requirement_validation_end(self, event: RequirementValidationEvent, + ctx: ValidationContext | None) -> None: + pass + + def _on_profile_validation_end(self, event: ProfileValidationEvent, + ctx: ValidationContext | None) -> None: + pass + + def _on_validation_end(self, event: ValidationEvent, + ctx: ValidationContext | None) -> None: + pass + + @staticmethod + def _is_check_actionable(event: RequirementCheckValidationEvent, + ctx: ValidationContext | None) -> bool: + """Return ``True`` if the check is neither hidden nor overridden.""" + assert ctx is not None, "Validation context must be provided" + if event.requirement_check.requirement.hidden: + return False + if event.requirement_check.overridden: + return ( + ctx.target_validation_profile.identifier + == event.requirement_check.requirement.profile.identifier + ) + return True diff --git a/rocrate_validator/utils/io_helpers/output/text/layout/progress.py b/rocrate_validator/utils/io_helpers/output/text/layout/progress.py index f1576f061..40cbbf9c7 100644 --- a/rocrate_validator/utils/io_helpers/output/text/layout/progress.py +++ b/rocrate_validator/utils/io_helpers/output/text/layout/progress.py @@ -12,27 +12,25 @@ # See the License for the specific language governing permissions and # limitations under the License. - - from rich.progress import BarColumn, Progress, TextColumn, TimeElapsedColumn -from rocrate_validator.events import Event, EventType, Subscriber from rocrate_validator.models import ( ProfileValidationEvent, RequirementCheckValidationEvent, RequirementValidationEvent, ValidationContext, - ValidationEvent, ValidationSettings, ValidationStatistics, ) from rocrate_validator.utils import log as logging +from .dispatcher import EventDispatcher + # set up logging logger = logging.getLogger(__name__) -class ProgressMonitor(Subscriber): +class ProgressMonitor(EventDispatcher): PROFILE_VALIDATION = "Profiles" REQUIREMENT_VALIDATION = "Requirements" @@ -40,6 +38,8 @@ class ProgressMonitor(Subscriber): def __init__(self, settings: dict | ValidationSettings, stats: ValidationStatistics | None = None): + # Initialize the Subscriber + super().__init__("ProgressMonitor") self.__progress = Progress( TextColumn("[progress.description]{task.description}"), BarColumn(), @@ -52,69 +52,38 @@ def __init__(self, settings: dict | ValidationSettings, # Store settings self.settings = settings # Initialize progress tasks - self.profile_validation = self.progress.add_task( + self.profile_validation = self.__progress.add_task( self.PROFILE_VALIDATION, total=len(stats.profiles)) - self.requirement_validation = self.progress.add_task( + self.requirement_validation = self.__progress.add_task( self.REQUIREMENT_VALIDATION, total=stats.total_requirements) - self.requirement_check_validation = self.progress.add_task( + self.requirement_check_validation = self.__progress.add_task( self.REQUIREMENT_CHECK_VALIDATION, total=stats.total_checks) - - # Initialize the Subscriber - super().__init__("ProgressMonitor") - # Initialize progress according to current statistics - self.__initialize__(stats) - - def __initialize__(self, stats: ValidationStatistics): - """Initialize the progress monitor according to the current statistics.""" - self.progress.update(task_id=self.profile_validation, - advance=len(stats.validated_profiles)) - self.progress.update(task_id=self.requirement_validation, - advance=len(stats.validated_requirements)) - self.progress.update(task_id=self.requirement_check_validation, - advance=len(stats.validated_checks)) + self.__progress.update(task_id=self.profile_validation, + advance=len(stats.validated_profiles)) + self.__progress.update(task_id=self.requirement_validation, + advance=len(stats.validated_requirements)) + self.__progress.update(task_id=self.requirement_check_validation, + advance=len(stats.validated_checks)) def start(self): - self.progress.start() + self.__progress.start() def stop(self): - self.progress.stop() + self.__progress.stop() @property def progress(self) -> Progress: return self.__progress - def update(self, event: Event, ctx: ValidationContext | None = None): - logger.debug("Event: %s", event.event_type) - if event.event_type == EventType.VALIDATION_START: - logger.debug("Validation started") - if event.event_type == EventType.PROFILE_VALIDATION_START: - assert isinstance(event, ProfileValidationEvent) - logger.debug("Profile validation start: %s", event.profile.identifier) - elif event.event_type == EventType.REQUIREMENT_VALIDATION_START: - logger.debug("Requirement validation start") - elif event.event_type == EventType.REQUIREMENT_CHECK_VALIDATION_START: - logger.debug("Requirement check validation start") - elif event.event_type == EventType.REQUIREMENT_CHECK_VALIDATION_END: - self.__on_requirement_check_end__(event, ctx) - elif event.event_type == EventType.REQUIREMENT_VALIDATION_END: - assert isinstance(event, RequirementValidationEvent) - if not event.requirement.hidden: - self.progress.update(task_id=self.requirement_validation, advance=1) - elif event.event_type == EventType.PROFILE_VALIDATION_END: - self.progress.update(task_id=self.profile_validation, advance=1) - elif event.event_type == EventType.VALIDATION_END: - assert isinstance(event, ValidationEvent) - logger.debug("Validation ended with result: %s", event.validation_result) + def _on_requirement_check_validation_end(self, event: RequirementCheckValidationEvent, + ctx: ValidationContext | None) -> None: + self.__progress.update(task_id=self.requirement_check_validation, advance=1) + + def _on_requirement_validation_end(self, event: RequirementValidationEvent, + ctx: ValidationContext | None) -> None: + self.__progress.update(task_id=self.requirement_validation, advance=1) - def __on_requirement_check_end__(self, event: Event, ctx: ValidationContext | None) -> None: - """Advance the requirement-check progress bar, unless the check is hidden or overridden.""" - assert isinstance(event, RequirementCheckValidationEvent) - assert ctx is not None, "Validation context must be provided" - target_profile = ctx.target_validation_profile - if not event.requirement_check.requirement.hidden and \ - (not event.requirement_check.overridden - or target_profile.identifier == event.requirement_check.requirement.profile.identifier): - self.progress.update(task_id=self.requirement_check_validation, advance=1) - else: - logger.debug("Skipping requirement check validation: %s", event.requirement_check.identifier) + def _on_profile_validation_end(self, event: ProfileValidationEvent, + ctx: ValidationContext | None) -> None: + self.__progress.update(task_id=self.profile_validation, advance=1) diff --git a/rocrate_validator/utils/io_helpers/output/text/layout/report.py b/rocrate_validator/utils/io_helpers/output/text/layout/report.py index 23b30c0b8..e48bfb950 100644 --- a/rocrate_validator/utils/io_helpers/output/text/layout/report.py +++ b/rocrate_validator/utils/io_helpers/output/text/layout/report.py @@ -26,9 +26,7 @@ from rich.rule import Rule from rich.text import Text -from rocrate_validator.events import Event, EventType from rocrate_validator.models import ( - ProfileValidationEvent, RequirementCheckValidationEvent, RequirementValidationEvent, Severity, @@ -43,11 +41,13 @@ from rocrate_validator.utils.uri import URI from rocrate_validator.utils.versioning import get_version +from .dispatcher import EventDispatcher from .progress import ProgressMonitor if TYPE_CHECKING: from collections.abc import Callable + from rocrate_validator.events import Subscriber from rocrate_validator.utils.io_helpers.output.console import Console # set up logging @@ -66,9 +66,10 @@ def __init__(self, console: Console, self.statistics = statistics self.profile_autodetected = profile_autodetected self.result: ValidationResult | None = None - self.__layout: Padding | None = None + self._layout: Padding | None = None self._validation_checks_progress: Layout | None = None - self.__progress_monitor: ProgressMonitor | None = None + self._progress_monitor: ProgressMonitor | None = None + self._subscriber: EventDispatcher = _ReportLayoutSubscriber(self) self.requirement_checks_container_layout: Layout | None = None self.passed_checks: Layout | None = None self.failed_checks: Layout | None = None @@ -79,9 +80,9 @@ def __init__(self, console: Console, @property def layout(self): - if not self.__layout: - self.__init_layout__() - return self.__layout + if not self._layout: + self._init_layout() + return self._layout @property def validation_checks_progress(self): @@ -89,16 +90,20 @@ def validation_checks_progress(self): @property def progress_monitor(self) -> ProgressMonitor: - if not self.__progress_monitor: - self.__progress_monitor = ProgressMonitor(self.validation_settings, self.statistics) - return self.__progress_monitor + if not self._progress_monitor: + self._progress_monitor = ProgressMonitor(self.validation_settings, self.statistics) + return self._progress_monitor + + @property + def subscribers(self) -> list[Subscriber]: + """Subscribers to register with the validator (layout + progress).""" + return [self._subscriber, self.progress_monitor] def live(self, update_callable: Callable) -> Any: - # Start live rendering with Live(self.layout, console=self.console, refresh_per_second=10, transient=False): return update_callable() - def __init_layout__(self): + def _init_layout(self): # Get the validation settings settings = self.validation_settings @@ -170,7 +175,7 @@ def __init_layout__(self): group_layout.add_split(self.checks_stats_layout) group_layout.add_split(self.overall_result) - self.__layout = Padding(group_layout, (1, 1)) + self._layout = Padding(group_layout, (1, 1)) # Update the layout with the profile stats self.update_stats( @@ -181,37 +186,7 @@ def __init_layout__(self): result = self.result or (self.statistics.validation_result) if self.statistics else None # Show the overall result if available if result: - self.__show_overall_result__(result) - - def update(self, event: Event, ctx: ValidationContext | None = None): # type: ignore[override] - logger.debug("Event: %s", event.event_type) - if event.event_type == EventType.PROFILE_VALIDATION_START: - assert isinstance(event, ProfileValidationEvent) - logger.debug("Profile validation start: %s", event.profile.identifier) - elif event.event_type == EventType.REQUIREMENT_VALIDATION_START: - logger.debug("Requirement validation start") - elif event.event_type == EventType.REQUIREMENT_CHECK_VALIDATION_START: - logger.debug("Requirement check validation start") - elif event.event_type == EventType.REQUIREMENT_CHECK_VALIDATION_END: - assert isinstance(event, RequirementCheckValidationEvent) - assert ctx is not None, "Validation context must be provided" - target_profile = ctx.target_validation_profile - if not event.requirement_check.requirement.hidden and \ - (not event.requirement_check.overridden - or target_profile.identifier == event.requirement_check.requirement.profile.identifier): - if event.validation_result is not None: - self.update_stats(ctx.result.statistics) - else: - logger.debug("Skipping requirement check validation: %s", event.requirement_check.identifier) - elif event.event_type == EventType.REQUIREMENT_VALIDATION_END: - assert isinstance(event, RequirementValidationEvent) - assert ctx is not None, "Validation context must be provided" - if not event.requirement.hidden: - self.update_stats(ctx.result.statistics) - elif event.event_type == EventType.VALIDATION_END: - assert isinstance(event, ValidationEvent) - self.__show_overall_result__(event.validation_result) - logger.debug("Validation ended with result: %s", event.validation_result) + self._show_overall_result(result) def update_stats(self, profile_stats: ValidationStatistics | None = None): assert profile_stats, "Profile stats must be provided" @@ -279,7 +254,7 @@ def update_stats(self, profile_stats: ValidationStatistics | None = None): ) ) - def __show_overall_result__(self, result: ValidationResult | None): + def _show_overall_result(self, result: ValidationResult | None): assert result, "Validation result must be provided" assert self.overall_result is not None, "Layout not initialized" self.result = result @@ -297,6 +272,29 @@ def __show_overall_result__(self, result: ValidationResult | None): style="bold red"), (1, 1))) +class _ReportLayoutSubscriber(EventDispatcher): + """Drives :class:`ValidationReportLayout` from validation events.""" + + def __init__(self, layout: ValidationReportLayout): + super().__init__("ValidationReportLayout") + self._layout = layout + + def _on_requirement_check_validation_end(self, event: RequirementCheckValidationEvent, + ctx: ValidationContext | None) -> None: + if event.validation_result is not None: + assert ctx is not None + self._layout.update_stats(ctx.result.statistics) + + def _on_requirement_validation_end(self, event: RequirementValidationEvent, + ctx: ValidationContext | None) -> None: + assert ctx is not None, "Validation context must be provided" + self._layout.update_stats(ctx.result.statistics) + + def _on_validation_end(self, event: ValidationEvent, + ctx: ValidationContext | None) -> None: + self._layout._show_overall_result(event.validation_result) + + def get_app_header_rule() -> Padding: return Padding(Rule(f"\n[bold][cyan]ROCrate Validator[/cyan] (ver. [magenta]{get_version()}[/magenta])[/bold]", style="bold cyan"), (1, 2)) From a6af0e9989af46dd732f43ec355a990c8f9b6975 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Fri, 12 Jun 2026 10:10:36 +0200 Subject: [PATCH 309/352] =?UTF-8?q?refactor(cli):=20=F0=9F=94=A8=20expose?= =?UTF-8?q?=20show=5Foverall=5Fresult=20for=20the=20report=20subscriber?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../utils/io_helpers/output/text/layout/report.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/rocrate_validator/utils/io_helpers/output/text/layout/report.py b/rocrate_validator/utils/io_helpers/output/text/layout/report.py index e48bfb950..97ac3a5ff 100644 --- a/rocrate_validator/utils/io_helpers/output/text/layout/report.py +++ b/rocrate_validator/utils/io_helpers/output/text/layout/report.py @@ -186,7 +186,7 @@ def _init_layout(self): result = self.result or (self.statistics.validation_result) if self.statistics else None # Show the overall result if available if result: - self._show_overall_result(result) + self.show_overall_result(result) def update_stats(self, profile_stats: ValidationStatistics | None = None): assert profile_stats, "Profile stats must be provided" @@ -254,7 +254,7 @@ def update_stats(self, profile_stats: ValidationStatistics | None = None): ) ) - def _show_overall_result(self, result: ValidationResult | None): + def show_overall_result(self, result: ValidationResult | None): assert result, "Validation result must be provided" assert self.overall_result is not None, "Layout not initialized" self.result = result @@ -292,7 +292,7 @@ def _on_requirement_validation_end(self, event: RequirementValidationEvent, def _on_validation_end(self, event: ValidationEvent, ctx: ValidationContext | None) -> None: - self._layout._show_overall_result(event.validation_result) + self._layout.show_overall_result(event.validation_result) def get_app_header_rule() -> Padding: @@ -383,6 +383,7 @@ def run_validation(): try: result_container[0] = self.callable_service(self.validation_settings) except Exception as e: + # Captured here and re-raised on the main thread after join. exception_container[0] = e validation_thread = threading.Thread(target=run_validation) From 6d5cc3950b681724e22c6655ff2003f416e1b85b Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Fri, 12 Jun 2026 10:11:25 +0200 Subject: [PATCH 310/352] =?UTF-8?q?chore(typing):=20=F0=9F=8F=B7=EF=B8=8F?= =?UTF-8?q?=20drop=20obsolete=20import-untyped=20ignores?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- rocrate_validator/utils/config.py | 2 +- rocrate_validator/utils/http.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/rocrate_validator/utils/config.py b/rocrate_validator/utils/config.py index a9bc5c8c3..5923382da 100644 --- a/rocrate_validator/utils/config.py +++ b/rocrate_validator/utils/config.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -import toml # type: ignore[import-untyped] +import toml from rocrate_validator.utils import log as logging from rocrate_validator.utils.paths import get_config_path diff --git a/rocrate_validator/utils/http.py b/rocrate_validator/utils/http.py index 91256d3ce..5af8ad541 100644 --- a/rocrate_validator/utils/http.py +++ b/rocrate_validator/utils/http.py @@ -25,7 +25,7 @@ if TYPE_CHECKING: from typing_extensions import Self -import requests # type: ignore[import-untyped] +import requests from rocrate_validator import constants from rocrate_validator.utils import log as logging From 0be8305a188983959a9d3ce31975ba56ca1ce852 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Fri, 12 Jun 2026 10:28:39 +0200 Subject: [PATCH 311/352] =?UTF-8?q?style(lint):=20=F0=9F=8E=A8=20fix=20pyl?= =?UTF-8?q?int=20warnings=20in=20requirements=20and=20io=20helpers?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - rename 'property' param to 'prop' to avoid shadowing the builtin - use rsplit(maxsplit=1) instead of split for trailing-segment extraction - drop unused SHACLCheck import in shacl/requirements - add targeted pylint disables for unused-argument/unused-import and redefined-outer-name where the names are intentional --- rocrate_validator/requirements/python/__init__.py | 7 ++++--- rocrate_validator/requirements/shacl/checks.py | 2 +- rocrate_validator/requirements/shacl/models.py | 12 ++++++------ rocrate_validator/requirements/shacl/requirements.py | 3 +-- rocrate_validator/requirements/shacl/utils.py | 2 +- rocrate_validator/requirements/shacl/validator.py | 2 +- rocrate_validator/utils/cache_warmup.py | 2 +- .../utils/io_helpers/output/json/formatters.py | 4 ++-- 8 files changed, 17 insertions(+), 17 deletions(-) diff --git a/rocrate_validator/requirements/python/__init__.py b/rocrate_validator/requirements/python/__init__.py index f5dfbf6f7..1960447f0 100644 --- a/rocrate_validator/requirements/python/__init__.py +++ b/rocrate_validator/requirements/python/__init__.py @@ -43,7 +43,7 @@ class PyFunctionCheck(RequirementCheck): """ def __init__(self, - requirement: Requirement, + requirement: Requirement, # pylint: disable=redefined-outer-name name: str, check_function: Callable[[RequirementCheck, ValidationContext], bool], description: str | None = None, @@ -140,6 +140,7 @@ def __init_checks__(self): severity = self.severity_from_path or Severity.REQUIRED logger.debug("Severity log: %r", severity) deactivated = bool(getattr(member, "deactivated", False)) + # pylint: disable-next=redefined-outer-name # local 'check' mirrors the decorator name check = self.requirement_check_class(self, check_name, member, @@ -235,9 +236,9 @@ def decorator(func): class PyRequirementLoader(RequirementLoader): def load(self, profile: Profile, - requirement_level: RequirementLevel, + requirement_level: RequirementLevel, # pylint: disable=unused-argument file_path: Path, - publicID: str | None = None) -> list[Requirement]: + publicID: str | None = None) -> list[Requirement]: # pylint: disable=unused-argument # instantiate a list to store the requirements requirements: list[Requirement] = [] diff --git a/rocrate_validator/requirements/shacl/checks.py b/rocrate_validator/requirements/shacl/checks.py index 17120d910..f5a4521e8 100644 --- a/rocrate_validator/requirements/shacl/checks.py +++ b/rocrate_validator/requirements/shacl/checks.py @@ -14,7 +14,7 @@ import json from timeit import default_timer as timer -from typing import Any, ClassVar, Optional, cast +from typing import Any, ClassVar, Optional, cast # pylint: disable=unused-import from rdflib import RDF, BNode, Literal, Namespace diff --git a/rocrate_validator/requirements/shacl/models.py b/rocrate_validator/requirements/shacl/models.py index 8cb5a37c3..f665236cc 100644 --- a/rocrate_validator/requirements/shacl/models.py +++ b/rocrate_validator/requirements/shacl/models.py @@ -60,7 +60,7 @@ def name(self) -> str: """Return the name of the shape""" if not self._name: self._name = self.node_name - return self._name or str(self._node).split("/")[-1] + return self._name or str(self._node).rsplit("/", maxsplit=1)[-1] @name.setter def name(self, value: str): @@ -187,13 +187,13 @@ def get_property_index(self, name) -> int: return i return -1 - def add_property(self, property: PropertyShape): + def add_property(self, prop: PropertyShape): """Add a property to the shape""" - self._properties.append(property) + self._properties.append(prop) - def remove_property(self, property: PropertyShape): + def remove_property(self, prop: PropertyShape): """Remove a property from the shape""" - self._properties.remove(property) + self._properties.remove(prop) class Shape(SHACLNode): @@ -238,7 +238,7 @@ def name(self) -> str: self._short_name = path_str.rsplit(sep, maxsplit=1)[-1] if self.parent: self._name = f"{self._short_name} of {self.parent.name}" - return self._name or str(self._node).split("/")[-1] + return self._name or str(self._node).rsplit("/", maxsplit=1)[-1] @name.setter def name(self, value: str): diff --git a/rocrate_validator/requirements/shacl/requirements.py b/rocrate_validator/requirements/shacl/requirements.py index f63801261..69e05d853 100644 --- a/rocrate_validator/requirements/shacl/requirements.py +++ b/rocrate_validator/requirements/shacl/requirements.py @@ -13,7 +13,7 @@ # limitations under the License. from pathlib import Path -from typing import Any, cast +from typing import Any, cast # pylint: disable=unused-import from rdflib import RDF @@ -107,7 +107,6 @@ def finalize(cls, context: ValidationContext) -> None: # extract profiles and target profile from context profiles = context.profiles - from rocrate_validator.requirements.shacl.checks import SHACLCheck # noqa: PLC0415 from rocrate_validator.requirements.shacl.validator import SHACLValidationContext # noqa: PLC0415 target = next((p for p in profiles if p.identifier == context.settings.profile_identifier), None) diff --git a/rocrate_validator/requirements/shacl/utils.py b/rocrate_validator/requirements/shacl/utils.py index 9ddb4c141..d684a48c7 100644 --- a/rocrate_validator/requirements/shacl/utils.py +++ b/rocrate_validator/requirements/shacl/utils.py @@ -15,7 +15,7 @@ from __future__ import annotations import hashlib -from typing import TYPE_CHECKING, Any, cast +from typing import TYPE_CHECKING, Any, cast # pylint: disable=unused-import if TYPE_CHECKING: from pathlib import Path diff --git a/rocrate_validator/requirements/shacl/validator.py b/rocrate_validator/requirements/shacl/validator.py index 5eca51038..910706914 100644 --- a/rocrate_validator/requirements/shacl/validator.py +++ b/rocrate_validator/requirements/shacl/validator.py @@ -15,7 +15,7 @@ from __future__ import annotations from pathlib import Path -from typing import TYPE_CHECKING, Any, cast +from typing import TYPE_CHECKING, Any, cast # pylint: disable=unused-import import pyshacl from rdflib import BNode, Graph diff --git a/rocrate_validator/utils/cache_warmup.py b/rocrate_validator/utils/cache_warmup.py index bbf4cb174..51df895e1 100644 --- a/rocrate_validator/utils/cache_warmup.py +++ b/rocrate_validator/utils/cache_warmup.py @@ -27,7 +27,7 @@ import os from dataclasses import dataclass -from typing import TYPE_CHECKING, Any, cast +from typing import TYPE_CHECKING, Any, cast # pylint: disable=unused-import from rocrate_validator import constants from rocrate_validator.utils import log as logging diff --git a/rocrate_validator/utils/io_helpers/output/json/formatters.py b/rocrate_validator/utils/io_helpers/output/json/formatters.py index c6d0fd629..ac62ddd33 100644 --- a/rocrate_validator/utils/io_helpers/output/json/formatters.py +++ b/rocrate_validator/utils/io_helpers/output/json/formatters.py @@ -41,8 +41,8 @@ def format_validation_result(data: ValidationResult, console: Console, console_o def format_validation_results( data: dict[str, ValidationResult], console: Console | None = None, # pylint: disable=unused-argument - console_options: ConsoleOptions | None = None, -) -> str: # pylint: disable=unused-argument + console_options: ConsoleOptions | None = None, # pylint: disable=unused-argument +) -> str: # Initialize an empty JSON output json_output: dict[str, Any] = { From 83e9ad03b0896a050011fb6425efd7022e103c9f Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Fri, 12 Jun 2026 10:45:44 +0200 Subject: [PATCH 312/352] =?UTF-8?q?build(deps):=20=F0=9F=93=8C=20declare?= =?UTF-8?q?=20mypy,=20drop=20unused=20typos=20runtime=20dep?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - add mypy to the dev group so the pre-commit hook works on a clean checkout (was relying on an undeclared, lock-untracked install) - remove the typos runtime dependency: never imported, and the pre-commit hook uses the hosted crate-ci/typos binary instead - deduplicate enum-tools (kept as a runtime dep; the docs-group entry was redundant and had a divergent constraint) --- pyproject.toml | 89 ++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 87 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 478719656..58ba45660 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -84,13 +84,14 @@ requests = ">=2.32,<3.0" requests-cache = ">=1.2,<2.0" inquirerpy = ">=0.3.4,<0.4.0" enum-tools = ">=0.12,<0.13" -typos = "^1.41.0" [tool.poetry.group.dev.dependencies] pylint = "^3.1.0" ipykernel = "^6.29.3" ruff = "^0.15.15" pre-commit = "^4.6.0" +types-toml = "^0.10.8.20260518" +mypy = "^2.1.0" [tool.poetry.group.test.dependencies] pytest-cov = "^5.0.0" @@ -100,7 +101,6 @@ pytest-xdist = "^3.6.1" [tool.poetry.group.docs.dependencies] sphinx = "^8.1.3" nbsphinx = "^0.9.5" -enum-tools = "^0.12.0" sphinx-toolbox = "^3.8.1" myst-parser = "^4.0.0" sphinx-rtd-theme = "^3.0.2" @@ -132,3 +132,88 @@ extend-ignore-re = ["[0-9a-f]{7,40}"] # Ignore long hexadecimal strings, which a [tool.typos.files] extend-exclude = ["tests/data", "docs/diagrams", "*.json", "*.html", "*__init__.py"] + +# Pylint configuration. +# Ruff (see ruff.toml) is the primary linter/formatter; this section keeps +# pylint consistent with it so the two tools do not disagree on style or on +# the design limits the project deliberately accepts. +[tool.pylint.main] +# Lowest supported Python version (matches tool.poetry.dependencies.python). +py-version = "3.10" +# Platform-specific stdlib modules that are imported behind try/except guards; +# pylint cannot import them on every OS and would report a false `import-error`. +ignored-modules = ["msvcrt"] + +[tool.pylint.format] +# Match the line length enforced by Ruff (ruff.toml `line-length = 120`). +max-line-length = 120 + +[tool.pylint.basic] +# Names that intentionally deviate from the default naming style and should +# NOT be flagged as `invalid-name` — they mirror external APIs or domain terms: +good-names-rgxs = [ + "^_?publicID$", # rdflib API parameter name + "^_?(violating[A-Za-z]+|propertyValue)$", # SHACL violation fields (camelCase domain) + "^(basicConfig|getLogger)$", # drop-in mirror of the stdlib `logging` API + "^(_installed|_config|__profiles_loaded)$", # module-level mutable state (not constants) + "^[A-Z][A-Z0-9_]*_TYPES$", # UPPER_CASE type aliases +] + +[tool.pylint."messages control"] +disable = [ + # Docstrings are not enforced project-wide: Ruff's `D` (pydocstyle) rules + # are intentionally not enabled, so pylint should not flag them either. + "missing-module-docstring", # C0114 + "missing-class-docstring", # C0115 + "missing-function-docstring", # C0116 + # Pre-existing project style — mirrors the `PLR*` entries already ignored + # in ruff.toml, so pylint and Ruff stay aligned on design limits. + "too-many-arguments", # R0913 ~ ruff PLR0913 + "too-many-positional-arguments", # R0917 ~ ruff PLR0917 + "too-many-public-methods", # R0904 ~ ruff PLR0904 + # Design metrics not enforced by this project (commonly disabled): small + # data/config classes and cross-file similarity are acceptable here. + "too-few-public-methods", # R0903 + "too-many-instance-attributes", # R0902 + # "duplicate-code", # R0801: noisy across CLI/output layers + # Deferred imports are intentional: circular-import avoidance, + # platform-specific modules, optional dependencies, and lazy CLI loading. + "import-outside-toplevel", # C0415 + # Broad `except Exception` is structural here: CLI boundary handlers, + # user-supplied check execution (Python/SHACL plugins), parsers of + # untrusted RO-Crate metadata and best-effort I/O fallbacks all need + # to catch anything. Narrowing is impossible at most call sites and + # noisy at the rest; suppress globally instead of per-line. + "broad-exception-caught", # W0718 + # False positive triggered by the `if TYPE_CHECKING:` import pattern: + # pylint sees the guarded block as code and flags any import after it + # as not at the top. Ruff handles this pattern correctly. + "wrong-import-position", # C0413 + # Module-level lazy init / configuration singletons (e.g. `_installed`, + # `_config`, `__profiles_loaded`) intentionally use the `global` keyword. + # The pattern is conventional and already whitelisted in `good-names-rgxs`. + "global-statement", # W0603 + # `_name`-prefixed cross-class collaboration is intentional throughout the + # validator (e.g. `_add_executed_check`, `_do_validate_`, `_close_session`): + # the underscore signals "internal API" within the package, not a hard + # privacy boundary. Per-callsite suppressions would be noisy. + "protected-access", # W0212 +] + +# Mypy configuration. +# Static type checking for the package. The pre-commit `mypy` hook runs +# `poetry run mypy` with no file arguments, so `files` below defines what gets +# checked. The baseline is intentionally lenient (gradual typing): it surfaces +# real type errors without forcing annotations on every function yet. +[tool.mypy] +python_version = "3.10" # lowest supported version (see dependencies.python) +files = ["rocrate_validator"] # the package; tests are not type-checked (yet) + +# Surface configuration/annotation drift without being noisy. +warn_unused_configs = true +warn_redundant_casts = true +warn_unused_ignores = true + +# Type-check the bodies of unannotated functions too (catches real bugs without +# requiring annotations everywhere). +check_untyped_defs = true From 3b1dbfca393b81cb015cf955a2fda3673b759b67 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Fri, 12 Jun 2026 11:01:20 +0200 Subject: [PATCH 313/352] =?UTF-8?q?ci(pre-commit):=20=E2=AC=86=EF=B8=8F=20?= =?UTF-8?q?bump=20typos/pre-commit-hooks=20and=20exclude=20generated=20fil?= =?UTF-8?q?es=20from=20typos?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .pre-commit-config.yaml | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index e39569dd4..b31acc4bb 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -15,9 +15,13 @@ repos: # Run typos to check for common spelling mistakes in the codebase. - repo: https://github.com/crate-ci/typos - rev: v1.22.9 + rev: v1.47.2 hooks: - id: typos + # Pre-commit passes explicit file paths to the hook, bypassing the + # `[tool.typos.files] extend-exclude` config; replicate the excludes + # here so generated SVGs, JSON fixtures and HTML aren't scanned. + exclude: ^(tests/data/|docs/diagrams/|.*\.json$|.*\.html$|.*__init__\.py$) args: - --config - pyproject.toml @@ -26,7 +30,7 @@ repos: # Performs basic checks on files, such as removing trailing whitespace, # ensuring files end with a newline, and validating YAML files. - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v4.6.0 + rev: v6.0.0 hooks: - id: trailing-whitespace - id: end-of-file-fixer From f8d16ba6284d458100ba1ef36a8bdb1d53b88add Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Fri, 12 Jun 2026 15:25:27 +0200 Subject: [PATCH 314/352] =?UTF-8?q?style(format):=20=F0=9F=8E=A8=20apply?= =?UTF-8?q?=20ruff-format=20and=20pre-commit=20fixups=20across=20the=20cod?= =?UTF-8?q?ebase?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .github/workflows/release.yaml | 6 +- docs/11_writing_a_profile.rst | 88 ++-- docs/12_validation_profiles.rst | 16 +- docs/3_usage_api.rst | 10 +- docs/diagrams/core-model.plantuml | 22 +- docs/diagrams/core-model.svg | 2 +- docs/diagrams/core-services.profiles.plantuml | 4 +- docs/diagrams/core-services.profiles.svg | 2 +- docs/diagrams/core-services.validate.plantuml | 4 +- docs/diagrams/core-services.validate.svg | 2 +- docs/index.rst | 6 +- rocrate_validator/__init__.py | 2 + rocrate_validator/cli/commands/cache.py | 75 ++-- rocrate_validator/cli/commands/errors.py | 11 +- rocrate_validator/cli/commands/profiles.py | 307 +++++++------ rocrate_validator/cli/commands/validate.py | 168 ++++--- rocrate_validator/cli/main.py | 33 +- rocrate_validator/cli/ui/text/validate.py | 30 +- rocrate_validator/cli/utils.py | 2 +- rocrate_validator/constants.py | 24 +- rocrate_validator/errors.py | 8 +- rocrate_validator/models.py | 50 +-- .../profiles/isa-ro-crate/0_investigation.ttl | 2 +- .../profiles/isa-ro-crate/10_definedterm.ttl | 22 +- .../isa-ro-crate/11_propertyvalue.ttl | 28 +- .../profiles/isa-ro-crate/1_study.ttl | 10 +- .../profiles/isa-ro-crate/2_assay.ttl | 16 +- .../profiles/isa-ro-crate/3_process.ttl | 6 +- .../profiles/isa-ro-crate/4_protocol.ttl | 8 +- .../profiles/isa-ro-crate/5_sample.ttl | 14 +- .../profiles/isa-ro-crate/6_data.ttl | 8 +- .../profiles/isa-ro-crate/7_person.ttl | 12 +- .../profiles/isa-ro-crate/8_article.ttl | 16 +- .../profiles/isa-ro-crate/9_comment.ttl | 4 +- .../ro-crate/may/61_license_entity.ttl | 9 +- .../ro-crate/must/0_file_descriptor_format.py | 127 +++--- .../must/2_root_data_entity_metadata.ttl | 4 +- .../ro-crate/must/4_data_entity_metadata.py | 12 +- .../ro-crate/must/4_data_entity_metadata.ttl | 2 +- .../must/5_web_data_entity_metadata.ttl | 3 +- .../profiles/ro-crate/ontology.ttl | 1 - .../should/2_root_data_entity_metadata.ttl | 4 +- .../should/2_root_data_entity_relative_uri.py | 8 +- .../should/4_data_entity_existence.py | 12 +- .../should/4_data_entity_metadata.ttl | 2 +- .../should/5_web_data_entity_metadata.py | 18 +- .../should/5_web_data_entity_metadata.ttl | 2 +- .../workflow-ro-crate/may/1_main_workflow.py | 3 +- .../workflow-ro-crate/must/0_main_workflow.py | 7 +- .../requirements/python/__init__.py | 122 +++-- .../requirements/shacl/__init__.py | 12 +- .../requirements/shacl/checks.py | 11 +- .../requirements/shacl/errors.py | 5 +- .../requirements/shacl/models.py | 21 +- .../requirements/shacl/requirements.py | 10 +- .../requirements/shacl/validator.py | 68 ++- rocrate_validator/rocrate.py | 125 ++---- rocrate_validator/services.py | 4 +- rocrate_validator/utils/cache_warmup.py | 17 +- rocrate_validator/utils/collections.py | 1 - rocrate_validator/utils/document_loader.py | 4 +- rocrate_validator/utils/http.py | 56 +-- rocrate_validator/utils/io_helpers/input.py | 33 +- .../utils/io_helpers/output/__init__.py | 4 +- .../utils/io_helpers/output/console.py | 11 +- .../utils/io_helpers/output/json/__init__.py | 1 - .../utils/io_helpers/output/text/__init__.py | 1 - .../io_helpers/output/text/formatters.py | 26 +- .../io_helpers/output/text/layout/__init__.py | 1 - .../output/text/layout/dispatcher.py | 57 ++- .../io_helpers/output/text/layout/progress.py | 36 +- .../io_helpers/output/text/layout/report.py | 178 +++++--- rocrate_validator/utils/log.py | 46 +- rocrate_validator/utils/paths.py | 11 +- rocrate_validator/utils/rdf.py | 13 +- tests/conftest.py | 68 +-- .../ro-crate-metadata.json | 2 +- .../ro-crate-metadata.json | 2 +- .../ro-crate-metadata.json | 2 +- .../ro-crate-metadata.json | 2 +- .../ro-crate-metadata.json | 2 +- .../ro-crate-metadata.json | 2 +- .../ro-crate-metadata.json | 2 +- .../no_files/ro-crate-metadata.json | 2 +- .../0_multi_profile_crate/primary-job.json | 2 +- .../no_license/ro-crate-metadata.json | 2 +- .../no_mainentity/ro-crate-metadata.json | 2 +- .../ro-crate-metadata.json | 2 +- .../ro-crate-metadata.json | 2 +- .../ro-crate-metadata.json | 2 +- .../ro-crate-metadata.json | 4 +- .../ro-crate-metadata.json | 2 +- .../provenance-run-crate/primary-job.json | 2 +- .../ro-crate-metadata.json | 2 +- .../data/crates/valid/wrroc-paper/index.html | 418 +++++++++--------- .../wrroc-paper/mapping/prov-mapping.json | 2 +- .../wrroc-paper/mapping/prov-mapping.rdf | 2 - .../wrroc-paper/mapping/prov-mapping.ttl | 2 - .../wrroc-paper/ro-crate-metadata.jsonld | 2 +- .../valid/wrroc-paper/ro-crate-preview.html | 418 +++++++++--------- .../data/profiles/check_overriding/readme.md | 2 +- tests/data/profiles/fake/c/profile.ttl | 2 +- .../a_explicit_version_property/profile.ttl | 2 +- .../nested_c/c/profile.ttl | 2 +- .../profiles/hidden_requirements/xh/a.ttl | 1 - .../hidden_requirements/xh/must/a_must.ttl | 1 - .../data/profiles/requirement_loading/x/a.ttl | 1 - .../requirement_loading/x/must/a_must.ttl | 1 - .../isa-ro-crate/test_0_investigation.py | 12 +- .../isa-ro-crate/test_10_definedterm.py | 12 +- .../isa-ro-crate/test_11_propertyvalue.py | 16 +- .../profiles/isa-ro-crate/test_1_study.py | 20 +- .../profiles/isa-ro-crate/test_2_assay.py | 8 +- .../profiles/isa-ro-crate/test_3_process.py | 16 +- .../profiles/isa-ro-crate/test_4_protocol.py | 8 +- .../profiles/isa-ro-crate/test_5_sample.py | 16 +- .../profiles/isa-ro-crate/test_6_data.py | 8 +- .../profiles/isa-ro-crate/test_7_person.py | 24 +- .../profiles/isa-ro-crate/test_8_article.py | 16 +- .../process-run-crate/test_procrc_action.py | 104 ++--- .../test_procrc_application.py | 32 +- .../test_procrc_collection.py | 12 +- .../test_procrc_containerimage.py | 40 +- .../test_procrc_root_data_entity.py | 30 +- .../process-run-crate/test_valid_prc.py | 6 +- .../test_provrc_controlaction.py | 52 ++- .../provenance-run-crate/test_provrc_file.py | 8 +- .../test_provrc_howtostep.py | 40 +- .../test_provrc_organizeaction.py | 60 +-- .../test_provrc_parameterconnection.py | 28 +- .../test_provrc_propertyvalue.py | 8 +- .../test_provrc_root_data_entity.py | 48 +- .../provenance-run-crate/test_provrc_tool.py | 48 +- .../test_provrc_tool_action.py | 8 +- .../test_provrc_workflow.py | 36 +- .../provenance-run-crate/test_valid_provrc.py | 2 +- .../ro-crate/test_file_descriptor_entity.py | 26 +- .../ro-crate/test_file_descriptor_format.py | 53 +-- .../ro-crate/test_root_data_entity.py | 42 +- .../profiles/ro-crate/test_valid_ro-crate.py | 52 +-- .../ro-crate/test_web_based_data_entity.py | 14 +- .../profiles/test_metadata_only.py | 36 +- .../workflow-ro-crate/test_main_workflow.py | 40 +- .../workflow-ro-crate/test_valid_wroc.py | 4 +- .../workflow-ro-crate/test_wroc_crate.py | 8 +- .../workflow-ro-crate/test_wroc_descriptor.py | 10 +- .../workflow-ro-crate/test_wroc_readme.py | 8 +- .../test_wroc_root_metadata.py | 8 +- .../workflow-run-crate/test_valid_wfrc.py | 2 +- .../test_wfrc_computational_workflow.py | 24 +- .../test_wfrc_formal_parameter.py | 60 +-- .../test_wfrc_root_data_entity.py | 30 +- .../test_valid_wtroc.py | 2 +- .../test_wtroc_root_data_entity.py | 4 +- .../test_wtroc_testdefinition.py | 20 +- .../test_wtroc_testinstance.py | 24 +- .../test_wtroc_testsuite.py | 24 +- tests/integration/test_offline_mode.py | 44 +- tests/integration/test_sparql_constraints.py | 17 +- tests/ro_crates.py | 15 - tests/shared.py | 15 +- tests/test_cli.py | 90 ++-- tests/test_models.py | 26 +- .../requirements/test_load_requirements.py | 37 +- tests/unit/requirements/test_profiles.py | 54 +-- tests/unit/requirements/test_shacl_checks.py | 8 +- tests/unit/test_cache_warmup.py | 7 +- tests/unit/test_cli_internals.py | 45 +- tests/unit/test_document_loader.py | 13 +- tests/unit/test_http_requester_offline.py | 7 +- tests/unit/test_remote_context_retrieval.py | 22 +- tests/unit/test_rocrate.py | 33 +- tests/unit/test_services.py | 61 ++- tests/unit/test_uri.py | 40 +- tests/unit/test_validation_settings.py | 6 +- 175 files changed, 2385 insertions(+), 2432 deletions(-) diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml index 5b089fbcc..b3e2be23b 100644 --- a/.github/workflows/release.yaml +++ b/.github/workflows/release.yaml @@ -49,11 +49,11 @@ jobs: token: ${{ secrets.GITHUB_TOKEN }} checkName: ⌛ Run tests ref: ${{ github.sha }} - + - name: Do something with a passing build if: steps.wait-for-testing.outputs.conclusion == 'success' run: echo "Testing pipeline passed" && exit 0 - + - name: Do something with a failing build if: steps.wait-for-testing.outputs.conclusion == 'failure' run: echo "Testing pipeline failed" && exit 1 @@ -166,7 +166,7 @@ jobs: with: name: python-package-signatures path: dist/*.json - + # Create GitHub Release github_release: name: 🎉 Release on GitHub diff --git a/docs/11_writing_a_profile.rst b/docs/11_writing_a_profile.rst index 20a27b03e..86bdc8d63 100644 --- a/docs/11_writing_a_profile.rst +++ b/docs/11_writing_a_profile.rst @@ -1,35 +1,35 @@ Writing a new profile ===================== -This page is about writing a SHACL validation profile for a new or -existing RO-Crate profile. It does *not* offer guidance on creating the +This page is about writing a SHACL validation profile for a new or +existing RO-Crate profile. It does *not* offer guidance on creating the RO-Crate profile itself - for that, see the `RO-Crate page on Profiles `_. Learning SHACL -------------- -The validator profiles are written in SHACL (Shapes Constraint Language), a -language for validating RDF graphs against a set of conditions. -To use SHACL effectively, you also need some familiarity with RDF -(Resource Description Framework), the technology which underpins +The validator profiles are written in SHACL (Shapes Constraint Language), a +language for validating RDF graphs against a set of conditions. +To use SHACL effectively, you also need some familiarity with RDF +(Resource Description Framework), the technology which underpins JSON-LD and therefore RO-Crate. -For an RDF introduction, try the `RDF 1.1 Primer `_ or +For an RDF introduction, try the `RDF 1.1 Primer `_ or `Introduction to the Principles of Linked Open Data `_. -This `chapter on SHACL `_ +This `chapter on SHACL `_ from the book `Validating RDF Data `_ -has examples of most of SHACL's features and is a good place -to start learning. Other chapters in that book may provide an understanding +has examples of most of SHACL's features and is a good place +to start learning. Other chapters in that book may provide an understanding of *why* SHACL is our language of choice for this purpose. -For complex validation, you may also need some knowledge of SPARQL, an RDF +For complex validation, you may also need some knowledge of SPARQL, an RDF query language. You can learn about SPARQL in the tutorial `Using SPARQL to access Linked Open Data `_. -All these tools are best learned through practice and examples, so when building a -profile, it's encouraged to use the +All these tools are best learned through practice and examples, so when building a +profile, it's encouraged to use the `other profiles `_ as a point of reference. @@ -39,59 +39,59 @@ Setting up profile files and tests These instructions assume you are familiar with code development using Python and Git. #. `Install the repository from source `_. -#. From the root folder of the repo, create a folder for the profile under +#. From the root folder of the repo, create a folder for the profile under `rocrate_validator/profiles `_. -#. To set up the profile metadata, copy across ``profile.ttl`` from another - profile folder to the folder you created - (`example `_) +#. To set up the profile metadata, copy across ``profile.ttl`` from another + profile folder to the folder you created + (`example `_) & update that metadata to reflect your profile. In particular: #. change the token for the profile to a new and unique name, e.g. - ``prof:hasToken "workflow-ro-crate-linkml"``. This is the name which + ``prof:hasToken "workflow-ro-crate-linkml"``. This is the name which can be used to select the profile using ``--profile-identifier`` argument (and should also be the name of the folder). - #. Ensure the URI of the profile is unique (the first line after the - ``@prefix`` statements), to prevent conflation between this profile + #. Ensure the URI of the profile is unique (the first line after the + ``@prefix`` statements), to prevent conflation between this profile and any other profile in the package. - #. If this profile inherits from another profile in the validator - (including the base specification), set ``prof:isProfileOf`` / + #. If this profile inherits from another profile in the validator + (including the base specification), set ``prof:isProfileOf`` / ``prof:isTransitiveProfileOf`` to that profile's URI (which can be found in that profile's own ``profile.ttl``). -#. Create a ``profile-name.ttl`` file in the folder you created - this is - where you will write the SHACL for the validation. If you have a lot of - checks to write, you can create multiple files - the validator will - collect them all automatically at runtime. +#. Create a ``profile-name.ttl`` file in the folder you created - this is + where you will write the SHACL for the validation. If you have a lot of + checks to write, you can create multiple files - the validator will + collect them all automatically at runtime. .. note:: - Some profiles split the checks into folders called ``must/``, - ``should/`` and ``may/`` according to the requirement severity. This - is not mandatory - you can also label individual checks/shapes with + Some profiles split the checks into folders called ``must/``, + ``should/`` and ``may/`` according to the requirement severity. This + is not mandatory - you can also label individual checks/shapes with ``sh:severity`` in the SHACL code instead. -#. Optionally, associate an ontology graph with the profile by providing - an ``ontology.ttl`` file alongside the SHACL files. - This graph is merged into the crate's data graph at validation time, - allowing you to define formal relationships and additional definitions - between profile entities (e.g., using ``rdfs:subClassOf``, +#. Optionally, associate an ontology graph with the profile by providing + an ``ontology.ttl`` file alongside the SHACL files. + This graph is merged into the crate's data graph at validation time, + allowing you to define formal relationships and additional definitions + between profile entities (e.g., using ``rdfs:subClassOf``, ``owl:equivalentClass``, etc.). - + .. warning:: - Including an ontology can significantly impact validation times and + Including an ontology can significantly impact validation times and overall performance, especially for large graphs. Use with caution. -#. From the root folder of the repo, create a test folder for the profile - under +#. From the root folder of the repo, create a test folder for the profile + under `tests/integration/profiles `_. The name should match the folder you made earlier. -#. Copy the style of other profiles' tests to build up a test suite for the - profile. Add any required RO-Crate test data under +#. Copy the style of other profiles' tests to build up a test suite for the + profile. Add any required RO-Crate test data under `tests/data/crates/ `_ - and create corresponding classes in - `tests/ro_crates.py `_ + and create corresponding classes in + `tests/ro_crates.py `_ which can be used to fetch the data during the tests. -#. When your profile & tests are written, open a pull request to contribute +#. When your profile & tests are written, open a pull request to contribute it back to the repository! Overriding inherited checks @@ -245,7 +245,7 @@ can disable an inherited Python check by redeclaring it with Running validator & tests during profile development ---------------------------------------------------- -To run the test suite, run ``pytest``. New tests should be picked up automatically for +To run the test suite, run ``pytest``. New tests should be picked up automatically for the new profile. When running the validator manually, use ``--profile-identifier`` to select the desired profile. diff --git a/docs/12_validation_profiles.rst b/docs/12_validation_profiles.rst index da3696b9b..4902050e8 100644 --- a/docs/12_validation_profiles.rst +++ b/docs/12_validation_profiles.rst @@ -1,14 +1,14 @@ Validation Profiles =================== -The system comes with a set of **predefined validation profiles** that are loaded -automatically when the application starts (see `supported profiles <../#features>`_). +The system comes with a set of **predefined validation profiles** that are loaded +automatically when the application starts (see `supported profiles <../#features>`_). These profiles define the standard rules and checks that are applied during RO-Crate validation. Additional Profiles ------------------- -You can **extend or override** the predefined validation profiles by specifying +You can **extend or override** the predefined validation profiles by specifying the path to additional profiles using the ``--extra-profiles-path`` option on the command line. CLI Usage @@ -44,11 +44,11 @@ API Usage Behavior ^^^^^^^^ -* Profiles provided via ``--extra-profiles-path`` are **loaded in addition to** the system’s predefined profiles. -* If an additional profile has the **same name** as a predefined profile, the additional profile **overrides** the predefined one. +* Profiles provided via ``--extra-profiles-path`` are **loaded in addition to** the system’s predefined profiles. +* If an additional profile has the **same name** as a predefined profile, the additional profile **overrides** the predefined one. This mechanism allows you to: -* **Add new custom validation profiles** to implement project-specific checks. -* **Modify existing profiles** without altering the system’s predefined configuration files. -* **Maintain a clear separation** between standard validation logic and project-specific customizations. +* **Add new custom validation profiles** to implement project-specific checks. +* **Modify existing profiles** without altering the system’s predefined configuration files. +* **Maintain a clear separation** between standard validation logic and project-specific customizations. diff --git a/docs/3_usage_api.rst b/docs/3_usage_api.rst index bc3f57c1d..aa86c0c40 100644 --- a/docs/3_usage_api.rst +++ b/docs/3_usage_api.rst @@ -45,9 +45,9 @@ the library also supports metadata-only validation. This is useful when you want to ensure that the metadata conforms to the expected schema without checking the actual data files. -To perform metadata-only validation, you can use the `validate_metadata_as_dict` +To perform metadata-only validation, you can use the `validate_metadata_as_dict` from the `rocrate_validator.services` module. This function takes a dictionary -representing the metadata and validates it against a given validation profile. +representing the metadata and validates it against a given validation profile. .. code-block:: python @@ -61,7 +61,7 @@ representing the metadata and validates it against a given validation profile. with open('tests/data/crates/invalid/0_main_workflow/main_workflow_bad_type/ro-crate-metadata.json', 'r') as f: # load the metadata from the JSON file rocrate_metadata = json.load(f) - + # validate the metadata dictionary result = validate_metadata_as_dict(rocrate_metadata, settings=settings) @@ -73,7 +73,7 @@ Formatting Validation Results ----------------------------- Validation results can be rendered using different output formatters provided by -the library. Two formatter types are available: *text* and *JSON*. +the library. Two formatter types are available: *text* and *JSON*. Both rely on the ``rich`` Python library and integrate with the ``rocrate_validator.utils.io_helpers.output.console.Console`` class, which extends ``rich.console.Console`` to support custom formatter registration. @@ -86,7 +86,7 @@ aggregated statistics). TextOutputFormatter ~~~~~~~~~~~~~~~~~~~ -``TextOutputFormatter`` renders validation reports as human-readable, styled text. +``TextOutputFormatter`` renders validation reports as human-readable, styled text. It is typically used for console output, report generation, or writing results to a file. diff --git a/docs/diagrams/core-model.plantuml b/docs/diagrams/core-model.plantuml index 5225095de..5c3647806 100644 --- a/docs/diagrams/core-model.plantuml +++ b/docs/diagrams/core-model.plantuml @@ -13,7 +13,7 @@ caption **I**: Interface **E**: Enumeration **→** : the source object contains a reference to the target object as part of its state - **♦―** : the source object contains the target object as part of its state + **♦―** : the source object contains the target object as part of its state and the target object cannot exist without the source object **♢➞** : the source object references one or more target objects **⇢** : the source object uses the target object @@ -26,14 +26,14 @@ hide fields ' Define the package package "rocrate_validator.models" { - + ' Define the Severity enumeration enum Severity [[#rocrate_validator.models.Severity]] { OPTIONAL RECOMMENDED REQUIRED } - + show Severity members ' Define the RequirementLevel class @@ -41,9 +41,9 @@ package "rocrate_validator.models" { + name: str + severity: Severity } - + RequirementLevel o--> "1 " Severity - + show RequirementLevel members ' Define the LevelCollection class @@ -63,7 +63,7 @@ package "rocrate_validator.models" { {static} + list all() {static} + RequirementLevel get(name: str) } - + LevelCollection o--> "*" RequirementLevel ' Define the Profile class @@ -101,9 +101,9 @@ package "rocrate_validator.models" { + get_checks(): list + get_check(name: str): RequirementCheck } - + Profile "1 " *-- "1...* " Requirement - + hide Requirement members hide Requirement methods @@ -118,9 +118,9 @@ package "rocrate_validator.models" { + overrides: list + execute_check(context: ValidationContext) -> bool } - + Requirement "1" *-- " 1...*" RequirementCheck - + hide RequirementCheck members hide RequirementCheck methods @@ -133,7 +133,7 @@ package "rocrate_validator.models" { + rocrate_path: Path + profile_path: Path } - + ValidationContext --> "1 " ValidationSettings ValidationContext ..> "1 " rocrate_validator.rocrate.ROCrate: instantiates ValidationContext ..> "1 " Profile: " references" diff --git a/docs/diagrams/core-model.svg b/docs/diagrams/core-model.svg index 009b5e919..610279be7 100644 --- a/docs/diagrams/core-model.svg +++ b/docs/diagrams/core-model.svg @@ -1 +1 @@ -rocrate_validatormodelsrocrateSeverityOPTIONALRECOMMENDEDREQUIREDRequirementLevelname: strseverity: SeverityLevelCollectionProfileRequirementRequirementCheckValidationSettingsValidationContextValidationResultCheckIssueValidatorROCrate1*11...*11...*1instantiates1references1instantiates11*1instantiates* Core Model DiagramUML class diagram of main validation components. Legend:C: ClassA: Abstract ClassI: InterfaceE: Enumeration: the source object contains a reference to the target object as part of its state♦―: the source object contains the target object as part of its stateand the target object cannot exist without the source object♢➞: the source object references one or more target objects: the source object uses the target object \ No newline at end of file +rocrate_validatormodelsrocrateSeverityOPTIONALRECOMMENDEDREQUIREDRequirementLevelname: strseverity: SeverityLevelCollectionProfileRequirementRequirementCheckValidationSettingsValidationContextValidationResultCheckIssueValidatorROCrate1*11...*11...*1instantiates1references1instantiates11*1instantiates* Core Model DiagramUML class diagram of main validation components. Legend:C: ClassA: Abstract ClassI: InterfaceE: Enumeration: the source object contains a reference to the target object as part of its state♦―: the source object contains the target object as part of its stateand the target object cannot exist without the source object♢➞: the source object references one or more target objects: the source object uses the target object diff --git a/docs/diagrams/core-services.profiles.plantuml b/docs/diagrams/core-services.profiles.plantuml index 4a5fce9a3..bb219c4e8 100644 --- a/docs/diagrams/core-services.profiles.plantuml +++ b/docs/diagrams/core-services.profiles.plantuml @@ -21,8 +21,8 @@ package "rocrate_validator" { note top of services **I** ➠ **Interface** - Python module that exposes - the main validation services + Python module that exposes + the main validation services provided by the RO-Crate validator end note diff --git a/docs/diagrams/core-services.profiles.svg b/docs/diagrams/core-services.profiles.svg index 4d8ea3b26..5259fc361 100644 --- a/docs/diagrams/core-services.profiles.svg +++ b/docs/diagrams/core-services.profiles.svg @@ -1 +1 @@ -rocrate_validatormodelsservicesget_profile(profiles_path: Optional[Path], profile_identifier: str, severity:rocrate_validator.models.Severity) -> rocrate_validator.models.Profileget_profiles(profiles_path: Optional[Path], severity:rocrate_validator.models.Severity) -> list[rocrate_validator.models.Profile]ProfileSeverityI  Interface      Python module that exposes      the main validation services      provided by the RO-Crate validatorC  Class       The Profile class represents a       set of requirements and checks that       can be used to validate an RO-CrateE  Enum       The Severity enum represents the       different levels of severity that can       be used to filter requirements       and checks of a profileinstantiatesfilters by \ No newline at end of file +rocrate_validatormodelsservicesget_profile(profiles_path: Optional[Path], profile_identifier: str, severity:rocrate_validator.models.Severity) -> rocrate_validator.models.Profileget_profiles(profiles_path: Optional[Path], severity:rocrate_validator.models.Severity) -> list[rocrate_validator.models.Profile]ProfileSeverityI  Interface      Python module that exposes      the main validation services      provided by the RO-Crate validatorC  Class       The Profile class represents a       set of requirements and checks that       can be used to validate an RO-CrateE  Enum       The Severity enum represents the       different levels of severity that can       be used to filter requirements       and checks of a profileinstantiatesfilters by diff --git a/docs/diagrams/core-services.validate.plantuml b/docs/diagrams/core-services.validate.plantuml index 06bdc1ac7..f628878d5 100644 --- a/docs/diagrams/core-services.validate.plantuml +++ b/docs/diagrams/core-services.validate.plantuml @@ -21,8 +21,8 @@ package "rocrate_validator" { note top of services **I** ➠ **Interface** - Python module that exposes - the main validation services + Python module that exposes + the main validation services provided by the RO-Crate validator end note diff --git a/docs/diagrams/core-services.validate.svg b/docs/diagrams/core-services.validate.svg index 3492c013e..c1b96d081 100644 --- a/docs/diagrams/core-services.validate.svg +++ b/docs/diagrams/core-services.validate.svg @@ -1 +1 @@ -rocrate_validatormodelseventsservicesvalidate(settings: Union[dict, ValidationSettings], subscribers:Optional[list[Subscriber]]) -> rocrate_validator.models.ValidationResultValidationSettingsValidationResultC  Class       The ValidationResult class       represents the result of a       validation processC  Class       The ValidationSettings class       represents the settings used       to configure a validation processSubscriberEventA  Abstract Class       The Subscriber abstract class       defines the base interface for       an object that listens to events       and updates its state accordinglyC  Class       The Event class represents       an event that can be sent to subscribersI  Interface      Python module that exposes      the main validation services      provided by the RO-Crate validatorreceivesupdates*returnsreceives \ No newline at end of file +rocrate_validatormodelseventsservicesvalidate(settings: Union[dict, ValidationSettings], subscribers:Optional[list[Subscriber]]) -> rocrate_validator.models.ValidationResultValidationSettingsValidationResultC  Class       The ValidationResult class       represents the result of a       validation processC  Class       The ValidationSettings class       represents the settings used       to configure a validation processSubscriberEventA  Abstract Class       The Subscriber abstract class       defines the base interface for       an object that listens to events       and updates its state accordinglyC  Class       The Event class represents       an event that can be sent to subscribersI  Interface      Python module that exposes      the main validation services      provided by the RO-Crate validatorreceivesupdates*returnsreceives diff --git a/docs/index.rst b/docs/index.rst index 143c4fdae..9983d1ad6 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -1,12 +1,12 @@ .. Copyright (c) 2024-2026 CRS4 - + Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at - + http://www.apache.org/licenses/LICENSE-2.0 - + Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. diff --git a/rocrate_validator/__init__.py b/rocrate_validator/__init__.py index ef190af33..0a296af87 100644 --- a/rocrate_validator/__init__.py +++ b/rocrate_validator/__init__.py @@ -12,8 +12,10 @@ # See the License for the specific language governing permissions and # limitations under the License. + def _get_version(): from rocrate_validator.utils.versioning import get_version # noqa: PLC0415 + return get_version() diff --git a/rocrate_validator/cli/commands/cache.py b/rocrate_validator/cli/commands/cache.py index 3f67c2623..768add09b 100644 --- a/rocrate_validator/cli/commands/cache.py +++ b/rocrate_validator/cli/commands/cache.py @@ -76,7 +76,7 @@ def cache_info(ctx, cache_path: Path | None = None): """ Display information about the HTTP cache. """ - console = ctx.obj['console'] + console = ctx.obj["console"] try: resolved = _resolve_cache_path(cache_path) _reset_requester(resolved) @@ -147,7 +147,7 @@ def cache_list( """ List entries currently stored in the HTTP cache (alias: `ls`). """ - console = ctx.obj['console'] + console = ctx.obj["console"] try: resolved = _resolve_cache_path(cache_path) _reset_requester(resolved) @@ -210,8 +210,8 @@ def cache_reset(ctx, cache_path: Path | None = None, yes: bool = False): """ Remove every entry from the HTTP cache. """ - console = ctx.obj['console'] - interactive = ctx.obj.get('interactive', False) + console = ctx.obj["console"] + interactive = ctx.obj.get("interactive", False) exit_code = 0 try: resolved = _resolve_cache_path(cache_path) @@ -220,22 +220,17 @@ def cache_reset(ctx, cache_path: Path | None = None, yes: bool = False): entries = info.get("entries", 0) size = _format_bytes(info.get("size_bytes", 0) or 0) console.print( - f"[bold]HTTP cache:[/bold] {info.get('path') or resolved} " - f"([cyan]{entries}[/cyan] entries, {size})" + f"[bold]HTTP cache:[/bold] {info.get('path') or resolved} ([cyan]{entries}[/cyan] entries, {size})" ) if entries == 0: console.print("[green]Cache is already empty.[/green]") return if not yes: if not interactive: - console.print( - "[yellow]Use --yes to remove entries in non-interactive mode.[/yellow]" - ) + console.print("[yellow]Use --yes to remove entries in non-interactive mode.[/yellow]") exit_code = 1 else: - confirm = click.confirm( - f"Remove all {entries} cached entries?", default=False - ) + confirm = click.confirm(f"Remove all {entries} cached entries?", default=False) if not confirm: console.print("Aborted.") else: @@ -321,7 +316,7 @@ def cache_warm( Pre-populate the HTTP cache with resources declared by profiles and with optional remote RO-Crate URLs. """ - console = ctx.obj['console'] + console = ctx.obj["console"] explicit_urls = list(url or []) invalid_urls = [u for u in explicit_urls if not u.lower().startswith(("http://", "https://"))] if invalid_urls: @@ -345,28 +340,19 @@ def cache_warm( # source (no -p, no --crate, no --url, no --all-profiles). any_explicit_source = bool(crate or explicit_urls or requested_ids or all_profiles) if all_profiles or requested_ids or not any_explicit_source: - urls, profile_scope = _resolve_warmup_urls_from_profiles( - console, profiles_dir, extra_dir, requested_ids - ) + urls, profile_scope = _resolve_warmup_urls_from_profiles(console, profiles_dir, extra_dir, requested_ids) results: list[WarmUpResult] = [] if urls: - console.print( - f"[bold]Warming cache for {profile_scope}[/bold] " - f"([cyan]{len(urls)}[/cyan] URL(s))..." - ) + console.print(f"[bold]Warming cache for {profile_scope}[/bold] ([cyan]{len(urls)}[/cyan] URL(s))...") results.extend(warm_up_urls(urls)) if crate: - console.print( - f"[bold]Fetching remote RO-Crates[/bold] ([cyan]{len(crate)}[/cyan] URL(s))..." - ) + console.print(f"[bold]Fetching remote RO-Crates[/bold] ([cyan]{len(crate)}[/cyan] URL(s))...") results.extend(_warm_remote_crates(list(crate))) if explicit_urls: - console.print( - f"[bold]Fetching explicit URLs[/bold] ([cyan]{len(explicit_urls)}[/cyan] URL(s))..." - ) + console.print(f"[bold]Fetching explicit URLs[/bold] ([cyan]{len(explicit_urls)}[/cyan] URL(s))...") results.extend(warm_up_urls(explicit_urls)) if not results: @@ -397,10 +383,7 @@ def _render_warmup_results(console, results: list[WarmUpResult]) -> bool: elif r.status == "failed": failed += 1 console.print(table) - console.print( - f"[bold]Summary:[/bold] {ok} cached, {failed} failed, " - f"{len(results) - ok - failed} skipped" - ) + console.print(f"[bold]Summary:[/bold] {ok} cached, {failed} failed, {len(results) - ok - failed} skipped") return failed > 0 @@ -427,9 +410,7 @@ def _resolve_warmup_urls_from_profiles(console, profiles_dir, extra_dir, request else: selected.append(profile) for requested, resolved, candidates in ambiguous_fallbacks: - other_versions = sorted( - p.identifier for p in candidates if p.identifier != resolved.identifier - ) + other_versions = sorted(p.identifier for p in candidates if p.identifier != resolved.identifier) console.print( f"[yellow]Note:[/yellow] '{requested}' matched multiple profiles; " f"using [cyan]{resolved.identifier}[/cyan] (highest version). " @@ -437,9 +418,7 @@ def _resolve_warmup_urls_from_profiles(console, profiles_dir, extra_dir, request f"(available: {', '.join(other_versions)})." ) if missing: - console.print( - f"[yellow]Profile(s) not found and skipped:[/yellow] {', '.join(missing)}" - ) + console.print(f"[yellow]Profile(s) not found and skipped:[/yellow] {', '.join(missing)}") profile_scope = f"profiles: {', '.join(p.identifier for p in selected)}" urls = discover_cacheable_urls_from_profiles(selected) else: @@ -531,16 +510,18 @@ def _collect_cache_entries( url = getattr(resp, "url", "") or "" if needle and needle not in url.lower(): continue - entries.append({ - "key": key, - "url": url, - "status": getattr(resp, "status_code", None), - "size": int(getattr(resp, "size", 0) or 0), - "content_type": (getattr(resp, "headers", {}) or {}).get("Content-Type"), - "created_at": getattr(resp, "created_at", None), - "expires": getattr(resp, "expires", None), - "is_expired": bool(getattr(resp, "is_expired", False)), - }) + entries.append( + { + "key": key, + "url": url, + "status": getattr(resp, "status_code", None), + "size": int(getattr(resp, "size", 0) or 0), + "content_type": (getattr(resp, "headers", {}) or {}).get("Content-Type"), + "created_at": getattr(resp, "created_at", None), + "expires": getattr(resp, "expires", None), + "is_expired": bool(getattr(resp, "is_expired", False)), + } + ) effective_order = sort_order or _DEFAULT_SORT_ORDER.get(sort_by, "desc") reverse = effective_order == "desc" if sort_by == "url": @@ -554,8 +535,10 @@ def _collect_cache_entries( def _entry_to_dict(entry: dict) -> dict: """JSON-safe view of an entry produced by ``_collect_cache_entries``.""" + def _iso(value: datetime | None) -> str | None: return value.isoformat() if value is not None else None + return { "url": entry["url"], "status": entry["status"], diff --git a/rocrate_validator/cli/commands/errors.py b/rocrate_validator/cli/commands/errors.py index c89a1aedf..7d3c9f50e 100644 --- a/rocrate_validator/cli/commands/errors.py +++ b/rocrate_validator/cli/commands/errors.py @@ -49,9 +49,14 @@ def handle_error(e: Exception, console: Console) -> None: error_message = f"\n\n[bold][[red]FAILED[/red]] Unexpected error: {e} !!![/bold]\n" if logger.isEnabledFor(logging.DEBUG): console.print_exception() - console.print(textwrap.indent("This error may be due to a bug.\n" - "Please report it to the issue tracker " - "along with the following stack trace:\n", ' ' * 9)) + console.print( + textwrap.indent( + "This error may be due to a bug.\n" + "Please report it to the issue tracker " + "along with the following stack trace:\n", + " " * 9, + ) + ) console.print_exception() console.print(f"\n\n[bold][[red]ERROR[/red]] {error_message}[/bold]\n", style="white") diff --git a/rocrate_validator/cli/commands/profiles.py b/rocrate_validator/cli/commands/profiles.py index 28b80bf77..c2cc6a3db 100644 --- a/rocrate_validator/cli/commands/profiles.py +++ b/rocrate_validator/cli/commands/profiles.py @@ -47,45 +47,39 @@ type=click.Path(exists=True), default=DEFAULT_PROFILES_PATH, show_default=True, - help="Path containing the profiles files" + help="Path containing the profiles files", ) @click.option( "--extra-profiles-path", type=click.Path(exists=True), default=None, show_default=True, - help="Path containing additional user profiles files" + help="Path containing additional user profiles files", ) @click.pass_context -def profiles(ctx, profiles_path: Path = DEFAULT_PROFILES_PATH, - extra_profiles_path: Path | None = None): +def profiles(ctx, profiles_path: Path = DEFAULT_PROFILES_PATH, extra_profiles_path: Path | None = None): """ [magenta]rocrate-validator:[/magenta] Manage profiles """ logger.debug("Profiles path: %s", profiles_path) - ctx.obj['profiles_path'] = profiles_path - ctx.obj['extra_profiles_path'] = extra_profiles_path + ctx.obj["profiles_path"] = profiles_path + ctx.obj["extra_profiles_path"] = extra_profiles_path @profiles.command("list") @click.option( - '--no-paging', - is_flag=True, - help="Disable paging", - default=False, - show_default=True, - hidden=sys.platform == "win32" + "--no-paging", is_flag=True, help="Disable paging", default=False, show_default=True, hidden=sys.platform == "win32" ) @click.pass_context def list_profiles(ctx, no_paging: bool = False): # , profiles_path: Path = DEFAULT_PROFILES_PATH): """ List available profiles """ - profiles_path = ctx.obj['profiles_path'] - extra_profiles_path = ctx.obj['extra_profiles_path'] - console = ctx.obj['console'] - pager = ctx.obj['pager'] - interactive = ctx.obj['interactive'] + profiles_path = ctx.obj["profiles_path"] + extra_profiles_path = ctx.obj["extra_profiles_path"] + console = ctx.obj["console"] + pager = ctx.obj["pager"] + interactive = ctx.obj["interactive"] # Get the no_paging flag enable_pager = not no_paging # override the enable_pager flag if the interactive flag is False @@ -94,18 +88,19 @@ def list_profiles(ctx, no_paging: bool = False): # , profiles_path: Path = DEFA try: # Get the profiles - profiles = services.get_profiles(profiles_path=profiles_path, - extra_profiles_path=extra_profiles_path) - - table = Table(show_header=True, - title=" Available profiles", - title_style="italic bold cyan", - title_justify="left", - header_style="bold cyan", - border_style="bright_black", - show_footer=False, - caption_style="italic bold", - caption="[cyan](*)[/cyan] Number of requirements checks by severity") + profiles = services.get_profiles(profiles_path=profiles_path, extra_profiles_path=extra_profiles_path) + + table = Table( + show_header=True, + title=" Available profiles", + title_style="italic bold cyan", + title_justify="left", + header_style="bold cyan", + border_style="bright_black", + show_footer=False, + caption_style="italic bold", + caption="[cyan](*)[/cyan] Number of requirements checks by severity", + ) # Define columns table.add_column("Identifier", style="magenta bold", justify="center") @@ -124,10 +119,7 @@ def list_profiles(ctx, no_paging: bool = False): # , profiles_path: Path = DEFA # Count requirements by severity checks_info: dict[str, dict[str, Any]] = {} for level in levels: - checks_info[level.severity.name] = { - "count": 0, - "color": get_severity_color(level.severity) - } + checks_info[level.severity.name] = {"count": 0, "color": get_severity_color(level.severity)} requirements = [_ for _ in profile.get_requirements(severity=Severity.OPTIONAL) if not _.hidden] for requirement in requirements: @@ -136,19 +128,23 @@ def list_profiles(ctx, no_paging: bool = False): # , profiles_path: Path = DEFA checks_info[level.severity.name]["count"] += count checks_summary = "\n".join( - [f"[{v['color']}]{k}[/{v['color']}]: {v['count']}" for k, v in checks_info.items()]) + [f"[{v['color']}]{k}[/{v['color']}]: {v['count']}" for k, v in checks_info.items()] + ) # Add the row to the table profile_name = ( - "\n".join(map(str, profile.name)) - if isinstance(profile.name, list) - else str(profile.name or "") + "\n".join(map(str, profile.name)) if isinstance(profile.name, list) else str(profile.name or "") ) - table.add_row(profile.identifier, profile.uri, profile.version, - profile_name, Markdown((profile.description or "").strip()), - "\n".join([p.identifier for p in profile.inherited_profiles]), - checks_summary) + table.add_row( + profile.identifier, + profile.uri, + profile.version, + profile_name, + Markdown((profile.description or "").strip()), + "\n".join([p.identifier for p in profile.inherited_profiles]), + checks_summary, + ) table.add_row() # Print the table @@ -162,31 +158,28 @@ def list_profiles(ctx, no_paging: bool = False): # , profiles_path: Path = DEFA @profiles.command("describe") @click.option( - '-v', - '--verbose', + "-v", + "--verbose", is_flag=True, - help="Show detailed list of requirements (or, when a check identifier is given, " - "show the source code of the check)", + help="Show detailed list of requirements (or, when a check identifier is given, show the source code of the check)", default=False, - show_default=True + show_default=True, ) @click.argument("profile-identifier", type=click.STRING, default=DEFAULT_PROFILE_IDENTIFIER, required=True) @click.argument("check-identifier", type=click.STRING, required=False, default=None) @click.option( - '--no-paging', - is_flag=True, - help="Disable paging", - default=False, - show_default=True, - hidden=sys.platform == "win32" + "--no-paging", is_flag=True, help="Disable paging", default=False, show_default=True, hidden=sys.platform == "win32" ) @click.pass_context -def describe_profile(ctx, - profile_identifier: str = DEFAULT_PROFILE_IDENTIFIER, - check_identifier: str | None = None, - profiles_path: Path = DEFAULT_PROFILES_PATH, - extra_profiles_path: Path | None = None, - verbose: bool = False, no_paging: bool = False): +def describe_profile( + ctx, + profile_identifier: str = DEFAULT_PROFILE_IDENTIFIER, + check_identifier: str | None = None, + profiles_path: Path = DEFAULT_PROFILES_PATH, + extra_profiles_path: Path | None = None, + verbose: bool = False, + no_paging: bool = False, +): """ Show a profile, or — when CHECK_IDENTIFIER is given — show a single requirement check. @@ -198,11 +191,11 @@ def describe_profile(ctx, With -v on a single check, the source code of the check is shown. """ # Get the console - console = ctx.obj['console'] - pager = ctx.obj['pager'] - interactive = ctx.obj['interactive'] - profiles_path = ctx.obj['profiles_path'] - extra_profiles_path = ctx.obj['extra_profiles_path'] + console = ctx.obj["console"] + pager = ctx.obj["pager"] + interactive = ctx.obj["interactive"] + profiles_path = ctx.obj["profiles_path"] + extra_profiles_path = ctx.obj["extra_profiles_path"] # Get the no_paging flag enable_pager = not no_paging # override the enable_pager flag if the interactive flag is False @@ -211,8 +204,9 @@ def describe_profile(ctx, try: # Get the profile - profile = services.get_profile(profile_identifier, profiles_path=profiles_path, - extra_profiles_path=extra_profiles_path) + profile = services.get_profile( + profile_identifier, profiles_path=profiles_path, extra_profiles_path=extra_profiles_path + ) # Single-check view if check_identifier: @@ -235,8 +229,7 @@ def describe_profile(ctx, profile_name = str(profile_name).strip() subheader_content += f"[bold cyan]Name:[/bold cyan] [italic]{profile_name}[/italic]\n" subheader_content += ( - f"[bold cyan]Description:[/bold cyan] " - f"[italic]{(profile.description or '').strip()}[/italic]" + f"[bold cyan]Description:[/bold cyan] [italic]{(profile.description or '').strip()}[/italic]" ) # Add path info to the subheader subheader_content += ( @@ -263,8 +256,18 @@ def describe_profile(ctx, with console.pager(pager=pager, styles=not console.no_color) if enable_pager else console: console.print(get_app_header_rule()) - console.print(Padding(Panel(subheader_content, title=subheader_title, padding=(1, 1, 0, 1), - title_align="left", border_style="cyan"), (0, 1, 0, 1))) + console.print( + Padding( + Panel( + subheader_content, + title=subheader_title, + padding=(1, 1, 0, 1), + title_align="left", + border_style="cyan", + ), + (0, 1, 0, 1), + ) + ) console.print(Padding(table, (1, 1))) except click.ClickException: @@ -300,21 +303,28 @@ def __compacted_describe_profile__(profile): color = get_severity_color(level.severity) level_info = f"[{color}]{level.severity.name}[/{color}]" levels_list.add(level_info) - table_rows.append((str(requirement.order_number), requirement.name, - Markdown(requirement.description.strip()), - f"{levels_count[0]}", - f"{levels_count[1]}", - f"{levels_count[2]}")) - - table = Table(show_header=True, - title=f"[cyan]{len(requirements)}[/cyan] Profile Requirements", - title_style="italic bold", - header_style="bold cyan", - border_style="bright_black", - show_footer=False, - show_lines=True, - caption_style="italic bold", - caption=f"[cyan](*)[/cyan] number of checks by severity level: {', '.join(levels_list)}") + table_rows.append( + ( + str(requirement.order_number), + requirement.name, + Markdown(requirement.description.strip()), + f"{levels_count[0]}", + f"{levels_count[1]}", + f"{levels_count[2]}", + ) + ) + + table = Table( + show_header=True, + title=f"[cyan]{len(requirements)}[/cyan] Profile Requirements", + title_style="italic bold", + header_style="bold cyan", + border_style="bright_black", + show_footer=False, + show_lines=True, + caption_style="italic bold", + caption=f"[cyan](*)[/cyan] number of checks by severity level: {', '.join(levels_list)}", + ) # Define columns table.add_column("#", style="cyan bold", justify="right") @@ -352,20 +362,20 @@ def __verbose_describe_profile__(profile): description_table.add_row(Align(Padding(override, (0, 0, 1, 0)), align="right")) description_table.add_row(Markdown(check.description.strip())) - table_rows.append(( - check.identifier, check.name, - description_table, level_info)) + table_rows.append((check.identifier, check.name, description_table, level_info)) count_checks += 1 - table = Table(show_header=True, - title=f"[cyan]{count_checks}[/cyan] Profile Requirements Checks", - title_style="italic bold", - header_style="bold cyan", - border_style="bright_black", - show_footer=False, - show_lines=True, - caption_style="italic bold", - caption=f"[cyan](*)[/cyan] number of checks by severity level: {', '.join(levels_list)}") + table = Table( + show_header=True, + title=f"[cyan]{count_checks}[/cyan] Profile Requirements Checks", + title_style="italic bold", + header_style="bold cyan", + border_style="bright_black", + show_footer=False, + show_lines=True, + caption_style="italic bold", + caption=f"[cyan](*)[/cyan] number of checks by severity level: {', '.join(levels_list)}", + ) # Define columns table.add_column("Identifier", style="cyan bold", justify="right") @@ -397,7 +407,7 @@ def __resolve_check__(profile: Profile, check_identifier: str) -> RequirementChe f"Check identifier '{raw}' does not belong to profile '{profile.identifier}'.", param_hint="CHECK_IDENTIFIER", ) - relative = raw[len(prefix):] + relative = raw[len(prefix) :] match = _CHECK_ID_RE.match(relative) if not match: @@ -467,16 +477,15 @@ def __describe_check__(console, profile: Profile, check: RequirementCheck, verbo f"[italic]#{requirement.order_number} — {requirement.name}[/italic]" ) if requirement.path: - header += ( - "\n[bold cyan]Source file:[/bold cyan] " - f"[italic green]{shorten_path(requirement.path)}[/italic green]" - ) + header += f"\n[bold cyan]Source file:[/bold cyan] [italic green]{shorten_path(requirement.path)}[/italic green]" title = f"[bold][cyan]Check:[/cyan] [magenta italic]{check.identifier}[/magenta italic][/bold]" - console.print(Padding( - Panel(header, title=title, padding=(1, 1, 1, 1), title_align="left", border_style="cyan"), - (0, 1, 0, 1), - )) + console.print( + Padding( + Panel(header, title=title, padding=(1, 1, 1, 1), title_align="left", border_style="cyan"), + (0, 1, 0, 1), + ) + ) description_panel = Panel( Markdown(check.description.strip()), @@ -489,47 +498,67 @@ def __describe_check__(console, profile: Profile, check: RequirementCheck, verbo if check.overrides: overrides_text = __format_overrides__(check.overrides, label="overrides") - console.print(Padding(Panel( - overrides_text, - title="[bold cyan]Overrides[/bold cyan]", - title_align="left", - border_style="bright_black", - padding=(1, 1, 1, 1), - ), (1, 1, 0, 1))) + console.print( + Padding( + Panel( + overrides_text, + title="[bold cyan]Overrides[/bold cyan]", + title_align="left", + border_style="bright_black", + padding=(1, 1, 1, 1), + ), + (1, 1, 0, 1), + ) + ) if check.overridden_by: overridden_text = __format_overrides__(check.overridden_by, label="overridden by") - console.print(Padding(Panel( - overridden_text, - title="[bold cyan]Overridden by[/bold cyan]", - title_align="left", - border_style="bright_black", - padding=(1, 1, 1, 1), - ), (1, 1, 0, 1))) + console.print( + Padding( + Panel( + overridden_text, + title="[bold cyan]Overridden by[/bold cyan]", + title_align="left", + border_style="bright_black", + padding=(1, 1, 1, 1), + ), + (1, 1, 0, 1), + ) + ) if verbose: snippet = check.get_source_snippet() if snippet is None: - console.print(Padding(Panel( - "[italic]Source code not available for this check kind.[/italic]", - title="[bold cyan]Source[/bold cyan]", - title_align="left", - border_style="bright_black", - padding=(1, 1, 1, 1), - ), (1, 1, 0, 1))) + console.print( + Padding( + Panel( + "[italic]Source code not available for this check kind.[/italic]", + title="[bold cyan]Source[/bold cyan]", + title_align="left", + border_style="bright_black", + padding=(1, 1, 1, 1), + ), + (1, 1, 0, 1), + ) + ) else: source_title = f"[bold cyan]Source ({snippet.language})[/bold cyan]" if snippet.source_path: source_title += f': [italic green]"{snippet.source_path.name}"[/italic green]' - console.print(Padding(Panel( - Syntax( - snippet.code, - snippet.language, - theme="ansi_dark", - line_numbers=False, - word_wrap=True, - ), - title=source_title, - title_align="left", - border_style="bright_black", - padding=(1, 1, 1, 1), - ), (1, 1, 1, 1))) + console.print( + Padding( + Panel( + Syntax( + snippet.code, + snippet.language, + theme="ansi_dark", + line_numbers=False, + word_wrap=True, + ), + title=source_title, + title_align="left", + border_style="bright_black", + padding=(1, 1, 1, 1), + ), + (1, 1, 1, 1), + ) + ) diff --git a/rocrate_validator/cli/commands/validate.py b/rocrate_validator/cli/commands/validate.py index 566daede7..4e19d7dfa 100644 --- a/rocrate_validator/cli/commands/validate.py +++ b/rocrate_validator/cli/commands/validate.py @@ -61,28 +61,21 @@ def validate_uri(ctx, param, value): @cli.command("validate") @click.argument("rocrate-uri", callback=validate_uri, default=".") @click.option( - '-rr', - '--relative-root-path', + "-rr", + "--relative-root-path", help="Use root-relative paths for all file references in the RO-Crate", default=None, - show_default=True + show_default=True, ) @click.option( - '-m', - '--metadata-only', + "-m", + "--metadata-only", is_flag=True, help="Validate only the metadata of the RO-Crate", default=False, - show_default=True -) -@click.option( - '-ff', - '--fail-fast', - is_flag=True, - help="Fail fast validation mode", - default=False, - show_default=True + show_default=True, ) +@click.option("-ff", "--fail-fast", is_flag=True, help="Fail fast validation mode", default=False, show_default=True) @click.option( "--profiles-path", type=click.Path(exists=True), @@ -95,7 +88,7 @@ def validate_uri(ctx, param, value): type=click.Path(exists=True), default=None, show_default=True, - help="Path containing additional user profiles files" + help="Path containing additional user profiles files", ) @click.option( "-p", @@ -113,15 +106,15 @@ def validate_uri(ctx, param, value): is_flag=True, help="Disable automatic detection of the profile to use for validation", default=False, - show_default=True + show_default=True, ) @click.option( - '-nh', - '--disable-profile-inheritance', + "-nh", + "--disable-profile-inheritance", is_flag=True, help="Disable inheritance of profiles", default=False, - show_default=True + show_default=True, ) @click.option( "-l", @@ -132,16 +125,16 @@ def validate_uri(ctx, param, value): help="Severity of the requirements to validate", ) @click.option( - '-lo', - '--requirement-severity-only', + "-lo", + "--requirement-severity-only", is_flag=True, help="Validate only the requirements of the specified severity (no requirements with lower severity)", default=False, - show_default=True + show_default=True, ) @click.option( - '-s', - '--skip-checks', + "-s", + "--skip-checks", multiple=True, type=click.STRING, default=None, @@ -159,62 +152,62 @@ def validate_uri(ctx, param, value): ), ) @click.option( - '-v', - '--verbose', + "-v", + "--verbose", is_flag=True, help="Output the validation details without prompting", default=False, - show_default=True + show_default=True, ) @click.option( - '--no-paging', + "--no-paging", is_flag=True, help="Disable pagination of the validation details", default=False, show_default=True, - hidden=sys.platform == "win32" + hidden=sys.platform == "win32", ) @click.option( - '-f', - '--output-format', + "-f", + "--output-format", type=click.Choice(["json", "text"], case_sensitive=False), default="text", show_default=True, - help="Output format of the validation report" + help="Output format of the validation report", ) @click.option( - '-o', - '--output-file', + "-o", + "--output-file", type=click.Path(), default=None, show_default=True, help="Path to the output file for the validation report", ) @click.option( - '-w', - '--output-line-width', + "-w", + "--output-line-width", type=click.INT, default=120, show_default=True, help="Width of the output line", ) @click.option( - '--cache-max-age', + "--cache-max-age", type=click.INT, default=constants.DEFAULT_HTTP_CACHE_MAX_AGE, show_default=True, help="Maximum age of the HTTP cache in seconds ([bold green]-1[/bold green] for no expiration)", ) @click.option( - '--cache-path', + "--cache-path", type=click.Path(), default=None, show_default=True, help="Path to the HTTP cache directory", ) @click.option( - '-nc', - '--no-cache', + "-nc", + "--no-cache", is_flag=True, help=( "Disable the HTTP cache entirely: every request goes to the network " @@ -224,7 +217,7 @@ def validate_uri(ctx, param, value): show_default=True, ) @click.option( - '--offline', + "--offline", is_flag=True, help=( "Offline mode: HTTP requests are served only from the cache. " @@ -234,34 +227,36 @@ def validate_uri(ctx, param, value): show_default=True, ) @click.pass_context -def validate(ctx, - profiles_path: Path = DEFAULT_PROFILES_PATH, - extra_profiles_path: Path | None = None, - profile_identifier: tuple[str, ...] = (), - metadata_only: bool = False, - no_auto_profile: bool = False, - disable_profile_inheritance: bool = False, - requirement_severity: str = Severity.REQUIRED.name, - requirement_severity_only: bool = False, - skip_checks: list[str] | None = None, - rocrate_uri: str | Path = ".", - relative_root_path: Path | None = None, - fail_fast: bool = False, - no_paging: bool = False, - verbose: bool = False, - output_format: str = "text", - output_file: Path | None = None, - output_line_width: int | None = None, - cache_max_age: int = constants.DEFAULT_HTTP_CACHE_MAX_AGE, - cache_path: Path | None = None, - no_cache: bool = False, - offline: bool = False): +def validate( + ctx, + profiles_path: Path = DEFAULT_PROFILES_PATH, + extra_profiles_path: Path | None = None, + profile_identifier: tuple[str, ...] = (), + metadata_only: bool = False, + no_auto_profile: bool = False, + disable_profile_inheritance: bool = False, + requirement_severity: str = Severity.REQUIRED.name, + requirement_severity_only: bool = False, + skip_checks: list[str] | None = None, + rocrate_uri: str | Path = ".", + relative_root_path: Path | None = None, + fail_fast: bool = False, + no_paging: bool = False, + verbose: bool = False, + output_format: str = "text", + output_file: Path | None = None, + output_line_width: int | None = None, + cache_max_age: int = constants.DEFAULT_HTTP_CACHE_MAX_AGE, + cache_path: Path | None = None, + no_cache: bool = False, + offline: bool = False, +): """ [magenta]rocrate-validator:[/magenta] Validate a RO-Crate against a profile """ - console: Console = ctx.obj['console'] - pager = ctx.obj['pager'] - interactive = ctx.obj['interactive'] + console: Console = ctx.obj["console"] + pager = ctx.obj["pager"] + interactive = ctx.obj["interactive"] # Get the no_paging flag enable_pager = not no_paging # override the enable_pager flag if the interactive flag is False @@ -495,9 +490,7 @@ def _resolve_profile_identifiers( # Prompt the user when interactive and no single profile could be auto-detected if interactive and ( - not candidate_profiles - or len(candidate_profiles) == 0 - or len(candidate_profiles) == len(available_profiles) + not candidate_profiles or len(candidate_profiles) == 0 or len(candidate_profiles) == len(available_profiles) ): console.print( Padding( @@ -505,9 +498,9 @@ def _resolve_profile_identifiers( "[bold yellow]WARNING: [/bold yellow]" "[bold]Unable to automatically detect the profile to use for validation[/bold]\n", align="center", - style="bold yellow" + style="bold yellow", ), - (2, 2, 0, 2) + (2, 2, 0, 2), ) ) selected_options = multiple_choice(console, available_profiles) @@ -523,12 +516,12 @@ def _resolve_profile_identifiers( # Fall back to the base profile when nothing could be resolved if not profile_identifiers: - console.print(f"\n{' '*2}[bold yellow]WARNING: [/bold yellow]", end="") + console.print(f"\n{' ' * 2}[bold yellow]WARNING: [/bold yellow]", end="") if no_auto_profile: console.print("[bold]Auto-detection of the profiles to use for validation is disabled[/bold]") else: console.print("[bold]Unable to automatically detect the profile to use for validation[/bold]") - console.print(f"{' '*11}[bold]The base `ro-crate` profile will be used for validation[/bold]") + console.print(f"{' ' * 11}[bold]The base `ro-crate` profile will be used for validation[/bold]") profile_identifiers = ["ro-crate"] return profile_identifiers, autodetection @@ -550,16 +543,17 @@ def _render_console_result( console=console, interactive=interactive, no_paging=not enable_pager, - pager=pager + pager=pager, ) result = command_view.show_validation_progress(services.validate) if not result.passed(): verbose_choice = "n" if interactive and not verbose: - verbose_choice = get_single_char(console, choices=['y', 'n'], message=( - "[bold] > Do you want to see the validation details? " - "([magenta]y/n[/magenta]): [/bold]" - )) + verbose_choice = get_single_char( + console, + choices=["y", "n"], + message=("[bold] > Do you want to see the validation details? ([magenta]y/n[/magenta]): [/bold]"), + ) if verbose_choice == "y" or verbose: command_view.display_validation_result(result) return result @@ -592,7 +586,7 @@ def _render_file_or_collected_result( profile_identifier=profile, validation_settings=validation_settings, callable_service=services.validate, - transient=True + transient=True, ) as result: logger.debug("Validation result obtained") else: @@ -604,7 +598,7 @@ def _render_file_or_collected_result( # Output processing for text format to file if output_file and output_format == "text": if interactive: - console.print(f"\n{' '*2}📝 [bold]Writing validation results to file[/bold]{'.'*4} ", end="") + console.print(f"\n{' ' * 2}📝 [bold]Writing validation results to file[/bold]{'.' * 4} ", end="") with output_file.open("w", encoding="utf-8") if output_file else sys.stdout as f: out = Console(color_system=None, width=output_line_width, height=31, file=f) out.register_formatter(TextOutputFormatter()) @@ -630,20 +624,20 @@ def _emit_json_report( if interactive: if is_valid: console.print( - f"\n{' '*2}✅ [bold]Validation [green]PASSED![/green]. " - f"\n{' '*5}RO-Crate is valid according to the profile(s): " + f"\n{' ' * 2}✅ [bold]Validation [green]PASSED![/green]. " + f"\n{' ' * 5}RO-Crate is valid according to the profile(s): " f"[cyan]{', '.join(profile_identifiers)}[/cyan][/bold]" ) else: - console.print(f"\n{' '*2}❌ [bold]Validation [red]FAILED![/red][/bold]") + console.print(f"\n{' ' * 2}❌ [bold]Validation [red]FAILED![/red][/bold]") if output_file: console.print( - f"\n{' '*2}📝 [bold]Writing validation results in JSON format " - f"to the file \"{output_file}\"[/bold]{'.'*4} ", - end="" + f"\n{' ' * 2}📝 [bold]Writing validation results in JSON format " + f'to the file "{output_file}"[/bold]{"." * 4} ', + end="", ) else: - console.print(f"\n{' '*2}📋 [bold]The validation report in JSON format: [/bold]\n") + console.print(f"\n{' ' * 2}📋 [bold]The validation report in JSON format: [/bold]\n") # Generate the JSON output and write it to the specified output file or to stdout with output_file.open("w", encoding="utf-8") if output_file else nullcontext(sys.stdout) as f: diff --git a/rocrate_validator/cli/main.py b/rocrate_validator/cli/main.py index 64a43b216..ce57f6277 100644 --- a/rocrate_validator/cli/main.py +++ b/rocrate_validator/cli/main.py @@ -31,30 +31,30 @@ @click.group(invoke_without_command=True) @click.rich_config(help_config=click.RichHelpConfiguration(text_markup="rich")) @click.option( - '--debug', + "--debug", is_flag=True, help="Enable debug logging", - default=False + default=False, ) @click.option( - '-v', - '--version', + "-v", + "--version", is_flag=True, help="Show the version of the rocrate-validator package", - default=False + default=False, ) @click.option( - '-y', - '--no-interactive', + "-y", + "--no-interactive", is_flag=True, help="Disable interactive mode", - default=False + default=False, ) @click.option( - '--disable-color', + "--disable-color", is_flag=True, help="Disable colored console output", - default=False + default=False, ) @click.pass_context def cli(ctx: click.Context, debug: bool, version: bool, disable_color: bool, no_interactive: bool): @@ -65,15 +65,14 @@ def cli(ctx: click.Context, debug: bool, version: bool, disable_color: bool, no_ console = Console(no_color=disable_color or not interactive, interactive=interactive) # pass the console to subcommands through the click context, after configuration - ctx.obj['console'] = console - ctx.obj['pager'] = SystemPager() - ctx.obj['interactive'] = interactive + ctx.obj["console"] = console + ctx.obj["pager"] = SystemPager() + ctx.obj["interactive"] = interactive try: # If the version flag is set, print the version and exit if version: - console.print( - f"[bold]rocrate-validator [cyan]{get_version()}[/cyan][/bold]") + console.print(f"[bold]rocrate-validator [cyan]{get_version()}[/cyan][/bold]") sys.exit(0) # Set the log level logging.basicConfig(level=logging.DEBUG if debug else logging.WARNING) @@ -81,12 +80,12 @@ def cli(ctx: click.Context, debug: bool, version: bool, disable_color: bool, no_ if ctx.invoked_subcommand is None: # If no subcommand is provided, invoke the default command from rocrate_validator.cli.commands.validate import validate # noqa: PLC0415 + ctx.invoke(validate) else: logger.debug("Command invoked: %s", ctx.invoked_subcommand) except Exception as e: - console.print( - f"\n\n[bold][[red]FAILED[/red]] Unexpected error: {e} !!![/bold]\n", style="white") + console.print(f"\n\n[bold][[red]FAILED[/red]] Unexpected error: {e} !!![/bold]\n", style="white") console.print("""This error may be due to a bug. Please report it to the issue tracker along with the following stack trace: diff --git a/rocrate_validator/cli/ui/text/validate.py b/rocrate_validator/cli/ui/text/validate.py index 9f8081662..9cb0abfa0 100644 --- a/rocrate_validator/cli/ui/text/validate.py +++ b/rocrate_validator/cli/ui/text/validate.py @@ -36,12 +36,14 @@ class ValidationCommandView: A class to handle the validation command view """ - def __init__(self, - validation_settings: ValidationSettings | None, - interactive: bool = True, - no_paging: bool = False, - pager: SystemPager | None = None, - console: Console | None = None): + def __init__( + self, + validation_settings: ValidationSettings | None, + interactive: bool = True, + no_paging: bool = False, + pager: SystemPager | None = None, + console: Console | None = None, + ): self.console = console or Console() self.interactive = interactive self.pager = pager if not no_paging else None @@ -65,10 +67,7 @@ def report_layout(self) -> ValidationReportLayout: """ if self._report_layout is None: assert self.validation_settings is not None, "Validation settings must be set" - self._report_layout = ValidationReportLayout( - console=self.console, - settings=self.validation_settings - ) + self._report_layout = ValidationReportLayout(console=self.console, settings=self.validation_settings) return self._report_layout @@ -85,10 +84,7 @@ def show_validation_progress(self, validation_command: Callable) -> Any: logger.debug("Starting validation with progress bar") result = self.report_layout.live( - lambda: validation_command( - self.validation_settings, - subscribers=self.report_layout.subscribers - ) + lambda: validation_command(self.validation_settings, subscribers=self.report_layout.subscribers) ) logger.debug("Validation completed with result: %s", result) return result @@ -102,8 +98,7 @@ def display_validation_statistics(self, statistics: ValidationStatistics) -> Non """ assert statistics is not None, "Validation statistics must be provided" - with (self.console.pager(pager=self.pager, styles=not self.console.no_color) - if self.pager else self.console): + with self.console.pager(pager=self.pager, styles=not self.console.no_color) if self.pager else self.console: self.console.print(statistics) def display_validation_result(self, result: ValidationResult) -> None: @@ -117,6 +112,5 @@ def display_validation_result(self, result: ValidationResult) -> None: logger.debug("Displaying validation result") - with (self.console.pager(pager=self.pager, styles=not self.console.no_color) - if self.pager else self.console): + with self.console.pager(pager=self.pager, styles=not self.console.no_color) if self.pager else self.console: self.console.print(result) diff --git a/rocrate_validator/cli/utils.py b/rocrate_validator/cli/utils.py index 1df0e20c8..68d666166 100644 --- a/rocrate_validator/cli/utils.py +++ b/rocrate_validator/cli/utils.py @@ -26,4 +26,4 @@ def running_in_jupyter(): # that launched the current kernel. # It is mainly used internally to track the parent process # and manage kernel lifecycle. - return 'JPY_PARENT_PID' in os.environ + return "JPY_PARENT_PID" in os.environ diff --git a/rocrate_validator/constants.py b/rocrate_validator/constants.py index affc8b6d6..6d25b5dcd 100644 --- a/rocrate_validator/constants.py +++ b/rocrate_validator/constants.py @@ -64,12 +64,18 @@ "n3": "n3", "turtle": "ttl", "nt": "nt", - "json-ld": "json-ld" + "json-ld": "json-ld", } # Define allowed RDF serialization formats RDF_SERIALIZATION_FORMATS_TYPES = typing.Literal[ - "xml", "pretty-xml", "trig", "n3", "turtle", "nt", "json-ld" + "xml", + "pretty-xml", + "trig", + "n3", + "turtle", + "nt", + "json-ld", ] RDF_SERIALIZATION_FORMATS = typing.get_args(RDF_SERIALIZATION_FORMATS_TYPES) @@ -79,8 +85,16 @@ # Define allowed requirement levels VALID_REQUIREMENT_LEVELS_TYPES = typing.Literal[ - 'MAY', 'OPTIONAL', 'SHOULD', 'SHOULD_NOT', - 'REQUIRED', 'MUST', 'MUST_NOT', 'SHALL', 'SHALL_NOT', 'RECOMMENDED' + "MAY", + "OPTIONAL", + "SHOULD", + "SHOULD_NOT", + "REQUIRED", + "MUST", + "MUST_NOT", + "SHALL", + "SHALL_NOT", + "RECOMMENDED", ] # Current JSON output format @@ -99,7 +113,7 @@ # Http Cache Settings DEFAULT_HTTP_CACHE_MAX_AGE = -1 # in seconds; negative means "never expire" -DEFAULT_HTTP_CACHE_PATH_PREFIX = '/tmp/rocrate_validator_cache' +DEFAULT_HTTP_CACHE_PATH_PREFIX = "/tmp/rocrate_validator_cache" # Directory name used under the user's cache root for the persistent HTTP cache USER_CACHE_DIR_NAME = "rocrate-validator" # Filename (without extension) of the persistent HTTP cache under the user cache dir diff --git a/rocrate_validator/errors.py b/rocrate_validator/errors.py index d71cb4e6c..6d3da1133 100644 --- a/rocrate_validator/errors.py +++ b/rocrate_validator/errors.py @@ -232,11 +232,7 @@ def __repr__(self): class CheckValidationError(ValidationError): """Raised when a validation check fails.""" - def __init__(self, - check, - message, - path: str = ".", - code: int = -1): + def __init__(self, check, message, path: str = ".", code: int = -1): super().__init__(message, path, code) self._check = check @@ -280,7 +276,7 @@ def __repr__(self) -> str: @classmethod def default_error_message(cls, uri: str | Path | URI) -> str: return ( - f"\"{uri!s}\" is not a valid RO-Crate URI. " + f'"{uri!s}" is not a valid RO-Crate URI. ' "It MUST be either a local path to the RO-Crate root directory or a local/remote RO-Crate ZIP file." ) diff --git a/rocrate_validator/models.py b/rocrate_validator/models.py index 6ca9e11bf..6723e0809 100644 --- a/rocrate_validator/models.py +++ b/rocrate_validator/models.py @@ -747,9 +747,7 @@ def __load_profiles_paths__( for root_profile_directory in root_profile_directories: # if the path is a string, convert it to a Path profile_root_directory = ( - Path(root_profile_directory) - if isinstance(root_profile_directory, str) - else root_profile_directory + Path(root_profile_directory) if isinstance(root_profile_directory, str) else root_profile_directory ) # check if the path is a directory and raise an error if not if not profile_root_directory.is_dir(): @@ -1376,12 +1374,14 @@ def ok_file(p: Path) -> bool: requirement_path, ) requirement_loader = RequirementLoader.__get_requirement_loader__(profile, requirement_path) - requirements.extend(cast("Any", requirement_loader).load( - profile, - requirement_level, - requirement_path, - publicID=profile.publicID, - )) + requirements.extend( + cast("Any", requirement_loader).load( + profile, + requirement_level, + requirement_path, + publicID=profile.publicID, + ) + ) # sort the requirements by severity requirements = sorted( requirements, @@ -1408,6 +1408,7 @@ class SourceSnippet: :ivar code: the source code as text. :ivar source_path: path to the file the snippet was extracted from, when available. """ + language: str code: str source_path: Path | None = None @@ -1415,7 +1416,6 @@ class SourceSnippet: @total_ordering class RequirementCheck(ABC): - def __init__( self, requirement: Requirement, @@ -1885,8 +1885,12 @@ def duration(self) -> float | None: @staticmethod def __collect_requirement_checks__( - requirement, severity_validation, validation_settings, - target_profile_identifier, checks, checks_by_severity, + requirement, + severity_validation, + validation_settings, + target_profile_identifier, + checks, + checks_by_severity, ) -> int: """Count and register a requirement's checks across severities >= the requested one.""" requirement_checks_count = 0 @@ -1895,9 +1899,7 @@ def __collect_requirement_checks__( Severity.RECOMMENDED, Severity.OPTIONAL, ): - logger.debug( - f"Checking requirement: {requirement} severity: {severity} {severity < severity_validation}" - ) + logger.debug(f"Checking requirement: {requirement} severity: {severity} {severity < severity_validation}") # skip requirements with lower severity if severity < severity_validation: continue @@ -2410,9 +2412,7 @@ def __build_sorted_stats_dict__(cls, raw_stats): """ sorted_checks_by_severity = {} for severity_key, severity_checks in raw_stats["checks_by_severity"].items(): - sorted_checks_by_severity[severity_key] = sorted( - severity_checks, key=lambda c: c.identifier - ) + sorted_checks_by_severity[severity_key] = sorted(severity_checks, key=lambda c: c.identifier) return { "profiles": sorted(raw_stats["profiles"], key=lambda p: p.identifier), @@ -2605,8 +2605,7 @@ def failed_requirements(self) -> Collection[Requirement]: Get the requirements that failed at or above the configured `requirement_severity`. """ min_severity = self.context.requirement_severity - return {issue.check.requirement for issue in self._issues - if issue.severity >= min_severity} + return {issue.check.requirement for issue in self._issues if issue.severity >= min_severity} # --- Checks --- @property @@ -2615,8 +2614,7 @@ def failed_checks(self) -> Collection[RequirementCheck]: Get the checks that failed at or above the configured `requirement_severity`. """ min_severity = self.context.requirement_severity - return {issue.check for issue in self._issues - if issue.severity >= min_severity} + return {issue.check for issue in self._issues if issue.severity >= min_severity} def get_failed_checks_by_requirement(self, requirement: Requirement) -> Collection[RequirementCheck]: """ @@ -2767,8 +2765,7 @@ def __post_init__(self): # combined with an explicit cache disable. if self.offline and self.no_cache: raise ValueError( - "Offline mode requires the HTTP cache to be enabled; " - "no_cache=True is incompatible with offline=True." + "Offline mode requires the HTTP cache to be enabled; no_cache=True is incompatible with offline=True." ) # Default to the persistent user cache whenever caching is enabled so that # consecutive runs (online then offline) share the same HTTP cache: this @@ -2794,7 +2791,10 @@ def __post_init__(self): ) logger.debug( "HTTP cache initialized at %s with max age %s seconds (offline=%s, no_cache=%s)", - self.cache_path, self.cache_max_age, self.offline, self.no_cache, + self.cache_path, + self.cache_max_age, + self.offline, + self.no_cache, ) # Install the JSON-LD document loader so context resolution goes through the cache. try: diff --git a/rocrate_validator/profiles/isa-ro-crate/0_investigation.ttl b/rocrate_validator/profiles/isa-ro-crate/0_investigation.ttl index 54b9f1f76..b635d5ec8 100644 --- a/rocrate_validator/profiles/isa-ro-crate/0_investigation.ttl +++ b/rocrate_validator/profiles/isa-ro-crate/0_investigation.ttl @@ -133,4 +133,4 @@ isa-ro-crate:InvestigationShouldHaveDateCreated a sh:NodeShape ; sh:message "Investigation dateCreated MUST be a valid ISO 8601 date" ; sh:severity sh:Violation ; ] -. \ No newline at end of file +. diff --git a/rocrate_validator/profiles/isa-ro-crate/10_definedterm.ttl b/rocrate_validator/profiles/isa-ro-crate/10_definedterm.ttl index 3755a6ebb..b4949c2b9 100644 --- a/rocrate_validator/profiles/isa-ro-crate/10_definedterm.ttl +++ b/rocrate_validator/profiles/isa-ro-crate/10_definedterm.ttl @@ -44,7 +44,7 @@ isa-ro-crate:FindISAOntologyAnnotation a sh:NodeShape, validator:HiddenShape; sh:property [ sh:name "OntologyAnnotation is measurementMethod of an Assay" ; sh:path [ sh:inversePath schema:measurementMethod ] ; - sh:qualifiedValueShape [ + sh:qualifiedValueShape [ sh:class isa-ro-crate:Assay ; ] ; sh:qualifiedMinCount 1 ; @@ -54,7 +54,7 @@ isa-ro-crate:FindISAOntologyAnnotation a sh:NodeShape, validator:HiddenShape; sh:property [ sh:name "OntologyAnnotation is measurementTechnique of an Assay" ; sh:path [ sh:inversePath schema:measurementTechnique ] ; - sh:qualifiedValueShape [ + sh:qualifiedValueShape [ sh:class isa-ro-crate:Assay ] ; sh:qualifiedMinCount 1 ; @@ -64,7 +64,7 @@ isa-ro-crate:FindISAOntologyAnnotation a sh:NodeShape, validator:HiddenShape; sh:property [ sh:name "OntologyAnnotation is intendedUse of a Protocol" ; sh:path [ sh:inversePath schema:intendedUse ] ; - sh:qualifiedValueShape [ + sh:qualifiedValueShape [ sh:class isa-ro-crate:Protocol ] ; sh:qualifiedMinCount 1 ; @@ -74,7 +74,7 @@ isa-ro-crate:FindISAOntologyAnnotation a sh:NodeShape, validator:HiddenShape; sh:property [ sh:name "OntologyAnnotation is computationalTool of a Protocol" ; sh:path [ sh:inversePath schema:computationalTool ] ; - sh:qualifiedValueShape [ + sh:qualifiedValueShape [ sh:class isa-ro-crate:Protocol ] ; sh:qualifiedMinCount 1 ; @@ -84,7 +84,7 @@ isa-ro-crate:FindISAOntologyAnnotation a sh:NodeShape, validator:HiddenShape; sh:property [ sh:name "OntologyAnnotation is labEquipment of a Protocol" ; sh:path [ sh:inversePath schema:labEquipment ] ; - sh:qualifiedValueShape [ + sh:qualifiedValueShape [ sh:class isa-ro-crate:Protocol ] ; sh:qualifiedMinCount 1 ; @@ -94,7 +94,7 @@ isa-ro-crate:FindISAOntologyAnnotation a sh:NodeShape, validator:HiddenShape; sh:property [ sh:name "OntologyAnnotation is reagent of a Protocol" ; sh:path [ sh:inversePath schema:reagent ] ; - sh:qualifiedValueShape [ + sh:qualifiedValueShape [ sh:class isa-ro-crate:Protocol ] ; sh:qualifiedMinCount 1 ; @@ -104,7 +104,7 @@ isa-ro-crate:FindISAOntologyAnnotation a sh:NodeShape, validator:HiddenShape; sh:property [ sh:name "OntologyAnnotation is job title of a Person" ; sh:path [ sh:inversePath schema:jobTitle ] ; - sh:qualifiedValueShape [ + sh:qualifiedValueShape [ sh:class isa-ro-crate:Person ] ; sh:qualifiedMinCount 1 ; @@ -114,7 +114,7 @@ isa-ro-crate:FindISAOntologyAnnotation a sh:NodeShape, validator:HiddenShape; sh:property [ sh:name "OntologyAnnotation is status of a Publication" ; sh:path [ sh:inversePath schema:creativeWorkStatus ] ; - sh:qualifiedValueShape [ + sh:qualifiedValueShape [ sh:class isa-ro-crate:Publication ] ; sh:qualifiedMinCount 1 ; @@ -134,14 +134,14 @@ isa-ro-crate:DefinedTermMustHaveName a sh:NodeShape ; sh:path schema:name ; sh:datatype xsd:string ; sh:minCount 1 ; - sh:maxCount 1 ; + sh:maxCount 1 ; sh:not [ sh:hasValue "" ] ; sh:description "Check that DefinedTerm does have non-empty name and it's a string" ; sh:message "DefinedTerm entity MUST have a non-empty name of type string" ; sh:severity sh:Violation ; - ] + ] . isa-ro-crate:DefinedTermShouldHaveTermCodeOfCorrectType a sh:NodeShape ; @@ -167,4 +167,4 @@ isa-ro-crate:DefinedTermShouldHaveTermCodeOfCorrectType a sh:NodeShape ; sh:message "DefinedTerm termCode MUST be of type string" ; sh:severity sh:Violation ; ] -. \ No newline at end of file +. diff --git a/rocrate_validator/profiles/isa-ro-crate/11_propertyvalue.ttl b/rocrate_validator/profiles/isa-ro-crate/11_propertyvalue.ttl index 6bdee423d..f633a4cbd 100644 --- a/rocrate_validator/profiles/isa-ro-crate/11_propertyvalue.ttl +++ b/rocrate_validator/profiles/isa-ro-crate/11_propertyvalue.ttl @@ -44,7 +44,7 @@ isa-ro-crate:FindISAPropertyValue a sh:NodeShape, validator:HiddenShape; sh:property [ sh:name "PropertyValue is measurementMethod of an Assay" ; sh:path [ sh:inversePath schema:variableMeasured ] ; - sh:qualifiedValueShape [ + sh:qualifiedValueShape [ sh:class isa-ro-crate:Assay ; ] ; sh:qualifiedMinCount 1 ; @@ -54,7 +54,7 @@ isa-ro-crate:FindISAPropertyValue a sh:NodeShape, validator:HiddenShape; sh:property [ sh:name "PropertyValue is parameterValue of a Process" ; sh:path [ sh:inversePath bioschemas-prop:parameterValue ] ; - sh:qualifiedValueShape [ + sh:qualifiedValueShape [ sh:class isa-ro-crate:Process ] ; sh:qualifiedMinCount 1 ; @@ -64,7 +64,7 @@ isa-ro-crate:FindISAPropertyValue a sh:NodeShape, validator:HiddenShape; sh:property [ sh:name "PropertyValue is computationalTool of a Protocol" ; sh:path [ sh:inversePath schema:computationalTool ] ; - sh:qualifiedValueShape [ + sh:qualifiedValueShape [ sh:class isa-ro-crate:Protocol ] ; sh:qualifiedMinCount 1 ; @@ -74,7 +74,7 @@ isa-ro-crate:FindISAPropertyValue a sh:NodeShape, validator:HiddenShape; sh:property [ sh:name "PropertyValue is labEquipment of a Protocol" ; sh:path [ sh:inversePath schema:labEquipment ] ; - sh:qualifiedValueShape [ + sh:qualifiedValueShape [ sh:class isa-ro-crate:Protocol ] ; sh:qualifiedMinCount 1 ; @@ -84,7 +84,7 @@ isa-ro-crate:FindISAPropertyValue a sh:NodeShape, validator:HiddenShape; sh:property [ sh:name "PropertyValue is reagent of a Protocol" ; sh:path [ sh:inversePath schema:reagent ] ; - sh:qualifiedValueShape [ + sh:qualifiedValueShape [ sh:class isa-ro-crate:Protocol ] ; sh:qualifiedMinCount 1 ; @@ -94,7 +94,7 @@ isa-ro-crate:FindISAPropertyValue a sh:NodeShape, validator:HiddenShape; sh:property [ sh:name "PropertyValue is characteristic of a Sample" ; sh:path [ sh:inversePath bioschemas-prop:additionalProperty ] ; - sh:qualifiedValueShape [ + sh:qualifiedValueShape [ sh:class isa-ro-crate:Sample ] ; sh:qualifiedMinCount 1 ; @@ -104,7 +104,7 @@ isa-ro-crate:FindISAPropertyValue a sh:NodeShape, validator:HiddenShape; sh:property [ sh:name "PropertyValue is identifier of a Person" ; sh:path [ sh:inversePath schema:identifier ] ; - sh:qualifiedValueShape [ + sh:qualifiedValueShape [ sh:class isa-ro-crate:Person ] ; sh:qualifiedMinCount 1 ; @@ -114,7 +114,7 @@ isa-ro-crate:FindISAPropertyValue a sh:NodeShape, validator:HiddenShape; sh:property [ sh:name "PropertyValue is identifier of a Publication" ; sh:path [ sh:inversePath schema:identifier ] ; - sh:qualifiedValueShape [ + sh:qualifiedValueShape [ sh:class isa-ro-crate:Publication ] ; sh:qualifiedMinCount 1 ; @@ -193,14 +193,14 @@ isa-ro-crate:PropertyValueMustHaveName a sh:NodeShape ; sh:path schema:name ; sh:datatype xsd:string ; sh:minCount 1 ; - sh:maxCount 1 ; + sh:maxCount 1 ; sh:not [ sh:hasValue "" ] ; sh:description "Check that PropertyValue does have non-empty name and it's a string" ; sh:message "PropertyValue entity MUST have a non-empty name of type string" ; sh:severity sh:Violation ; - ] + ] . isa-ro-crate:PropertyValueShouldHaveValueOfCorrectType a sh:NodeShape ; @@ -222,12 +222,12 @@ isa-ro-crate:PropertyValueShouldHaveValueOfCorrectType a sh:NodeShape ; a sh:PropertyShape ; sh:path schema:value ; sh:or( - [ sh:datatype xsd:string ] - [ sh:datatype xsd:float ] - [ sh:datatype xsd:integer ] + [ sh:datatype xsd:string ] + [ sh:datatype xsd:float ] + [ sh:datatype xsd:integer ] ) ; sh:description "The value of a PropertyValue MUST be of type string or a number" ; sh:message "PropertyValue value MUST be of type string, float, or integer" ; sh:severity sh:Violation ; ] -. \ No newline at end of file +. diff --git a/rocrate_validator/profiles/isa-ro-crate/1_study.ttl b/rocrate_validator/profiles/isa-ro-crate/1_study.ttl index 11d5ec2ff..829b374cf 100644 --- a/rocrate_validator/profiles/isa-ro-crate/1_study.ttl +++ b/rocrate_validator/profiles/isa-ro-crate/1_study.ttl @@ -53,7 +53,7 @@ isa-ro-crate:StudyMustHaveBaseDescriptors a sh:NodeShape ; sh:path schema:identifier ; sh:datatype xsd:string ; sh:minCount 1 ; - sh:maxCount 1 ; + sh:maxCount 1 ; sh:not [ sh:hasValue "" ] ; @@ -66,7 +66,7 @@ isa-ro-crate:StudyMustHaveBaseDescriptors a sh:NodeShape ; sh:path schema:name ; sh:datatype xsd:string ; sh:minCount 1 ; - sh:maxCount 1 ; + sh:maxCount 1 ; sh:not [ sh:hasValue "" ] ; @@ -95,7 +95,7 @@ isa-ro-crate:StudyRecommendedProperties a sh:NodeShape ; sh:name "Study SHOULD have dateCreated, datePublished, hasPart, description, creator, and about" ; sh:description "A Study SHOULD have a dateCreated" ; sh:targetClass isa-ro-crate:Study ; - + sh:property [ a sh:PropertyShape ; sh:path schema:dateCreated ; @@ -136,7 +136,7 @@ isa-ro-crate:StudyRecommendedProperties a sh:NodeShape ; a sh:PropertyShape ; sh:path schema:description ; sh:minCount 1 ; - sh:maxCount 1 ; + sh:maxCount 1 ; sh:not [ sh:hasValue "" ] ; @@ -205,5 +205,5 @@ isa-ro-crate:StudyRecommendedProperties a sh:NodeShape ; sh:description "Check that if study does have at least one object in about, it MUST be of type LabProcess" ; sh:message "Study about MUST be of type LabProcess" ; sh:severity sh:Violation ; - ] + ] . diff --git a/rocrate_validator/profiles/isa-ro-crate/2_assay.ttl b/rocrate_validator/profiles/isa-ro-crate/2_assay.ttl index 475627afc..7eb4313ba 100644 --- a/rocrate_validator/profiles/isa-ro-crate/2_assay.ttl +++ b/rocrate_validator/profiles/isa-ro-crate/2_assay.ttl @@ -27,7 +27,7 @@ isa-ro-crate:FindAssays a sh:NodeShape, validator:HiddenShape; sh:name "Identify Assays within the RO-Crate" ; sh:description "An Assay has type Dataset and additionalType 'Assay'." ; sh:targetClass schema:Dataset ; - sh:order 1 ; + sh:order 1 ; # Expand data graph with triples from the file data entity sh:rule [ a sh:TripleRule ; @@ -53,7 +53,7 @@ isa-ro-crate:AssayMustHaveBaseDescriptors a sh:NodeShape ; sh:path schema:identifier ; sh:datatype xsd:string ; sh:minCount 1 ; - sh:maxCount 1 ; + sh:maxCount 1 ; sh:not [ sh:hasValue "" ] ; @@ -70,9 +70,9 @@ isa-ro-crate:AssayMustBeReferencedFromInvestigation a sh:NodeShape ; sh:property[ a sh:PropertyShape ; sh:path [ sh:inversePath schema:hasPart ] ; - sh:qualifiedValueShape [ + sh:qualifiedValueShape [ sh:or ( - [ sh:class ro-crate:RootDataEntity ] + [ sh:class ro-crate:RootDataEntity ] [ sh:class isa-ro-crate:Study ] ) ; ] ; @@ -92,7 +92,7 @@ isa-ro-crate:AssayRecommendedProperties a sh:NodeShape ; a sh:PropertyShape ; sh:path schema:name ; sh:minCount 1 ; - sh:maxCount 1 ; + sh:maxCount 1 ; sh:not [ sh:hasValue "" ] ; @@ -113,7 +113,7 @@ isa-ro-crate:AssayRecommendedProperties a sh:NodeShape ; a sh:PropertyShape ; sh:path schema:description ; sh:minCount 1 ; - sh:maxCount 1 ; + sh:maxCount 1 ; sh:not [ sh:hasValue "" ] ; @@ -222,5 +222,5 @@ isa-ro-crate:AssayRecommendedProperties a sh:NodeShape ; sh:description "Check that if assay does have at least one object in about, it MUST be of type LabProcess" ; sh:message "Assay about MUST be of type LabProcess" ; sh:severity sh:Violation ; - ] -. \ No newline at end of file + ] +. diff --git a/rocrate_validator/profiles/isa-ro-crate/3_process.ttl b/rocrate_validator/profiles/isa-ro-crate/3_process.ttl index 3d71bf39a..fc9282c6e 100644 --- a/rocrate_validator/profiles/isa-ro-crate/3_process.ttl +++ b/rocrate_validator/profiles/isa-ro-crate/3_process.ttl @@ -39,7 +39,7 @@ isa-ro-crate:FindISAProcesses a sh:NodeShape, validator:HiddenShape; sh:condition [ sh:property [ sh:path [ sh:inversePath schema:about ] ; - sh:qualifiedValueShape [ + sh:qualifiedValueShape [ sh:or( [sh:class isa-ro-crate:Assay] [sh:class isa-ro-crate:Study] @@ -66,7 +66,7 @@ isa-ro-crate:ProcessMustHaveName a sh:NodeShape ; sh:path schema:name ; sh:datatype xsd:string ; sh:minCount 1 ; - sh:maxCount 1 ; + sh:maxCount 1 ; sh:not [ sh:hasValue "" ] ; @@ -185,4 +185,4 @@ isa-ro-crate:ProcessShouldHaveParamValue a sh:NodeShape ; sh:message "Process parameter values MUST be of type PropertyValue" ; sh:severity sh:Violation ; ] -. \ No newline at end of file +. diff --git a/rocrate_validator/profiles/isa-ro-crate/4_protocol.ttl b/rocrate_validator/profiles/isa-ro-crate/4_protocol.ttl index 64e762242..ad55511a5 100644 --- a/rocrate_validator/profiles/isa-ro-crate/4_protocol.ttl +++ b/rocrate_validator/profiles/isa-ro-crate/4_protocol.ttl @@ -39,7 +39,7 @@ isa-ro-crate:FindISAProtocols a sh:NodeShape, validator:HiddenShape; sh:condition [ sh:property [ sh:path [ sh:inversePath bioschemas-prop:executesLabProtocol ] ; - sh:qualifiedValueShape [ + sh:qualifiedValueShape [ sh:class isa-ro-crate:Process ; ] ; sh:qualifiedMinCount 1 ; @@ -58,7 +58,7 @@ isa-ro-crate:ProtocolShouldHaveNameOfCorrectType a sh:NodeShape ; a sh:PropertyShape ; sh:path schema:name ; sh:minCount 1 ; - sh:maxCount 1 ; + sh:maxCount 1 ; sh:not [ sh:hasValue "" ] ; @@ -84,7 +84,7 @@ isa-ro-crate:ProtocolShouldHaveDescriptionOfCorrectType a sh:NodeShape ; a sh:PropertyShape ; sh:path schema:description ; sh:minCount 1 ; - sh:maxCount 1 ; + sh:maxCount 1 ; sh:not [ sh:hasValue "" ] ; @@ -125,4 +125,4 @@ isa-ro-crate:ProtocolShouldHaveIntendedUse a sh:NodeShape ; sh:message "Protocol intended use MUST be of type string or DefinedTerm" ; sh:severity sh:Violation ; ] -. \ No newline at end of file +. diff --git a/rocrate_validator/profiles/isa-ro-crate/5_sample.ttl b/rocrate_validator/profiles/isa-ro-crate/5_sample.ttl index 5bfab5038..6c8612090 100644 --- a/rocrate_validator/profiles/isa-ro-crate/5_sample.ttl +++ b/rocrate_validator/profiles/isa-ro-crate/5_sample.ttl @@ -43,20 +43,20 @@ isa-ro-crate:FindISASamples a sh:NodeShape, validator:HiddenShape; [ sh:property [ sh:path [ sh:inversePath schema:object ] ; - sh:qualifiedValueShape [ + sh:qualifiedValueShape [ sh:class isa-ro-crate:Process ] ; sh:qualifiedMinCount 1 ; - ] + ] ] [ sh:property [ sh:path [ sh:inversePath schema:result ] ; - sh:qualifiedValueShape [ + sh:qualifiedValueShape [ sh:class isa-ro-crate:Process ] ; sh:qualifiedMinCount 1 ; - ] + ] ] ) ; ] ; @@ -74,14 +74,14 @@ isa-ro-crate:SampledMustHaveName a sh:NodeShape ; sh:path schema:name ; sh:datatype xsd:string ; sh:minCount 1 ; - sh:maxCount 1 ; + sh:maxCount 1 ; sh:not [ sh:hasValue "" ] ; sh:description "Check that sample does have non-empty name and it's a string" ; sh:message "Sample entity MUST have a non-empty name of type string" ; sh:severity sh:Violation ; - ] + ] . isa-ro-crate:SampleShouldHaveAdditionalPropertyOfCorrectType a sh:NodeShape ; @@ -104,4 +104,4 @@ isa-ro-crate:SampleShouldHaveAdditionalPropertyOfCorrectType a sh:NodeShape ; sh:message "Sample additional property MUST be of type PropertyValue" ; sh:severity sh:Violation ; ] -. \ No newline at end of file +. diff --git a/rocrate_validator/profiles/isa-ro-crate/6_data.ttl b/rocrate_validator/profiles/isa-ro-crate/6_data.ttl index 3a8d1948d..2a342a23b 100644 --- a/rocrate_validator/profiles/isa-ro-crate/6_data.ttl +++ b/rocrate_validator/profiles/isa-ro-crate/6_data.ttl @@ -39,7 +39,7 @@ isa-ro-crate:FindISAFiles a sh:NodeShape, validator:HiddenShape; sh:condition [ sh:property [ sh:path [ sh:inversePath schema:hasPart ] ; - sh:qualifiedValueShape [ + sh:qualifiedValueShape [ sh:or( [sh:class isa-ro-crate:Assay] [sh:class isa-ro-crate:Study] @@ -62,12 +62,12 @@ isa-ro-crate:FileMustHaveName a sh:NodeShape ; sh:path schema:name ; sh:datatype xsd:string ; sh:minCount 1 ; - sh:maxCount 1 ; + sh:maxCount 1 ; sh:not [ sh:hasValue "" ] ; sh:description "Check that file does have non-empty name and it's a string" ; sh:message "File entity MUST have a non-empty name of type string" ; sh:severity sh:Violation ; - ] -. \ No newline at end of file + ] +. diff --git a/rocrate_validator/profiles/isa-ro-crate/7_person.ttl b/rocrate_validator/profiles/isa-ro-crate/7_person.ttl index 9e2855265..ba54c8244 100644 --- a/rocrate_validator/profiles/isa-ro-crate/7_person.ttl +++ b/rocrate_validator/profiles/isa-ro-crate/7_person.ttl @@ -44,7 +44,7 @@ isa-ro-crate:FindISAPerson a sh:NodeShape, validator:HiddenShape; sh:property [ sh:name "Person is creator of a Study, Assay, or RootDataEntity" ; sh:path [ sh:inversePath schema:creator ] ; - sh:qualifiedValueShape [ + sh:qualifiedValueShape [ sh:or( [sh:class isa-ro-crate:Assay] [sh:class isa-ro-crate:Study] @@ -58,7 +58,7 @@ isa-ro-crate:FindISAPerson a sh:NodeShape, validator:HiddenShape; sh:property [ sh:name "Person is author of an Article" ; sh:path [ sh:inversePath schema:author ] ; - sh:qualifiedValueShape [ + sh:qualifiedValueShape [ sh:class isa-ro-crate:Article ] ; sh:qualifiedMinCount 1 ; @@ -68,7 +68,7 @@ isa-ro-crate:FindISAPerson a sh:NodeShape, validator:HiddenShape; sh:property [ sh:name "Person is agent of a Process" ; sh:path [ sh:inversePath schema:agent ] ; - sh:qualifiedValueShape [ + sh:qualifiedValueShape [ sh:class isa-ro-crate:Process ] ; sh:qualifiedMinCount 1 ; @@ -90,14 +90,14 @@ isa-ro-crate:PersonMustHaveGivenName a sh:NodeShape ; sh:path schema:givenName ; sh:datatype xsd:string ; sh:minCount 1 ; - sh:maxCount 1 ; + sh:maxCount 1 ; sh:not [ sh:hasValue "" ] ; sh:description "Check that person does have non-empty given name and it's a string" ; sh:message "Person entity MUST have a non-empty given name of type string" ; sh:severity sh:Violation ; - ] + ] . isa-ro-crate:PersonShouldHaveAffiliationOfCorrectType a sh:NodeShape ; @@ -220,4 +220,4 @@ isa-ro-crate:PersonShouldHaveIdentifierOfCorrectType a sh:NodeShape ; sh:message "Person identifier MUST be of type string or PropertyValue" ; sh:severity sh:Violation ; ] -. \ No newline at end of file +. diff --git a/rocrate_validator/profiles/isa-ro-crate/8_article.ttl b/rocrate_validator/profiles/isa-ro-crate/8_article.ttl index c9deffb53..fb52b7fb3 100644 --- a/rocrate_validator/profiles/isa-ro-crate/8_article.ttl +++ b/rocrate_validator/profiles/isa-ro-crate/8_article.ttl @@ -39,7 +39,7 @@ isa-ro-crate:FindISAPublication a sh:NodeShape, validator:HiddenShape; sh:condition [ sh:property [ sh:path [ sh:inversePath schema:citation ] ; - sh:qualifiedValueShape [ + sh:qualifiedValueShape [ sh:or( [sh:class ro-crate:RootDataEntity] [sh:class isa-ro-crate:Study] @@ -61,14 +61,14 @@ isa-ro-crate:ArticleMustHaveHeadline a sh:NodeShape ; sh:path schema:headline ; sh:datatype xsd:string ; sh:minCount 1 ; - sh:maxCount 1 ; + sh:maxCount 1 ; sh:not [ sh:hasValue "" ] ; sh:description "Check that article does have non-empty headline and it's a string" ; sh:message "Article entity MUST have a non-empty headline of type string" ; sh:severity sh:Violation ; - ] + ] . isa-ro-crate:ArticleMustHaveIdentifier a sh:NodeShape ; @@ -79,18 +79,18 @@ isa-ro-crate:ArticleMustHaveIdentifier a sh:NodeShape ; a sh:PropertyShape ; sh:path schema:identifier ; sh:or( - [ sh:datatype xsd:string ] - [ sh:class schema:PropertyValue ] + [ sh:datatype xsd:string ] + [ sh:class schema:PropertyValue ] ) ; sh:minCount 1 ; - sh:maxCount 1 ; + sh:maxCount 1 ; sh:not [ sh:hasValue "" ] ; sh:description "Check that article does have non-empty identifier and it's a string or PropertyValue" ; sh:message "Article entity MUST have a non-empty identifier of type string or PropertyValue" ; sh:severity sh:Violation ; - ] + ] . isa-ro-crate:ArticleShouldHaveAuthorOfCorrectType a sh:NodeShape ; @@ -113,4 +113,4 @@ isa-ro-crate:ArticleShouldHaveAuthorOfCorrectType a sh:NodeShape ; sh:message "Article author MUST be of type Person" ; sh:severity sh:Violation ; ] -. \ No newline at end of file +. diff --git a/rocrate_validator/profiles/isa-ro-crate/9_comment.ttl b/rocrate_validator/profiles/isa-ro-crate/9_comment.ttl index d02c0eb3a..3a807cfd1 100644 --- a/rocrate_validator/profiles/isa-ro-crate/9_comment.ttl +++ b/rocrate_validator/profiles/isa-ro-crate/9_comment.ttl @@ -39,7 +39,7 @@ isa-ro-crate:FindISAComment a sh:NodeShape, validator:HiddenShape; sh:condition [ sh:property [ sh:path [ sh:inversePath schema:comment ] ; - sh:qualifiedValueShape [ + sh:qualifiedValueShape [ sh:or( [sh:class ro-crate:RootDataEntity] [sh:class isa-ro-crate:Study] @@ -103,4 +103,4 @@ isa-ro-crate:CommentShouldHaveText a sh:NodeShape ; sh:message "Comment text MUST be of type string" ; sh:severity sh:Violation ; ] -. \ No newline at end of file +. diff --git a/rocrate_validator/profiles/ro-crate/may/61_license_entity.ttl b/rocrate_validator/profiles/ro-crate/may/61_license_entity.ttl index 202af897c..9bfe4bab1 100644 --- a/rocrate_validator/profiles/ro-crate/may/61_license_entity.ttl +++ b/rocrate_validator/profiles/ro-crate/may/61_license_entity.ttl @@ -24,11 +24,11 @@ ro-crate:LicenseOptionalAllowedValues a sh:NodeShape ; sh:name "Root Data Entity: optional properties" ; sh:description """Define the optional properties for the Root Data Entity (e.g., license)""" ; sh:targetClass ro-crate:RootDataEntity ; - sh:property [ + sh:property [ a sh:PropertyShape ; sh:name "License" ; sh:description """Check if the RO-Crate has a license property with a URI or a textual description""" ; - sh:message """MAY have a URI (eg for Creative Commons or Open Source licenses). + sh:message """MAY have a URI (eg for Creative Commons or Open Source licenses). MAY, if necessary be a textual description of how the RO-Crate may be used.""" ; sh:minCount 1 ; sh:nodeKind sh:IRIOrLiteral ; @@ -43,7 +43,7 @@ ro-crate:LicenseDefinition a sh:NodeShape ; sh:name "License definition" ; sh:description """Contextual entity representing a license with a name and description."""; sh:targetClass schema_org:license ; - sh:property [ + sh:property [ a sh:PropertyShape ; sh:name "License name" ; sh:description "The license MAY have a name" ; @@ -53,7 +53,7 @@ ro-crate:LicenseDefinition a sh:NodeShape ; sh:path schema_org:name ; sh:message "Missing license name" ; ] ; - sh:property [ + sh:property [ a sh:PropertyShape ; sh:name "License description" ; sh:description """The license MAY have a description""" ; @@ -63,4 +63,3 @@ ro-crate:LicenseDefinition a sh:NodeShape ; sh:path schema_org:description ; sh:message "Missing license description" ; ] . - diff --git a/rocrate_validator/profiles/ro-crate/must/0_file_descriptor_format.py b/rocrate_validator/profiles/ro-crate/must/0_file_descriptor_format.py index 72f0d8a97..31b7d4718 100644 --- a/rocrate_validator/profiles/ro-crate/must/0_file_descriptor_format.py +++ b/rocrate_validator/profiles/ro-crate/must/0_file_descriptor_format.py @@ -53,7 +53,7 @@ def test_size(self, context: ValidationContext) -> bool: logger.debug("Skipping file descriptor existence check in metadata-only mode") return True if not context.ro_crate.has_descriptor(): - message = f'file descriptor {context.rel_fd_path} is empty' + message = f"file descriptor {context.rel_fd_path} is empty" context.result.add_issue(message, self) return False if context.ro_crate.metadata.size == 0: @@ -67,16 +67,18 @@ class FileDescriptorJsonFormat(PyFunctionCheck): """ The file descriptor MUST be a valid JSON file """ + @check(name="File Descriptor JSON format") def check(self, context: ValidationContext) -> bool: - """ Check if the file descriptor is in the correct format""" + """Check if the file descriptor is in the correct format""" try: logger.debug("Checking validity of JSON file at %s", context.ro_crate.metadata) context.ro_crate.metadata.as_dict() return True except Exception: context.result.add_issue( - f'RO-Crate file descriptor "{context.rel_fd_path}" is not in the correct format', self) + f'RO-Crate file descriptor "{context.rel_fd_path}" is not in the correct format', self + ) if logger.isEnabledFor(logging.DEBUG): logger.exception("RO-Crate file descriptor is not in the correct format") return False @@ -110,9 +112,9 @@ def __get_remote_context__(self, context_uri: str) -> object: # so we can use .get() without worrying about the case) link_header = raw_data.headers.get("Link", "") logger.debug(f"Checking Link header for alternate JSON-LD context: {link_header}") - has_alternate_link = ('rel="alternate"' in link_header and - ('type="application/ld+json"' in link_header or - 'type="application/json"' in link_header)) + has_alternate_link = 'rel="alternate"' in link_header and ( + 'type="application/ld+json"' in link_header or 'type="application/json"' in link_header + ) if has_alternate_link: logger.debug(f"Found alternate link for JSON-LD context in Link header: {link_header}") @@ -124,28 +126,36 @@ def __get_remote_context__(self, context_uri: str) -> object: if not alternate_url.startswith("http"): alternate_url = urljoin(context_uri, alternate_url) logger.debug(f"Trying to retrieve JSON-LD context from alternate URL: {alternate_url}") - raw_data = HttpRequester().get(alternate_url, headers={ - "Accept": "application/ld+json, application/json"}) + raw_data = HttpRequester().get( + alternate_url, headers={"Accept": "application/ld+json, application/json"} + ) if raw_data.status_code != HTTP_STATUS_OK: raise RuntimeError( - f"Unable to retrieve the JSON-LD context from alternate URL '{alternate_url}'", self) + f"Unable to retrieve the JSON-LD context from alternate URL '{alternate_url}'", self + ) logger.debug(f"Retrieved context from alternate URL {alternate_url}") content_type = raw_data.headers.get("Content-Type", "") if "application/ld+json" not in content_type and "application/json" not in content_type: raise RuntimeError( f"The retrieved context from alternate URL {alternate_url} " "does not have a Content-Type of application/ld+json or application/json: " - f"the actual Content-Type is {content_type}. ", self) + f"the actual Content-Type is {content_type}. ", + self, + ) else: logger.debug(f"No valid alternate link found in Link header: {link_header}") raise RuntimeError( f"Unable to retrieve the JSON-LD context from {context_uri} and no valid " - f"alternate link found in Link header: {link_header}", self) + f"alternate link found in Link header: {link_header}", + self, + ) else: logger.debug(f"No alternate link for JSON-LD context found in Link header: {link_header}") raise RuntimeError( f"Unable to retrieve the JSON-LD context from {context_uri} " - f"and no alternate link found in Link header: {link_header}", self) + f"and no alternate link found in Link header: {link_header}", + self, + ) # Try to parse the JSON-LD and access the context jsonLD = raw_data.json()["@context"] @@ -158,9 +168,10 @@ def __check_remote_context__(self, context_uri: str) -> bool: try: # Try to parse the JSON-LD and access the context jsonLD = self.__get_remote_context__(context_uri) - assert isinstance( - jsonLD, dict), f"The retrieved context from {context_uri} is not \ + assert isinstance(jsonLD, dict), ( + f"The retrieved context from {context_uri} is not \ a valid JSON-LD context: it is not a dictionary" + ) return True except Exception: if logger.isEnabledFor(logging.DEBUG): @@ -168,12 +179,11 @@ def __check_remote_context__(self, context_uri: str) -> bool: return False def __check_contexts__(self, context: ValidationContext, jsonld_context: object) -> bool: - """ Get the keys of the context URI """ + """Get the keys of the context URI""" is_valid = True # if the context is a string, check if it is a valid URI if isinstance(jsonld_context, str) and not self.__check_remote_context__(jsonld_context): - context.result.add_issue( - f'Unable to retrieve the JSON-LD context "{jsonld_context}"', self) + context.result.add_issue(f'Unable to retrieve the JSON-LD context "{jsonld_context}"', self) is_valid = False # if the context is a dictionary, get the keys of the dictionary @@ -190,15 +200,15 @@ def __check_contexts__(self, context: ValidationContext, jsonld_context: object) @check(name="File Descriptor @context property validation") def check_context(self, context: ValidationContext) -> bool: - """ Check if the file descriptor contains + """Check if the file descriptor contains the @context property and it is a valid JSON-LD context """ try: json_dict = context.ro_crate.metadata.as_dict() if "@context" not in json_dict: context.result.add_issue( - f'RO-Crate file descriptor "{context.rel_fd_path}" ' - "does not contain a context", self) + f'RO-Crate file descriptor "{context.rel_fd_path}" does not contain a context', self + ) return False # Check if the context is valid @@ -210,14 +220,15 @@ def check_context(self, context: ValidationContext) -> bool: @check(name="File Descriptor JSON-LD must be flattened") def check_flattened(self, context: ValidationContext) -> bool: - """ Check if the file descriptor is flattened """ + """Check if the file descriptor is flattened""" return self._check_flattened_graph( context, lambda entity, fail_fast: self._is_entity_flat(context, entity, fail_fast=fail_fast), ) - def _is_entity_flat(self, context: ValidationContext, entity: Any, - is_first: bool = True, fail_fast: bool = False) -> bool: + def _is_entity_flat( + self, context: ValidationContext, entity: Any, is_first: bool = True, fail_fast: bool = False + ) -> bool: """Recursively check that an entity (and its children) is flattened.""" if isinstance(entity, dict): if is_first: @@ -243,32 +254,31 @@ def _validate_non_root_entity(self, context: ValidationContext, entity: Any, fai if "@id" in entity and "@value" in entity: issues.append( f'entity "{entity.get("@id", entity)}" contains both @id and @value: ' - 'an object with an @value represents a value object, which is a literal value such as ' - 'a string, number, date, or language-tagged string. This object is not an identifiable ' - 'resource, but a simple literal value.' + "an object with an @value represents a value object, which is a literal value such as " + "a string, number, date, or language-tagged string. This object is not an identifiable " + "resource, but a simple literal value." ) if "@value" in entity: if not isinstance(entity, dict): issues.append( - f'entity "{entity.get("@id", entity)}" is not a valid value object: ' - 'it MUST be a dictionary.' + f'entity "{entity.get("@id", entity)}" is not a valid value object: it MUST be a dictionary.' ) has_language = "@language" in entity has_type = "@type" in entity if has_language and has_type: issues.append( f'entity "{entity.get("@id", entity)}" is not a valid value object: ' - '@language and @type cannot coexist.' + "@language and @type cannot coexist." ) if has_language and not isinstance(entity["@value"], str): issues.append( f'entity "{entity.get("@id", entity)}" is not a valid value object: ' - 'if @language is present, @value must be a string.' + "if @language is present, @value must be a string." ) elif "@id" not in entity or len(entity) > 1: issues.append( f'entity "{entity.get("@id", entity)}" is not a valid node object reference: ' - 'it MUST have only @id, but no other properties.' + "it MUST have only @id, but no other properties." ) return self._emit_entity_issues(context, issues, fail_fast) @@ -289,7 +299,9 @@ def _check_flattened_graph(self, context, is_flat): if not is_flat(entity, fail_fast=fail_fast): context.result.add_issue( f'RO-Crate file descriptor "{context.rel_fd_path}" ' - f'is not fully flattened at entity "{entity.get("@id", entity)}"', self) + f'is not fully flattened at entity "{entity.get("@id", entity)}"', + self, + ) result = False if fail_fast: return False @@ -301,15 +313,17 @@ def _check_flattened_graph(self, context, is_flat): @check(name="Validation of the @id property of the file descriptor entities") def check_identifiers(self, context: ValidationContext) -> bool: - """ Check if the file descriptor entities have the @id property """ + """Check if the file descriptor entities have the @id property""" try: json_dict = context.ro_crate.metadata.as_dict() for entity in json_dict["@graph"]: if "@id" not in entity: context.result.add_issue( - f"Entity \"{entity.get('name', None) or entity}\" " - f"of RO-Crate \"{context.rel_fd_path}\" " - "file descriptor does not contain the @id attribute", self) + f'Entity "{entity.get("name", None) or entity}" ' + f'of RO-Crate "{context.rel_fd_path}" ' + "file descriptor does not contain the @id attribute", + self, + ) return False return True except Exception: @@ -319,15 +333,17 @@ def check_identifiers(self, context: ValidationContext) -> bool: @check(name="Validation of the @type property of the file descriptor entities") def check_types(self, context: ValidationContext) -> bool: - """ Check if the file descriptor entities have the @type property """ + """Check if the file descriptor entities have the @type property""" try: json_dict = context.ro_crate.metadata.as_dict() for entity in json_dict["@graph"]: if "@type" not in entity: context.result.add_issue( - f"Entity \"{entity.get('name', None) or entity}\" " - f"of RO-Crate \"{context.rel_fd_path}\" " - "file descriptor does not contain the @type attribute", self) + f'Entity "{entity.get("name", None) or entity}" ' + f'of RO-Crate "{context.rel_fd_path}" ' + "file descriptor does not contain the @type attribute", + self, + ) return False return True except Exception: @@ -336,7 +352,7 @@ def check_types(self, context: ValidationContext) -> bool: return False def __get_context_keys__(self, context: object) -> set: - """ Get the keys of the context URI """ + """Get the keys of the context URI""" if isinstance(context, str): return self.__get_remote_context_keys__(context) @@ -355,7 +371,7 @@ def __get_context_keys__(self, context: object) -> set: return set() def __get_remote_context_keys__(self, context_uri: str) -> set: - """ Get the keys of the context URI """ + """Get the keys of the context URI""" logger.debug(f"Retrieving context from {context_uri}...") # Get the keys of the context @@ -367,10 +383,10 @@ def __get_remote_context_keys__(self, context_uri: str) -> set: # Reserved JSON-LD keywords that are always allowed as entity keys. __RESERVED_JSONLD_KEYS__ = frozenset({"@id", "@type", "@context", "@value", "@language"}) - def __check_entity_keys__(self, entity: Any, - context_keys: set, - unexpected_keys: dict[str, int] | None = None) -> dict[str, int]: - """ Check if the entity is in the correct format """ + def __check_entity_keys__( + self, entity: Any, context_keys: set, unexpected_keys: dict[str, int] | None = None + ) -> dict[str, int]: + """Check if the entity is in the correct format""" # Ensure unexpected_keys is initialized if unexpected_keys is None: unexpected_keys = {} @@ -391,7 +407,7 @@ def __check_entity_keys__(self, entity: Any, return unexpected_keys def __record_unexpected_key__(self, k: str, context_keys: set, unexpected_keys: dict[str, int]) -> None: - """ Record ``k`` as unexpected unless it is reserved or a valid compact IRI prefix """ + """Record ``k`` as unexpected unless it is reserved or a valid compact IRI prefix""" # If the key is a reserved JSON-LD keyword, skip it if k in self.__RESERVED_JSONLD_KEYS__: logger.debug(f"Key {k} is a reserved JSON-LD keyword, skipping") @@ -409,7 +425,7 @@ def __record_unexpected_key__(self, k: str, context_keys: set, unexpected_keys: @check(name="Validation of the compaction format of the file descriptor") def check_compaction(self, context: ValidationContext) -> bool: - """ Check if the file descriptor is in the **compacted** JSON-LD format """ + """Check if the file descriptor is in the **compacted** JSON-LD format""" try: logger.debug("Checking compaction format of JSON-LD file at %s", context.ro_crate.metadata) json_dict = context.ro_crate.metadata.as_dict() @@ -438,19 +454,22 @@ def check_compaction(self, context: ValidationContext) -> bool: if k.startswith("http"): context.result.add_issue( f'The {v} occurrence{suffix} of the "{k}" URI cannot be used as a key{suffix} "' - 'because the compacted format requires simple terms as keys ' - '(see https://www.w3.org/TR/json-ld-api/#compaction for more details).', self) + "because the compacted format requires simple terms as keys " + "(see https://www.w3.org/TR/json-ld-api/#compaction for more details).", + self, + ) else: context.result.add_issue( f'The {v} occurrence{suffix} of the JSON-LD key "{k}" ' - f'{"is" if v == 1 else "are"} not allowed in the compacted format ' - 'because it is not present in the @context of the document', self) + f"{'is' if v == 1 else 'are'} not allowed in the compacted format " + "because it is not present in the @context of the document", + self, + ) return False return True except Exception as e: if logger.isEnabledFor(logging.DEBUG): logger.exception("Unexpected error during file descriptor validation") - context.result.add_issue( - f'Unexpected error: {e}', self) + context.result.add_issue(f"Unexpected error: {e}", self) return False diff --git a/rocrate_validator/profiles/ro-crate/must/2_root_data_entity_metadata.ttl b/rocrate_validator/profiles/ro-crate/must/2_root_data_entity_metadata.ttl index 2c2efcf2c..236d3cdc4 100644 --- a/rocrate_validator/profiles/ro-crate/must/2_root_data_entity_metadata.ttl +++ b/rocrate_validator/profiles/ro-crate/must/2_root_data_entity_metadata.ttl @@ -62,7 +62,7 @@ ro-crate:RootDataEntityType ro-crate:FindRootDataEntity a sh:NodeShape, validator:HiddenShape; sh:name "Identify the Root Data Entity of the RO-Crate" ; sh:description """The Root Data Entity is the top-level Data Entity in the RO-Crate and serves as the starting point for the description of the RO-Crate. - It is a schema:Dataset and is indirectly identified by the about property of the resource ro-crate-metadata.json in the RO-Crate + It is a schema:Dataset and is indirectly identified by the about property of the resource ro-crate-metadata.json in the RO-Crate (see the definition at [Finding RO-Crate Root in RDF triple stores](https://www.researchobject.org/ro-crate/1.1/appendix/relative-uris.html#finding-ro-crate-root-in-rdf-triple-stores)). """ ; sh:target [ @@ -110,7 +110,7 @@ ro-crate:RootDataEntityRequiredProperties sh:property [ a sh:PropertyShape ; sh:name "Root Data Entity: `name` property" ; - sh:description """Check if the Root Data Entity includes a `name` (as specified by schema.org) + sh:description """Check if the Root Data Entity includes a `name` (as specified by schema.org) to clearly identify the dataset and distinguish it from other datasets.""" ; sh:minCount 1 ; sh:nodeKind sh:Literal ; diff --git a/rocrate_validator/profiles/ro-crate/must/4_data_entity_metadata.py b/rocrate_validator/profiles/ro-crate/must/4_data_entity_metadata.py index 5647304c9..859609e72 100644 --- a/rocrate_validator/profiles/ro-crate/must/4_data_entity_metadata.py +++ b/rocrate_validator/profiles/ro-crate/must/4_data_entity_metadata.py @@ -51,7 +51,8 @@ def check_availability(self, context: ValidationContext) -> bool: "are not required to be included in the RO-Crate payload" "(see https://github.com/ResearchObject/ro-crate/issues/400#issuecomment-2779152885 and " "https://github.com/ResearchObject/ro-crate/pull/426 for more details)", - entity.id) + entity.id, + ) continue if not entity.has_relative_path(): logger.debug( @@ -59,15 +60,18 @@ def check_availability(self, context: ValidationContext) -> bool: "According to the RO-Crate specification, local entities with absolute paths " "are not required to be included in the RO-Crate payload. " "It is only recommended that they exist at the time of RO-Crate creation.", - entity.id) + entity.id, + ) continue if not entity.is_available(): context.result.add_issue( - f"The RO-Crate does not include the Data Entity '{entity.id}' as part of its payload", self) + f"The RO-Crate does not include the Data Entity '{entity.id}' as part of its payload", self + ) result = False except Exception as e: context.result.add_issue( - f"Unable to check the the presence of the Data Entity '{entity.id}' within the RO-Crate", self) + f"Unable to check the the presence of the Data Entity '{entity.id}' within the RO-Crate", self + ) if logger.isEnabledFor(logging.DEBUG): logger.debug(e, exc_info=True) result = False diff --git a/rocrate_validator/profiles/ro-crate/must/4_data_entity_metadata.ttl b/rocrate_validator/profiles/ro-crate/must/4_data_entity_metadata.ttl index 46356e330..80479546b 100644 --- a/rocrate_validator/profiles/ro-crate/must/4_data_entity_metadata.ttl +++ b/rocrate_validator/profiles/ro-crate/must/4_data_entity_metadata.ttl @@ -162,7 +162,7 @@ ro-crate:GenericDataEntityRequiredProperties a sh:NodeShape ; FILTER(contains(str(?metadatafile), "ro-crate-metadata.json")) FILTER(?this != ?root) FILTER(?this != ?metadatafile) - FILTER NOT EXISTS { + FILTER NOT EXISTS { ?this a schema:MediaObject . ?this a schema:Dataset . } diff --git a/rocrate_validator/profiles/ro-crate/must/5_web_data_entity_metadata.ttl b/rocrate_validator/profiles/ro-crate/must/5_web_data_entity_metadata.ttl index c547d60f0..48a7eaf3b 100644 --- a/rocrate_validator/profiles/ro-crate/must/5_web_data_entity_metadata.ttl +++ b/rocrate_validator/profiles/ro-crate/must/5_web_data_entity_metadata.ttl @@ -26,7 +26,7 @@ ro-crate:WebBasedDataEntity a sh:NodeShape, validator:HiddenShape ; sh:name "Web-based Data Entity: REQUIRED properties" ; sh:description """A Web-based Data Entity is a `File` identified by an absolute URL""" ; - + sh:target [ a sh:SPARQLTarget ; sh:prefixes ro-crate:sparqlPrefixes ; @@ -47,4 +47,3 @@ ro-crate:WebBasedDataEntity a sh:NodeShape, validator:HiddenShape ; sh:predicate rdf:type ; sh:object ro-crate:WebDataEntity ; ] . - diff --git a/rocrate_validator/profiles/ro-crate/ontology.ttl b/rocrate_validator/profiles/ro-crate/ontology.ttl index 5f19a200a..db6f33f36 100644 --- a/rocrate_validator/profiles/ro-crate/ontology.ttl +++ b/rocrate_validator/profiles/ro-crate/ontology.ttl @@ -77,4 +77,3 @@ ro-crate:Directory rdf:type owl:Class ; # Declare the individual representing the RO-Crate Metadata File Descriptor entity ro:ro-crate-metadata.json a owl:NamedIndividual, ro-crate:ROCrateMetadataFileDescriptor . - diff --git a/rocrate_validator/profiles/ro-crate/should/2_root_data_entity_metadata.ttl b/rocrate_validator/profiles/ro-crate/should/2_root_data_entity_metadata.ttl index 7cb53f00b..0d5411638 100644 --- a/rocrate_validator/profiles/ro-crate/should/2_root_data_entity_metadata.ttl +++ b/rocrate_validator/profiles/ro-crate/should/2_root_data_entity_metadata.ttl @@ -21,14 +21,14 @@ ro-crate:RootDataEntityDirectRecommendedProperties a sh:NodeShape ; sh:name "RO-Crate Root Data Entity RECOMMENDED properties" ; - sh:description """The Root Data Entity SHOULD have + sh:description """The Root Data Entity SHOULD have the properties `name`, `description` and `license` defined as described in the RO-Crate specification """; sh:targetClass ro-crate:RootDataEntity ; sh:property [ a sh:PropertyShape ; sh:name "Root Data Entity: `license` SHOULD link to a Contextual Entity" ; - sh:description """Check if the Root Data Entity includes a `license` property + sh:description """Check if the Root Data Entity includes a `license` property that links to a Contextual Entity with type `schema_org:CreativeWork` to describe the license.""" ; sh:nodeKind sh:BlankNodeOrIRI ; sh:class schema_org:CreativeWork ; diff --git a/rocrate_validator/profiles/ro-crate/should/2_root_data_entity_relative_uri.py b/rocrate_validator/profiles/ro-crate/should/2_root_data_entity_relative_uri.py index 800383cf3..6f4665a42 100644 --- a/rocrate_validator/profiles/ro-crate/should/2_root_data_entity_relative_uri.py +++ b/rocrate_validator/profiles/ro-crate/should/2_root_data_entity_relative_uri.py @@ -30,12 +30,10 @@ class RootDataEntityRelativeURI(PyFunctionCheck): def check_relative_uris(self, context: ValidationContext) -> bool: """Check if the Root Data Entity is denoted by the string `./` in the file descriptor JSON-LD""" try: - if context.ro_crate.metadata.get_root_data_entity().id != './': - context.result.add_issue( - 'Root Data Entity URI is not denoted by the string `./`', self) + if context.ro_crate.metadata.get_root_data_entity().id != "./": + context.result.add_issue("Root Data Entity URI is not denoted by the string `./`", self) return False return True except Exception as e: - context.result.add_issue( - f'Error checking Root Data Entity URI: {e!s}', self) + context.result.add_issue(f"Error checking Root Data Entity URI: {e!s}", self) return False diff --git a/rocrate_validator/profiles/ro-crate/should/4_data_entity_existence.py b/rocrate_validator/profiles/ro-crate/should/4_data_entity_existence.py index 4f9004eed..d21c37c7b 100644 --- a/rocrate_validator/profiles/ro-crate/should/4_data_entity_existence.py +++ b/rocrate_validator/profiles/ro-crate/should/4_data_entity_existence.py @@ -40,17 +40,17 @@ def check_availability(self, context: ValidationContext) -> bool: # Perform the check result = True for entity in [ - _ for _ in context.ro_crate.metadata.get_data_entities(exclude_web_data_entities=True) - if _.has_absolute_path()]: + _ + for _ in context.ro_crate.metadata.get_data_entities(exclude_web_data_entities=True) + if _.has_absolute_path() + ]: assert entity.id is not None, "Entity has no @id" try: if not entity.is_available(): - context.result.add_issue( - f'Data Entity {entity.id} is not available', self) + context.result.add_issue(f"Data Entity {entity.id} is not available", self) result = False except Exception as e: - context.result.add_issue( - f'Web-based Data Entity {entity.id} is not available: {e}', self) + context.result.add_issue(f"Web-based Data Entity {entity.id} is not available: {e}", self) result = False if not result and context.fail_fast: return result diff --git a/rocrate_validator/profiles/ro-crate/should/4_data_entity_metadata.ttl b/rocrate_validator/profiles/ro-crate/should/4_data_entity_metadata.ttl index 188e3a834..8f04d9947 100644 --- a/rocrate_validator/profiles/ro-crate/should/4_data_entity_metadata.ttl +++ b/rocrate_validator/profiles/ro-crate/should/4_data_entity_metadata.ttl @@ -40,7 +40,7 @@ ro-crate:FileRecommendedProperties a sh:NodeShape ; sh:datatype xsd:string ; sh:pattern "^(\\w*)\\/(\\w[\\w\\.-]*)(?:\\+(\\w[\\w\\.-]*))?(?:;(\\w+=[^;]+))*$" ; sh:name "File Data Entity: RECOMMENDED `PRONOM` for the `encodingFormat` property" ; - sh:description """Check if the File Data Entity is linked to its `encodingFormat` through a PRONOM identifier + sh:description """Check if the File Data Entity is linked to its `encodingFormat` through a PRONOM identifier (e.g., application/pdf, application/text, image/svg+xml, image/svg;q=0.9,/;q=0.8,image/svg+xml;q=0.9,/;q=0.8, application/vnd.uplanet.listcmd-wbxml;charset=utf-8). """ ; sh:message "The `encodingFormat` SHOULD be linked using a PRONOM identifier (e.g., application/pdf)."; diff --git a/rocrate_validator/profiles/ro-crate/should/5_web_data_entity_metadata.py b/rocrate_validator/profiles/ro-crate/should/5_web_data_entity_metadata.py index 24d787799..a43415822 100644 --- a/rocrate_validator/profiles/ro-crate/should/5_web_data_entity_metadata.py +++ b/rocrate_validator/profiles/ro-crate/should/5_web_data_entity_metadata.py @@ -63,12 +63,10 @@ def check_availability(self, context: ValidationContext) -> bool: logger.warning(msg) context.result.add_issue(msg, self) else: - context.result.add_issue( - f'Web-based Data Entity {entity.id} is not available', self) + context.result.add_issue(f"Web-based Data Entity {entity.id} is not available", self) result = False except Exception as e: - context.result.add_issue( - f'Web-based Data Entity {entity.id} is not available: {e}', self) + context.result.add_issue(f"Web-based Data Entity {entity.id} is not available: {e}", self) result = False if not result and context.fail_fast: return result @@ -94,10 +92,14 @@ def check_content_size(self, context: ValidationContext) -> bool: actual_size = context.ro_crate.get_external_file_size(entity.id) if content_size and int(content_size) != actual_size: context.result.add_issue( - f'The property contentSize={content_size} of the Web-based Data Entity ' - f'{entity.id} does not match the actual size of ' - f'the downloadable content, i.e., {actual_size} (bytes)', self, - violatingEntity=entity.id, violatingProperty='contentSize', violatingPropertyValue=content_size) + f"The property contentSize={content_size} of the Web-based Data Entity " + f"{entity.id} does not match the actual size of " + f"the downloadable content, i.e., {actual_size} (bytes)", + self, + violatingEntity=entity.id, + violatingProperty="contentSize", + violatingPropertyValue=content_size, + ) result = False if not result and context.fail_fast: return result diff --git a/rocrate_validator/profiles/ro-crate/should/5_web_data_entity_metadata.ttl b/rocrate_validator/profiles/ro-crate/should/5_web_data_entity_metadata.ttl index e4f3cc5b2..53f2b1fb7 100644 --- a/rocrate_validator/profiles/ro-crate/should/5_web_data_entity_metadata.ttl +++ b/rocrate_validator/profiles/ro-crate/should/5_web_data_entity_metadata.ttl @@ -25,7 +25,7 @@ ro-crate:WebBasedDataEntityRequiredValueRestriction a sh:NodeShape ; sh:name "Web-based Data Entity: RECOMMENDED properties" ; - sh:description """A Web-based Data Entity MUST be identified by an absolute URL and + sh:description """A Web-based Data Entity MUST be identified by an absolute URL and SHOULD have a `contentSize` and `sdDatePublished` property""" ; sh:targetClass ro-crate:WebDataEntity ; # Check if the Web-based Data Entity has a contentSize property diff --git a/rocrate_validator/profiles/workflow-ro-crate/may/1_main_workflow.py b/rocrate_validator/profiles/workflow-ro-crate/may/1_main_workflow.py index c64746900..681293337 100644 --- a/rocrate_validator/profiles/workflow-ro-crate/may/1_main_workflow.py +++ b/rocrate_validator/profiles/workflow-ro-crate/may/1_main_workflow.py @@ -59,7 +59,8 @@ def check_workflow_description(self, context: ValidationContext) -> bool: return False if not context.settings.metadata_only and not main_workflow_subject.is_available(): context.result.add_issue( - f"Workflow CWL description {main_workflow_subject.id} not found in crate", self) + f"Workflow CWL description {main_workflow_subject.id} not found in crate", self + ) return False return True except Exception: diff --git a/rocrate_validator/profiles/workflow-ro-crate/must/0_main_workflow.py b/rocrate_validator/profiles/workflow-ro-crate/must/0_main_workflow.py index 375905f7d..3941190e4 100644 --- a/rocrate_validator/profiles/workflow-ro-crate/must/0_main_workflow.py +++ b/rocrate_validator/profiles/workflow-ro-crate/must/0_main_workflow.py @@ -37,8 +37,11 @@ def check_workflow(self, context: ValidationContext) -> bool: return False return True except ValueError: - context.result.add_issue("Unable to check the existence of the main workflow file " - "because the metadata file descriptor doesn't contain a `mainEntity`", self) + context.result.add_issue( + "Unable to check the existence of the main workflow file " + "because the metadata file descriptor doesn't contain a `mainEntity`", + self, + ) if logger.isEnabledFor(logging.DEBUG): logger.exception("Unable to check main workflow file existence") return False diff --git a/rocrate_validator/requirements/python/__init__.py b/rocrate_validator/requirements/python/__init__.py index 1960447f0..6bb9e8282 100644 --- a/rocrate_validator/requirements/python/__init__.py +++ b/rocrate_validator/requirements/python/__init__.py @@ -42,13 +42,15 @@ class PyFunctionCheck(RequirementCheck): Concrete class that implements a check that calls a function. """ - def __init__(self, - requirement: Requirement, # pylint: disable=redefined-outer-name - name: str, - check_function: Callable[[RequirementCheck, ValidationContext], bool], - description: str | None = None, - level: RequirementLevel | None = LevelCollection.REQUIRED, - deactivated: bool = False): + def __init__( + self, + requirement: Requirement, # pylint: disable=redefined-outer-name + name: str, + check_function: Callable[[RequirementCheck, ValidationContext], bool], + description: str | None = None, + level: RequirementLevel | None = LevelCollection.REQUIRED, + deactivated: bool = False, + ): """ check_function: a function that accepts an instance of PyFunctionCheck and a ValidationContext. """ @@ -56,19 +58,28 @@ def __init__(self, sig = inspect.signature(check_function) if len(sig.parameters) != EXPECTED_CHECK_PARAM_COUNT: - raise RuntimeError("Invalid PyFunctionCheck function. Checks are expected to accept " - f"arguments [RequirementCheck, ValidationContext] but this has signature {sig}") + raise RuntimeError( + "Invalid PyFunctionCheck function. Checks are expected to accept " + f"arguments [RequirementCheck, ValidationContext] but this has signature {sig}" + ) if sig.return_annotation not in (bool, inspect.Signature.empty): - raise RuntimeError("Invalid PyFunctionCheck function. Checks are expected to " - f"return bool but this only returns {sig.return_annotation}") + raise RuntimeError( + "Invalid PyFunctionCheck function. Checks are expected to " + f"return bool but this only returns {sig.return_annotation}" + ) self._check_function = check_function def execute_check(self, context: ValidationContext) -> bool: - if self.requirement.profile.identifier != context.profile_identifier and \ - context.settings.disable_inherited_profiles_issue_reporting: - logger.debug("Skipping requirement %s as it belongs to an inherited profile %s", - self.requirement.identifier, self.requirement.profile.identifier) + if ( + self.requirement.profile.identifier != context.profile_identifier + and context.settings.disable_inherited_profiles_issue_reporting + ): + logger.debug( + "Skipping requirement %s as it belongs to an inherited profile %s", + self.requirement.identifier, + self.requirement.profile.identifier, + ) return True return self._check_function(self, context) @@ -105,12 +116,14 @@ class PyRequirement(Requirement): The class should define one or more methods that are decorated with the :py:func:`check` decorator. """ - def __init__(self, - profile: Profile, - requirement_check_class: type[PyFunctionCheck], - name: str = "", - description: str | None = None, - path: Path | None = None): + def __init__( + self, + profile: Profile, + requirement_check_class: type[PyFunctionCheck], + name: str = "", + description: str | None = None, + path: Path | None = None, + ): self.requirement_check_class = requirement_check_class super().__init__(profile, name, description, path, initialize_checks=True) @@ -135,18 +148,22 @@ def __init_checks__(self): except Exception: logger.debug(f"No severity set for check '{check_name}' from decorator.") if not severity: - logger.debug(f"No explicit severity set for check '{check_name}' from decorator." - f"Getting severity from path: {self.severity_from_path}") + logger.debug( + f"No explicit severity set for check '{check_name}' from decorator." + f"Getting severity from path: {self.severity_from_path}" + ) severity = self.severity_from_path or Severity.REQUIRED logger.debug("Severity log: %r", severity) deactivated = bool(getattr(member, "deactivated", False)) # pylint: disable-next=redefined-outer-name # local 'check' mirrors the decorator name - check = self.requirement_check_class(self, - check_name, - member, - description=check_description, - level=LevelCollection.get(severity.name) if severity else None, - deactivated=deactivated) + check = self.requirement_check_class( + self, + check_name, + member, + description=check_description, + level=LevelCollection.get(severity.name) if severity else None, + deactivated=deactivated, + ) self._checks.append(check) logger.debug("Added check: %s %r", check_name, check) @@ -175,6 +192,7 @@ def requirement(name: str, description: str | None = None, hidden: bool = False) :return: the decorated class """ + def decorator(cls): if name: cls.__rq_name__ = name @@ -186,9 +204,7 @@ def decorator(cls): return decorator -def check(name: str | None = None, - severity: Severity | None = None, - deactivated: bool = False): +def check(name: str | None = None, severity: Severity | None = None, deactivated: bool = False): """ A decorator to mark a function as a check. @@ -216,29 +232,37 @@ def check(name: str | None = None, :return: the decorated function :rtype: Callable """ + def decorator(func): check_name = name or func.__name__ sig = inspect.signature(func) if len(sig.parameters) != EXPECTED_CHECK_PARAM_COUNT: - raise RuntimeError(f"Invalid check {check_name}. Checks are expected to " - f"accept two arguments but this only takes {len(sig.parameters)}") + raise RuntimeError( + f"Invalid check {check_name}. Checks are expected to " + f"accept two arguments but this only takes {len(sig.parameters)}" + ) if sig.return_annotation not in (bool, inspect.Signature.empty): - raise RuntimeError(f"Invalid check {check_name}. Checks are expected to " - f"return bool but this only returns {sig.return_annotation}") + raise RuntimeError( + f"Invalid check {check_name}. Checks are expected to " + f"return bool but this only returns {sig.return_annotation}" + ) func.check = True func.name = check_name func.severity = severity func.deactivated = deactivated return func + return decorator class PyRequirementLoader(RequirementLoader): - - def load(self, profile: Profile, - requirement_level: RequirementLevel, # pylint: disable=unused-argument - file_path: Path, - publicID: str | None = None) -> list[Requirement]: # pylint: disable=unused-argument + def load( + self, + profile: Profile, + requirement_level: RequirementLevel, # pylint: disable=unused-argument + file_path: Path, + publicID: str | None = None, + ) -> list[Requirement]: # pylint: disable=unused-argument # instantiate a list to store the requirements requirements: list[Requirement] = [] @@ -250,21 +274,25 @@ def load(self, profile: Profile, for requirement_name, check_class in classes.items(): # set default requirement name and description rq = {} - rq["name"] = " ".join(re.findall(r'[A-Z](?:[a-z]+|[A-Z]*(?=[A-Z]|$))', - requirement_name.strip())) if requirement_name else "" + rq["name"] = ( + " ".join(re.findall(r"[A-Z](?:[a-z]+|[A-Z]*(?=[A-Z]|$))", requirement_name.strip())) + if requirement_name + else "" + ) rq["description"] = check_class.__doc__.strip() if check_class.__doc__ else "" # handle default overrides via decorators - for pn in ("name", "description"): - pv = getattr(check_class, f"__rq_{pn}__", None) + for on in ("name", "description"): + pv = getattr(check_class, f"__rq_{on}__", None) if pv and isinstance(pv, str): - rq[pn] = pv + rq[on] = pv logger.debug("Processing requirement: %r", requirement_name) r = PyRequirement( profile, requirement_check_class=check_class, name=rq["name"], description=rq["description"], - path=file_path) + path=file_path, + ) logger.debug("Created requirement: %r", r) requirements.append(r) diff --git a/rocrate_validator/requirements/shacl/__init__.py b/rocrate_validator/requirements/shacl/__init__.py index 699a0f8d8..5186bbf24 100644 --- a/rocrate_validator/requirements/shacl/__init__.py +++ b/rocrate_validator/requirements/shacl/__init__.py @@ -18,10 +18,10 @@ from rocrate_validator.requirements.shacl.validator import SHACLValidationResult, SHACLValidator __all__ = [ - "SHACLCheck", - "SHACLRequirement", - "SHACLRequirementLoader", - "SHACLValidationError", - "SHACLValidationResult", - "SHACLValidator", + "SHACLCheck", + "SHACLRequirement", + "SHACLRequirementLoader", + "SHACLValidationError", + "SHACLValidationResult", + "SHACLValidator", ] diff --git a/rocrate_validator/requirements/shacl/checks.py b/rocrate_validator/requirements/shacl/checks.py index f5a4521e8..de5475a6c 100644 --- a/rocrate_validator/requirements/shacl/checks.py +++ b/rocrate_validator/requirements/shacl/checks.py @@ -408,12 +408,14 @@ def __collect_failed_checks__(self, shacl_context, shacl_result, shapes_registry violations.append(violation) return failed_requirements_checks, failed_requirements_checks_violations - def __process_failed_checks__(self, shacl_context, failed_requirements_checks, - failed_requirements_checks_violations): + def __process_failed_checks__( + self, shacl_context, failed_requirements_checks, failed_requirements_checks_violations + ): failed_requirement_checks_notified = [ _.check.identifier for _ in shacl_context.result.get_issues( - min_severity=cast("Severity", shacl_context.settings.requirement_severity)) + min_severity=cast("Severity", shacl_context.settings.requirement_severity) + ) ] for requirementCheck in sorted(failed_requirements_checks, key=lambda x: (x.identifier, x.severity)): # if the check is not in the current profile @@ -455,8 +457,7 @@ def __process_failed_checks__(self, shacl_context, failed_requirements_checks, def __register_check_violations__(self, shacl_context, requirementCheck, violations): for violation in violations: - violating_entity = make_uris_relative(cast("Any", violation.focusNode).toPython(), - shacl_context.publicID) + violating_entity = make_uris_relative(cast("Any", violation.focusNode).toPython(), shacl_context.publicID) violating_property = violation.resultPath.toPython() if violation.resultPath else None violation_message = violation.get_result_message(str(shacl_context.rocrate_uri)) registered_check_issues = shacl_context.result.get_issues_by_check(requirementCheck) diff --git a/rocrate_validator/requirements/shacl/errors.py b/rocrate_validator/requirements/shacl/errors.py index 44ba40856..ffccca793 100644 --- a/rocrate_validator/requirements/shacl/errors.py +++ b/rocrate_validator/requirements/shacl/errors.py @@ -18,7 +18,6 @@ class SHACLValidationError(ValidationError): - def __init__( self, result: SHACLValidationResult | None = None, @@ -34,6 +33,4 @@ def result(self) -> SHACLValidationResult | None: return self._result def __repr__(self): - return ( - f"SHACLValidationError({self._message!r}, {self._path!r}, {self.result!r})" - ) + return f"SHACLValidationError({self._message!r}, {self._path!r}, {self.result!r})" diff --git a/rocrate_validator/requirements/shacl/models.py b/rocrate_validator/requirements/shacl/models.py index f665236cc..d3f71826c 100644 --- a/rocrate_validator/requirements/shacl/models.py +++ b/rocrate_validator/requirements/shacl/models.py @@ -33,7 +33,6 @@ class SHACLNode: - # define default values _name: str | None = None _description: str | None = None @@ -162,7 +161,6 @@ def compute_hash(graph: Graph, node: Node) -> int: class SHACLNodeCollection(SHACLNode): - def __init__(self, node: Node, graph: Graph, properties: list[PropertyShape] | None = None): super().__init__(node, graph) # store the properties @@ -205,7 +203,6 @@ class PropertyGroup(SHACLNodeCollection): class PropertyShape(Shape): - # define default values _name: str | None = None _short_name: str | None = None @@ -216,10 +213,7 @@ class PropertyShape(Shape): # store the reference to the property group _property_group: PropertyGroup | None = None - def __init__(self, - node: Node, - graph: Graph, - parent: Shape | None = None): + def __init__(self, node: Node, graph: Graph, parent: Shape | None = None): # call the parent constructor super().__init__(node, graph) # store the parent shape @@ -252,9 +246,9 @@ def description(self) -> str: property_name = self.name if self._short_name: property_name = self._short_name - self._description = f"Check the property \"**{property_name}**\"" + self._description = f'Check the property "**{property_name}**"' if self.parent and self.parent.name not in property_name: - self._description += f" of the entity \"**{self.parent.name}**\"" + self._description += f' of the entity "**{self.parent.name}**"' return self._description @description.setter @@ -283,7 +277,6 @@ def propertyGroup(self) -> PropertyGroup | None: class NodeShape(Shape, SHACLNodeCollection): - @property def property_groups(self) -> list[PropertyGroup]: """Return the property groups of the shape""" @@ -305,7 +298,6 @@ def ungrouped_properties(self) -> list[PropertyShape]: class ShapesRegistry: - def __init__(self): self._shapes: dict[str, Shape] = {} self._shapes_graph: Graph = Graph() @@ -390,8 +382,7 @@ def load_shapes(self, shapes_path: str | Path, publicID: str | None = None) -> l nested_properties = node_graph.objects(subject=node_shape, predicate=shacl_ns.property) for property_shape in nested_properties: property_graph = shapes_list.get_shape_property_graph(node_shape, property_shape) - p_shape = PropertyShape( - property_shape, property_graph, shape) + p_shape = PropertyShape(property_shape, property_graph, shape) shape.add_property(p_shape) group = __process_property_group__(property_groups, p_shape) if group and group not in shapes: @@ -440,9 +431,7 @@ def get_instance(cls, ctx: object): return instance -def __process_property_group__( - groups: dict[str, PropertyGroup], property_shape: PropertyShape -) -> PropertyGroup | None: +def __process_property_group__(groups: dict[str, PropertyGroup], property_shape: PropertyShape) -> PropertyGroup | None: group_name = property_shape.group if group_name: if group_name not in groups: diff --git a/rocrate_validator/requirements/shacl/requirements.py b/rocrate_validator/requirements/shacl/requirements.py index 69e05d853..f9dfe4104 100644 --- a/rocrate_validator/requirements/shacl/requirements.py +++ b/rocrate_validator/requirements/shacl/requirements.py @@ -37,9 +37,7 @@ class SHACLRequirement(Requirement): def __init__(self, shape: Shape, profile: Profile, path: Path): self._shape = shape - super().__init__( - profile, shape.name or "", shape.description or "", path - ) + super().__init__(profile, shape.name or "", shape.description or "", path) # init checks self._checks = self.__init_checks__() # assign check IDs @@ -84,8 +82,7 @@ def shape(self) -> Shape: @property def hidden(self) -> bool: return bool( - self.shape.node is not None - and (self.shape.node, RDF.type, VALIDATOR_NS.HiddenShape) in self.shape.graph + self.shape.node is not None and (self.shape.node, RDF.type, VALIDATOR_NS.HiddenShape) in self.shape.graph ) @classmethod @@ -140,7 +137,8 @@ def finalize(cls, context: ValidationContext) -> None: if context.maybe_warn_offline_cache_miss(e): logger.debug( "Forced SHACL run for zero-shape target profile %s skipped due to offline cache miss: %s", - target.identifier, e, + target.identifier, + e, ) else: logger.warning("Forced SHACL run for zero-shape target profile %s failed: %s", target.identifier, e) diff --git a/rocrate_validator/requirements/shacl/validator.py b/rocrate_validator/requirements/shacl/validator.py index 910706914..0787c25c2 100644 --- a/rocrate_validator/requirements/shacl/validator.py +++ b/rocrate_validator/requirements/shacl/validator.py @@ -47,14 +47,12 @@ class SHACLValidationSkip(Exception): class SHACLValidationAlreadyProcessed(Exception): - def __init__(self, profile_identifier: str, result: bool | None) -> None: super().__init__(f"Profile {profile_identifier} has already been processed") self.result = result class SHACLValidationContextManager: - def __init__(self, check: RequirementCheck, context: ValidationContext) -> None: self._check = check self._profile = check.requirement.profile @@ -65,8 +63,9 @@ def __enter__(self) -> SHACLValidationContext: logger.debug("Entering SHACLValidationContextManager") if not self._shacl_context.__set_current_validation_profile__(self._profile): raise SHACLValidationAlreadyProcessed( - self._profile.identifier, self._shacl_context.get_validation_result(self._profile)) - logger.debug("Processing profile: %s (id: %s)", self._profile.name, self._profile.identifier) + self._profile.identifier, self._shacl_context.get_validation_result(self._profile) + ) + logger.debug("Processing profile: %s (id: %s)", self._profile.name, self._profile.identifier) if self._profile.identifier != self._context.settings.profile_identifier: logger.debug("Skipping validation of profile %s", self._profile.identifier) self.context.result._add_skipped_check(self._check) @@ -92,7 +91,6 @@ def check(self) -> RequirementCheck: class SHACLValidationContext(ValidationContext): - def __init__(self, context: ValidationContext): super().__init__(context.validator, context.settings) self._base_context: ValidationContext = context @@ -129,6 +127,7 @@ def __set_current_validation_profile__(self, profile: Profile) -> bool: # enable overriding of checks if self.settings.allow_requirement_check_override: from rocrate_validator.requirements.shacl.requirements import SHACLRequirement # noqa: PLC0415 + for requirement in [_ for _ in profile.requirements if isinstance(_, SHACLRequirement)]: for check in requirement.get_checks(): if check.overridden and check.requirement.profile != self.target_profile: @@ -204,8 +203,9 @@ def __get_data_graph_base__(self) -> str | None: logger.debug("Unable to extract @base from data graph metadata: %s", e) return None - def __load_ontology_graph__(self, profile_path: Path, - ontology_filename: str = DEFAULT_ONTOLOGY_FILE) -> Graph | None: + def __load_ontology_graph__( + self, profile_path: Path, ontology_filename: str = DEFAULT_ONTOLOGY_FILE + ) -> Graph | None: # load the graph of ontologies ontology_graph: Graph | None = None ontology_path = self.__get_ontology_path__(profile_path, ontology_filename) @@ -224,8 +224,7 @@ def __load_ontology_graph__(self, profile_path: Path, else: logger.debug("Using default publicID: %s", self.publicID) - ontology_graph.parse(ontology_path, format="ttl", - publicID=public_id) + ontology_graph.parse(ontology_path, format="ttl", publicID=public_id) logger.debug("Ontologies loaded: %s", ontology_graph) return ontology_graph @@ -244,7 +243,6 @@ def get_instance(cls, context: ValidationContext) -> SHACLValidationContext: class SHACLViolation: - def __init__(self, result: SHACLValidationResult, violation_node: Node, graph: Graph) -> None: # check the input assert result is not None, "Invalid result" @@ -305,9 +303,11 @@ def get_result_severity(self) -> Severity: def sourceConstraintComponent(self): if not self._source_constraint_component: self._source_constraint_component = self.graph.value( - self._violation_node, URIRef(f"{SHACL_NS}sourceConstraintComponent")) - assert self._source_constraint_component is not None, \ + self._violation_node, URIRef(f"{SHACL_NS}sourceConstraintComponent") + ) + assert self._source_constraint_component is not None, ( f"Unable to get source constraint component from violation node {self._violation_node}" + ) return self._source_constraint_component def get_result_message(self, ro_crate_path: Path | str) -> str: @@ -321,21 +321,19 @@ def get_result_message(self, ro_crate_path: Path | str) -> str: def sourceShape(self) -> URIRef | BNode: if not self._source_shape_node: self._source_shape_node = self.graph.value(self._violation_node, URIRef(f"{SHACL_NS}sourceShape")) - assert self._source_shape_node is not None, \ + assert self._source_shape_node is not None, ( f"Unable to get source shape node from violation node {self._violation_node}" + ) return cast("URIRef | BNode", self._source_shape_node) class SHACLValidationResult: - - def __init__(self, results_graph: Graph, - results_text: str | None = None) -> None: + def __init__(self, results_graph: Graph, results_text: str | None = None) -> None: # validate the results graph input assert results_graph is not None, "Invalid graph" assert isinstance(results_graph, Graph), "Invalid graph type" # check if the graph is valid ValidationReport - assert (None, URIRef(f"{SHACL_NS}conforms"), - None) in results_graph, "Invalid ValidationReport" + assert (None, URIRef(f"{SHACL_NS}conforms"), None) in results_graph, "Invalid ValidationReport" # store the input properties self.results_graph = results_graph self._text = results_text @@ -344,8 +342,12 @@ def __init__(self, results_graph: Graph, # initialize the conforms property self._conforms = len(self._violations) == 0 - logger.debug("Validation report. N. violations: %s, Conforms: %s; Text: %s", - len(self._violations), self._conforms, self._text) + logger.debug( + "Validation report. N. violations: %s, Conforms: %s; Text: %s", + len(self._violations), + self._conforms, + self._text, + ) def _parse_results_graph(self, results_graph: Graph): # parse the violations from the results graph @@ -370,7 +372,6 @@ def text(self) -> str | None: class SHACLValidator: - def __init__( self, shapes_graph: GraphLike | str | bytes | None, @@ -414,8 +415,7 @@ def validate( allow_warnings: bool | None = True, # serialization settings serialization_output_path: str | None = None, - serialization_output_format: - RDF_SERIALIZATION_FORMATS_TYPES | None = "turtle", + serialization_output_format: RDF_SERIALIZATION_FORMATS_TYPES | None = "turtle", **kwargs, ) -> SHACLValidationResult: """ @@ -448,20 +448,15 @@ def validate( # Validate data_graph if not isinstance(data_graph, (Graph, str, bytes)): - raise TypeError( - "data_graph must be an instance of Graph, str, or bytes") + raise TypeError("data_graph must be an instance of Graph, str, or bytes") # Validate inference if inference and inference not in VALID_INFERENCE_OPTIONS: - raise ValueError( - f"inference must be one of {VALID_INFERENCE_OPTIONS}") + raise ValueError(f"inference must be one of {VALID_INFERENCE_OPTIONS}") # Validate serialization_output_format - if serialization_output_format and \ - serialization_output_format not in RDF_SERIALIZATION_FORMATS: - raise ValueError( - "serialization_output_format must be one of " - f"{RDF_SERIALIZATION_FORMATS}") + if serialization_output_format and serialization_output_format not in RDF_SERIALIZATION_FORMATS: + raise ValueError(f"serialization_output_format must be one of {RDF_SERIALIZATION_FORMATS}") assert inference in (None, "rdfs", "owlrl", "both"), "Invalid inference option" @@ -488,10 +483,7 @@ def validate( logger.debug("pyshacl.validate result: Results Text: %r", results_text) if not isinstance(results_graph, Graph): - raise TypeError( - "pyshacl.validate returned a non-Graph results_graph: " - f"{type(results_graph).__name__}" - ) + raise TypeError(f"pyshacl.validate returned a non-Graph results_graph: {type(results_graph).__name__}") # serialize the results graph if serialization_output_path: @@ -503,9 +495,7 @@ def validate( "rdf", "json-ld", ], "Invalid serialization output format" - results_graph.serialize( - serialization_output_path, format=serialization_output_format - ) + results_graph.serialize(serialization_output_path, format=serialization_output_format) # return the validation result return SHACLValidationResult(results_graph, results_text) diff --git a/rocrate_validator/rocrate.py b/rocrate_validator/rocrate.py index 2de0e04cc..da1bc33b2 100644 --- a/rocrate_validator/rocrate.py +++ b/rocrate_validator/rocrate.py @@ -37,28 +37,27 @@ class ROCrateEntity: - def __init__(self, metadata: ROCrateMetadata, raw_data: dict) -> None: self._raw_data: dict = raw_data self._metadata = metadata @property def id(self) -> str: - return cast('str', self._raw_data.get('@id')) + return cast("str", self._raw_data.get("@id")) @property def type(self) -> str | list[str]: - return cast('str | list[str]', self._raw_data.get('@type')) + return cast("str | list[str]", self._raw_data.get("@type")) def is_dataset(self) -> bool: - return self.has_type('Dataset') + return self.has_type("Dataset") def is_file(self) -> bool: - return self.has_type('File') + return self.has_type("File") @property def name(self) -> str: - return cast('str', self._raw_data.get('name')) + return cast("str", self._raw_data.get("name")) @property def metadata(self) -> ROCrateMetadata: @@ -75,9 +74,7 @@ def is_remote(self) -> bool: def get_id_as_path(cls, entity_id: str, ro_crate: ROCrate | None = None) -> Path: return cls.get_path_from_identifier( entity_id, - ro_crate.uri.as_path() - if ro_crate and ro_crate.uri.is_local_resource() - else None, + ro_crate.uri.as_path() if ro_crate and ro_crate.uri.is_local_resource() else None, ) @staticmethod @@ -168,9 +165,7 @@ def has_relative_path(self) -> bool: def has_local_identifier(self) -> bool: has_local_id = ( - self.id.startswith("#") - or f"{self.ro_crate.uri}/#" in self.id - or f"file://{self.ro_crate.uri}/#" in self.id + self.id.startswith("#") or f"{self.ro_crate.uri}/#" in self.id or f"file://{self.ro_crate.uri}/#" in self.id ) logger.debug( "Identifier '%s' is %s a local identifier", @@ -361,9 +356,7 @@ def get_entity(self, entity_id: str) -> ROCrateEntity | None: def get_entities(self) -> list[ROCrateEntity]: return [ROCrateEntity(self, entity) for entity in self.as_dict().get("@graph", [])] - def get_entities_by_type( - self, entity_type: str | list[str] - ) -> list[ROCrateEntity]: + def get_entities_by_type(self, entity_type: str | list[str]) -> list[ROCrateEntity]: entity_types = [entity_type] if isinstance(entity_type, str) else entity_type return [e for e in self.get_entities() if e.has_types(entity_types)] @@ -373,20 +366,15 @@ def get_dataset_entities(self) -> list[ROCrateEntity]: def get_file_entities(self) -> list[ROCrateEntity]: return self.get_entities_by_type("File") - def get_data_entities( - self, exclude_web_data_entities: bool = False - ) -> list[ROCrateEntity]: + def get_data_entities(self, exclude_web_data_entities: bool = False) -> list[ROCrateEntity]: if not exclude_web_data_entities: return self.get_entities_by_type(["Dataset", "File"]) - return [ - e - for e in self.get_entities_by_type(["Dataset", "File"]) - if not e.is_remote() - ] + return [e for e in self.get_entities_by_type(["Dataset", "File"]) if not e.is_remote()] def get_web_data_entities(self) -> list[ROCrateEntity]: return [ - entity for entity in self.get_entities() + entity + for entity in self.get_entities() if (entity.has_type("File") or entity.has_type("Dataset")) and entity.is_remote() ] @@ -406,9 +394,9 @@ def get_conforms_to(self) -> list[str] | None: def as_json(self) -> str: if not self._json: - self._json = cast('str', self.ro_crate.get_file_content( - Path(self.METADATA_FILE_DESCRIPTOR), binary_mode=False - )) + self._json = cast( + "str", self.ro_crate.get_file_content(Path(self.METADATA_FILE_DESCRIPTOR), binary_mode=False) + ) return self._json def as_dict(self) -> dict[Any, Any]: @@ -544,11 +532,7 @@ def __get_search_path__(self, path: Path) -> tuple[Path, Path]: """ assert path, "Path cannot be None" # Identify the root path of the RO-Crate - root_path = ( - self.uri.as_path() - if self.uri.is_local_resource() and isinstance(path, Path) - else Path("./") - ) + root_path = self.uri.as_path() if self.uri.is_local_resource() and isinstance(path, Path) else Path("./") # Extract the search path relative to the root of the RO-Crate root path try: search_path = path.relative_to(root_path) @@ -587,9 +571,7 @@ def __parse_path__(self, path: Path) -> Path: # Resolve the path based on the RO-Crate location rocrate_path = self.uri.as_path() if self.uri.is_local_resource() else None - rocrate_path_arg = ( - rocrate_path if not str(rocrate_path).endswith(".zip") else None - ) + rocrate_path_arg = rocrate_path if not str(rocrate_path).endswith(".zip") else None paths_to_try = [path] unquoted_path = Path(unquote(str(path))) if str(path) != str(unquoted_path): @@ -667,9 +649,7 @@ def get_file_size(self, path: Path) -> int: """ @abstractmethod - def get_file_content( - self, path: Path, binary_mode: bool = True - ) -> str | bytes: + def get_file_content(self, path: Path, binary_mode: bool = True) -> str | bytes: """ Get the content of a file in the RO-Crate. @@ -684,9 +664,7 @@ def get_file_content( """ @staticmethod - def get_external_file_content( - uri: str, binary_mode: bool = True - ) -> str | bytes: + def get_external_file_content(uri: str, binary_mode: bool = True) -> str | bytes: """ Get the content of an external file. @@ -721,9 +699,7 @@ def get_external_file_size(uri: str) -> int: return int(response.headers.get("Content-Length")) @staticmethod - def from_metadata_dict( - metadata_dict: dict - ) -> ROCrate: + def from_metadata_dict(metadata_dict: dict) -> ROCrate: """ Create a new instance of the RO-Crate based on the metadata dictionary. @@ -741,9 +717,7 @@ def from_metadata_dict( return ro_crate @staticmethod - def new_instance( - uri: str | Path | URI, relative_root_path: Path | None = None - ) -> ROCrate: + def new_instance(uri: str | Path | URI, relative_root_path: Path | None = None) -> ROCrate: """ Create a new instance of the RO-Crate based on the URI. @@ -822,9 +796,7 @@ def get_file_size(self, path: Path) -> int: raise FileNotFoundError(f"File not found: {path}") return path.stat().st_size - def get_file_content( - self, path: Path, binary_mode: bool = True - ) -> str | bytes: + def get_file_content(self, path: Path, binary_mode: bool = True) -> str | bytes: path = self.__parse_path__(path) if not self.has_file(path): raise FileNotFoundError(f"File not found: {path}") @@ -933,9 +905,7 @@ def get_file_size(self, path: Path) -> int: assert self._zipref is not None, "Zip reference not initialized" return self._zipref.getinfo(str(self.__parse_path__(path))).file_size - def get_file_content( - self, path: Path, binary_mode: bool = True - ) -> str | bytes: + def get_file_content(self, path: Path, binary_mode: bool = True) -> str | bytes: path = self.__parse_path__(path) if not self.has_file(path): raise FileNotFoundError(f"File not found: {path}") @@ -945,7 +915,6 @@ def get_file_content( class ROCrateRemoteZip(ROCrateLocalZip): - def __init__(self, path: str | Path | URI, relative_root_path: Path | None = None): super().__init__(path, relative_root_path=relative_root_path, init_zip=False) @@ -960,7 +929,7 @@ def __init_zip_reference__(self): raise ROCrateInvalidURIError(uri=url, message="URI is not available") # Step 1: Fetch the last 22 bytes to find the EOCD record - eocd_data = self.__fetch_range__(url, -22, '') + eocd_data = self.__fetch_range__(url, -22, "") # Step 2: Find the EOCD record eocd_offset = self.__find_eocd__(eocd_data) @@ -970,8 +939,9 @@ def __init_zip_reference__(self): central_directory_offset, central_directory_size = self.__parse_eocd__(eocd_full_data) # Step 4: Fetch the central directory - central_directory_data = self.__fetch_range__(url, central_directory_offset, - central_directory_offset + central_directory_size - 1) + central_directory_data = self.__fetch_range__( + url, central_directory_offset, central_directory_offset + central_directory_size - 1 + ) # Step 5: Parse the central directory and return the zip file self._zipref = zipfile.ZipFile(io.BytesIO(central_directory_data)) # pylint: disable=consider-using-with @@ -979,21 +949,21 @@ def __init_zip_reference__(self): def size(self) -> int: response = HttpRequester().head(str(self.uri)) response.raise_for_status() # Check if the request was successful - file_size = response.headers.get('Content-Length') + file_size = response.headers.get("Content-Length") if file_size is not None: return int(file_size) raise ValueError("Could not determine the file size from the headers") @staticmethod def __fetch_range__(uri: str, start, end): - headers = {'Range': f'bytes={start}-{end}'} + headers = {"Range": f"bytes={start}-{end}"} response = HttpRequester().get(uri, headers=headers) response.raise_for_status() return response.content @staticmethod def __find_eocd__(data): - eocd_signature = b'PK\x05\x06' + eocd_signature = b"PK\x05\x06" eocd_offset = data.rfind(eocd_signature) if eocd_offset == -1: raise ValueError("EOCD not found") @@ -1001,15 +971,14 @@ def __find_eocd__(data): @staticmethod def __parse_eocd__(data): - eocd_size = struct.calcsize('<4s4H2LH') - eocd = struct.unpack('<4s4H2LH', data[-eocd_size:]) + eocd_size = struct.calcsize("<4s4H2LH") + eocd = struct.unpack("<4s4H2LH", data[-eocd_size:]) central_directory_size = eocd[5] central_directory_offset = eocd[6] return central_directory_offset, central_directory_size class BagitROCrate(ROCrate, ABC): - def __init__(self, uri, relative_root_path=None): super().__init__(uri, relative_root_path) @@ -1035,25 +1004,25 @@ def is_bagit_wrapping_crate(uri: str | Path | URI) -> bool: # Check for local directory if uri.is_local_directory(): base_path = uri.as_path() - result = (base_path / 'bagit.txt').is_file() and \ - (base_path / 'data' / 'ro-crate-metadata.json').is_file() + result = (base_path / "bagit.txt").is_file() and ( + base_path / "data" / "ro-crate-metadata.json" + ).is_file() # Check for local zip file elif uri.is_local_file(): path = uri.as_path() - if path.suffix == '.zip': - with zipfile.ZipFile(path, 'r') as zf: + if path.suffix == ".zip": + with zipfile.ZipFile(path, "r") as zf: namelist = zf.namelist() - result = 'bagit.txt' in namelist and \ - 'data/ro-crate-metadata.json' in namelist + result = "bagit.txt" in namelist and "data/ro-crate-metadata.json" in namelist # Check for remote zip file elif uri.is_remote_resource(): # For remote resources, we need to check if both files exist # We'll use HTTP HEAD requests to check without downloading - base_url = str(uri).rstrip('/') + base_url = str(uri).rstrip("/") - if not base_url.endswith('.zip'): + if not base_url.endswith(".zip"): # Check for bagit.txt bagit_response = HttpRequester().head(f"{base_url}/bagit.txt") if bagit_response.status_code == HTTP_STATUS_OK: @@ -1065,9 +1034,9 @@ def is_bagit_wrapping_crate(uri: str | Path | URI) -> bool: # Temporarily create instance to check temp_crate = ROCrateRemoteZip(uri) logger.debug("Initializing ROCrateRemoteZip for URI: %s", uri) - has_bagit_txt = temp_crate.has_file(Path('bagit.txt')) + has_bagit_txt = temp_crate.has_file(Path("bagit.txt")) logger.debug("Presence of 'bagit.txt': %s", has_bagit_txt) - has_ro_crate_metadata = temp_crate.has_file(Path('data/ro-crate-metadata.json')) + has_ro_crate_metadata = temp_crate.has_file(Path("data/ro-crate-metadata.json")) logger.debug("Presence of 'data/ro-crate-metadata.json': %s", has_ro_crate_metadata) result = has_bagit_txt and has_ro_crate_metadata del temp_crate @@ -1088,16 +1057,18 @@ def __check_search_path__(self, path): """ search_path, root_path = super().__get_search_path__(path) # Check if the path has the substring 'data/' in it - has_sub_data_path = re.search(r'data/', str(search_path)) - logger.debug("The search path '%s' %s the 'data/' sub-path", search_path, - "contains" if has_sub_data_path else "does not contain") + has_sub_data_path = re.search(r"data/", str(search_path)) + logger.debug( + "The search path '%s' %s the 'data/' sub-path", + search_path, + "contains" if has_sub_data_path else "does not contain", + ) if search_path == "." or not has_sub_data_path: return search_path, root_path return None, None class ROCrateBagitLocalFolder(BagitROCrate, ROCrateLocalFolder): - def __init__(self, uri: str | Path | URI, relative_root_path: Path | None = None): # initialize the parent classes super(ROCrateLocalFolder, self).__init__(uri, relative_root_path=relative_root_path) diff --git a/rocrate_validator/services.py b/rocrate_validator/services.py index 20d089b59..2462e440e 100644 --- a/rocrate_validator/services.py +++ b/rocrate_validator/services.py @@ -62,9 +62,7 @@ def validate_metadata_as_dict( return validate(settings, subscribers) -def validate( - settings: dict | ValidationSettings, subscribers: list[Subscriber] | None = None -) -> ValidationResult: +def validate(settings: dict | ValidationSettings, subscribers: list[Subscriber] | None = None) -> ValidationResult: """ Validate a RO-Crate against a profile and return the validation result diff --git a/rocrate_validator/utils/cache_warmup.py b/rocrate_validator/utils/cache_warmup.py index 51df895e1..b2dceff29 100644 --- a/rocrate_validator/utils/cache_warmup.py +++ b/rocrate_validator/utils/cache_warmup.py @@ -62,6 +62,7 @@ @dataclass class WarmUpResult: """Outcome of a warm-up operation.""" + url: str status: str # "ok", "skipped", "failed" detail: str | None = None @@ -75,8 +76,7 @@ def discover_profile_cacheable_urls(profile: Profile) -> list[str]: """ graph = profile.profile_specification_graph if graph is None: - logger.debug( - "Profile %s has no specification graph loaded", getattr(profile, "identifier", "?")) + logger.debug("Profile %s has no specification graph loaded", getattr(profile, "identifier", "?")) return [] urls: list[str] = [] try: @@ -88,8 +88,7 @@ def discover_profile_cacheable_urls(profile: Profile) -> list[str]: if value.lower().startswith(("http://", "https://")) and value not in urls: urls.append(value) except Exception as e: - logger.debug("Failed to query cacheable URLs for profile %s: %s", - getattr(profile, "identifier", "?"), e) + logger.debug("Failed to query cacheable URLs for profile %s: %s", getattr(profile, "identifier", "?"), e) return urls @@ -176,13 +175,15 @@ def auto_warm_up_for_settings(settings: ValidationSettings) -> list[WarmUpResult requester = HttpRequester() urls_to_fetch = [u for u in urls if not requester.has_cached(u)] if not urls_to_fetch: - logger.debug("Auto warm-up: all %d resources already cached for profile %s", - len(urls), settings.profile_identifier) + logger.debug( + "Auto warm-up: all %d resources already cached for profile %s", len(urls), settings.profile_identifier + ) return [] results = warm_up_urls(urls_to_fetch) ok = sum(1 for r in results if r.status == "ok") - logger.info("Auto warm-up: pre-loaded %d/%d resources for profile %s", - ok, len(urls_to_fetch), settings.profile_identifier) + logger.info( + "Auto warm-up: pre-loaded %d/%d resources for profile %s", ok, len(urls_to_fetch), settings.profile_identifier + ) return results diff --git a/rocrate_validator/utils/collections.py b/rocrate_validator/utils/collections.py index 878a2b281..932ad03cf 100644 --- a/rocrate_validator/utils/collections.py +++ b/rocrate_validator/utils/collections.py @@ -16,7 +16,6 @@ class MapIndex: - def __init__(self, name: str, unique: bool = False): self.name = name self.unique = unique diff --git a/rocrate_validator/utils/document_loader.py b/rocrate_validator/utils/document_loader.py index 18a979aec..d87b4aaa3 100644 --- a/rocrate_validator/utils/document_loader.py +++ b/rocrate_validator/utils/document_loader.py @@ -73,7 +73,7 @@ def install_document_loader() -> bool: # time, binding its own reference to the original function. Patching # only `util` would not intercept remote @context resolution, so the # context module must be patched separately. - jsonld_context.source_to_json = _patched_source_to_json # pyright: ignore[reportPrivateImportUsage] + jsonld_context.source_to_json = _patched_source_to_json # pyright: ignore[reportPrivateImportUsage] except Exception as e: logger.error("Failed to install JSON-LD document loader: %s", e) return False @@ -97,7 +97,7 @@ def uninstall_document_loader() -> bool: try: jsonld_util.source_to_json = _original_source_to_json - jsonld_context.source_to_json = _original_source_to_json # pyright: ignore[reportPrivateImportUsage] + jsonld_context.source_to_json = _original_source_to_json # pyright: ignore[reportPrivateImportUsage] except Exception as e: logger.error("Failed to uninstall JSON-LD document loader: %s", e) return False diff --git a/rocrate_validator/utils/http.py b/rocrate_validator/utils/http.py index 5af8ad541..7b3e44fd6 100644 --- a/rocrate_validator/utils/http.py +++ b/rocrate_validator/utils/http.py @@ -99,6 +99,7 @@ class HttpRequester: supports an offline mode in which only cached responses are served (cache misses yield a 504 response instead of hitting the network). """ + _instance = None _lock = threading.Lock() @@ -113,11 +114,13 @@ def __new__(cls, *args, **kwargs) -> Self: logger.debug(f"Instance created: {cls._instance.__class__.__name__}") return cls._instance - def __init__(self, - cache_max_age: int = constants.DEFAULT_HTTP_CACHE_MAX_AGE, - cache_path: str | None = None, - offline: bool = False, - no_cache: bool = False): + def __init__( + self, + cache_max_age: int = constants.DEFAULT_HTTP_CACHE_MAX_AGE, + cache_path: str | None = None, + offline: bool = False, + no_cache: bool = False, + ): logger.debug(f"Initializing instance of {self.__class__.__name__} {self}") # check if the instance is already initialized if not hasattr(self, "_initialized"): @@ -162,7 +165,7 @@ def __initialize_session__(self, cache_max_age: int, cache_path: str | None = No if not cache_path: # Generate a random path for the cache # to avoid conflicts with other instances - random_suffix = ''.join(random.choices(string.ascii_letters + string.digits, k=8)) + random_suffix = "".join(random.choices(string.ascii_letters + string.digits, k=8)) cache_path = constants.DEFAULT_HTTP_CACHE_PATH_PREFIX + f"_{random_suffix}" logger.debug(f"Using default cache path: {cache_path}") else: @@ -176,9 +179,9 @@ def __initialize_session__(self, cache_max_age: int, cache_path: str | None = No # Cache name with random suffix cache_name=str(cache_path), expire_after=expire_after, # Cache expiration time in seconds - backend='sqlite', # Use SQLite backend - allowable_methods=('GET',), # Cache GET - allowable_codes=(200, 302, 404) # Cache responses with these status codes + backend="sqlite", # Use SQLite backend + allowable_methods=("GET",), # Cache GET + allowable_codes=(200, 302, 404), # Cache responses with these status codes ) # Apply offline policy: only return cached responses. if self.offline: @@ -198,8 +201,7 @@ def __initialize_session__(self, cache_max_age: int, cache_path: str | None = No if not self.session: if self.offline: logger.warning( - "Offline mode requested but requests_cache is not available: " - "HTTP requests will be blocked." + "Offline mode requested but requests_cache is not available: HTTP requests will be blocked." ) self.session = _OfflineFallbackSession() else: @@ -219,7 +221,7 @@ def cleanup(self): Remove the SQLite cache file when the cache is marked as temporary. """ logger.debug(f"Deleting instance of {self.__class__.__name__}") - if self.session and hasattr(self.session, 'cache') and self.session.cache: + if self.session and hasattr(self.session, "cache") and self.session.cache: try: logger.debug(f"Deleting cache directory: {self.session.cache.cache_name}") cache_path = f"{self.session.cache.cache_name}.sqlite" @@ -351,11 +353,13 @@ def cache_info(self) -> dict[str, Any]: return info @classmethod - def initialize_cache(cls, - cache_max_age: int = constants.DEFAULT_HTTP_CACHE_MAX_AGE, - cache_path: str | None = None, - offline: bool = False, - no_cache: bool = False) -> HttpRequester: + def initialize_cache( + cls, + cache_max_age: int = constants.DEFAULT_HTTP_CACHE_MAX_AGE, + cache_path: str | None = None, + offline: bool = False, + no_cache: bool = False, + ) -> HttpRequester: """ Initialize the HttpRequester singleton with cache settings. @@ -369,13 +373,11 @@ def initialize_cache(cls, with cls._lock: instance = cls._instance if instance is None: - return cls(cache_max_age=cache_max_age, cache_path=cache_path, - offline=offline, no_cache=no_cache) + return cls(cache_max_age=cache_max_age, cache_path=cache_path, offline=offline, no_cache=no_cache) # Re-apply the configuration without recreating the instance: # we keep the same singleton in place and only rebuild its underlying session, # rather than dropping and recreating the object (as ``reset`` does). - instance._reconfigure(cache_max_age=cache_max_age, cache_path=cache_path, - offline=offline, no_cache=no_cache) + instance._reconfigure(cache_max_age=cache_max_age, cache_path=cache_path, offline=offline, no_cache=no_cache) return instance def _close_session(self) -> None: @@ -392,11 +394,13 @@ def _close_session(self) -> None: except Exception as e: logger.debug("Error cleaning up previous cache: %s", e) - def _reconfigure(self, - cache_max_age: int = constants.DEFAULT_HTTP_CACHE_MAX_AGE, - cache_path: str | None = None, - offline: bool = False, - no_cache: bool = False) -> None: + def _reconfigure( + self, + cache_max_age: int = constants.DEFAULT_HTTP_CACHE_MAX_AGE, + cache_path: str | None = None, + offline: bool = False, + no_cache: bool = False, + ) -> None: """ Rebuild the underlying session with new cache settings while preserving the singleton instance (and any attributes set on it, e.g. test patches). diff --git a/rocrate_validator/utils/io_helpers/input.py b/rocrate_validator/utils/io_helpers/input.py index 711303db4..fc98b88ec 100644 --- a/rocrate_validator/utils/io_helpers/input.py +++ b/rocrate_validator/utils/io_helpers/input.py @@ -31,9 +31,9 @@ logger = logging.getLogger(__name__) -def __get_single_char_win32__(console: Console | None = None, end: str = "\n", - message: str | None = None, - choices: list[str] | None = None) -> str: +def __get_single_char_win32__( + console: Console | None = None, end: str = "\n", message: str | None = None, choices: list[str] | None = None +) -> str: """ Get a single character from the console """ @@ -53,9 +53,9 @@ def __get_single_char_win32__(console: Console | None = None, end: str = "\n", return char -def __get_single_char_unix__(console: Console | None = None, end: str = "\n", - message: str | None = None, - choices: list[str] | None = None) -> str: +def __get_single_char_unix__( + console: Console | None = None, end: str = "\n", message: str | None = None, choices: list[str] | None = None +) -> str: """ Get a single character from the console """ @@ -82,9 +82,9 @@ def __get_single_char_unix__(console: Console | None = None, end: str = "\n", return char -def get_single_char(console: Console | None = None, end: str = "\n", - message: str | None = None, - choices: list[str] | None = None) -> str: +def get_single_char( + console: Console | None = None, end: str = "\n", message: str | None = None, choices: list[str] | None = None +) -> str: """ Get a single character from the console """ @@ -93,8 +93,7 @@ def get_single_char(console: Console | None = None, end: str = "\n", return __get_single_char_unix__(console, end, message, choices) -def multiple_choice(console: Console, - choices: list[Profile]): +def multiple_choice(console: Console, choices: list[Profile]): """ Display a multiple choice menu """ @@ -107,14 +106,14 @@ def multiple_choice(console: Console, "type": "checkbox", "name": "profiles", "message": prompt_text, - "choices": [Choice(i, f"{choices[i].identifier}: {choices[i].name}") for i in range(len(choices))] + "choices": [Choice(i, f"{choices[i].identifier}: {choices[i].name}") for i in range(len(choices))], } ] console.print("\n") - selected = prompt(question, style={"questionmark": "#ff9d00 bold", - "question": "bold", - "checkbox": "magenta", - "answer": "magenta"}, - style_override=False) + selected = prompt( + question, + style={"questionmark": "#ff9d00 bold", "question": "bold", "checkbox": "magenta", "answer": "magenta"}, + style_override=False, + ) logger.debug("Selected profiles: %s", selected) return selected["profiles"] diff --git a/rocrate_validator/utils/io_helpers/output/__init__.py b/rocrate_validator/utils/io_helpers/output/__init__.py index 5488d5cdc..990d73847 100644 --- a/rocrate_validator/utils/io_helpers/output/__init__.py +++ b/rocrate_validator/utils/io_helpers/output/__init__.py @@ -35,12 +35,10 @@ class OutputFormatter(Protocol): """Protocol for output formatters.""" - def __rich_console__(self, console: Console, options: ConsoleOptions) -> RenderResult: - ... + def __rich_console__(self, console: Console, options: ConsoleOptions) -> RenderResult: ... class BaseOutputFormatter(OutputFormatter): - def __init__(self, data: Any | None = None): # Formatters are registered as classes (instantiated with the data to # render), so the map values are formatter types, not instances. diff --git a/rocrate_validator/utils/io_helpers/output/console.py b/rocrate_validator/utils/io_helpers/output/console.py index 52417b2d5..4b012ead7 100644 --- a/rocrate_validator/utils/io_helpers/output/console.py +++ b/rocrate_validator/utils/io_helpers/output/console.py @@ -26,8 +26,14 @@ class Console(BaseConsole): """Rich console that can be disabled.""" - def __init__(self, *args, disabled: bool = False, interactive: bool = True, - formatters: dict[type, Any] | None = None, **kwargs): + def __init__( + self, + *args, + disabled: bool = False, + interactive: bool = True, + formatters: dict[type, Any] | None = None, + **kwargs, + ): force_jupyter = kwargs.pop("force_jupyter", None) if force_jupyter is None: force_jupyter = False if self.__jupyter_environment__() else None @@ -43,6 +49,7 @@ def __init__(self, *args, disabled: bool = False, interactive: bool = True, def __jupyter_environment__(self) -> bool: from rocrate_validator.cli.utils import running_in_jupyter # noqa: PLC0415 + return running_in_jupyter() def register_formatter(self, formatter: OutputFormatter, type_: type | None = None): diff --git a/rocrate_validator/utils/io_helpers/output/json/__init__.py b/rocrate_validator/utils/io_helpers/output/json/__init__.py index 9aa1c29b5..0abee5a34 100644 --- a/rocrate_validator/utils/io_helpers/output/json/__init__.py +++ b/rocrate_validator/utils/io_helpers/output/json/__init__.py @@ -28,7 +28,6 @@ class JSONOutputFormatter(BaseOutputFormatter): - def __init__(self, data: Any | None = None): super().__init__(data) self.add_type_formatter(ValidationResult, ValidationResultJSONOutputFormatter) diff --git a/rocrate_validator/utils/io_helpers/output/text/__init__.py b/rocrate_validator/utils/io_helpers/output/text/__init__.py index 7090f1be5..ee37dea7b 100644 --- a/rocrate_validator/utils/io_helpers/output/text/__init__.py +++ b/rocrate_validator/utils/io_helpers/output/text/__init__.py @@ -32,7 +32,6 @@ class TextOutputFormatter(BaseOutputFormatter): - def __init__(self, data: Any | None = None): super().__init__(data) self.add_type_formatter(ValidationResult, ValidationResultTextOutputFormatter) diff --git a/rocrate_validator/utils/io_helpers/output/text/formatters.py b/rocrate_validator/utils/io_helpers/output/text/formatters.py index fabe90196..ce753b75b 100644 --- a/rocrate_validator/utils/io_helpers/output/text/formatters.py +++ b/rocrate_validator/utils/io_helpers/output/text/formatters.py @@ -36,7 +36,6 @@ class ValidationResultTextOutputFormatter(OutputFormatter): - def __init__(self, validation_result: ValidationResult): self._validation_result = validation_result @@ -50,41 +49,42 @@ def __rich_console__(self, console: Console, options: ConsoleOptions) -> RenderR yield Align(f"\n[profile: [magenta bold]{requirement.profile.name}[/magenta bold]]", align="right") yield Padding( - f"[bold][cyan][u][ {requirement.identifier} ]: " - f"{Markdown(requirement.name).markup}[/u][/cyan][/bold]", (0, 5)) + f"[bold][cyan][u][ {requirement.identifier} ]: {Markdown(requirement.name).markup}[/u][/cyan][/bold]", + (0, 5), + ) yield Padding(Markdown(requirement.description), (1, 6)) yield Padding("[white bold u] Failed checks [/white bold u]\n", (0, 8)) - for check in sorted(result.get_failed_checks_by_requirement(requirement), - key=lambda x: (-x.severity.value, x)): + for check in sorted( + result.get_failed_checks_by_requirement(requirement), key=lambda x: (-x.severity.value, x) + ): issue_color = get_severity_color(check.level.severity) yield Padding( f"[bold][{issue_color}][ {check.identifier.center(16)} ][/{issue_color}] " f"[magenta]{check.name}[/magenta][/bold]:", - (1, 8, 0, 8) + (1, 8, 0, 8), ) yield Padding(Markdown(check.description), (0, 0, 0, len(check.identifier) + 13)) yield Padding("[u] Detected issues [/u]", (0, 8)) - for issue in sorted(result.get_issues_by_check(check), - key=lambda x: (-x.severity.value, x)): + for issue in sorted(result.get_issues_by_check(check), key=lambda x: (-x.severity.value, x)): path = "" if issue.violatingProperty and issue.violatingPropertyValue: path = f" of [yellow]{issue.violatingProperty}[/yellow]" if issue.violatingPropertyValue: if issue.violatingProperty: path += "=" - path += f"\"[green]{issue.violatingPropertyValue}[/green]\" " # keep the ending space + path += f'"[green]{issue.violatingPropertyValue}[/green]" ' # keep the ending space if issue.violatingEntity: path = f"{path} on [cyan]<{issue.violatingEntity}>[/cyan]" - yield Padding(f"- [[red]Violation[/red]{path}]: " - f"{Markdown(issue.message or '').markup}", (0, 9, 1, 9)) + yield Padding( + f"- [[red]Violation[/red]{path}]: {Markdown(issue.message or '').markup}", (0, 9, 1, 9) + ) if console.no_color: yield Padding("\n", (0, 0)) yield Padding("\n", (0, 0)) class ValidationStatisticsTextOutputFormatter(OutputFormatter): - def __init__(self, validation_statistics: ValidationStatistics): self._validation_statistics = validation_statistics @@ -92,7 +92,7 @@ def __rich_console__(self, console: Console, options: ConsoleOptions) -> RenderR layout = ValidationReportLayout( console=console, settings=self._validation_statistics.validation_settings, - statistics=self._validation_statistics + statistics=self._validation_statistics, ) logger.debug(layout.layout) if layout.layout is not None: diff --git a/rocrate_validator/utils/io_helpers/output/text/layout/__init__.py b/rocrate_validator/utils/io_helpers/output/text/layout/__init__.py index ff3b52f09..a4a8d2bb3 100644 --- a/rocrate_validator/utils/io_helpers/output/text/layout/__init__.py +++ b/rocrate_validator/utils/io_helpers/output/text/layout/__init__.py @@ -1,4 +1,3 @@ - # Copyright (c) 2024-2026 CRS4 # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/rocrate_validator/utils/io_helpers/output/text/layout/dispatcher.py b/rocrate_validator/utils/io_helpers/output/text/layout/dispatcher.py index 4e02d81fe..64b42b37d 100644 --- a/rocrate_validator/utils/io_helpers/output/text/layout/dispatcher.py +++ b/rocrate_validator/utils/io_helpers/output/text/layout/dispatcher.py @@ -47,15 +47,19 @@ class EventDispatcher(Subscriber): EventType.VALIDATION_END: "_on_validation_end", } - _CHECK_EVENTS: ClassVar[frozenset[EventType]] = frozenset({ - EventType.REQUIREMENT_CHECK_VALIDATION_START, - EventType.REQUIREMENT_CHECK_VALIDATION_END, - }) - - _REQUIREMENT_EVENTS: ClassVar[frozenset[EventType]] = frozenset({ - EventType.REQUIREMENT_VALIDATION_START, - EventType.REQUIREMENT_VALIDATION_END, - }) + _CHECK_EVENTS: ClassVar[frozenset[EventType]] = frozenset( + { + EventType.REQUIREMENT_CHECK_VALIDATION_START, + EventType.REQUIREMENT_CHECK_VALIDATION_END, + } + ) + + _REQUIREMENT_EVENTS: ClassVar[frozenset[EventType]] = frozenset( + { + EventType.REQUIREMENT_VALIDATION_START, + EventType.REQUIREMENT_VALIDATION_END, + } + ) def __init__(self, name: str | None = None): super().__init__(name or type(self).__name__) @@ -84,44 +88,39 @@ def _should_dispatch(self, event: Event, ctx: ValidationContext | None) -> bool: def _on_validation_start(self, event: Event, ctx: ValidationContext | None) -> None: pass - def _on_profile_validation_start(self, event: ProfileValidationEvent, - ctx: ValidationContext | None) -> None: + def _on_profile_validation_start(self, event: ProfileValidationEvent, ctx: ValidationContext | None) -> None: pass - def _on_requirement_validation_start(self, event: RequirementValidationEvent, - ctx: ValidationContext | None) -> None: + def _on_requirement_validation_start( + self, event: RequirementValidationEvent, ctx: ValidationContext | None + ) -> None: pass - def _on_requirement_check_validation_start(self, event: RequirementCheckValidationEvent, - ctx: ValidationContext | None) -> None: + def _on_requirement_check_validation_start( + self, event: RequirementCheckValidationEvent, ctx: ValidationContext | None + ) -> None: pass - def _on_requirement_check_validation_end(self, event: RequirementCheckValidationEvent, - ctx: ValidationContext | None) -> None: + def _on_requirement_check_validation_end( + self, event: RequirementCheckValidationEvent, ctx: ValidationContext | None + ) -> None: pass - def _on_requirement_validation_end(self, event: RequirementValidationEvent, - ctx: ValidationContext | None) -> None: + def _on_requirement_validation_end(self, event: RequirementValidationEvent, ctx: ValidationContext | None) -> None: pass - def _on_profile_validation_end(self, event: ProfileValidationEvent, - ctx: ValidationContext | None) -> None: + def _on_profile_validation_end(self, event: ProfileValidationEvent, ctx: ValidationContext | None) -> None: pass - def _on_validation_end(self, event: ValidationEvent, - ctx: ValidationContext | None) -> None: + def _on_validation_end(self, event: ValidationEvent, ctx: ValidationContext | None) -> None: pass @staticmethod - def _is_check_actionable(event: RequirementCheckValidationEvent, - ctx: ValidationContext | None) -> bool: + def _is_check_actionable(event: RequirementCheckValidationEvent, ctx: ValidationContext | None) -> bool: """Return ``True`` if the check is neither hidden nor overridden.""" assert ctx is not None, "Validation context must be provided" if event.requirement_check.requirement.hidden: return False if event.requirement_check.overridden: - return ( - ctx.target_validation_profile.identifier - == event.requirement_check.requirement.profile.identifier - ) + return ctx.target_validation_profile.identifier == event.requirement_check.requirement.profile.identifier return True diff --git a/rocrate_validator/utils/io_helpers/output/text/layout/progress.py b/rocrate_validator/utils/io_helpers/output/text/layout/progress.py index 40cbbf9c7..873eb4e8e 100644 --- a/rocrate_validator/utils/io_helpers/output/text/layout/progress.py +++ b/rocrate_validator/utils/io_helpers/output/text/layout/progress.py @@ -31,13 +31,11 @@ class ProgressMonitor(EventDispatcher): - PROFILE_VALIDATION = "Profiles" REQUIREMENT_VALIDATION = "Requirements" REQUIREMENT_CHECK_VALIDATION = "Requirements Checks" - def __init__(self, settings: dict | ValidationSettings, - stats: ValidationStatistics | None = None): + def __init__(self, settings: dict | ValidationSettings, stats: ValidationStatistics | None = None): # Initialize the Subscriber super().__init__("ProgressMonitor") self.__progress = Progress( @@ -45,26 +43,25 @@ def __init__(self, settings: dict | ValidationSettings, BarColumn(), TextColumn("{task.completed}/{task.total}"), TimeElapsedColumn(), - expand=True) + expand=True, + ) # Initialize statistics stats = stats or ValidationStatistics(settings) self.initial_state = stats # Store settings self.settings = settings # Initialize progress tasks - self.profile_validation = self.__progress.add_task( - self.PROFILE_VALIDATION, total=len(stats.profiles)) + self.profile_validation = self.__progress.add_task(self.PROFILE_VALIDATION, total=len(stats.profiles)) self.requirement_validation = self.__progress.add_task( - self.REQUIREMENT_VALIDATION, total=stats.total_requirements) + self.REQUIREMENT_VALIDATION, total=stats.total_requirements + ) self.requirement_check_validation = self.__progress.add_task( - self.REQUIREMENT_CHECK_VALIDATION, total=stats.total_checks) + self.REQUIREMENT_CHECK_VALIDATION, total=stats.total_checks + ) # Initialize progress according to current statistics - self.__progress.update(task_id=self.profile_validation, - advance=len(stats.validated_profiles)) - self.__progress.update(task_id=self.requirement_validation, - advance=len(stats.validated_requirements)) - self.__progress.update(task_id=self.requirement_check_validation, - advance=len(stats.validated_checks)) + self.__progress.update(task_id=self.profile_validation, advance=len(stats.validated_profiles)) + self.__progress.update(task_id=self.requirement_validation, advance=len(stats.validated_requirements)) + self.__progress.update(task_id=self.requirement_check_validation, advance=len(stats.validated_checks)) def start(self): self.__progress.start() @@ -76,14 +73,13 @@ def stop(self): def progress(self) -> Progress: return self.__progress - def _on_requirement_check_validation_end(self, event: RequirementCheckValidationEvent, - ctx: ValidationContext | None) -> None: + def _on_requirement_check_validation_end( + self, event: RequirementCheckValidationEvent, ctx: ValidationContext | None + ) -> None: self.__progress.update(task_id=self.requirement_check_validation, advance=1) - def _on_requirement_validation_end(self, event: RequirementValidationEvent, - ctx: ValidationContext | None) -> None: + def _on_requirement_validation_end(self, event: RequirementValidationEvent, ctx: ValidationContext | None) -> None: self.__progress.update(task_id=self.requirement_validation, advance=1) - def _on_profile_validation_end(self, event: ProfileValidationEvent, - ctx: ValidationContext | None) -> None: + def _on_profile_validation_end(self, event: ProfileValidationEvent, ctx: ValidationContext | None) -> None: self.__progress.update(task_id=self.profile_validation, advance=1) diff --git a/rocrate_validator/utils/io_helpers/output/text/layout/report.py b/rocrate_validator/utils/io_helpers/output/text/layout/report.py index 97ac3a5ff..e31b13cf4 100644 --- a/rocrate_validator/utils/io_helpers/output/text/layout/report.py +++ b/rocrate_validator/utils/io_helpers/output/text/layout/report.py @@ -55,11 +55,13 @@ class ValidationReportLayout(Layout): - - def __init__(self, console: Console, - settings: ValidationSettings, - statistics: ValidationStatistics | None = None, - profile_autodetected: bool = False): + def __init__( + self, + console: Console, + settings: ValidationSettings, + statistics: ValidationStatistics | None = None, + profile_autodetected: bool = False, + ): super().__init__() self.console = console self.validation_settings = settings @@ -121,55 +123,72 @@ def _init_layout(self): f"{'[italic](autodetected)[/italic]' if self.profile_autodetected else ''}" f"\n[bold cyan]Validation Severity:[/bold cyan] " f"[bold {severity_color}]{settings.requirement_severity}[/bold {severity_color}]", - style="white", align="left"), - name="Base Info", size=5) + style="white", + align="left", + ), + name="Base Info", + size=5, + ) self.passed_checks = Layout(name="PASSED") self.failed_checks = Layout(name="FAILED") # Create the layout of the requirement checks section validated_checks_container = Layout(name="Requirement Checks Validated") - validated_checks_container.split_row( - self.passed_checks, - self.failed_checks - ) + validated_checks_container.split_row(self.passed_checks, self.failed_checks) # Create the layout of the requirement checks section self.requirement_checks_by_severity_container_layout = Layout(name="Requirement Checks Validation", size=5) self.requirement_checks_by_severity_container_layout.split_row( - Layout(name="required"), - Layout(name="recommended"), - Layout(name="optional") + Layout(name="required"), Layout(name="recommended"), Layout(name="optional") ) # Create the layout of the requirement checks section requirement_checks_container_layout = Layout(name="Requirement Checks") requirement_checks_container_layout.split_column( - self.requirement_checks_by_severity_container_layout, - validated_checks_container + self.requirement_checks_by_severity_container_layout, validated_checks_container ) # Create the layout of the validation checks progress self._validation_checks_progress = Layout( - Panel(Align(self.progress_monitor.progress, align="center"), - border_style="white", padding=(0, 1), title="Overall Progress"), - name="Validation Progress", size=5) + Panel( + Align(self.progress_monitor.progress, align="center"), + border_style="white", + padding=(0, 1), + title="Overall Progress", + ), + name="Validation Progress", + size=5, + ) # Create the layout of the report container report_container_layout = Layout(name="Report Container Layout") report_container_layout.split_column( base_info_layout, - Layout(Panel(requirement_checks_container_layout, - title="[bold]Requirements Checks Validation[/bold]", border_style="white", padding=(1, 1))), - self._validation_checks_progress + Layout( + Panel( + requirement_checks_container_layout, + title="[bold]Requirements Checks Validation[/bold]", + border_style="white", + padding=(1, 1), + ) + ), + self._validation_checks_progress, ) # Create the main layout self.checks_stats_layout = Layout( - Panel(report_container_layout, title="[bold]- Validation Report -[/bold]", - border_style="cyan", title_align="center", padding=(1, 2))) + Panel( + report_container_layout, + title="[bold]- Validation Report -[/bold]", + border_style="cyan", + title_align="center", + padding=(1, 2), + ) + ) # Create the overall result layout self.overall_result = Layout( - Padding(Rule("\n[italic][cyan]Validating ROCrate...[/cyan][/italic]"), (1, 1)), size=3) + Padding(Rule("\n[italic][cyan]Validating ROCrate...[/cyan][/italic]"), (1, 1)), size=3 + ) group_layout = Layout() group_layout.add_split(self.checks_stats_layout) @@ -178,9 +197,7 @@ def _init_layout(self): self._layout = Padding(group_layout, (1, 1)) # Update the layout with the profile stats - self.update_stats( - self.statistics or ValidationStatistics(self.validation_settings) - ) + self.update_stats(self.statistics or ValidationStatistics(self.validation_settings)) # Extract the result if available result = self.result or (self.statistics.validation_result) if self.statistics else None @@ -195,62 +212,56 @@ def update_stats(self, profile_stats: ValidationStatistics | None = None): Panel( Align( str(profile_stats.check_count_by_severity[Severity.REQUIRED]) if profile_stats else "0", - align="center" + align="center", ), padding=(1, 1), title="Severity: REQUIRED", title_align="center", - border_style="RED" + border_style="RED", ) ) self.requirement_checks_by_severity_container_layout["recommended"].update( Panel( Align( str(profile_stats.check_count_by_severity[Severity.RECOMMENDED]) if profile_stats else "0", - align="center" + align="center", ), padding=(1, 1), title="Severity: RECOMMENDED", title_align="center", - border_style="orange1" + border_style="orange1", ) ) self.requirement_checks_by_severity_container_layout["optional"].update( Panel( Align( str(profile_stats.check_count_by_severity[Severity.OPTIONAL]) if profile_stats else "0", - align="center" + align="center", ), padding=(1, 1), title="Severity: OPTIONAL", title_align="center", - border_style="yellow" + border_style="yellow", ) ) self.passed_checks.update( Panel( - Align( - str(len(profile_stats.passed_checks)), - align="center" - ), + Align(str(len(profile_stats.passed_checks)), align="center"), padding=(1, 1), title="PASSED Checks", title_align="center", - border_style="green" + border_style="green", ) ) self.failed_checks.update( Panel( - Align( - str(len(profile_stats.failed_checks)), - align="center" - ), + Align(str(len(profile_stats.failed_checks)), align="center"), padding=(1, 1), title="FAILED Checks", title_align="center", - border_style="red" + border_style="red", ) ) @@ -261,15 +272,27 @@ def show_overall_result(self, result: ValidationResult | None): if result.passed(): icon = "[OK]" if not self.console.interactive else "✅" self.overall_result.update( - Padding(Rule(f"[bold]{icon} RO-Crate is a [green]valid[/green] " - f"[magenta]{result.context.target_profile.identifier}[/magenta] !!![/bold]\n\n", - style="bold green"), (1, 1))) + Padding( + Rule( + f"[bold]{icon} RO-Crate is a [green]valid[/green] " + f"[magenta]{result.context.target_profile.identifier}[/magenta] !!![/bold]\n\n", + style="bold green", + ), + (1, 1), + ) + ) else: icon = "[FAILED]" if not self.console.interactive else "❌" self.overall_result.update( - Padding(Rule(f"[bold]{icon} RO-Crate is [red]not[/red] a [red]valid[/red] " - f"[magenta]{result.context.target_profile.identifier}[/magenta] !!![/bold]\n", - style="bold red"), (1, 1))) + Padding( + Rule( + f"[bold]{icon} RO-Crate is [red]not[/red] a [red]valid[/red] " + f"[magenta]{result.context.target_profile.identifier}[/magenta] !!![/bold]\n", + style="bold red", + ), + (1, 1), + ) + ) class _ReportLayoutSubscriber(EventDispatcher): @@ -279,33 +302,43 @@ def __init__(self, layout: ValidationReportLayout): super().__init__("ValidationReportLayout") self._layout = layout - def _on_requirement_check_validation_end(self, event: RequirementCheckValidationEvent, - ctx: ValidationContext | None) -> None: + def _on_requirement_check_validation_end( + self, event: RequirementCheckValidationEvent, ctx: ValidationContext | None + ) -> None: if event.validation_result is not None: assert ctx is not None self._layout.update_stats(ctx.result.statistics) - def _on_requirement_validation_end(self, event: RequirementValidationEvent, - ctx: ValidationContext | None) -> None: + def _on_requirement_validation_end(self, event: RequirementValidationEvent, ctx: ValidationContext | None) -> None: assert ctx is not None, "Validation context must be provided" self._layout.update_stats(ctx.result.statistics) - def _on_validation_end(self, event: ValidationEvent, - ctx: ValidationContext | None) -> None: + def _on_validation_end(self, event: ValidationEvent, ctx: ValidationContext | None) -> None: self._layout.show_overall_result(event.validation_result) def get_app_header_rule() -> Padding: - return Padding(Rule(f"\n[bold][cyan]ROCrate Validator[/cyan] (ver. [magenta]{get_version()}[/magenta])[/bold]", - style="bold cyan"), (1, 2)) + return Padding( + Rule( + f"\n[bold][cyan]ROCrate Validator[/cyan] (ver. [magenta]{get_version()}[/magenta])[/bold]", + style="bold cyan", + ), + (1, 2), + ) class LiveReportLayout(ValidationReportLayout): """Context manager for live validation report rendering.""" - def __init__(self, console: Console, validation_settings: dict, - result: ValidationResult, profile_autodetected: bool = False, - refresh_per_second: int = 10, transient: bool = False): + def __init__( + self, + console: Console, + validation_settings: dict, + result: ValidationResult, + profile_autodetected: bool = False, + refresh_per_second: int = 10, + transient: bool = False, + ): """ Initialize the live report layout context manager. @@ -325,10 +358,7 @@ def __init__(self, console: Console, validation_settings: dict, def __enter__(self): """Enter the context and start live rendering.""" self._live = Live( - self, - console=self.console, - refresh_per_second=self.refresh_per_second, - transient=self.transient + self, console=self.console, refresh_per_second=self.refresh_per_second, transient=self.transient ) self._live.__enter__() return self @@ -343,11 +373,15 @@ def __exit__(self, exc_type, exc_val, exc_tb): class LiveTextProgressLayout: """Context manager for live validation report rendering with text progress.""" - def __init__(self, console: Console, - profile_identifier: str, - validation_settings: dict, - callable_service: Callable, - refresh_per_second: int = 10, transient: bool = False): + def __init__( + self, + console: Console, + profile_identifier: str, + validation_settings: dict, + callable_service: Callable, + refresh_per_second: int = 10, + transient: bool = False, + ): """ Initialize the live text progress layout context manager. Args: @@ -368,7 +402,7 @@ def __enter__(self): # Create initial message message = Text() - message.append(f"\n{' '*2}🔍 ", style="") + message.append(f"\n{' ' * 2}🔍 ", style="") message.append("Validating RO-Crate against profile: ", style="bold") message.append(f"{self.profile_identifier}", style="cyan") message.append("... ", style="bold") @@ -394,7 +428,7 @@ def run_validation(): while validation_thread.is_alive(): dot_count += 1 message = Text() - message.append(f"\n{' '*2}🔍 ", style="") + message.append(f"\n{' ' * 2}🔍 ", style="") message.append("Validating RO-Crate against profile: ", style="bold") message.append(f"{self.profile_identifier}", style="cyan") message.append("." * dot_count, style="bold") diff --git a/rocrate_validator/utils/log.py b/rocrate_validator/utils/log.py index 0c91ec1a4..4c0c66a8b 100644 --- a/rocrate_validator/utils/log.py +++ b/rocrate_validator/utils/log.py @@ -33,22 +33,18 @@ def get_log_format(level: int): """Get the log format based on the log level""" log_format = ( - '[%(log_color)s%(asctime)s%(reset)s] %(levelname)s in %(yellow)s%(module)s%(reset)s: ' - '%(light_white)s%(message)s%(reset)s' + "[%(log_color)s%(asctime)s%(reset)s] %(levelname)s in %(yellow)s%(module)s%(reset)s: " + "%(light_white)s%(message)s%(reset)s" ) if level == DEBUG: log_format = ( - '%(log_color)s%(levelname)s%(reset)s:%(yellow)s%(name)s:%(module)s::%(funcName)s%(reset)s ' - '@ %(light_green)sline: %(lineno)s%(reset)s - %(light_black)s%(message)s%(reset)s' + "%(log_color)s%(levelname)s%(reset)s:%(yellow)s%(name)s:%(module)s::%(funcName)s%(reset)s " + "@ %(light_green)sline: %(lineno)s%(reset)s - %(light_black)s%(message)s%(reset)s" ) return log_format -DEFAULT_SETTINGS : dict[str, Any] = { - 'enabled': True, - 'level': WARNING, - 'format': get_log_format(WARNING) -} +DEFAULT_SETTINGS: dict[str, Any] = {"enabled": True, "level": WARNING, "format": get_log_format(WARNING)} # _lock is used to serialize access to shared data structures in this module. @@ -102,9 +98,9 @@ def __setup_logger__(logger: Logger): settings = __settings__.get(logger.name, __settings__) # parse the log level - level = settings.get('level', __settings__['level']) + level = settings.get("level", __settings__["level"]) if not isinstance(level, int): - level = getattr(__module__, settings['level'].upper(), WARNING) + level = getattr(__module__, settings["level"].upper(), WARNING) # set the log level logger.setLevel(level) @@ -118,7 +114,7 @@ def __setup_logger__(logger: Logger): logger.addHandler(ch) # enable/disable the logger - if settings.get('enabled', __settings__['enabled']): + if settings.get("enabled", __settings__["enabled"]): logger.disabled = False else: logger.disabled = True @@ -126,7 +122,7 @@ def __setup_logger__(logger: Logger): def __create_logger__(name: str) -> Logger: if not isinstance(name, str): - raise TypeError('A logger name must be a string') + raise TypeError("A logger name must be a string") with _lock: # Return the cached logger if it already exists, otherwise create it. logger = __loggers__.get(name) @@ -148,8 +144,8 @@ def basicConfig(level: int, modules_config: dict | None = None): level = getattr(__module__, level.upper(), None) # set the default log level and format - __settings__['level'] = level - __settings__['format'] = get_log_format(level) + __settings__["level"] = level + __settings__["format"] = get_log_format(level) # set the log level for the modules if modules_config: @@ -157,16 +153,16 @@ def basicConfig(level: int, modules_config: dict | None = None): # initialize the logging module colorlog.basicConfig( - level=__settings__['level'], - format=__settings__['format'], + level=__settings__["level"], + format=__settings__["format"], log_colors={ - 'DEBUG': 'cyan', - 'INFO': 'green', - 'WARNING': 'yellow', - 'ERROR': 'red', - 'CRITICAL': 'red,bg_white', + "DEBUG": "cyan", + "INFO": "green", + "WARNING": "yellow", + "ERROR": "red", + "CRITICAL": "red,bg_white", }, - handlers=[StreamHandler(__log_stream__)] + handlers=[StreamHandler(__log_stream__)], ) # reconfigure existing loggers @@ -179,7 +175,6 @@ def getLogger(name: str) -> "LoggerProxy": class LoggerProxy: - """Define a proxy class for the logger to allow lazy initialization of the logger instance""" def __init__(self, name: str): @@ -196,8 +191,7 @@ def __getattr__(self, name): return getattr(self._instance, name) -__export__ = [get_log_format, DEFAULT_SETTINGS, Logger, - CRITICAL, DEBUG, ERROR, INFO, WARNING, StreamHandler, Optional] +__export__ = [get_log_format, DEFAULT_SETTINGS, Logger, CRITICAL, DEBUG, ERROR, INFO, WARNING, StreamHandler, Optional] """ diff --git a/rocrate_validator/utils/paths.py b/rocrate_validator/utils/paths.py index aeb30dfc2..3a63e4597 100644 --- a/rocrate_validator/utils/paths.py +++ b/rocrate_validator/utils/paths.py @@ -100,8 +100,8 @@ def get_default_http_cache_path() -> Path: def list_matching_file_paths( - directory: str = '.', - serialization_format: constants.RDF_SERIALIZATION_FORMATS_TYPES = "turtle") -> list[str]: + directory: str = ".", serialization_format: constants.RDF_SERIALIZATION_FORMATS_TYPES = "turtle" +) -> list[str]: """ Get all the files in the directory matching the format. @@ -121,8 +121,9 @@ def list_matching_file_paths( return file_paths -def list_graph_paths(graphs_dir: str = CURRENT_DIR, - serialization_format: constants.RDF_SERIALIZATION_FORMATS_TYPES = "turtle") -> list[str]: +def list_graph_paths( + graphs_dir: str = CURRENT_DIR, serialization_format: constants.RDF_SERIALIZATION_FORMATS_TYPES = "turtle" +) -> list[str]: """ Get the paths to all the graphs in the directory @@ -134,7 +135,7 @@ def list_graph_paths(graphs_dir: str = CURRENT_DIR, def shorten_path(p: Path) -> str: - """" + """ " Shorten the path to a relative path if possible, otherwise return the absolute path. :param p: The path to shorten diff --git a/rocrate_validator/utils/rdf.py b/rocrate_validator/utils/rdf.py index 7b10ebb8c..6e4f1af12 100644 --- a/rocrate_validator/utils/rdf.py +++ b/rocrate_validator/utils/rdf.py @@ -23,9 +23,8 @@ def get_full_graph( - graphs_dir: str, - serialization_format: constants.RDF_SERIALIZATION_FORMATS_TYPES = "turtle", - publicID: str = ".") -> Graph: + graphs_dir: str, serialization_format: constants.RDF_SERIALIZATION_FORMATS_TYPES = "turtle", publicID: str = "." +) -> Graph: """ Get the full graph from the directory @@ -53,19 +52,19 @@ def extract_base_from_jsonld(json_data: dict) -> str | None: :param json_data: The JSON-LD data as a dictionary :return: The @base value if found, None otherwise """ - context = json_data.get('@context') + context = json_data.get("@context") if not context: return None # If @context is a dictionary, look for @base directly if isinstance(context, dict): - return context.get('@base') + return context.get("@base") # If @context is a list, look for @base in each context item if isinstance(context, list): for ctx in context: - if isinstance(ctx, dict) and '@base' in ctx: - return ctx['@base'] + if isinstance(ctx, dict) and "@base" in ctx: + return ctx["@base"] return None diff --git a/tests/conftest.py b/tests/conftest.py index 7d57a770e..4f4ad1e36 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -28,7 +28,7 @@ level=logging.WARNING, modules_config={ # "rocrate_validator.models": {"level": logging.DEBUG} - } + }, ) CURRENT_PATH = str(Path(__file__).resolve().parent) @@ -43,10 +43,8 @@ rocrate_profile = services.get_profile("ro-crate") if not rocrate_profile: raise RuntimeError("Unable to load the RO-Crate profile") -check_local_data_entity_existence = \ - rocrate_profile.get_requirement_check("Data Entity: REQUIRED resource availability") -assert check_local_data_entity_existence, \ - "Unable to find the requirement 'Data Entity: REQUIRED resource availability'" +check_local_data_entity_existence = rocrate_profile.get_requirement_check("Data Entity: REQUIRED resource availability") +assert check_local_data_entity_existence, "Unable to find the requirement 'Data Entity: REQUIRED resource availability'" SKIP_LOCAL_DATA_ENTITY_EXISTENCE_CHECK_IDENTIFIER = check_local_data_entity_existence.identifier @@ -194,37 +192,41 @@ def ro_crate_profile_may_path(ro_crate_profile_path): return str(Path(ro_crate_profile_path) / "may") -@fixture(params=[ - "2024 01 01", - "2024 Jan 01", - "2021-13-01", - "2021-00-10", - "2021-01-32", - "2021-01-01T25:00", - "2021-01-01T23:60", - "2021-01-01T23:59:60", - "T23:59:59", -]) +@fixture( + params=[ + "2024 01 01", + "2024 Jan 01", + "2021-13-01", + "2021-00-10", + "2021-01-32", + "2021-01-01T25:00", + "2021-01-01T23:60", + "2021-01-01T23:59:60", + "T23:59:59", + ] +) def invalid_datetime(request): return request.param -@fixture(params=[ - "2024", - "2024-01", - "202401", - "2024-01-01", - "20240101", - "2024-001", - "2024-W01", - "2024-W01-1", - "2024-01-01T00:00", - "2024-01-01T00:00:00", - "2024-01-01T00:00:00Z", - "2024-01-01T00:00:00+00:00", - "2024-01-01T00:00:00.000", - "2024-01-01T00:00:00.000Z", - "2024-01-01T00:00:00.000+00:00", -]) +@fixture( + params=[ + "2024", + "2024-01", + "202401", + "2024-01-01", + "20240101", + "2024-001", + "2024-W01", + "2024-W01-1", + "2024-01-01T00:00", + "2024-01-01T00:00:00", + "2024-01-01T00:00:00Z", + "2024-01-01T00:00:00+00:00", + "2024-01-01T00:00:00.000", + "2024-01-01T00:00:00.000Z", + "2024-01-01T00:00:00.000+00:00", + ] +) def valid_datetime(request): return request.param diff --git a/tests/data/crates/invalid/0_main_workflow/main_workflow_bad_conformsto/ro-crate-metadata.json b/tests/data/crates/invalid/0_main_workflow/main_workflow_bad_conformsto/ro-crate-metadata.json index 0f454e80d..c0eaa4dcc 100644 --- a/tests/data/crates/invalid/0_main_workflow/main_workflow_bad_conformsto/ro-crate-metadata.json +++ b/tests/data/crates/invalid/0_main_workflow/main_workflow_bad_conformsto/ro-crate-metadata.json @@ -134,4 +134,4 @@ "@type": "Dataset" } ] -} \ No newline at end of file +} diff --git a/tests/data/crates/invalid/0_main_workflow/main_workflow_bad_type/ro-crate-metadata.json b/tests/data/crates/invalid/0_main_workflow/main_workflow_bad_type/ro-crate-metadata.json index b6d3d9cb9..f5cb4a7a5 100644 --- a/tests/data/crates/invalid/0_main_workflow/main_workflow_bad_type/ro-crate-metadata.json +++ b/tests/data/crates/invalid/0_main_workflow/main_workflow_bad_type/ro-crate-metadata.json @@ -110,4 +110,4 @@ "encodingFormat": "text/markdown" } ] -} \ No newline at end of file +} diff --git a/tests/data/crates/invalid/0_main_workflow/main_workflow_cwl_desc_bad_type/ro-crate-metadata.json b/tests/data/crates/invalid/0_main_workflow/main_workflow_cwl_desc_bad_type/ro-crate-metadata.json index 51cee9625..274a1f4eb 100644 --- a/tests/data/crates/invalid/0_main_workflow/main_workflow_cwl_desc_bad_type/ro-crate-metadata.json +++ b/tests/data/crates/invalid/0_main_workflow/main_workflow_cwl_desc_bad_type/ro-crate-metadata.json @@ -108,4 +108,4 @@ "encodingFormat": "text/markdown" } ] -} \ No newline at end of file +} diff --git a/tests/data/crates/invalid/0_main_workflow/main_workflow_cwl_desc_no_lang/ro-crate-metadata.json b/tests/data/crates/invalid/0_main_workflow/main_workflow_cwl_desc_no_lang/ro-crate-metadata.json index 513e57ad0..4bc0af3ae 100644 --- a/tests/data/crates/invalid/0_main_workflow/main_workflow_cwl_desc_no_lang/ro-crate-metadata.json +++ b/tests/data/crates/invalid/0_main_workflow/main_workflow_cwl_desc_no_lang/ro-crate-metadata.json @@ -95,4 +95,4 @@ "encodingFormat": "text/markdown" } ] -} \ No newline at end of file +} diff --git a/tests/data/crates/invalid/0_main_workflow/main_workflow_no_cwl_desc/ro-crate-metadata.json b/tests/data/crates/invalid/0_main_workflow/main_workflow_no_cwl_desc/ro-crate-metadata.json index 60a60937a..d433a707c 100644 --- a/tests/data/crates/invalid/0_main_workflow/main_workflow_no_cwl_desc/ro-crate-metadata.json +++ b/tests/data/crates/invalid/0_main_workflow/main_workflow_no_cwl_desc/ro-crate-metadata.json @@ -83,4 +83,4 @@ "encodingFormat": "text/markdown" } ] -} \ No newline at end of file +} diff --git a/tests/data/crates/invalid/0_main_workflow/main_workflow_no_image/ro-crate-metadata.json b/tests/data/crates/invalid/0_main_workflow/main_workflow_no_image/ro-crate-metadata.json index b746084b5..c5f585b21 100644 --- a/tests/data/crates/invalid/0_main_workflow/main_workflow_no_image/ro-crate-metadata.json +++ b/tests/data/crates/invalid/0_main_workflow/main_workflow_no_image/ro-crate-metadata.json @@ -100,4 +100,4 @@ "encodingFormat": "text/markdown" } ] -} \ No newline at end of file +} diff --git a/tests/data/crates/invalid/0_main_workflow/main_workflow_no_lang/ro-crate-metadata.json b/tests/data/crates/invalid/0_main_workflow/main_workflow_no_lang/ro-crate-metadata.json index f9ca8cb37..bd54ffaa5 100644 --- a/tests/data/crates/invalid/0_main_workflow/main_workflow_no_lang/ro-crate-metadata.json +++ b/tests/data/crates/invalid/0_main_workflow/main_workflow_no_lang/ro-crate-metadata.json @@ -96,4 +96,4 @@ "encodingFormat": "text/markdown" } ] -} \ No newline at end of file +} diff --git a/tests/data/crates/invalid/0_main_workflow/no_files/ro-crate-metadata.json b/tests/data/crates/invalid/0_main_workflow/no_files/ro-crate-metadata.json index 4bb00a12b..1c4180e7e 100644 --- a/tests/data/crates/invalid/0_main_workflow/no_files/ro-crate-metadata.json +++ b/tests/data/crates/invalid/0_main_workflow/no_files/ro-crate-metadata.json @@ -110,4 +110,4 @@ "encodingFormat": "text/markdown" } ] -} \ No newline at end of file +} diff --git a/tests/data/crates/invalid/0_multi_profile_crate/primary-job.json b/tests/data/crates/invalid/0_multi_profile_crate/primary-job.json index 2ef06945f..364e4e90f 100644 --- a/tests/data/crates/invalid/0_multi_profile_crate/primary-job.json +++ b/tests/data/crates/invalid/0_multi_profile_crate/primary-job.json @@ -10,4 +10,4 @@ "checksum": "sha1$327fc7aedf4f6b69a42a7c8b808dc5a7aff61376" }, "reverse_sort": true -} \ No newline at end of file +} diff --git a/tests/data/crates/invalid/1_wroc_crate/no_license/ro-crate-metadata.json b/tests/data/crates/invalid/1_wroc_crate/no_license/ro-crate-metadata.json index 7f490d697..77afbde7e 100644 --- a/tests/data/crates/invalid/1_wroc_crate/no_license/ro-crate-metadata.json +++ b/tests/data/crates/invalid/1_wroc_crate/no_license/ro-crate-metadata.json @@ -109,4 +109,4 @@ "encodingFormat": "text/markdown" } ] -} \ No newline at end of file +} diff --git a/tests/data/crates/invalid/1_wroc_crate/no_mainentity/ro-crate-metadata.json b/tests/data/crates/invalid/1_wroc_crate/no_mainentity/ro-crate-metadata.json index 6169df3ab..b0558f219 100644 --- a/tests/data/crates/invalid/1_wroc_crate/no_mainentity/ro-crate-metadata.json +++ b/tests/data/crates/invalid/1_wroc_crate/no_mainentity/ro-crate-metadata.json @@ -131,4 +131,4 @@ "@type": "Dataset" } ] -} \ No newline at end of file +} diff --git a/tests/data/crates/invalid/1_wroc_crate/readme_not_about_crate/ro-crate-metadata.json b/tests/data/crates/invalid/1_wroc_crate/readme_not_about_crate/ro-crate-metadata.json index ad81e806a..83b6f6b3e 100644 --- a/tests/data/crates/invalid/1_wroc_crate/readme_not_about_crate/ro-crate-metadata.json +++ b/tests/data/crates/invalid/1_wroc_crate/readme_not_about_crate/ro-crate-metadata.json @@ -107,4 +107,4 @@ "encodingFormat": "text/markdown" } ] -} \ No newline at end of file +} diff --git a/tests/data/crates/invalid/1_wroc_crate/readme_wrong_encoding_format/ro-crate-metadata.json b/tests/data/crates/invalid/1_wroc_crate/readme_wrong_encoding_format/ro-crate-metadata.json index 2669fb9fb..cd92d5db5 100644 --- a/tests/data/crates/invalid/1_wroc_crate/readme_wrong_encoding_format/ro-crate-metadata.json +++ b/tests/data/crates/invalid/1_wroc_crate/readme_wrong_encoding_format/ro-crate-metadata.json @@ -110,4 +110,4 @@ "encodingFormat": "text/csv" } ] -} \ No newline at end of file +} diff --git a/tests/data/crates/invalid/2_wroc_descriptor/wroc_descriptor_bad_conforms_to/ro-crate-metadata.json b/tests/data/crates/invalid/2_wroc_descriptor/wroc_descriptor_bad_conforms_to/ro-crate-metadata.json index 70a0f622e..b0cb73258 100644 --- a/tests/data/crates/invalid/2_wroc_descriptor/wroc_descriptor_bad_conforms_to/ro-crate-metadata.json +++ b/tests/data/crates/invalid/2_wroc_descriptor/wroc_descriptor_bad_conforms_to/ro-crate-metadata.json @@ -105,4 +105,4 @@ "encodingFormat": "text/markdown" } ] -} \ No newline at end of file +} diff --git a/tests/data/crates/invalid/4_data_entity_metadata/invalid_missing_hasPart_reference/ro-crate-metadata.json b/tests/data/crates/invalid/4_data_entity_metadata/invalid_missing_hasPart_reference/ro-crate-metadata.json index 16bb00f96..b7ae4c623 100644 --- a/tests/data/crates/invalid/4_data_entity_metadata/invalid_missing_hasPart_reference/ro-crate-metadata.json +++ b/tests/data/crates/invalid/4_data_entity_metadata/invalid_missing_hasPart_reference/ro-crate-metadata.json @@ -91,12 +91,12 @@ { "@id": "foo/", "@type": "Dataset" - + }, { "@id": "foo/xxx", "@type": "File" - + }, { "@id": "blank.png", diff --git a/tests/data/crates/valid/minimal-isa-ro-crate/ro-crate-metadata.json b/tests/data/crates/valid/minimal-isa-ro-crate/ro-crate-metadata.json index 3a9578d1a..13427694d 100644 --- a/tests/data/crates/valid/minimal-isa-ro-crate/ro-crate-metadata.json +++ b/tests/data/crates/valid/minimal-isa-ro-crate/ro-crate-metadata.json @@ -209,4 +209,4 @@ } } ] -} \ No newline at end of file +} diff --git a/tests/data/crates/valid/provenance-run-crate/primary-job.json b/tests/data/crates/valid/provenance-run-crate/primary-job.json index 2ef06945f..364e4e90f 100644 --- a/tests/data/crates/valid/provenance-run-crate/primary-job.json +++ b/tests/data/crates/valid/provenance-run-crate/primary-job.json @@ -10,4 +10,4 @@ "checksum": "sha1$327fc7aedf4f6b69a42a7c8b808dc5a7aff61376" }, "reverse_sort": true -} \ No newline at end of file +} diff --git a/tests/data/crates/valid/rocrate-with-custom-terms/ro-crate-metadata.json b/tests/data/crates/valid/rocrate-with-custom-terms/ro-crate-metadata.json index e4e0c953c..59c535d72 100644 --- a/tests/data/crates/valid/rocrate-with-custom-terms/ro-crate-metadata.json +++ b/tests/data/crates/valid/rocrate-with-custom-terms/ro-crate-metadata.json @@ -2,7 +2,7 @@ "@context": [ "https://w3id.org/ro/crate/1.1/context", "https://w3id.org/ro/terms/workflow-run/context", - { + { "myCustomTerm": "https://example.org#myCustomTerm", "rdfs": "http://www.w3.org/2000/01/rdf-schema#" } diff --git a/tests/data/crates/valid/wrroc-paper/index.html b/tests/data/crates/valid/wrroc-paper/index.html index 7a2fc2a8f..da40ef862 100644 --- a/tests/data/crates/valid/wrroc-paper/index.html +++ b/tests/data/crates/valid/wrroc-paper/index.html @@ -4,7 +4,7 @@ - - + @@ -1465,12 +1465,12 @@

Recording provenance of workflow runs with RO-Crate (RO-Cr

Recording provenance of workflow runs with RO-Crate (RO-Crate and mapping)

- - - - + + + +
- + @@ -1491,39 +1491,39 @@

Recording provenance of workflow runs with RO-Crate (RO-Crate and mapping)<

@@ -1539,11 +1539,11 @@

Recording provenance of workflow runs with RO-Crate (RO-Crate and mapping)<

@@ -1563,12 +1563,12 @@

Recording provenance of workflow runs with RO-Crate (RO-Crate and mapping)<


Go to: Simone Leo

- - - - + + + +
- +

@@ -1596,12 +1596,12 @@

Go to: Simone Leo




Go to: Michael R Crusoe

- - - - + + + +
- +
@@ -1616,9 +1616,9 @@

Go to: Michael R Crusoe

@@ -1635,12 +1635,12 @@

Go to: Michael R Crusoe


Go to: Laura Rodríguez-Navas

- - - - + + + +
- +

@@ -1668,12 +1668,12 @@

Go to: Laura Rodríguez


Go to: Raül Sirvent

- - - - + + + +
- +

@@ -1701,12 +1701,12 @@

Go to: Raül Sirvent

Go to: Alexander Kanitz

- - - - + + + +
- +

@@ -1721,7 +1721,7 @@

Go to: Alexander Kanitz

@@ -1738,12 +1738,12 @@

Go to: Alexander Kanitz


Go to: Paul De Geest

- - - - + + + +
- +

@@ -1771,12 +1771,12 @@

Go to: Paul De Geest

Go to: Rudolf Wittner

- - - - + + + +
- +

@@ -1791,9 +1791,9 @@

Go to: Rudolf Wittneraffiliation [?]

@@ -1810,12 +1810,12 @@

Go to: Rudolf Wittner

Go to: Luca Pireddu

- - - - + + + +
- +

@@ -1840,12 +1840,12 @@

Go to: Luca Pireddu



Go to: Daniel Garijo

- - - - + + + +
- +
@@ -1863,7 +1863,7 @@

Go to: Daniel Garijoauthor [?]

@@ -1877,12 +1877,12 @@

Go to: Daniel Garijo

Go to: José María Fernández,José M. Fernández

- - - - + + + +
- +

@@ -1891,7 +1891,7 @@

Go to: José María Fer

@@ -1914,12 +1914,12 @@

Go to: José María Fer


Go to: Iacopo Colonnelli

- - - - + + + +
- +

@@ -1947,12 +1947,12 @@

Go to: Iacopo Colonnell


Go to: Matej Gallo

- - - - + + + +
- +

@@ -1977,12 +1977,12 @@

Go to: Matej Gallo




Go to: Tazro Ohta

- - - - + + + +
- +
@@ -1997,7 +1997,7 @@

Go to: Tazro Ohta

@@ -2014,12 +2014,12 @@

Go to: Tazro Ohta




Go to: Hirotaka Suetake

- - - - + + + +
- +
@@ -2047,12 +2047,12 @@

Go to: Hirotaka Suetake


Go to: Salvador Capella-Gutierrez

- - - - + + + +
- +

@@ -2077,12 +2077,12 @@

Go to: Salvador Capella


Go to: Renske de Wit

- - - - + + + +
- +

@@ -2110,12 +2110,12 @@

Go to: Renske de Wit

Go to: Bruno P. Kinoshita,Bruno de Paula Kinoshita

- - - - + + + +
- +

@@ -2124,7 +2124,7 @@

Go to: Bruno P. Kinoshi

@@ -2147,12 +2147,12 @@

Go to: Bruno P. Kinoshi


Go to: Stian Soiland-Reyes

- - - - + + + +
- +

@@ -2167,16 +2167,16 @@

Go to: Stian Soiland-Re

@@ -2193,12 +2193,12 @@

Go to: Stian Soiland-Re


Go to: doi

- - - - + + + +
- +

@@ -2229,12 +2229,12 @@

Go to: doi




Go to: Workflow Run Crate task force

- - - - + + + +
- +
@@ -2249,73 +2249,73 @@

Go to: Workflo

@@ -2328,9 +2328,9 @@

Go to: Workflo

@@ -2341,12 +2341,12 @@

Go to: Workflo


Go to: Process Run Crate

- - - - + + + +
- +

@@ -2374,12 +2374,12 @@

Go to: Process Run Crat


Go to: Workflow Run Crate

- - - - + + + +
- +

@@ -2407,12 +2407,12 @@

Go to: Workflow Run Cr


Go to: Provenance Run Crate

- - - - + + + +
- +

@@ -2440,12 +2440,12 @@

Go to: Provenance Ru


PROV mapping to Workflow Run Crate

- - - - + + + +
- +

@@ -2463,19 +2463,19 @@

PROV mapping to Workflow Run Crate

@@ -2489,12 +2489,12 @@

PROV mapping to Workflow Run Crate




Go to: Apache License, Version 2.0

- - - - + + + +
- +
diff --git a/tests/data/crates/valid/wrroc-paper/mapping/prov-mapping.json b/tests/data/crates/valid/wrroc-paper/mapping/prov-mapping.json index d8592e213..dbbb5b682 100644 --- a/tests/data/crates/valid/wrroc-paper/mapping/prov-mapping.json +++ b/tests/data/crates/valid/wrroc-paper/mapping/prov-mapping.json @@ -607,4 +607,4 @@ "orcid": "https://orcid.org/", "schema": "http://schema.org/" } -} \ No newline at end of file +} diff --git a/tests/data/crates/valid/wrroc-paper/mapping/prov-mapping.rdf b/tests/data/crates/valid/wrroc-paper/mapping/prov-mapping.rdf index 6311784d1..56245ed74 100644 --- a/tests/data/crates/valid/wrroc-paper/mapping/prov-mapping.rdf +++ b/tests/data/crates/valid/wrroc-paper/mapping/prov-mapping.rdf @@ -306,5 +306,3 @@ sssom:object_source_version 3e-01 ; sssom:subject_source ; sssom:subject_source_version 20130430 . - - diff --git a/tests/data/crates/valid/wrroc-paper/mapping/prov-mapping.ttl b/tests/data/crates/valid/wrroc-paper/mapping/prov-mapping.ttl index 32ac2165f..a4452cbbe 100644 --- a/tests/data/crates/valid/wrroc-paper/mapping/prov-mapping.ttl +++ b/tests/data/crates/valid/wrroc-paper/mapping/prov-mapping.ttl @@ -402,5 +402,3 @@ prov:Plan skos:narrowMatch schema1:HowTo, sssom:mapping_justification semapv:ManualMappingCuration ; sssom:object_label "agent" ; sssom:subject_label "was started by" . - - diff --git a/tests/data/crates/valid/wrroc-paper/ro-crate-metadata.jsonld b/tests/data/crates/valid/wrroc-paper/ro-crate-metadata.jsonld index 82f7f2457..2cae7fbf7 100644 --- a/tests/data/crates/valid/wrroc-paper/ro-crate-metadata.jsonld +++ b/tests/data/crates/valid/wrroc-paper/ro-crate-metadata.jsonld @@ -807,4 +807,4 @@ ] } ] -} \ No newline at end of file +} diff --git a/tests/data/crates/valid/wrroc-paper/ro-crate-preview.html b/tests/data/crates/valid/wrroc-paper/ro-crate-preview.html index 7a2fc2a8f..da40ef862 100644 --- a/tests/data/crates/valid/wrroc-paper/ro-crate-preview.html +++ b/tests/data/crates/valid/wrroc-paper/ro-crate-preview.html @@ -4,7 +4,7 @@ - - + @@ -1465,12 +1465,12 @@

Recording provenance of workflow runs with RO-Crate (RO-Cr

Recording provenance of workflow runs with RO-Crate (RO-Crate and mapping)

- - - - + + + +
- +

@@ -1491,39 +1491,39 @@

Recording provenance of workflow runs with RO-Crate (RO-Crate and mapping)<

@@ -1539,11 +1539,11 @@

Recording provenance of workflow runs with RO-Crate (RO-Crate and mapping)<

@@ -1563,12 +1563,12 @@

Recording provenance of workflow runs with RO-Crate (RO-Crate and mapping)<


Go to: Simone Leo

- - - - + + + +
- +

@@ -1596,12 +1596,12 @@

Go to: Simone Leo




Go to: Michael R Crusoe

- - - - + + + +
- +
@@ -1616,9 +1616,9 @@

Go to: Michael R Crusoe

@@ -1635,12 +1635,12 @@

Go to: Michael R Crusoe


Go to: Laura Rodríguez-Navas

- - - - + + + +
- +

@@ -1668,12 +1668,12 @@

Go to: Laura Rodríguez


Go to: Raül Sirvent

- - - - + + + +
- +

@@ -1701,12 +1701,12 @@

Go to: Raül Sirvent

Go to: Alexander Kanitz

- - - - + + + +
- +

@@ -1721,7 +1721,7 @@

Go to: Alexander Kanitz

@@ -1738,12 +1738,12 @@

Go to: Alexander Kanitz


Go to: Paul De Geest

- - - - + + + +
- +

@@ -1771,12 +1771,12 @@

Go to: Paul De Geest

Go to: Rudolf Wittner

- - - - + + + +
- +

@@ -1791,9 +1791,9 @@

Go to: Rudolf Wittneraffiliation [?]

@@ -1810,12 +1810,12 @@

Go to: Rudolf Wittner

Go to: Luca Pireddu

- - - - + + + +
- +

@@ -1840,12 +1840,12 @@

Go to: Luca Pireddu



Go to: Daniel Garijo

- - - - + + + +
- +
@@ -1863,7 +1863,7 @@

Go to: Daniel Garijoauthor [?]

@@ -1877,12 +1877,12 @@

Go to: Daniel Garijo

Go to: José María Fernández,José M. Fernández

- - - - + + + +
- +

@@ -1891,7 +1891,7 @@

Go to: José María Fer

@@ -1914,12 +1914,12 @@

Go to: José María Fer


Go to: Iacopo Colonnelli

- - - - + + + +
- +

@@ -1947,12 +1947,12 @@

Go to: Iacopo Colonnell


Go to: Matej Gallo

- - - - + + + +
- +

@@ -1977,12 +1977,12 @@

Go to: Matej Gallo




Go to: Tazro Ohta

- - - - + + + +
- +
@@ -1997,7 +1997,7 @@

Go to: Tazro Ohta

@@ -2014,12 +2014,12 @@

Go to: Tazro Ohta




Go to: Hirotaka Suetake

- - - - + + + +
- +
@@ -2047,12 +2047,12 @@

Go to: Hirotaka Suetake


Go to: Salvador Capella-Gutierrez

- - - - + + + +
- +

@@ -2077,12 +2077,12 @@

Go to: Salvador Capella


Go to: Renske de Wit

- - - - + + + +
- +

@@ -2110,12 +2110,12 @@

Go to: Renske de Wit

Go to: Bruno P. Kinoshita,Bruno de Paula Kinoshita

- - - - + + + +
- +

@@ -2124,7 +2124,7 @@

Go to: Bruno P. Kinoshi

@@ -2147,12 +2147,12 @@

Go to: Bruno P. Kinoshi


Go to: Stian Soiland-Reyes

- - - - + + + +
- +

@@ -2167,16 +2167,16 @@

Go to: Stian Soiland-Re

@@ -2193,12 +2193,12 @@

Go to: Stian Soiland-Re


Go to: doi

- - - - + + + +
- +

@@ -2229,12 +2229,12 @@

Go to: doi




Go to: Workflow Run Crate task force

- - - - + + + +
- +
@@ -2249,73 +2249,73 @@

Go to: Workflo

@@ -2328,9 +2328,9 @@

Go to: Workflo

@@ -2341,12 +2341,12 @@

Go to: Workflo


Go to: Process Run Crate

- - - - + + + +
- +

@@ -2374,12 +2374,12 @@

Go to: Process Run Crat


Go to: Workflow Run Crate

- - - - + + + +
- +

@@ -2407,12 +2407,12 @@

Go to: Workflow Run Cr


Go to: Provenance Run Crate

- - - - + + + +
- +

@@ -2440,12 +2440,12 @@

Go to: Provenance Ru


PROV mapping to Workflow Run Crate

- - - - + + + +
- +

@@ -2463,19 +2463,19 @@

PROV mapping to Workflow Run Crate

@@ -2489,12 +2489,12 @@

PROV mapping to Workflow Run Crate




Go to: Apache License, Version 2.0

- - - - + + + +
- +
diff --git a/tests/data/profiles/check_overriding/readme.md b/tests/data/profiles/check_overriding/readme.md index 6cb922188..72cf77f8c 100644 --- a/tests/data/profiles/check_overriding/readme.md +++ b/tests/data/profiles/check_overriding/readme.md @@ -4,7 +4,7 @@ The profile hierarchy is as follows: ``` - A + A / \ B C / \ | diff --git a/tests/data/profiles/fake/c/profile.ttl b/tests/data/profiles/fake/c/profile.ttl index 5050fdff2..d5b642fdd 100644 --- a/tests/data/profiles/fake/c/profile.ttl +++ b/tests/data/profiles/fake/c/profile.ttl @@ -27,7 +27,7 @@ # regular metadata, a basic description of the Profile rdfs:comment """Comment for the Profile C."""@en ; - + # the version of the profile schema:version "1.0.0" ; diff --git a/tests/data/profiles/fake_versioned_profiles/a_explicit_version_property/profile.ttl b/tests/data/profiles/fake_versioned_profiles/a_explicit_version_property/profile.ttl index 0b488217b..3878a8335 100644 --- a/tests/data/profiles/fake_versioned_profiles/a_explicit_version_property/profile.ttl +++ b/tests/data/profiles/fake_versioned_profiles/a_explicit_version_property/profile.ttl @@ -33,7 +33,7 @@ # the version of the profile schema:version "1.0.0" ; - + # a short code to refer to the Profile with when a URI can't be used prof:hasToken "a" ; diff --git a/tests/data/profiles/free_folder_structure/nested_c/c/profile.ttl b/tests/data/profiles/free_folder_structure/nested_c/c/profile.ttl index 5050fdff2..d5b642fdd 100644 --- a/tests/data/profiles/free_folder_structure/nested_c/c/profile.ttl +++ b/tests/data/profiles/free_folder_structure/nested_c/c/profile.ttl @@ -27,7 +27,7 @@ # regular metadata, a basic description of the Profile rdfs:comment """Comment for the Profile C."""@en ; - + # the version of the profile schema:version "1.0.0" ; diff --git a/tests/data/profiles/hidden_requirements/xh/a.ttl b/tests/data/profiles/hidden_requirements/xh/a.ttl index 20afc88d1..008f67d55 100644 --- a/tests/data/profiles/hidden_requirements/xh/a.ttl +++ b/tests/data/profiles/hidden_requirements/xh/a.ttl @@ -57,4 +57,3 @@ ro:A sh:minCount 1 ; sh:severity sh:Info ; ] . - diff --git a/tests/data/profiles/hidden_requirements/xh/must/a_must.ttl b/tests/data/profiles/hidden_requirements/xh/must/a_must.ttl index 834c8b508..36049d7b2 100644 --- a/tests/data/profiles/hidden_requirements/xh/must/a_must.ttl +++ b/tests/data/profiles/hidden_requirements/xh/must/a_must.ttl @@ -57,4 +57,3 @@ ro:A_MUST sh:minCount 1 ; sh:severity sh:Info ; ] . - diff --git a/tests/data/profiles/requirement_loading/x/a.ttl b/tests/data/profiles/requirement_loading/x/a.ttl index 963558f10..506a643fc 100644 --- a/tests/data/profiles/requirement_loading/x/a.ttl +++ b/tests/data/profiles/requirement_loading/x/a.ttl @@ -57,4 +57,3 @@ ro:A sh:minCount 1 ; sh:severity sh:Info ; ] . - diff --git a/tests/data/profiles/requirement_loading/x/must/a_must.ttl b/tests/data/profiles/requirement_loading/x/must/a_must.ttl index 834c8b508..36049d7b2 100644 --- a/tests/data/profiles/requirement_loading/x/must/a_must.ttl +++ b/tests/data/profiles/requirement_loading/x/must/a_must.ttl @@ -57,4 +57,3 @@ ro:A_MUST sh:minCount 1 ; sh:severity sh:Info ; ] . - diff --git a/tests/integration/profiles/isa-ro-crate/test_0_investigation.py b/tests/integration/profiles/isa-ro-crate/test_0_investigation.py index 07040ef97..40e5a531d 100644 --- a/tests/integration/profiles/isa-ro-crate/test_0_investigation.py +++ b/tests/integration/profiles/isa-ro-crate/test_0_investigation.py @@ -48,9 +48,7 @@ def test_isa_additionaltype_not_investigation(): requirement_severity=Severity.REQUIRED, expected_validation_result=False, expected_triggered_requirements=["Root Data Entity must be Investigation"], - expected_triggered_issues=[ - "The root data entity must have additionalType of `Investigation`" - ], + expected_triggered_issues=["The root data entity must have additionalType of `Investigation`"], profile_identifier="isa-ro-crate", rocrate_entity_mod_sparql=sparql, ) @@ -79,9 +77,7 @@ def test_isa_investigation_no_identifier(): requirement_severity=Severity.REQUIRED, expected_validation_result=False, # expected_triggered_requirements=["Investigation MUST have base properties"], # noqa: ERA001 - expected_triggered_issues=[ - "The root data entity must have a non-empty identifier" - ], + expected_triggered_issues=["The root data entity must have a non-empty identifier"], profile_identifier="isa-ro-crate", rocrate_entity_mod_sparql=sparql, ) @@ -113,9 +109,7 @@ def test_isa_investigation_identifier_not_string(): requirement_severity=Severity.REQUIRED, expected_validation_result=False, # expected_triggered_requirements=["Investigation MUST have base properties"], # noqa: ERA001 - expected_triggered_issues=[ - "The root data entity must have a non-empty identifier" - ], + expected_triggered_issues=["The root data entity must have a non-empty identifier"], profile_identifier="isa-ro-crate", rocrate_entity_mod_sparql=sparql, ) diff --git a/tests/integration/profiles/isa-ro-crate/test_10_definedterm.py b/tests/integration/profiles/isa-ro-crate/test_10_definedterm.py index 49f98cef3..95b3b20ed 100644 --- a/tests/integration/profiles/isa-ro-crate/test_10_definedterm.py +++ b/tests/integration/profiles/isa-ro-crate/test_10_definedterm.py @@ -48,9 +48,7 @@ def test_isa_defined_term_name(): requirement_severity=Severity.REQUIRED, expected_validation_result=False, # expected_triggered_requirements=["Study MUST have base properties"], # noqa: ERA001 - expected_triggered_issues=[ - "DefinedTerm entity MUST have a non-empty name of type string" - ], + expected_triggered_issues=["DefinedTerm entity MUST have a non-empty name of type string"], profile_identifier="isa-ro-crate", rocrate_entity_mod_sparql=sparql, ) @@ -81,9 +79,7 @@ def test_isa_defined_term_name_of_incorrect_type(): requirement_severity=Severity.REQUIRED, expected_validation_result=False, # expected_triggered_requirements=["Study MUST have base properties"], # noqa: ERA001 - expected_triggered_issues=[ - "DefinedTerm entity MUST have a non-empty name of type string" - ], + expected_triggered_issues=["DefinedTerm entity MUST have a non-empty name of type string"], profile_identifier="isa-ro-crate", rocrate_entity_mod_sparql=sparql, ) @@ -111,9 +107,7 @@ def test_isa_defined_term_termCode(): requirement_severity=Severity.RECOMMENDED, expected_validation_result=False, # expected_triggered_requirements=["Study MUST have base properties"], # noqa: ERA001 - expected_triggered_issues=[ - "DefinedTerm entity SHOULD have at least one termCode" - ], + expected_triggered_issues=["DefinedTerm entity SHOULD have at least one termCode"], profile_identifier="isa-ro-crate", rocrate_entity_mod_sparql=sparql, ) diff --git a/tests/integration/profiles/isa-ro-crate/test_11_propertyvalue.py b/tests/integration/profiles/isa-ro-crate/test_11_propertyvalue.py index 3683c18a8..7d06eb6e3 100644 --- a/tests/integration/profiles/isa-ro-crate/test_11_propertyvalue.py +++ b/tests/integration/profiles/isa-ro-crate/test_11_propertyvalue.py @@ -47,9 +47,7 @@ def test_isa_property_value_name(): requirement_severity=Severity.REQUIRED, expected_validation_result=False, # expected_triggered_requirements=["Study MUST have base properties"], # noqa: ERA001 - expected_triggered_issues=[ - "PropertyValue entity MUST have a non-empty name of type string" - ], + expected_triggered_issues=["PropertyValue entity MUST have a non-empty name of type string"], profile_identifier="isa-ro-crate", rocrate_entity_mod_sparql=sparql, ) @@ -81,9 +79,7 @@ def test_isa_property_value_name_of_incorrect_type(): requirement_severity=Severity.REQUIRED, expected_validation_result=False, # expected_triggered_requirements=["Study MUST have base properties"], # noqa: ERA001 - expected_triggered_issues=[ - "PropertyValue entity MUST have a non-empty name of type string" - ], + expected_triggered_issues=["PropertyValue entity MUST have a non-empty name of type string"], profile_identifier="isa-ro-crate", rocrate_entity_mod_sparql=sparql, ) @@ -112,9 +108,7 @@ def test_isa_property_value_value(): requirement_severity=Severity.RECOMMENDED, expected_validation_result=False, # expected_triggered_requirements=["Study MUST have base properties"], # noqa: ERA001 - expected_triggered_issues=[ - "PropertyValue entity SHOULD have at least one value" - ], + expected_triggered_issues=["PropertyValue entity SHOULD have at least one value"], profile_identifier="isa-ro-crate", rocrate_entity_mod_sparql=sparql, ) @@ -148,9 +142,7 @@ def test_isa_property_value_value_of_incorrect_type(): requirement_severity=Severity.REQUIRED, expected_validation_result=False, # expected_triggered_requirements=["Study MUST have base properties"], # noqa: ERA001 - expected_triggered_issues=[ - "PropertyValue value MUST be of type string, float, or integer" - ], + expected_triggered_issues=["PropertyValue value MUST be of type string, float, or integer"], profile_identifier="isa-ro-crate", rocrate_entity_mod_sparql=sparql, ) diff --git a/tests/integration/profiles/isa-ro-crate/test_1_study.py b/tests/integration/profiles/isa-ro-crate/test_1_study.py index 91a783f19..1e5ab1ed8 100644 --- a/tests/integration/profiles/isa-ro-crate/test_1_study.py +++ b/tests/integration/profiles/isa-ro-crate/test_1_study.py @@ -50,9 +50,7 @@ def test_isa_study_no_identifier(): requirement_severity=Severity.REQUIRED, expected_validation_result=False, expected_triggered_requirements=["Study MUST have base properties"], - expected_triggered_issues=[ - "Study entity MUST have a non-empty identifier of type string" - ], + expected_triggered_issues=["Study entity MUST have a non-empty identifier of type string"], profile_identifier="isa-ro-crate", rocrate_entity_mod_sparql=sparql, ) @@ -84,9 +82,7 @@ def test_isa_study_identifier_not_string(): requirement_severity=Severity.REQUIRED, expected_validation_result=False, # expected_triggered_requirements=["Study MUST have base properties"], # noqa: ERA001 - expected_triggered_issues=[ - "Study entity MUST have a non-empty identifier of type string" - ], + expected_triggered_issues=["Study entity MUST have a non-empty identifier of type string"], profile_identifier="isa-ro-crate", rocrate_entity_mod_sparql=sparql, ) @@ -115,9 +111,7 @@ def test_isa_study_name(): requirement_severity=Severity.REQUIRED, expected_validation_result=False, expected_triggered_requirements=["Study MUST have base properties"], - expected_triggered_issues=[ - "Study entity MUST have a non-empty name of type string" - ], + expected_triggered_issues=["Study entity MUST have a non-empty name of type string"], profile_identifier="isa-ro-crate", rocrate_entity_mod_sparql=sparql, ) @@ -149,9 +143,7 @@ def test_isa_study_name_not_string(): requirement_severity=Severity.REQUIRED, expected_validation_result=False, # expected_triggered_requirements=["Study MUST have base properties"], # noqa: ERA001 - expected_triggered_issues=[ - "Study entity MUST have a non-empty name of type string" - ], + expected_triggered_issues=["Study entity MUST have a non-empty name of type string"], profile_identifier="isa-ro-crate", rocrate_entity_mod_sparql=sparql, ) @@ -183,9 +175,7 @@ def test_isa_study_correctly_referenced_from_investigation(): rocrate_path=ValidROC().isa_ro_crate, requirement_severity=Severity.REQUIRED, expected_validation_result=False, - expected_triggered_requirements=[ - "Study MUST be directly referenced from Investigation (Root Data Entity)" - ], + expected_triggered_requirements=["Study MUST be directly referenced from Investigation (Root Data Entity)"], expected_triggered_issues=[ "Study MUST be directly referenced in hasPart on the Investigation (Root Data Entity)" ], diff --git a/tests/integration/profiles/isa-ro-crate/test_2_assay.py b/tests/integration/profiles/isa-ro-crate/test_2_assay.py index 0791f70b1..f85a0dcf8 100644 --- a/tests/integration/profiles/isa-ro-crate/test_2_assay.py +++ b/tests/integration/profiles/isa-ro-crate/test_2_assay.py @@ -47,9 +47,7 @@ def test_isa_assay_no_identifier(): requirement_severity=Severity.REQUIRED, expected_validation_result=False, expected_triggered_requirements=["Assay MUST have base properties"], - expected_triggered_issues=[ - "Assay entity MUST have a non-empty identifier of type string" - ], + expected_triggered_issues=["Assay entity MUST have a non-empty identifier of type string"], profile_identifier="isa-ro-crate", rocrate_entity_mod_sparql=sparql, ) @@ -81,9 +79,7 @@ def test_isa_assay_identifier_not_string(): requirement_severity=Severity.REQUIRED, expected_validation_result=False, # expected_triggered_requirements=["Root Data Entity must be Investigation"], # noqa: ERA001 - expected_triggered_issues=[ - "Assay entity MUST have a non-empty identifier of type string" - ], + expected_triggered_issues=["Assay entity MUST have a non-empty identifier of type string"], profile_identifier="isa-ro-crate", rocrate_entity_mod_sparql=sparql, ) diff --git a/tests/integration/profiles/isa-ro-crate/test_3_process.py b/tests/integration/profiles/isa-ro-crate/test_3_process.py index 695cb0091..4bed207d8 100644 --- a/tests/integration/profiles/isa-ro-crate/test_3_process.py +++ b/tests/integration/profiles/isa-ro-crate/test_3_process.py @@ -49,9 +49,7 @@ def test_isa_process_name(): requirement_severity=Severity.REQUIRED, expected_validation_result=False, expected_triggered_requirements=["Process MUST have name"], - expected_triggered_issues=[ - "Process entity MUST have a non-empty name of type string" - ], + expected_triggered_issues=["Process entity MUST have a non-empty name of type string"], profile_identifier="isa-ro-crate", rocrate_entity_mod_sparql=sparql, ) @@ -150,9 +148,7 @@ def test_isa_process_object_incorrect_type(): requirement_severity=Severity.REQUIRED, expected_validation_result=False, expected_triggered_requirements=["Process SHOULD have an object"], - expected_triggered_issues=[ - "Process objects MUST be of type File, Sample or BioSample" - ], + expected_triggered_issues=["Process objects MUST be of type File, Sample or BioSample"], profile_identifier="isa-ro-crate", rocrate_entity_mod_sparql=sparql, ) @@ -213,9 +209,7 @@ def test_isa_process_result_incorrect_type(): requirement_severity=Severity.REQUIRED, expected_validation_result=False, expected_triggered_requirements=["Process SHOULD have a result"], - expected_triggered_issues=[ - "Process results MUST be of type File, Sample or BioSample" - ], + expected_triggered_issues=["Process results MUST be of type File, Sample or BioSample"], profile_identifier="isa-ro-crate", rocrate_entity_mod_sparql=sparql, ) @@ -278,9 +272,7 @@ def test_isa_process_value_incorrect_type(): requirement_severity=Severity.REQUIRED, expected_validation_result=False, expected_triggered_requirements=["Process SHOULD have a parameter value"], - expected_triggered_issues=[ - "Process parameter values MUST be of type PropertyValue" - ], + expected_triggered_issues=["Process parameter values MUST be of type PropertyValue"], profile_identifier="isa-ro-crate", rocrate_entity_mod_sparql=sparql, ) diff --git a/tests/integration/profiles/isa-ro-crate/test_4_protocol.py b/tests/integration/profiles/isa-ro-crate/test_4_protocol.py index 38ef1ecc1..5b810e85c 100644 --- a/tests/integration/profiles/isa-ro-crate/test_4_protocol.py +++ b/tests/integration/profiles/isa-ro-crate/test_4_protocol.py @@ -50,9 +50,7 @@ def test_isa_protocol_no_name(): requirement_severity=Severity.RECOMMENDED, expected_validation_result=False, # expected_triggered_requirements=["Protocol SHOULD have name"], # noqa: ERA001 - expected_triggered_issues=[ - "Protocol entity SHOULD have a non-empty name of type string" - ], + expected_triggered_issues=["Protocol entity SHOULD have a non-empty name of type string"], profile_identifier="isa-ro-crate", rocrate_entity_mod_sparql=sparql, ) @@ -114,9 +112,7 @@ def test_isa_protocol_no_description(): requirement_severity=Severity.RECOMMENDED, expected_validation_result=False, # expected_triggered_requirements=["Protocol SHOULD have description"], # noqa: ERA001 - expected_triggered_issues=[ - "Protocol entity SHOULD have a non-empty description of type string" - ], + expected_triggered_issues=["Protocol entity SHOULD have a non-empty description of type string"], profile_identifier="isa-ro-crate", rocrate_entity_mod_sparql=sparql, ) diff --git a/tests/integration/profiles/isa-ro-crate/test_5_sample.py b/tests/integration/profiles/isa-ro-crate/test_5_sample.py index aff6addd8..8e3268616 100644 --- a/tests/integration/profiles/isa-ro-crate/test_5_sample.py +++ b/tests/integration/profiles/isa-ro-crate/test_5_sample.py @@ -47,9 +47,7 @@ def test_isa_sample_name(): requirement_severity=Severity.REQUIRED, expected_validation_result=False, # expected_triggered_requirements=["Study MUST have base properties"], # noqa: ERA001 - expected_triggered_issues=[ - "Sample entity MUST have a non-empty name of type string" - ], + expected_triggered_issues=["Sample entity MUST have a non-empty name of type string"], profile_identifier="isa-ro-crate", rocrate_entity_mod_sparql=sparql, ) @@ -117,9 +115,7 @@ def test_isa_sample_name_of_incorrect_type(): requirement_severity=Severity.REQUIRED, expected_validation_result=False, # expected_triggered_requirements=["Study MUST have base properties"], # noqa: ERA001 - expected_triggered_issues=[ - "Sample entity MUST have a non-empty name of type string" - ], + expected_triggered_issues=["Sample entity MUST have a non-empty name of type string"], profile_identifier="isa-ro-crate", rocrate_entity_mod_sparql=sparql, ) @@ -150,9 +146,7 @@ def test_isa_sample_no_additional_property(): requirement_severity=Severity.RECOMMENDED, expected_validation_result=False, # expected_triggered_requirements=["Study MUST have base properties"], # noqa: ERA001 - expected_triggered_issues=[ - "Sample entity SHOULD have at least one additional property" - ], + expected_triggered_issues=["Sample entity SHOULD have at least one additional property"], profile_identifier="isa-ro-crate", rocrate_entity_mod_sparql=sparql, ) @@ -186,9 +180,7 @@ def test_isa_sample_additional_property_of_incorrect_type(): requirement_severity=Severity.REQUIRED, expected_validation_result=False, # expected_triggered_requirements=["Study MUST have base properties"], # noqa: ERA001 - expected_triggered_issues=[ - "Sample additional property MUST be of type PropertyValue" - ], + expected_triggered_issues=["Sample additional property MUST be of type PropertyValue"], profile_identifier="isa-ro-crate", rocrate_entity_mod_sparql=sparql, ) diff --git a/tests/integration/profiles/isa-ro-crate/test_6_data.py b/tests/integration/profiles/isa-ro-crate/test_6_data.py index 879991a0c..5c46587d8 100644 --- a/tests/integration/profiles/isa-ro-crate/test_6_data.py +++ b/tests/integration/profiles/isa-ro-crate/test_6_data.py @@ -46,9 +46,7 @@ def test_isa_file_name(): requirement_severity=Severity.REQUIRED, expected_validation_result=False, # expected_triggered_requirements=["Study MUST have base properties"], # noqa: ERA001 - expected_triggered_issues=[ - "File entity MUST have a non-empty name of type string" - ], + expected_triggered_issues=["File entity MUST have a non-empty name of type string"], profile_identifier="isa-ro-crate", rocrate_entity_mod_sparql=sparql, ) @@ -115,9 +113,7 @@ def test_isa_file_name_of_incorrect_type(): requirement_severity=Severity.REQUIRED, expected_validation_result=False, # expected_triggered_requirements=["Study MUST have base properties"], # noqa: ERA001 - expected_triggered_issues=[ - "File entity MUST have a non-empty name of type string" - ], + expected_triggered_issues=["File entity MUST have a non-empty name of type string"], profile_identifier="isa-ro-crate", rocrate_entity_mod_sparql=sparql, ) diff --git a/tests/integration/profiles/isa-ro-crate/test_7_person.py b/tests/integration/profiles/isa-ro-crate/test_7_person.py index 65c50f903..66a257bb3 100644 --- a/tests/integration/profiles/isa-ro-crate/test_7_person.py +++ b/tests/integration/profiles/isa-ro-crate/test_7_person.py @@ -46,9 +46,7 @@ def test_isa_person_given_name(): requirement_severity=Severity.REQUIRED, expected_validation_result=False, # expected_triggered_requirements=["Study MUST have base properties"], # noqa: ERA001 - expected_triggered_issues=[ - "Person entity MUST have a non-empty given name of type string" - ], + expected_triggered_issues=["Person entity MUST have a non-empty given name of type string"], profile_identifier="isa-ro-crate", rocrate_entity_mod_sparql=sparql, ) @@ -119,9 +117,7 @@ def test_isa_person_given_name_of_incorrect_type(): requirement_severity=Severity.REQUIRED, expected_validation_result=False, # expected_triggered_requirements=["Study MUST have base properties"], # noqa: ERA001 - expected_triggered_issues=[ - "Person entity MUST have a non-empty given name of type string" - ], + expected_triggered_issues=["Person entity MUST have a non-empty given name of type string"], profile_identifier="isa-ro-crate", rocrate_entity_mod_sparql=sparql, ) @@ -149,9 +145,7 @@ def test_isa_person_family_name(): requirement_severity=Severity.RECOMMENDED, expected_validation_result=False, # expected_triggered_requirements=["Study MUST have base properties"], # noqa: ERA001 - expected_triggered_issues=[ - "Person entity SHOULD have a non-empty family name of type string" - ], + expected_triggered_issues=["Person entity SHOULD have a non-empty family name of type string"], profile_identifier="isa-ro-crate", rocrate_entity_mod_sparql=sparql, ) @@ -210,9 +204,7 @@ def test_isa_person_email(): requirement_severity=Severity.RECOMMENDED, expected_validation_result=False, # expected_triggered_requirements=["Study MUST have base properties"], # noqa: ERA001 - expected_triggered_issues=[ - "Person entity SHOULD have a non-empty email of type string" - ], + expected_triggered_issues=["Person entity SHOULD have a non-empty email of type string"], profile_identifier="isa-ro-crate", rocrate_entity_mod_sparql=sparql, ) @@ -272,9 +264,7 @@ def test_isa_person_identifier(): requirement_severity=Severity.RECOMMENDED, expected_validation_result=False, # expected_triggered_requirements=["Study MUST have base properties"], # noqa: ERA001 - expected_triggered_issues=[ - "Person entity SHOULD have a non-empty identifier of type string" - ], + expected_triggered_issues=["Person entity SHOULD have a non-empty identifier of type string"], profile_identifier="isa-ro-crate", rocrate_entity_mod_sparql=sparql, ) @@ -335,9 +325,7 @@ def test_isa_person_affiliation(): requirement_severity=Severity.RECOMMENDED, expected_validation_result=False, # expected_triggered_requirements=["Study MUST have base properties"], # noqa: ERA001 - expected_triggered_issues=[ - "Person entity SHOULD have at least one affiliation" - ], + expected_triggered_issues=["Person entity SHOULD have at least one affiliation"], profile_identifier="isa-ro-crate", rocrate_entity_mod_sparql=sparql, ) diff --git a/tests/integration/profiles/isa-ro-crate/test_8_article.py b/tests/integration/profiles/isa-ro-crate/test_8_article.py index af4d7f018..d350d1f58 100644 --- a/tests/integration/profiles/isa-ro-crate/test_8_article.py +++ b/tests/integration/profiles/isa-ro-crate/test_8_article.py @@ -46,9 +46,7 @@ def test_isa_article_headline(): requirement_severity=Severity.REQUIRED, expected_validation_result=False, # expected_triggered_requirements=["Study MUST have base properties"], # noqa: ERA001 - expected_triggered_issues=[ - "Article entity MUST have a non-empty headline of type string" - ], + expected_triggered_issues=["Article entity MUST have a non-empty headline of type string"], profile_identifier="isa-ro-crate", rocrate_entity_mod_sparql=sparql, ) @@ -114,9 +112,7 @@ def test_isa_article_headline_of_incorrect_type(): requirement_severity=Severity.REQUIRED, expected_validation_result=False, # expected_triggered_requirements=["Study MUST have base properties"], # noqa: ERA001 - expected_triggered_issues=[ - "Article entity MUST have a non-empty headline of type string" - ], + expected_triggered_issues=["Article entity MUST have a non-empty headline of type string"], profile_identifier="isa-ro-crate", rocrate_entity_mod_sparql=sparql, ) @@ -144,9 +140,7 @@ def test_isa_article_identifier(): requirement_severity=Severity.RECOMMENDED, expected_validation_result=False, # expected_triggered_requirements=["Study MUST have base properties"], # noqa: ERA001 - expected_triggered_issues=[ - "Article entity MUST have a non-empty identifier of type string or PropertyValue" - ], + expected_triggered_issues=["Article entity MUST have a non-empty identifier of type string or PropertyValue"], profile_identifier="isa-ro-crate", rocrate_entity_mod_sparql=sparql, ) @@ -177,9 +171,7 @@ def test_isa_article_identifier_of_incorrect_type(): requirement_severity=Severity.REQUIRED, expected_validation_result=False, # expected_triggered_requirements=["Study MUST have base properties"], # noqa: ERA001 - expected_triggered_issues=[ - "Article entity MUST have a non-empty identifier of type string or PropertyValue" - ], + expected_triggered_issues=["Article entity MUST have a non-empty identifier of type string or PropertyValue"], profile_identifier="isa-ro-crate", rocrate_entity_mod_sparql=sparql, ) diff --git a/tests/integration/profiles/process-run-crate/test_procrc_action.py b/tests/integration/profiles/process-run-crate/test_procrc_action.py index 1297136e3..3e3d38766 100644 --- a/tests/integration/profiles/process-run-crate/test_procrc_action.py +++ b/tests/integration/profiles/process-run-crate/test_procrc_action.py @@ -23,7 +23,7 @@ def test_procrc_action_no_instrument(): - """\ + """ Test a Process Run Crate where the action does not have an instrument. """ do_entity_test( @@ -32,12 +32,12 @@ def test_procrc_action_no_instrument(): False, ["Process Run Crate Action"], ["The Action MUST have an instrument property that references the executed tool"], - profile_identifier="process-run-crate" + profile_identifier="process-run-crate", ) def test_procrc_action_instrument_bad_type(): - """\ + """ Test a Process Run Crate where the instrument does not point to a SoftwareApplication, SoftwareSourceCode or ComputationalWorkflow. """ @@ -47,12 +47,12 @@ def test_procrc_action_instrument_bad_type(): False, ["Process Run Crate Action"], ["The Action MUST have an instrument property that references the executed tool"], - profile_identifier="process-run-crate" + profile_identifier="process-run-crate", ) def test_procrc_action_not_mentioned(): - """\ + """ Test a Process Run Crate where the action is not listed in the Root Data Entity's mentions. """ @@ -62,12 +62,12 @@ def test_procrc_action_not_mentioned(): False, ["Process Run Crate Action SHOULD"], ["The Action SHOULD be referenced from the Root Data Entity via mentions"], - profile_identifier="process-run-crate" + profile_identifier="process-run-crate", ) def test_procrc_action_no_name(): - """\ + """ Test a Process Run Crate where the action does not have an name. """ do_entity_test( @@ -76,12 +76,12 @@ def test_procrc_action_no_name(): False, ["Process Run Crate Action SHOULD"], ["The Action SHOULD have a name"], - profile_identifier="process-run-crate" + profile_identifier="process-run-crate", ) def test_procrc_action_no_description(): - """\ + """ Test a Process Run Crate where the action does not have a description. """ do_entity_test( @@ -90,12 +90,12 @@ def test_procrc_action_no_description(): False, ["Process Run Crate Action SHOULD"], ["The Action SHOULD have a description"], - profile_identifier="process-run-crate" + profile_identifier="process-run-crate", ) def test_procrc_action_no_endtime(): - """\ + """ Test a Process Run Crate where the action does not have an endTime. """ do_entity_test( @@ -104,12 +104,12 @@ def test_procrc_action_no_endtime(): False, ["Process Run Crate Action SHOULD"], ["The Action SHOULD have an endTime in ISO 8601 format"], - profile_identifier="process-run-crate" + profile_identifier="process-run-crate", ) def test_procrc_action_bad_endtime(): - """\ + """ Test a Process Run Crate where the action does not have an endTime. """ do_entity_test( @@ -118,12 +118,12 @@ def test_procrc_action_bad_endtime(): False, ["Process Run Crate Action SHOULD"], ["The Action SHOULD have an endTime in ISO 8601 format"], - profile_identifier="process-run-crate" + profile_identifier="process-run-crate", ) def test_procrc_action_no_agent(): - """\ + """ Test a Process Run Crate where the action does not have an agent. """ do_entity_test( @@ -132,12 +132,12 @@ def test_procrc_action_no_agent(): False, ["Process Run Crate Action SHOULD"], ["The Action SHOULD have an agent that is a Person or Organization"], - profile_identifier="process-run-crate" + profile_identifier="process-run-crate", ) def test_procrc_action_bad_agent(): - """\ + """ Test a Process Run Crate where the agent is neither a Person nor an Organization. """ @@ -147,12 +147,12 @@ def test_procrc_action_bad_agent(): False, ["Process Run Crate Action SHOULD"], ["The Action SHOULD have an agent that is a Person or Organization"], - profile_identifier="process-run-crate" + profile_identifier="process-run-crate", ) def test_procrc_action_no_result(): - """\ + """ Test a Process Run Crate where the CreateAction or UpdateAction does not have a result. """ @@ -162,12 +162,12 @@ def test_procrc_action_no_result(): False, ["Process Run Crate CreateAction UpdateAction SHOULD"], ["The Action SHOULD have a result"], - profile_identifier="process-run-crate" + profile_identifier="process-run-crate", ) def test_procrc_action_no_starttime(): - """\ + """ Test a Process Run Crate where the action does not have an startTime. """ do_entity_test( @@ -176,12 +176,12 @@ def test_procrc_action_no_starttime(): False, ["Process Run Crate Action MAY"], ["The Action MAY have a startTime"], - profile_identifier="process-run-crate" + profile_identifier="process-run-crate", ) def test_procrc_action_bad_starttime(): - """\ + """ Test a Process Run Crate where the action does not have an startTime. """ do_entity_test( @@ -190,12 +190,12 @@ def test_procrc_action_bad_starttime(): False, ["Process Run Crate Action SHOULD"], ["If present, the Action startTime SHOULD be in ISO 8601 format"], - profile_identifier="process-run-crate" + profile_identifier="process-run-crate", ) def test_procrc_action_error_not_failed_status(): - """\ + """ Test a Process Run Crate where the action has an error even though its actionStatus is not FailedActionStatus. """ @@ -205,12 +205,12 @@ def test_procrc_action_error_not_failed_status(): False, ["Process Run Crate Action error"], ["error SHOULD NOT be specified unless actionStatus is set to FailedActionStatus"], - profile_identifier="process-run-crate" + profile_identifier="process-run-crate", ) def test_procrc_action_error_no_status(): - """\ + """ Test a Process Run Crate where the action has an error even though it has no actionStatus. """ @@ -220,12 +220,12 @@ def test_procrc_action_error_no_status(): False, ["Process Run Crate Action error"], ["error SHOULD NOT be specified unless actionStatus is set to FailedActionStatus"], - profile_identifier="process-run-crate" + profile_identifier="process-run-crate", ) def test_procrc_action_no_object(): - """\ + """ Test a Process Run Crate where the Action does not have an object. """ do_entity_test( @@ -234,12 +234,12 @@ def test_procrc_action_no_object(): False, ["Process Run Crate Action MAY"], ["The Action MAY have an object"], - profile_identifier="process-run-crate" + profile_identifier="process-run-crate", ) def test_procrc_action_no_actionstatus(): - """\ + """ Test a Process Run Crate where the Action does not have an actionstatus. """ do_entity_test( @@ -248,12 +248,12 @@ def test_procrc_action_no_actionstatus(): False, ["Process Run Crate Action MAY"], ["The Action MAY have an actionStatus"], - profile_identifier="process-run-crate" + profile_identifier="process-run-crate", ) def test_procrc_action_bad_actionstatus(): - """\ + """ Test a Process Run Crate where the Action has an invalid actionstatus. """ do_entity_test( @@ -261,14 +261,16 @@ def test_procrc_action_bad_actionstatus(): Severity.RECOMMENDED, False, ["Process Run Crate Action SHOULD"], - ["If the Action has an actionStatus, it should be " - "http://schema.org/CompletedActionStatus or http://schema.org/FailedActionStatus"], - profile_identifier="process-run-crate" + [ + "If the Action has an actionStatus, it should be " + "http://schema.org/CompletedActionStatus or http://schema.org/FailedActionStatus" + ], + profile_identifier="process-run-crate", ) def test_procrc_action_no_error(): - """\ + """ Test a Process Run Crate where the Action does not have an error. """ do_entity_test( @@ -277,12 +279,12 @@ def test_procrc_action_no_error(): False, ["Process Run Crate Action MAY have error"], ["error MAY be specified if actionStatus is set to FailedActionStatus"], - profile_identifier="process-run-crate" + profile_identifier="process-run-crate", ) def test_procrc_action_obj_res_bad_type(): - """\ + """ Test a Process Run Crate where the Action's object or result does not point to a MediaObject, Dataset, Collection, CreativeWork or PropertyValue. @@ -292,14 +294,16 @@ def test_procrc_action_obj_res_bad_type(): Severity.RECOMMENDED, False, ["Process Run Crate Action object and result types"], - ["object and result SHOULD point to entities of type " - "MediaObject, Dataset, Collection, CreativeWork or PropertyValue"], - profile_identifier="process-run-crate" + [ + "object and result SHOULD point to entities of type " + "MediaObject, Dataset, Collection, CreativeWork or PropertyValue" + ], + profile_identifier="process-run-crate", ) def test_procrc_action_no_environment(): - """\ + """ Test a Process Run Crate where the Action does not have an environment. """ do_entity_test( @@ -308,12 +312,12 @@ def test_procrc_action_no_environment(): False, ["Process Run Crate Action MAY"], ["The Action MAY have an environment"], - profile_identifier="process-run-crate" + profile_identifier="process-run-crate", ) def test_procrc_action_bad_environment(): - """\ + """ Test a Process Run Crate where the Action has an environment that does not point to PropertyValues. """ @@ -323,12 +327,12 @@ def test_procrc_action_bad_environment(): False, ["Process Run Crate Action SHOULD"], ["If the Action has an environment, it should point to entities of type PropertyValue"], - profile_identifier="process-run-crate" + profile_identifier="process-run-crate", ) def test_procrc_action_no_containerimage(): - """\ + """ Test a Process Run Crate where the Action does not have a containerimage. """ do_entity_test( @@ -337,12 +341,12 @@ def test_procrc_action_no_containerimage(): False, ["Process Run Crate Action MAY"], ["The Action MAY have a containerImage"], - profile_identifier="process-run-crate" + profile_identifier="process-run-crate", ) def test_procrc_action_bad_containerimage(): - """\ + """ Test a Process Run Crate where the Action has a containerImage that does not point to a URL or to a ContainerImage object. """ @@ -356,5 +360,5 @@ def test_procrc_action_bad_containerimage(): False, ["Process Run Crate Action SHOULD"], ["If the Action has a containerImage, it should point to a ContainerImage or a URL"], - profile_identifier="process-run-crate" + profile_identifier="process-run-crate", ) diff --git a/tests/integration/profiles/process-run-crate/test_procrc_application.py b/tests/integration/profiles/process-run-crate/test_procrc_application.py index c97ac9914..89cf04abd 100644 --- a/tests/integration/profiles/process-run-crate/test_procrc_application.py +++ b/tests/integration/profiles/process-run-crate/test_procrc_application.py @@ -23,7 +23,7 @@ def test_procrc_application_no_name(): - """\ + """ Test a Process Run Crate where the application does not have a name. """ do_entity_test( @@ -32,12 +32,12 @@ def test_procrc_application_no_name(): False, ["ProcRC Application"], ["The Application SHOULD have a name"], - profile_identifier="process-run-crate" + profile_identifier="process-run-crate", ) def test_procrc_application_no_url(): - """\ + """ Test a Process Run Crate where the application does not have a url. """ do_entity_test( @@ -46,12 +46,12 @@ def test_procrc_application_no_url(): False, ["ProcRC Application"], ["The Application SHOULD have a url"], - profile_identifier="process-run-crate" + profile_identifier="process-run-crate", ) def test_procrc_application_no_version(): - """\ + """ Test a Process Run Crate where the application does not have a version or SoftwareVersion (SoftwareApplication). """ @@ -61,12 +61,12 @@ def test_procrc_application_no_version(): False, ["ProcRC SoftwareApplication"], ["The SoftwareApplication SHOULD have a version or softwareVersion"], - profile_identifier="process-run-crate" + profile_identifier="process-run-crate", ) def test_procrc_application_version_softwareversion(): - """\ + """ Test a Process Run Crate where the application has both a version and a SoftwareVersion (SoftwareApplication). """ @@ -76,12 +76,12 @@ def test_procrc_application_version_softwareversion(): False, ["ProcRC SoftwareApplication SingleVersion"], ["Process Run Crate SoftwareApplication should not have both version and softwareVersion"], - profile_identifier="process-run-crate" + profile_identifier="process-run-crate", ) def test_procrc_softwaresourcecode_no_version(): - """\ + """ Test a Process Run Crate where the application does not have a version (SoftwareSourceCode). """ @@ -91,12 +91,12 @@ def test_procrc_softwaresourcecode_no_version(): False, ["ProcRC SoftwareSourceCode or ComputationalWorkflow"], ["The SoftwareSourceCode or ComputationalWorkflow SHOULD have a version"], - profile_identifier="process-run-crate" + profile_identifier="process-run-crate", ) def test_procrc_application_id_no_absoluteuri(): - """\ + """ Test a Process Run Crate where the id of the application is not an absolute URI. """ @@ -106,12 +106,12 @@ def test_procrc_application_id_no_absoluteuri(): False, ["ProcRC SoftwareApplication ID"], ["The SoftwareApplication id SHOULD be an absolute URI"], - profile_identifier="process-run-crate" + profile_identifier="process-run-crate", ) def test_procrc_softwareapplication_no_softwarerequirements(): - """\ + """ Test a Process Run Crate where the SoftwareApplication does not have a SoftwareRequirements. """ @@ -121,12 +121,12 @@ def test_procrc_softwareapplication_no_softwarerequirements(): False, ["ProcRC SoftwareApplication MAY"], ["The SoftwareApplication MAY have a softwareRequirements that points to a SoftwareApplication"], - profile_identifier="process-run-crate" + profile_identifier="process-run-crate", ) def test_procrc_softwareapplication_bad_softwarerequirements(): - """\ + """ Test a Process Run Crate where the SoftwareApplication has a SoftwareRequirements that does not point to a SoftwareApplication. """ @@ -136,5 +136,5 @@ def test_procrc_softwareapplication_bad_softwarerequirements(): False, ["ProcRC SoftwareApplication MAY"], ["The SoftwareApplication MAY have a softwareRequirements that points to a SoftwareApplication"], - profile_identifier="process-run-crate" + profile_identifier="process-run-crate", ) diff --git a/tests/integration/profiles/process-run-crate/test_procrc_collection.py b/tests/integration/profiles/process-run-crate/test_procrc_collection.py index e593efbb9..988dcd472 100644 --- a/tests/integration/profiles/process-run-crate/test_procrc_collection.py +++ b/tests/integration/profiles/process-run-crate/test_procrc_collection.py @@ -23,7 +23,7 @@ def test_procrc_collection_not_mentioned(): - """\ + """ Test a Process Run Crate where the collection is not listed in the Root Data Entity's mentions. """ @@ -33,12 +33,12 @@ def test_procrc_collection_not_mentioned(): False, ["Process Run Crate Collection SHOULD"], ["The Collection SHOULD be referenced from the Root Data Entity via mentions"], - profile_identifier="process-run-crate" + profile_identifier="process-run-crate", ) def test_procrc_collection_no_haspart(): - """\ + """ Test a Process Run Crate where the collection does not have a hasPart. """ do_entity_test( @@ -47,12 +47,12 @@ def test_procrc_collection_no_haspart(): False, ["Process Run Crate Collection SHOULD"], ["The Collection SHOULD have a hasPart"], - profile_identifier="process-run-crate" + profile_identifier="process-run-crate", ) def test_procrc_collection_no_mainentity(): - """\ + """ Test a Process Run Crate where the collection does not have a mainEntity. """ do_entity_test( @@ -61,5 +61,5 @@ def test_procrc_collection_no_mainentity(): False, ["Process Run Crate Collection SHOULD"], ["The Collection SHOULD have a mainEntity"], - profile_identifier="process-run-crate" + profile_identifier="process-run-crate", ) diff --git a/tests/integration/profiles/process-run-crate/test_procrc_containerimage.py b/tests/integration/profiles/process-run-crate/test_procrc_containerimage.py index ec267deb8..855321d46 100644 --- a/tests/integration/profiles/process-run-crate/test_procrc_containerimage.py +++ b/tests/integration/profiles/process-run-crate/test_procrc_containerimage.py @@ -23,7 +23,7 @@ def test_procrc_containerimage_no_additionaltype(): - """\ + """ Test a Process Run Crate where the ContainerImage has no additionalType. """ do_entity_test( @@ -31,15 +31,17 @@ def test_procrc_containerimage_no_additionaltype(): Severity.RECOMMENDED, False, ["Process Run Crate ContainerImage SHOULD"], - ["The ContainerImage SHOULD have an additionalType pointing " - "to or " - ""], - profile_identifier="process-run-crate" + [ + "The ContainerImage SHOULD have an additionalType pointing " + "to or " + "" + ], + profile_identifier="process-run-crate", ) def test_procrc_containerimage_bad_additionaltype(): - """\ + """ Test a Process Run Crate where the ContainerImage additionalType does not point to one of the allowed values. """ @@ -48,15 +50,17 @@ def test_procrc_containerimage_bad_additionaltype(): Severity.RECOMMENDED, False, ["Process Run Crate ContainerImage SHOULD"], - ["The ContainerImage SHOULD have an additionalType pointing " - "to or " - ""], - profile_identifier="process-run-crate" + [ + "The ContainerImage SHOULD have an additionalType pointing " + "to or " + "" + ], + profile_identifier="process-run-crate", ) def test_procrc_containerimage_no_registry(): - """\ + """ Test a Process Run Crate where the ContainerImage has no registry. """ do_entity_test( @@ -65,12 +69,12 @@ def test_procrc_containerimage_no_registry(): False, ["Process Run Crate ContainerImage SHOULD"], ["The ContainerImage SHOULD have a registry"], - profile_identifier="process-run-crate" + profile_identifier="process-run-crate", ) def test_procrc_containerimage_no_name(): - """\ + """ Test a Process Run Crate where the ContainerImage has no name. """ do_entity_test( @@ -79,12 +83,12 @@ def test_procrc_containerimage_no_name(): False, ["Process Run Crate ContainerImage SHOULD"], ["The ContainerImage SHOULD have a name"], - profile_identifier="process-run-crate" + profile_identifier="process-run-crate", ) def test_procrc_containerimage_no_tag(): - """\ + """ Test a Process Run Crate where the ContainerImage has no tag. """ do_entity_test( @@ -93,12 +97,12 @@ def test_procrc_containerimage_no_tag(): False, ["Process Run Crate ContainerImage MAY"], ["The ContainerImage MAY have a tag"], - profile_identifier="process-run-crate" + profile_identifier="process-run-crate", ) def test_procrc_containerimage_no_sha256(): - """\ + """ Test a Process Run Crate where the ContainerImage has no sha256. """ do_entity_test( @@ -107,5 +111,5 @@ def test_procrc_containerimage_no_sha256(): False, ["Process Run Crate ContainerImage MAY"], ["The ContainerImage MAY have a sha256"], - profile_identifier="process-run-crate" + profile_identifier="process-run-crate", ) diff --git a/tests/integration/profiles/process-run-crate/test_procrc_root_data_entity.py b/tests/integration/profiles/process-run-crate/test_procrc_root_data_entity.py index 0fc9d305b..51ee22012 100644 --- a/tests/integration/profiles/process-run-crate/test_procrc_root_data_entity.py +++ b/tests/integration/profiles/process-run-crate/test_procrc_root_data_entity.py @@ -23,7 +23,7 @@ def test_procrc_no_conformsto(): - """\ + """ Test a Process Run Crate where the root data entity does not have a conformsTo. """ @@ -32,14 +32,16 @@ def test_procrc_no_conformsto(): Severity.REQUIRED, False, ["Root Data Entity Metadata"], - ["The Root Data Entity MUST reference a CreativeWork entity with an @id URI " - "that is consistent with the versioned permalink of the profile"], - profile_identifier="process-run-crate" + [ + "The Root Data Entity MUST reference a CreativeWork entity with an @id URI " + "that is consistent with the versioned permalink of the profile" + ], + profile_identifier="process-run-crate", ) def test_procrc_conformsto_bad_type(): - """\ + """ Test a Process Run Crate where the root data entity does not conformsTo a CreativeWork. """ @@ -48,14 +50,16 @@ def test_procrc_conformsto_bad_type(): Severity.REQUIRED, False, ["Root Data Entity Metadata"], - ["The Root Data Entity MUST reference a CreativeWork entity with an @id URI " - "that is consistent with the versioned permalink of the profile"], - profile_identifier="process-run-crate" + [ + "The Root Data Entity MUST reference a CreativeWork entity with an @id URI " + "that is consistent with the versioned permalink of the profile" + ], + profile_identifier="process-run-crate", ) def test_procrc_conformsto_bad_profile(): - """\ + """ Test a Process Run Crate where the root data entity does not conformsTo a Process Run Crate profile. """ @@ -64,7 +68,9 @@ def test_procrc_conformsto_bad_profile(): Severity.REQUIRED, False, ["Root Data Entity Metadata"], - ["The Root Data Entity MUST reference a CreativeWork entity with an @id URI " - "that is consistent with the versioned permalink of the profile"], - profile_identifier="process-run-crate" + [ + "The Root Data Entity MUST reference a CreativeWork entity with an @id URI " + "that is consistent with the versioned permalink of the profile" + ], + profile_identifier="process-run-crate", ) diff --git a/tests/integration/profiles/process-run-crate/test_valid_prc.py b/tests/integration/profiles/process-run-crate/test_valid_prc.py index bc726a9ee..4aa790791 100644 --- a/tests/integration/profiles/process-run-crate/test_valid_prc.py +++ b/tests/integration/profiles/process-run-crate/test_valid_prc.py @@ -29,19 +29,19 @@ def test_valid_process_run_crate_required(): Severity.REQUIRED, True, profile_identifier="process-run-crate", - skip_checks=[SKIP_LOCAL_DATA_ENTITY_EXISTENCE_CHECK_IDENTIFIER] + skip_checks=[SKIP_LOCAL_DATA_ENTITY_EXISTENCE_CHECK_IDENTIFIER], ) do_entity_test( ValidROC().process_run_crate_collections, Severity.REQUIRED, True, profile_identifier="process-run-crate", - skip_checks=[SKIP_LOCAL_DATA_ENTITY_EXISTENCE_CHECK_IDENTIFIER] + skip_checks=[SKIP_LOCAL_DATA_ENTITY_EXISTENCE_CHECK_IDENTIFIER], ) do_entity_test( ValidROC().process_run_crate_containerimage, Severity.REQUIRED, True, profile_identifier="process-run-crate", - skip_checks=[SKIP_LOCAL_DATA_ENTITY_EXISTENCE_CHECK_IDENTIFIER] + skip_checks=[SKIP_LOCAL_DATA_ENTITY_EXISTENCE_CHECK_IDENTIFIER], ) diff --git a/tests/integration/profiles/provenance-run-crate/test_provrc_controlaction.py b/tests/integration/profiles/provenance-run-crate/test_provrc_controlaction.py index 62ef2029a..9d4145c94 100644 --- a/tests/integration/profiles/provenance-run-crate/test_provrc_controlaction.py +++ b/tests/integration/profiles/provenance-run-crate/test_provrc_controlaction.py @@ -23,7 +23,7 @@ def test_provrc_controlaction_no_instrument(): - """\ + """ Test a Provenance Run Crate where a ControlAction has no instrument. """ do_entity_test( @@ -31,14 +31,16 @@ def test_provrc_controlaction_no_instrument(): Severity.REQUIRED, False, ["ProvRC ControlAction MUST"], - ["A ControlAction must reference a HowToStep instance representing " - "the corresponding workflow step via instrument"], - profile_identifier="provenance-run-crate" + [ + "A ControlAction must reference a HowToStep instance representing " + "the corresponding workflow step via instrument" + ], + profile_identifier="provenance-run-crate", ) def test_provrc_controlaction_bad_instrument(): - """\ + """ Test a Provenance Run Crate where a ControlAction instrument does not point to a HowToStep. """ @@ -47,14 +49,16 @@ def test_provrc_controlaction_bad_instrument(): Severity.REQUIRED, False, ["ProvRC ControlAction MUST"], - ["A ControlAction must reference a HowToStep instance representing " - "the corresponding workflow step via instrument"], - profile_identifier="provenance-run-crate" + [ + "A ControlAction must reference a HowToStep instance representing " + "the corresponding workflow step via instrument" + ], + profile_identifier="provenance-run-crate", ) def test_provrc_controlaction_no_object(): - """\ + """ Test a Provenance Run Crate where a ControlAction has no object. """ do_entity_test( @@ -63,12 +67,12 @@ def test_provrc_controlaction_no_object(): False, ["ProvRC ControlAction MUST"], ["A ControlAction must reference the action representing the corresponding tool run via object"], - profile_identifier="provenance-run-crate" + profile_identifier="provenance-run-crate", ) def test_provrc_controlaction_bad_object(): - """\ + """ Test a Provenance Run Crate where a ControlAction object does not point to an action. """ @@ -78,12 +82,12 @@ def test_provrc_controlaction_bad_object(): False, ["ProvRC ControlAction MUST"], ["A ControlAction must reference the action representing the corresponding tool run via object"], - profile_identifier="provenance-run-crate" + profile_identifier="provenance-run-crate", ) def test_provrc_controlaction_no_actionstatus(): - """\ + """ Test a Provenance Run Crate where a ControlAction has no actionStatus. """ do_entity_test( @@ -92,12 +96,12 @@ def test_provrc_controlaction_no_actionstatus(): False, ["ProvRC ControlAction and OrganizeAction MAY"], ["The Action MAY have an actionStatus"], - profile_identifier="provenance-run-crate" + profile_identifier="provenance-run-crate", ) def test_provrc_controlaction_bad_actionstatus(): - """\ + """ Test a Provenance Run Crate where a ControlAction has an invalid actionStatus. """ @@ -106,15 +110,17 @@ def test_provrc_controlaction_bad_actionstatus(): Severity.RECOMMENDED, False, ["ProvRC ControlAction and OrganizeAction SHOULD"], - ["If the action has an actionStatus, it should be " - "http://schema.org/CompletedActionStatus or " - "http://schema.org/FailedActionStatus"], - profile_identifier="provenance-run-crate" + [ + "If the action has an actionStatus, it should be " + "http://schema.org/CompletedActionStatus or " + "http://schema.org/FailedActionStatus" + ], + profile_identifier="provenance-run-crate", ) def test_provrc_controlaction_no_error(): - """\ + """ Test a Provenance Run Crate where a ControlAction with an actionStatus set to FailedActionStatus has no error. """ @@ -124,12 +130,12 @@ def test_provrc_controlaction_no_error(): False, ["ProvRC ControlAction and OrganizeAction error"], ["error MAY be specified if actionStatus is set to FailedActionStatus"], - profile_identifier="provenance-run-crate" + profile_identifier="provenance-run-crate", ) def test_provrc_controlaction_error_not_failed_status(): - """\ + """ Test a Provenance Run Crate where a ControlAction with an actionStatus different from FailedActionStatus sets the error property """ @@ -139,5 +145,5 @@ def test_provrc_controlaction_error_not_failed_status(): False, ["Provenance Run Crate ControlAction and OrganizeAction error"], ["error SHOULD NOT be specified unless actionStatus is set to FailedActionStatus"], - profile_identifier="provenance-run-crate" + profile_identifier="provenance-run-crate", ) diff --git a/tests/integration/profiles/provenance-run-crate/test_provrc_file.py b/tests/integration/profiles/provenance-run-crate/test_provrc_file.py index 96ba8e82b..fe67b965c 100644 --- a/tests/integration/profiles/provenance-run-crate/test_provrc_file.py +++ b/tests/integration/profiles/provenance-run-crate/test_provrc_file.py @@ -23,7 +23,7 @@ def test_provrc_environment_file_no_encodingformat(): - """\ + """ Test a Provenance Run Crate where an environment file has no encodingFormat. """ @@ -33,12 +33,12 @@ def test_provrc_environment_file_no_encodingformat(): False, ["Provenance Run Crate build instructions file SHOULD"], ["the environment file SHOULD have an encodingFormat"], - profile_identifier="provenance-run-crate" + profile_identifier="provenance-run-crate", ) def test_provrc_environment_file_no_conformsto(): - """\ + """ Test a Provenance Run Crate where an environment file has no conformsTo. """ do_entity_test( @@ -47,5 +47,5 @@ def test_provrc_environment_file_no_conformsto(): False, ["Provenance Run Crate build instructions file SHOULD"], ["the environment file SHOULD have a conformsTo"], - profile_identifier="provenance-run-crate" + profile_identifier="provenance-run-crate", ) diff --git a/tests/integration/profiles/provenance-run-crate/test_provrc_howtostep.py b/tests/integration/profiles/provenance-run-crate/test_provrc_howtostep.py index 41654474d..b02749221 100644 --- a/tests/integration/profiles/provenance-run-crate/test_provrc_howtostep.py +++ b/tests/integration/profiles/provenance-run-crate/test_provrc_howtostep.py @@ -23,7 +23,7 @@ def test_provrc_howtostep_no_inv_step(): - """\ + """ Test a Provenance Run Crate where a HowToStep is not referred to via step. """ do_entity_test( @@ -32,12 +32,12 @@ def test_provrc_howtostep_no_inv_step(): False, ["ProvRC HowToStep MUST"], ["A HowToStep must be referred to from a ComputationalWorkflow via step"], - profile_identifier="provenance-run-crate" + profile_identifier="provenance-run-crate", ) def test_provrc_howtostep_bad_inv_step(): - """\ + """ Test a Provenance Run Crate where a HowToStep is not referred to from a ComputationalWorkflow via step. """ @@ -47,12 +47,12 @@ def test_provrc_howtostep_bad_inv_step(): False, ["ProvRC HowToStep MUST"], ["A HowToStep must be referred to from a ComputationalWorkflow via step"], - profile_identifier="provenance-run-crate" + profile_identifier="provenance-run-crate", ) def test_provrc_howtostep_no_workexample(): - """\ + """ Test a Provenance Run Crate where a HowToStep has no workExample. """ do_entity_test( @@ -61,12 +61,12 @@ def test_provrc_howtostep_no_workexample(): False, ["ProvRC HowToStep MUST"], ["A HowToStep must refer to its corresponding tool via workExample"], - profile_identifier="provenance-run-crate" + profile_identifier="provenance-run-crate", ) def test_provrc_howtostep_bad_workexample(): - """\ + """ Test a Provenance Run Crate where a HowToStep does not refer to a tool via workExample. """ @@ -76,12 +76,12 @@ def test_provrc_howtostep_bad_workexample(): False, ["ProvRC HowToStep MUST"], ["A HowToStep must refer to its corresponding tool via workExample"], - profile_identifier="provenance-run-crate" + profile_identifier="provenance-run-crate", ) def test_provrc_howtostep_no_position(): - """\ + """ Test a Provenance Run Crate where a HowToStep has no position. """ do_entity_test( @@ -90,12 +90,12 @@ def test_provrc_howtostep_no_position(): False, ["ProvRC HowToStep MAY"], ["A HowToStep may indicate its position in the execution order via position"], - profile_identifier="provenance-run-crate" + profile_identifier="provenance-run-crate", ) def test_provrc_howtostep_bad_position(): - """\ + """ Test a Provenance Run Crate where a HowToStep has a position that is not an integer or a string representing an integer. """ @@ -105,12 +105,12 @@ def test_provrc_howtostep_bad_position(): False, ["ProvRC HowToStep MUST"], ["If specified, position must be an integer or a string representing an integer"], - profile_identifier="provenance-run-crate" + profile_identifier="provenance-run-crate", ) def test_provrc_howtostep_no_connection(): - """\ + """ Test a Provenance Run Crate where a HowToStep has no connection. """ do_entity_test( @@ -119,12 +119,12 @@ def test_provrc_howtostep_no_connection(): False, ["ProvRC HowToStep MAY"], ["HowToStep may have a connection property"], - profile_identifier="provenance-run-crate" + profile_identifier="provenance-run-crate", ) def test_provrc_howtostep_bad_connection(): - """\ + """ Test a Provenance Run Crate where a HowToStep has a connection that does not point to a ParameterConnection. """ @@ -134,12 +134,12 @@ def test_provrc_howtostep_bad_connection(): False, ["ProvRC HowToStep MUST"], ["If the HowToStep has a connection, it must point to a ParameterConnection"], - profile_identifier="provenance-run-crate" + profile_identifier="provenance-run-crate", ) def test_provrc_howtostep_no_buildinstructions(): - """\ + """ Test a Provenance Run Crate where a HowToStep has no buildInstructions. """ do_entity_test( @@ -148,12 +148,12 @@ def test_provrc_howtostep_no_buildinstructions(): False, ["ProvRC HowToStep MAY"], ["A HowToStep MAY have a buildInstructions pointing to a File"], - profile_identifier="provenance-run-crate" + profile_identifier="provenance-run-crate", ) def test_provrc_howtostep_bad_buildinstructions(): - """\ + """ Test a Provenance Run Crate where a HowToStep has a buildInstructions that does not point to a File. """ @@ -163,5 +163,5 @@ def test_provrc_howtostep_bad_buildinstructions(): False, ["ProvRC HowToStep MAY"], ["A HowToStep MAY have a buildInstructions pointing to a File"], - profile_identifier="provenance-run-crate" + profile_identifier="provenance-run-crate", ) diff --git a/tests/integration/profiles/provenance-run-crate/test_provrc_organizeaction.py b/tests/integration/profiles/provenance-run-crate/test_provrc_organizeaction.py index b772a0c25..bec7178d3 100644 --- a/tests/integration/profiles/provenance-run-crate/test_provrc_organizeaction.py +++ b/tests/integration/profiles/provenance-run-crate/test_provrc_organizeaction.py @@ -23,7 +23,7 @@ def test_provrc_organizeaction_no_instrument(): - """\ + """ Test a Provenance Run Crate where an OrganizeAction has no instrument. """ do_entity_test( @@ -31,14 +31,16 @@ def test_provrc_organizeaction_no_instrument(): Severity.REQUIRED, False, ["ProvRC OrganizeAction MUST"], - ["An OrganizeAction must reference an entity representing the " - "workflow engine (e.g. a SoftwareApplication) via instrument"], - profile_identifier="provenance-run-crate" + [ + "An OrganizeAction must reference an entity representing the " + "workflow engine (e.g. a SoftwareApplication) via instrument" + ], + profile_identifier="provenance-run-crate", ) def test_provrc_organizeaction_bad_instrument(): - """\ + """ Test a Provenance Run Crate where an OrganizeAction does not reference an application via instrument. """ @@ -47,14 +49,16 @@ def test_provrc_organizeaction_bad_instrument(): Severity.REQUIRED, False, ["ProvRC OrganizeAction MUST"], - ["An OrganizeAction must reference an entity representing the " - "workflow engine (e.g. a SoftwareApplication) via instrument"], - profile_identifier="provenance-run-crate" + [ + "An OrganizeAction must reference an entity representing the " + "workflow engine (e.g. a SoftwareApplication) via instrument" + ], + profile_identifier="provenance-run-crate", ) def test_provrc_organizeaction_no_result(): - """\ + """ Test a Provenance Run Crate where an OrganizeAction has no result. """ do_entity_test( @@ -63,12 +67,12 @@ def test_provrc_organizeaction_no_result(): False, ["ProvRC OrganizeAction MUST"], ["An OrganizeAction must reference the action representing the workflow run via result"], - profile_identifier="provenance-run-crate" + profile_identifier="provenance-run-crate", ) def test_provrc_organizeaction_bad_result(): - """\ + """ Test a Provenance Run Crate where an OrganizeAction does not reference an action via result. """ @@ -78,12 +82,12 @@ def test_provrc_organizeaction_bad_result(): False, ["ProvRC OrganizeAction MUST"], ["An OrganizeAction must reference the action representing the workflow run via result"], - profile_identifier="provenance-run-crate" + profile_identifier="provenance-run-crate", ) def test_provrc_organizeaction_no_object(): - """\ + """ Test a Provenance Run Crate where an OrganizeAction has no object. """ do_entity_test( @@ -92,12 +96,12 @@ def test_provrc_organizeaction_no_object(): False, ["ProvRC OrganizeAction MUST"], ["An OrganizeAction must reference the ControlAction instances representing the step executions via object"], - profile_identifier="provenance-run-crate" + profile_identifier="provenance-run-crate", ) def test_provrc_organizeaction_bad_object(): - """\ + """ Test a Provenance Run Crate where an OrganizeAction object does not point to a ControlAction. """ @@ -107,12 +111,12 @@ def test_provrc_organizeaction_bad_object(): False, ["ProvRC OrganizeAction MUST"], ["An OrganizeAction must reference the ControlAction instances representing the step executions via object"], - profile_identifier="provenance-run-crate" + profile_identifier="provenance-run-crate", ) def test_provrc_organizeaction_no_actionstatus(): - """\ + """ Test a Provenance Run Crate where an OrganizeAction has no actionStatus. """ do_entity_test( @@ -121,12 +125,12 @@ def test_provrc_organizeaction_no_actionstatus(): False, ["ProvRC ControlAction and OrganizeAction MAY"], ["The Action MAY have an actionStatus"], - profile_identifier="provenance-run-crate" + profile_identifier="provenance-run-crate", ) def test_provrc_organizeaction_bad_actionstatus(): - """\ + """ Test a Provenance Run Crate where an OrganizeAction has an invalid actionStatus. """ @@ -135,15 +139,17 @@ def test_provrc_organizeaction_bad_actionstatus(): Severity.RECOMMENDED, False, ["ProvRC ControlAction and OrganizeAction SHOULD"], - ["If the action has an actionStatus, it should be " - "http://schema.org/CompletedActionStatus or " - "http://schema.org/FailedActionStatus"], - profile_identifier="provenance-run-crate" + [ + "If the action has an actionStatus, it should be " + "http://schema.org/CompletedActionStatus or " + "http://schema.org/FailedActionStatus" + ], + profile_identifier="provenance-run-crate", ) def test_provrc_organizeaction_no_error(): - """\ + """ Test a Provenance Run Crate where an OrganizeAction with an actionStatus set to FailedActionStatus has no error. """ @@ -153,12 +159,12 @@ def test_provrc_organizeaction_no_error(): False, ["ProvRC ControlAction and OrganizeAction error"], ["error MAY be specified if actionStatus is set to FailedActionStatus"], - profile_identifier="provenance-run-crate" + profile_identifier="provenance-run-crate", ) def test_provrc_organizeaction_error_not_failed_status(): - """\ + """ Test a Provenance Run Crate where an OrganizeAction with an actionStatus different from FailedActionStatus sets the error property """ @@ -168,5 +174,5 @@ def test_provrc_organizeaction_error_not_failed_status(): False, ["Provenance Run Crate ControlAction and OrganizeAction error"], ["error SHOULD NOT be specified unless actionStatus is set to FailedActionStatus"], - profile_identifier="provenance-run-crate" + profile_identifier="provenance-run-crate", ) diff --git a/tests/integration/profiles/provenance-run-crate/test_provrc_parameterconnection.py b/tests/integration/profiles/provenance-run-crate/test_provrc_parameterconnection.py index ce7862743..3c051ccb3 100644 --- a/tests/integration/profiles/provenance-run-crate/test_provrc_parameterconnection.py +++ b/tests/integration/profiles/provenance-run-crate/test_provrc_parameterconnection.py @@ -23,7 +23,7 @@ def test_parameterconnection_no_sourceparameter(): - """\ + """ Test a Provenance Run Crate where a ParameterConnection does not have a SourceParameter. """ @@ -33,12 +33,12 @@ def test_parameterconnection_no_sourceparameter(): False, ["ProvRC ParameterConnection MUST"], ["ParameterConnection must have a sourceParameter that references a FormalParameter"], - profile_identifier="provenance-run-crate" + profile_identifier="provenance-run-crate", ) def test_parameterconnection_bad_sourceparameter(): - """\ + """ Test a Provenance Run Crate where a ParameterConnection has a SourceParameter that does not reference a FormalParameter. """ @@ -48,12 +48,12 @@ def test_parameterconnection_bad_sourceparameter(): False, ["ProvRC ParameterConnection MUST"], ["ParameterConnection must have a sourceParameter that references a FormalParameter"], - profile_identifier="provenance-run-crate" + profile_identifier="provenance-run-crate", ) def test_parameterconnection_no_targetparameter(): - """\ + """ Test a Provenance Run Crate where a ParameterConnection does not have a TargetParameter. """ @@ -63,12 +63,12 @@ def test_parameterconnection_no_targetparameter(): False, ["ProvRC ParameterConnection MUST"], ["ParameterConnection must have a targetParameter that references a FormalParameter"], - profile_identifier="provenance-run-crate" + profile_identifier="provenance-run-crate", ) def test_parameterconnection_bad_targetparameter(): - """\ + """ Test a Provenance Run Crate where a ParameterConnection has a TargetParameter that does not reference a FormalParameter. """ @@ -78,12 +78,12 @@ def test_parameterconnection_bad_targetparameter(): False, ["ProvRC ParameterConnection MUST"], ["ParameterConnection must have a targetParameter that references a FormalParameter"], - profile_identifier="provenance-run-crate" + profile_identifier="provenance-run-crate", ) def test_parameterconnection_not_referenced(): - """\ + """ Test a Provenance Run Crate where a ParameterConnection is not referenced by any other entity through the connection property. """ @@ -93,12 +93,12 @@ def test_parameterconnection_not_referenced(): False, ["ParameterConnection references"], ["Missing `connection` to this `ParameterConnection` entity"], - profile_identifier="provenance-run-crate" + profile_identifier="provenance-run-crate", ) def test_parameterconnection_not_workflow_referenced(): - """\ + """ Test a Provenance Run Crate where a ParameterConnection is not referenced by any Workflow through the connection property. """ @@ -108,12 +108,12 @@ def test_parameterconnection_not_workflow_referenced(): False, ["ParameterConnection references on computational workflows"], ["Missing `ComputationalWorkflow` connection to this `ParameterConnection` entity"], - profile_identifier="provenance-run-crate" + profile_identifier="provenance-run-crate", ) def test_parameterconnection_not_step_referenced(): - """\ + """ Test a Provenance Run Crate where a ParameterConnection is not referenced by any HowToStep through the connection property. """ @@ -123,5 +123,5 @@ def test_parameterconnection_not_step_referenced(): False, ["ParameterConnection references on HowToStep instances"], ["Missing `HowToStep` connection to this `ParameterConnection` entity"], - profile_identifier="provenance-run-crate" + profile_identifier="provenance-run-crate", ) diff --git a/tests/integration/profiles/provenance-run-crate/test_provrc_propertyvalue.py b/tests/integration/profiles/provenance-run-crate/test_provrc_propertyvalue.py index 6d9930242..7c829c27e 100644 --- a/tests/integration/profiles/provenance-run-crate/test_provrc_propertyvalue.py +++ b/tests/integration/profiles/provenance-run-crate/test_provrc_propertyvalue.py @@ -23,7 +23,7 @@ def test_provrc_propertyvalue_no_unitcode(): - """\ + """ Test a Provenance Run Crate where a PropertyValue does not have a unitCode. """ @@ -33,12 +33,12 @@ def test_provrc_propertyvalue_no_unitcode(): False, ["Provenance Run Crate resource usage PropertyValue SHOULD"], ["A PropertyValue used to represent resourceUsage SHOULD have a unitCode"], - profile_identifier="provenance-run-crate" + profile_identifier="provenance-run-crate", ) def test_provrc_propertyvalue_no_propertyid(): - """\ + """ Test a Provenance Run Crate where a PropertyValue does not have a propertyID. """ @@ -48,5 +48,5 @@ def test_provrc_propertyvalue_no_propertyid(): False, ["Provenance Run Crate resource usage PropertyValue MUST"], ["A PropertyValue used to represent resourceUsage MUST have a propertyID"], - profile_identifier="provenance-run-crate" + profile_identifier="provenance-run-crate", ) diff --git a/tests/integration/profiles/provenance-run-crate/test_provrc_root_data_entity.py b/tests/integration/profiles/provenance-run-crate/test_provrc_root_data_entity.py index 32ab3bf37..345d772be 100644 --- a/tests/integration/profiles/provenance-run-crate/test_provrc_root_data_entity.py +++ b/tests/integration/profiles/provenance-run-crate/test_provrc_root_data_entity.py @@ -23,7 +23,7 @@ def test_provrc_conformsto_no_provrc(): - """\ + """ Test a Provenance Run Crate where the root data entity does not conformsTo the Provenance Run Crate profile. """ @@ -32,15 +32,17 @@ def test_provrc_conformsto_no_provrc(): Severity.REQUIRED, False, ["Provenance Run Crate Root Data Entity"], - ["The Root Data Entity MUST reference a CreativeWork entity with an " - "@id URI that is consistent with the versioned permalink of the " - "Provenance Run Crate profile"], - profile_identifier="provenance-run-crate" + [ + "The Root Data Entity MUST reference a CreativeWork entity with an " + "@id URI that is consistent with the versioned permalink of the " + "Provenance Run Crate profile" + ], + profile_identifier="provenance-run-crate", ) def test_provrc_conformsto_no_wfrc(): - """\ + """ Test a Provenance Run Crate where the root data entity does not conformsTo the Workflow Run Crate profile. """ @@ -49,15 +51,17 @@ def test_provrc_conformsto_no_wfrc(): Severity.RECOMMENDED, False, ["Provenance Run Crate Root Data Entity SHOULD"], - ["The Root Data Entity SHOULD reference CreativeWork entities " - "corresponding to the Process Run Crate, Workflow Run Crate and " - "Workflow RO-Crate profiles"], - profile_identifier="provenance-run-crate" + [ + "The Root Data Entity SHOULD reference CreativeWork entities " + "corresponding to the Process Run Crate, Workflow Run Crate and " + "Workflow RO-Crate profiles" + ], + profile_identifier="provenance-run-crate", ) def test_provrc_conformsto_no_wroc(): - """\ + """ Test a Provenance Run Crate where the root data entity does not conformsTo the Workflow RO-Crate profile. """ @@ -66,15 +70,17 @@ def test_provrc_conformsto_no_wroc(): Severity.RECOMMENDED, False, ["Provenance Run Crate Root Data Entity SHOULD"], - ["The Root Data Entity SHOULD reference CreativeWork entities " - "corresponding to the Process Run Crate, Workflow Run Crate and " - "Workflow RO-Crate profiles"], - profile_identifier="provenance-run-crate" + [ + "The Root Data Entity SHOULD reference CreativeWork entities " + "corresponding to the Process Run Crate, Workflow Run Crate and " + "Workflow RO-Crate profiles" + ], + profile_identifier="provenance-run-crate", ) def test_provrc_conformsto_no_procrc(): - """\ + """ Test a Provenance Run Crate where the root data entity does not conformsTo the Process Run Crate profile. """ @@ -83,8 +89,10 @@ def test_provrc_conformsto_no_procrc(): Severity.RECOMMENDED, False, ["Provenance Run Crate Root Data Entity SHOULD"], - ["The Root Data Entity SHOULD reference CreativeWork entities " - "corresponding to the Process Run Crate, Workflow Run Crate and " - "Workflow RO-Crate profiles"], - profile_identifier="provenance-run-crate" + [ + "The Root Data Entity SHOULD reference CreativeWork entities " + "corresponding to the Process Run Crate, Workflow Run Crate and " + "Workflow RO-Crate profiles" + ], + profile_identifier="provenance-run-crate", ) diff --git a/tests/integration/profiles/provenance-run-crate/test_provrc_tool.py b/tests/integration/profiles/provenance-run-crate/test_provrc_tool.py index e3c6b03c0..dc1b23a8e 100644 --- a/tests/integration/profiles/provenance-run-crate/test_provrc_tool.py +++ b/tests/integration/profiles/provenance-run-crate/test_provrc_tool.py @@ -23,7 +23,7 @@ def test_provrc_tool_no_input(): - """\ + """ Test a Provenance Run Crate where a tool does not have an input. """ do_entity_test( @@ -32,12 +32,12 @@ def test_provrc_tool_no_input(): False, ["ProvRC tool MAY"], ["A tool MAY have an input"], - profile_identifier="provenance-run-crate" + profile_identifier="provenance-run-crate", ) def test_provrc_tool_no_output(): - """\ + """ Test a Provenance Run Crate where a tool does not have an output. """ do_entity_test( @@ -46,12 +46,12 @@ def test_provrc_tool_no_output(): False, ["ProvRC tool MAY"], ["A tool MAY have an output"], - profile_identifier="provenance-run-crate" + profile_identifier="provenance-run-crate", ) def test_provrc_tool_no_environment(): - """\ + """ Test a Provenance Run Crate where a tool does not have an environment. """ do_entity_test( @@ -60,12 +60,12 @@ def test_provrc_tool_no_environment(): False, ["ProvRC tool MAY"], ["A tool MAY have an environment"], - profile_identifier="provenance-run-crate" + profile_identifier="provenance-run-crate", ) def test_provrc_tool_bad_input(): - """\ + """ Test a Provenance Run Crate where a tool has an input that does not point to a FormalParameter. """ @@ -75,12 +75,12 @@ def test_provrc_tool_bad_input(): False, ["ProvRC tool MUST"], ["Tool input and output MUST point to FormalParameter entities"], - profile_identifier="provenance-run-crate" + profile_identifier="provenance-run-crate", ) def test_provrc_tool_bad_output(): - """\ + """ Test a Provenance Run Crate where a tool has an output that does not point to a FormalParameter. """ @@ -90,12 +90,12 @@ def test_provrc_tool_bad_output(): False, ["ProvRC tool MUST"], ["Tool input and output MUST point to FormalParameter entities"], - profile_identifier="provenance-run-crate" + profile_identifier="provenance-run-crate", ) def test_provrc_tool_bad_environment(): - """\ + """ Test a Provenance Run Crate where a tool has an environment that does not point to a FormalParameter. """ @@ -105,12 +105,12 @@ def test_provrc_tool_bad_environment(): False, ["ProvRC tool SHOULD"], ["If the tool has an environment, it SHOULD point to entities of type FormalParameter"], - profile_identifier="provenance-run-crate" + profile_identifier="provenance-run-crate", ) def test_provrc_tool_no_inv_instrument(): - """\ + """ Test a Provenance Run Crate where a tool is not referred to from an action via instrument. """ @@ -120,12 +120,12 @@ def test_provrc_tool_no_inv_instrument(): False, ["ProvRC tool MUST"], ["A tool must be referred to from an action via instrument"], - profile_identifier="provenance-run-crate" + profile_identifier="provenance-run-crate", ) def test_provrc_tool_bad_inv_instrument(): - """\ + """ Test a Provenance Run Crate where a tool is referred to via instrument by an entity that is not an action. """ @@ -135,12 +135,12 @@ def test_provrc_tool_bad_inv_instrument(): False, ["ProvRC tool MUST"], ["A tool must be referred to from an action via instrument"], - profile_identifier="provenance-run-crate" + profile_identifier="provenance-run-crate", ) def test_provrc_tool_no_softwarerequirements(): - """\ + """ Test a Provenance Run Crate where a tool does not have a softwarerequirements. """ @@ -150,12 +150,12 @@ def test_provrc_tool_no_softwarerequirements(): False, ["ProvRC tool MAY"], ["The tool MAY have a softwareRequirements that points to a SoftwareApplication"], - profile_identifier="provenance-run-crate" + profile_identifier="provenance-run-crate", ) def test_provrc_tool_bad_softwarerequirements(): - """\ + """ Test a Provenance Run Crate where a tool has a softwarerequirements that does not point to a SoftwareApplication. """ @@ -165,12 +165,12 @@ def test_provrc_tool_bad_softwarerequirements(): False, ["ProvRC tool MAY"], ["The tool MAY have a softwareRequirements that points to a SoftwareApplication"], - profile_identifier="provenance-run-crate" + profile_identifier="provenance-run-crate", ) def test_provrc_tool_no_mainentity(): - """\ + """ Test a Provenance Run Crate where a tool does not have a mainEntity. """ @@ -180,12 +180,12 @@ def test_provrc_tool_no_mainentity(): False, ["ProvRC tool MAY"], ["The tool MAY have a mainEntity that points to a SoftwareApplication"], - profile_identifier="provenance-run-crate" + profile_identifier="provenance-run-crate", ) def test_provrc_tool_bad_mainentity(): - """\ + """ Test a Provenance Run Crate where a tool has a mainEntity that does not point to a SoftwareApplication. """ @@ -195,5 +195,5 @@ def test_provrc_tool_bad_mainentity(): False, ["ProvRC tool MAY"], ["The tool MAY have a mainEntity that points to a SoftwareApplication"], - profile_identifier="provenance-run-crate" + profile_identifier="provenance-run-crate", ) diff --git a/tests/integration/profiles/provenance-run-crate/test_provrc_tool_action.py b/tests/integration/profiles/provenance-run-crate/test_provrc_tool_action.py index fd89a23bb..29aa5c872 100644 --- a/tests/integration/profiles/provenance-run-crate/test_provrc_tool_action.py +++ b/tests/integration/profiles/provenance-run-crate/test_provrc_tool_action.py @@ -23,7 +23,7 @@ def test_provrc_action_no_resourceusage(): - """\ + """ Test a Provenance Run Crate where a tool action has no resourceUsage. """ do_entity_test( @@ -32,12 +32,12 @@ def test_provrc_action_no_resourceusage(): False, ["Provenance Run Crate tool action MAY"], ["A tool action MAY have a resourceUsage"], - profile_identifier="provenance-run-crate" + profile_identifier="provenance-run-crate", ) def test_provrc_action_bad_resourceusage(): - """\ + """ Test a Provenance Run Crate where a tool action has a resourceUsage that does not point to PropertyValue. """ @@ -47,5 +47,5 @@ def test_provrc_action_bad_resourceusage(): False, ["Provenance Run Crate tool action MUST"], ["If present, resourceUsage MUST point to PropertyValue"], - profile_identifier="provenance-run-crate" + profile_identifier="provenance-run-crate", ) diff --git a/tests/integration/profiles/provenance-run-crate/test_provrc_workflow.py b/tests/integration/profiles/provenance-run-crate/test_provrc_workflow.py index 62eb3cfaf..0c49a7f8f 100644 --- a/tests/integration/profiles/provenance-run-crate/test_provrc_workflow.py +++ b/tests/integration/profiles/provenance-run-crate/test_provrc_workflow.py @@ -23,7 +23,7 @@ def test_provrc_workflow_no_haspart(): - """\ + """ Test a Provenance Run Crate where a ComputationalWorkflow does not have the hasPart property. """ @@ -33,12 +33,12 @@ def test_provrc_workflow_no_haspart(): False, ["Provenance Run Crate ComputationalWorkflow MUST"], ["ComputationalWorkflow MUST refer to orchestrated tools via hasPart"], - profile_identifier="provenance-run-crate" + profile_identifier="provenance-run-crate", ) def test_provrc_workflow_bad_haspart(): - """\ + """ Test a Provenance Run Crate where a ComputationalWorkflow does not point to the orchestrated tools via hasPart. """ @@ -48,12 +48,12 @@ def test_provrc_workflow_bad_haspart(): False, ["Provenance Run Crate ComputationalWorkflow MUST"], ["ComputationalWorkflow MUST refer to orchestrated tools via hasPart"], - profile_identifier="provenance-run-crate" + profile_identifier="provenance-run-crate", ) def test_provrc_workflow_type_no_howto(): - """\ + """ Test a Provenance Run Crate where a ComputationalWorkflow that points to steps does not have the HowTo type. """ @@ -63,12 +63,12 @@ def test_provrc_workflow_type_no_howto(): False, ["Provenance Run Crate ComputationalWorkflow with steps MUST"], ["A ComputationalWorkflow that links to steps MUST have the HowTo type"], - profile_identifier="provenance-run-crate" + profile_identifier="provenance-run-crate", ) def test_provrc_workflow_no_step(): - """\ + """ Test a Provenance Run Crate where a ComputationalWorkflow does not have the step property. """ @@ -78,12 +78,12 @@ def test_provrc_workflow_no_step(): False, ["Provenance Run Crate ComputationalWorkflow SHOULD"], ["ComputationalWorkflow SHOULD refer to HowToStep instances via step"], - profile_identifier="provenance-run-crate" + profile_identifier="provenance-run-crate", ) def test_provrc_workflow_bad_step(): - """\ + """ Test a Provenance Run Crate where a ComputationalWorkflow does not have the step property. """ @@ -93,12 +93,12 @@ def test_provrc_workflow_bad_step(): False, ["Provenance Run Crate ComputationalWorkflow SHOULD"], ["ComputationalWorkflow SHOULD refer to HowToStep instances via step"], - profile_identifier="provenance-run-crate" + profile_identifier="provenance-run-crate", ) def test_provrc_workflow_no_connection(): - """\ + """ Test a Provenance Run Crate where a ComputationalWorkflow does not have the connection property. """ @@ -108,12 +108,12 @@ def test_provrc_workflow_no_connection(): False, ["Provenance Run Crate ComputationalWorkflow MAY"], ["ComputationalWorkflow may have a connection property"], - profile_identifier="provenance-run-crate" + profile_identifier="provenance-run-crate", ) def test_provrc_workflow_bad_connection(): - """\ + """ Test a Provenance Run Crate where a ComputationalWorkflow has a connection that does not point to a ParameterConnection. """ @@ -123,12 +123,12 @@ def test_provrc_workflow_bad_connection(): False, ["Provenance Run Crate ComputationalWorkflow MUST"], ["If the ComputationalWorkflow has a connection, it must point to a ParameterConnection"], - profile_identifier="provenance-run-crate" + profile_identifier="provenance-run-crate", ) def test_provrc_workflow_no_buildinstructions(): - """\ + """ Test a Provenance Run Crate where a ComputationalWorkflow / HowTo does not have the buildInstructions property. """ @@ -138,12 +138,12 @@ def test_provrc_workflow_no_buildinstructions(): False, ["Provenance Run Crate ComputationalWorkflow / HowTo MAY"], ["A ComputationalWorkflow / HowTo MAY have a buildInstructions pointing to a File"], - profile_identifier="provenance-run-crate" + profile_identifier="provenance-run-crate", ) def test_provrc_workflow_bad_buildinstructions(): - """\ + """ Test a Provenance Run Crate where a ComputationalWorkflow / HowTo has a buildInstructions that does not point to a File. """ @@ -153,5 +153,5 @@ def test_provrc_workflow_bad_buildinstructions(): False, ["Provenance Run Crate ComputationalWorkflow / HowTo MAY"], ["A ComputationalWorkflow / HowTo MAY have a buildInstructions pointing to a File"], - profile_identifier="provenance-run-crate" + profile_identifier="provenance-run-crate", ) diff --git a/tests/integration/profiles/provenance-run-crate/test_valid_provrc.py b/tests/integration/profiles/provenance-run-crate/test_valid_provrc.py index 3c199bb21..78908a97c 100644 --- a/tests/integration/profiles/provenance-run-crate/test_valid_provrc.py +++ b/tests/integration/profiles/provenance-run-crate/test_valid_provrc.py @@ -29,5 +29,5 @@ def test_valid_provenance_run_crate_required(): Severity.REQUIRED, True, profile_identifier="provenance-run-crate", - skip_checks=[SKIP_LOCAL_DATA_ENTITY_EXISTENCE_CHECK_IDENTIFIER] + skip_checks=[SKIP_LOCAL_DATA_ENTITY_EXISTENCE_CHECK_IDENTIFIER], ) diff --git a/tests/integration/profiles/ro-crate/test_file_descriptor_entity.py b/tests/integration/profiles/ro-crate/test_file_descriptor_entity.py index d619300b2..bb6bfcb0e 100644 --- a/tests/integration/profiles/ro-crate/test_file_descriptor_entity.py +++ b/tests/integration/profiles/ro-crate/test_file_descriptor_entity.py @@ -34,7 +34,7 @@ def test_missing_entity(): models.Severity.REQUIRED, False, ["RO-Crate Metadata File Descriptor entity existence"], - ["The root of the document MUST have an entity with @id `ro-crate-metadata.json`"] + ["The root of the document MUST have an entity with @id `ro-crate-metadata.json`"], ) @@ -45,7 +45,7 @@ def test_invalid_entity_type(): models.Severity.REQUIRED, False, ["RO-Crate Metadata File Descriptor REQUIRED properties"], - ["The RO-Crate metadata file MUST be a CreativeWork, as per schema.org"] + ["The RO-Crate metadata file MUST be a CreativeWork, as per schema.org"], ) @@ -56,8 +56,10 @@ def test_missing_entity_about(): models.Severity.REQUIRED, False, ["RO-Crate Metadata File Descriptor REQUIRED properties"], - ["The RO-Crate metadata file MUST be a CreativeWork, as per schema.org", - "The RO-Crate metadata file descriptor MUST have an `about` property referencing the Root Data Entity"] + [ + "The RO-Crate metadata file MUST be a CreativeWork, as per schema.org", + "The RO-Crate metadata file descriptor MUST have an `about` property referencing the Root Data Entity", + ], ) @@ -69,7 +71,7 @@ def test_invalid_entity_about(): models.Severity.REQUIRED, False, ["RO-Crate Metadata File Descriptor REQUIRED properties"], - ["The RO-Crate metadata file descriptor MUST have an `about` property referencing the Root Data Entity"] + ["The RO-Crate metadata file descriptor MUST have an `about` property referencing the Root Data Entity"], ) @@ -80,7 +82,7 @@ def test_invalid_entity_about_type(): models.Severity.REQUIRED, False, ["RO-Crate Metadata File Descriptor REQUIRED properties"], - ["The RO-Crate metadata file descriptor MUST have an `about` property referencing the Root Data Entity"] + ["The RO-Crate metadata file descriptor MUST have an `about` property referencing the Root Data Entity"], ) @@ -91,8 +93,10 @@ def test_missing_conforms_to(): models.Severity.REQUIRED, False, ["RO-Crate Metadata File Descriptor REQUIRED properties"], - ["The RO-Crate metadata file descriptor MUST have a `conformsTo` " - "property with the RO-Crate specification version"] + [ + "The RO-Crate metadata file descriptor MUST have a `conformsTo` " + "property with the RO-Crate specification version" + ], ) @@ -103,6 +107,8 @@ def test_invalid_conforms_to(): models.Severity.REQUIRED, False, ["RO-Crate Metadata File Descriptor REQUIRED properties"], - ["The RO-Crate metadata file descriptor MUST have a `conformsTo` " - "property with the RO-Crate specification version"] + [ + "The RO-Crate metadata file descriptor MUST have a `conformsTo` " + "property with the RO-Crate specification version" + ], ) diff --git a/tests/integration/profiles/ro-crate/test_file_descriptor_format.py b/tests/integration/profiles/ro-crate/test_file_descriptor_format.py index a85d0cc6a..62aff3598 100644 --- a/tests/integration/profiles/ro-crate/test_file_descriptor_format.py +++ b/tests/integration/profiles/ro-crate/test_file_descriptor_format.py @@ -28,24 +28,12 @@ def test_missing_file_descriptor(): """Test a RO-Crate without a file descriptor.""" rocrate_path = paths.missing_file_descriptor - do_entity_test( - rocrate_path, - models.Severity.REQUIRED, - False, - ["File Descriptor existence"], - [] - ) + do_entity_test(rocrate_path, models.Severity.REQUIRED, False, ["File Descriptor existence"], []) def test_not_valid_json_format(): """Test a RO-Crate with an invalid JSON file descriptor format.""" - do_entity_test( - paths.invalid_json_format, - models.Severity.REQUIRED, - False, - ["File Descriptor JSON format"], - [] - ) + do_entity_test(paths.invalid_json_format, models.Severity.REQUIRED, False, ["File Descriptor JSON format"], []) def test_not_valid_jsonld_format_missing_context(): @@ -55,7 +43,7 @@ def test_not_valid_jsonld_format_missing_context(): models.Severity.REQUIRED, False, ["File Descriptor JSON-LD format"], - [] + [], ) @@ -68,7 +56,7 @@ def test_not_valid_jsonld_format_not_flattened(): models.Severity.REQUIRED, False, ["File Descriptor JSON-LD format"], - ["RO-Crate file descriptor \"ro-crate-metadata.json\" is not fully flattened"] + ['RO-Crate file descriptor "ro-crate-metadata.json" is not fully flattened'], ) @@ -81,11 +69,12 @@ def test_not_valid_jsonld_format_not_valid_value_object(): models.Severity.REQUIRED, False, ["File Descriptor JSON-LD format"], - ["entity \"nested-file.txt\" contains both @id and @value", - "is not a valid value object: @language and @type cannot coexist", - "entity \"invalidNestedReference\" is not a valid node object reference", - "entity \"{'@language': 'en', '@value': 12345}\" is not a valid value object" - ] + [ + 'entity "nested-file.txt" contains both @id and @value', + "is not a valid value object: @language and @type cannot coexist", + 'entity "invalidNestedReference" is not a valid node object reference', + "entity \"{'@language': 'en', '@value': 12345}\" is not a valid value object", + ], ) @@ -99,7 +88,7 @@ def test_not_valid_jsonld_format_missing_ids(): models.Severity.REQUIRED, False, ["File Descriptor JSON-LD format"], - ["file descriptor does not contain the @id attribute"] + ["file descriptor does not contain the @id attribute"], ) @@ -113,7 +102,7 @@ def test_not_valid_jsonld_format_missing_types(): models.Severity.REQUIRED, False, ["File Descriptor JSON-LD format"], - ["file descriptor does not contain the @type attribute"] + ["file descriptor does not contain the @type attribute"], ) @@ -129,7 +118,7 @@ def test_invalid_jsonld_context(): ["File Descriptor JSON-LD format"], ["Unable to retrieve the JSON-LD context 'https://w3id.org/ro/terms/invalid/context'"], profile_identifier="ro-crate", - abort_on_first=True + abort_on_first=True, ) @@ -143,7 +132,7 @@ def test_invalid_jsonld_not_compacted(): models.Severity.REQUIRED, False, ["File Descriptor JSON-LD format"], - ['The 1 occurrence of the "https://schema.org/name" URI cannot be used as a key'] + ['The 1 occurrence of the "https://schema.org/name" URI cannot be used as a key'], ) @@ -157,8 +146,10 @@ def test_invalid_jsonld_unexpected_key(): models.Severity.REQUIRED, False, ["File Descriptor JSON-LD format"], - ['The 1 occurrence of the JSON-LD key "hasPartx" is not allowed in the compacted format', - 'The 2 occurrences of the JSON-LD key "namex" are not allowed in the compacted format'] + [ + 'The 1 occurrence of the JSON-LD key "hasPartx" is not allowed in the compacted format', + 'The 2 occurrences of the JSON-LD key "namex" are not allowed in the compacted format', + ], ) @@ -167,10 +158,4 @@ def test_valid_jsonld_custom_term(): Test a RO-Crate with a valid JSON-LD file descriptor format which contains custom terms. """ - do_entity_test( - ValidROC().rocrate_with_custom_terms, - models.Severity.REQUIRED, - True, - [], - [] - ) + do_entity_test(ValidROC().rocrate_with_custom_terms, models.Severity.REQUIRED, True, [], []) diff --git a/tests/integration/profiles/ro-crate/test_root_data_entity.py b/tests/integration/profiles/ro-crate/test_root_data_entity.py index 770bb3224..1e0329661 100644 --- a/tests/integration/profiles/ro-crate/test_root_data_entity.py +++ b/tests/integration/profiles/ro-crate/test_root_data_entity.py @@ -34,7 +34,7 @@ def test_missing_root_data_entity(): models.Severity.REQUIRED, False, ["RO-Crate Root Data Entity type"], - ["The Root Data Entity MUST be a `Dataset` (as per `schema.org`)"] + ["The Root Data Entity MUST be a `Dataset` (as per `schema.org`)"], ) @@ -45,7 +45,7 @@ def test_invalid_root_data_entity_value(): models.Severity.REQUIRED, False, ["RO-Crate Root Data Entity value restriction"], - ["The Root Data Entity URI MUST end with `/`"] + ["The Root Data Entity URI MUST end with `/`"], ) @@ -56,7 +56,7 @@ def test_missing_root_data_entity_name(): models.Severity.REQUIRED, False, ["RO-Crate Root Data Entity REQUIRED properties"], - ["The Root Data Entity MUST have a `name` property (as specified by schema.org)"] + ["The Root Data Entity MUST have a `name` property (as specified by schema.org)"], ) @@ -67,7 +67,7 @@ def test_missing_root_data_entity_description(): models.Severity.REQUIRED, False, ["RO-Crate Root Data Entity REQUIRED properties"], - ["The Root Data Entity MUST have a `description` property (as specified by schema.org)"] + ["The Root Data Entity MUST have a `description` property (as specified by schema.org)"], ) @@ -78,7 +78,7 @@ def test_missing_root_data_entity_license(): models.Severity.REQUIRED, False, ["RO-Crate Root Data Entity REQUIRED properties"], - ["The Root Data Entity MUST have a `license` property (as specified by schema.org)"] + ["The Root Data Entity MUST have a `license` property (as specified by schema.org)"], ) @@ -89,7 +89,7 @@ def test_recommended_root_data_entity_value(): models.Severity.RECOMMENDED, False, ["RO-Crate Root Data Entity RECOMMENDED value"], - ["Root Data Entity URI is not denoted by the string `./`"] + ["Root Data Entity URI is not denoted by the string `./`"], ) @@ -100,9 +100,11 @@ def test_invalid_required_root_date(invalid_datetime): models.Severity.REQUIRED, False, ["RO-Crate Root Data Entity REQUIRED properties"], - ["The Root Data Entity MUST have a `datePublished` property (as specified by schema.org) " - "with a valid ISO 8601 date"], - rocrate_entity_patch={"./": {"datePublished": invalid_datetime}} + [ + "The Root Data Entity MUST have a `datePublished` property (as specified by schema.org) " + "with a valid ISO 8601 date" + ], + rocrate_entity_patch={"./": {"datePublished": invalid_datetime}}, ) @@ -113,7 +115,7 @@ def test_valid_required_root_date(valid_datetime): models.Severity.REQUIRED, True, rocrate_entity_patch={"./": {"datePublished": valid_datetime}}, - skip_checks=[SKIP_LOCAL_DATA_ENTITY_EXISTENCE_CHECK_IDENTIFIER] + skip_checks=[SKIP_LOCAL_DATA_ENTITY_EXISTENCE_CHECK_IDENTIFIER], ) @@ -124,8 +126,10 @@ def test_invalid_recommended_root_date(): models.Severity.RECOMMENDED, False, ["RO-Crate Root Data Entity RECOMMENDED properties"], - ["The Root Data Entity MUST have a `datePublished` property (as specified by schema.org) " - "with a valid ISO 8601 date and the precision of at least the day level"] + [ + "The Root Data Entity MUST have a `datePublished` property (as specified by schema.org) " + "with a valid ISO 8601 date and the precision of at least the day level" + ], ) @@ -135,7 +139,7 @@ def test_valid_referenced_generic_data_entities(): paths.valid_referenced_generic_data_entities, models.Severity.REQUIRED, True, - skip_checks=[SKIP_LOCAL_DATA_ENTITY_EXISTENCE_CHECK_IDENTIFIER] + skip_checks=[SKIP_LOCAL_DATA_ENTITY_EXISTENCE_CHECK_IDENTIFIER], ) @@ -146,7 +150,7 @@ def test_missing_root_license_contextual_entity(): models.Severity.RECOMMENDED, False, ["RO-Crate Root Data Entity RECOMMENDED properties"], - ["The Root Data Entity SHOULD have a link to a Contextual Entity representing the schema_org:license type"] + ["The Root Data Entity SHOULD have a link to a Contextual Entity representing the schema_org:license type"], ) @@ -157,7 +161,7 @@ def test_missing_root_license_name(): models.Severity.OPTIONAL, False, ["License definition"], - ["Missing license name"] + ["Missing license name"], ) @@ -168,14 +172,10 @@ def test_missing_root_license_description(): models.Severity.OPTIONAL, False, ["License definition"], - ["Missing license description"] + ["Missing license description"], ) def test_availability_of_root_entity_rocrate_archive(): """Test a RO-Crate without a root data entity license description.""" - do_entity_test( - ValidROC().sort_and_change_archive, - models.Severity.REQUIRED, - True - ) + do_entity_test(ValidROC().sort_and_change_archive, models.Severity.REQUIRED, True) diff --git a/tests/integration/profiles/ro-crate/test_valid_ro-crate.py b/tests/integration/profiles/ro-crate/test_valid_ro-crate.py index 2f86f3894..cdf2d13e3 100644 --- a/tests/integration/profiles/ro-crate/test_valid_ro-crate.py +++ b/tests/integration/profiles/ro-crate/test_valid_ro-crate.py @@ -24,38 +24,22 @@ def test_valid_roc_required(): """Test a valid RO-Crate.""" - do_entity_test( - ValidROC().wrroc_paper, - Severity.REQUIRED, - True - ) + do_entity_test(ValidROC().wrroc_paper, Severity.REQUIRED, True) def test_valid_roc_recommended(): """Test a valid RO-Crate.""" - do_entity_test( - ValidROC().wrroc_paper, - Severity.RECOMMENDED, - True - ) + do_entity_test(ValidROC().wrroc_paper, Severity.RECOMMENDED, True) def test_valid_roc_required_with_long_datetime(): """Test a valid RO-Crate.""" - do_entity_test( - ValidROC().wrroc_paper_long_date, - Severity.REQUIRED, - True - ) + do_entity_test(ValidROC().wrroc_paper_long_date, Severity.REQUIRED, True) def test_valid_roc_required_with_value_objects(): """Test a valid RO-Crate.""" - do_entity_test( - ValidROC().rocrate_with_value_objects, - Severity.REQUIRED, - True - ) + do_entity_test(ValidROC().rocrate_with_value_objects, Severity.REQUIRED, True) def test_valid_roc_with_relative_root_required(): @@ -64,44 +48,28 @@ def test_valid_roc_with_relative_root_required(): ValidROC().rocrate_with_relative_root, Severity.REQUIRED, True, - rocrate_relative_root_path="custom-relative-root/" + rocrate_relative_root_path="custom-relative-root/", ) def test_valid_roc_remote_required(): """Test a valid RO-Crate.""" - do_entity_test( - ValidROC().sort_and_change_remote, - Severity.REQUIRED, - True - ) + do_entity_test(ValidROC().sort_and_change_remote, Severity.REQUIRED, True) def test_valid_roc_bagit_required(): """Test a valid RO-Crate.""" - do_entity_test( - ValidROC().bagit, - Severity.REQUIRED, - True - ) + do_entity_test(ValidROC().bagit, Severity.REQUIRED, True) def test_valid_roc_bagit_zip_required(): """Test a valid RO-Crate.""" - do_entity_test( - ValidROC().bagit_zip, - Severity.REQUIRED, - True - ) + do_entity_test(ValidROC().bagit_zip, Severity.REQUIRED, True) def test_valid_roc_remote_bagit_required(): """Test a valid RO-Crate.""" - do_entity_test( - ValidROC().bagit_remote_zip, - Severity.REQUIRED, - True - ) + do_entity_test(ValidROC().bagit_remote_zip, Severity.REQUIRED, True) def test_valid_roc_with_at_base_set(): @@ -111,5 +79,5 @@ def test_valid_roc_with_at_base_set(): Severity.REQUIRED, True, # Skipping check: Root Data Entity URI ending with / is not required in RO-Crate 1.2 - skip_checks=["ro-crate-1.1_10.1"] + skip_checks=["ro-crate-1.1_10.1"], ) diff --git a/tests/integration/profiles/ro-crate/test_web_based_data_entity.py b/tests/integration/profiles/ro-crate/test_web_based_data_entity.py index 0770873c0..fead44786 100644 --- a/tests/integration/profiles/ro-crate/test_web_based_data_entity.py +++ b/tests/integration/profiles/ro-crate/test_web_based_data_entity.py @@ -33,8 +33,10 @@ def test_no_recommended_sdDatePublished(): models.Severity.RECOMMENDED, False, ["Web-based Data Entity: RECOMMENDED properties"], - ["Web-based Data Entities SHOULD have " - "a `sdDatePublished` property to indicate when the absolute URL was accessed"] + [ + "Web-based Data Entities SHOULD have " + "a `sdDatePublished` property to indicate when the absolute URL was accessed" + ], ) @@ -45,7 +47,9 @@ def test_invalid_recommended_sdDatePublished(invalid_datetime): models.Severity.RECOMMENDED, False, ["Web-based Data Entity: RECOMMENDED properties"], - ["Web-based Data Entities SHOULD have " - "a `sdDatePublished` property to indicate when the absolute URL was accessed"], - rocrate_entity_patch={"https://sort-and-change-case.cwl": {"datePublished": invalid_datetime}} + [ + "Web-based Data Entities SHOULD have " + "a `sdDatePublished` property to indicate when the absolute URL was accessed" + ], + rocrate_entity_patch={"https://sort-and-change-case.cwl": {"datePublished": invalid_datetime}}, ) diff --git a/tests/integration/profiles/test_metadata_only.py b/tests/integration/profiles/test_metadata_only.py index e9e19513c..47d2c827d 100644 --- a/tests/integration/profiles/test_metadata_only.py +++ b/tests/integration/profiles/test_metadata_only.py @@ -34,14 +34,17 @@ def valid_roc_paths(): return [ value for attr in dir(valid_roc) - if not attr.startswith('_') - and not any(excluded in attr for excluded in ( - 'bagit', - 'multi_profile_crate', - 'rocrate_with_relative_root', - 'rocrate_with_at_base_set' # Excluded: has dedicated test with skip_checks - )) - and not str(value := getattr(valid_roc, attr)).endswith('.zip') + if not attr.startswith("_") + and not any( + excluded in attr + for excluded in ( + "bagit", + "multi_profile_crate", + "rocrate_with_relative_root", + "rocrate_with_at_base_set", # Excluded: has dedicated test with skip_checks + ) + ) + and not str(value := getattr(valid_roc, attr)).endswith(".zip") ] @@ -52,14 +55,7 @@ def test_valid_ro_crates_from_folder(valid_roc_path): temp_path = Path(tmpdirname) / valid_roc_path.name shutil.copytree(valid_roc_path, temp_path) valid_roc_path = temp_path - do_entity_test( - valid_roc_path, - models.Severity.REQUIRED, - True, - [], - [], - metadata_only=True - ) + do_entity_test(valid_roc_path, models.Severity.REQUIRED, True, [], [], metadata_only=True) @pytest.mark.parametrize("valid_roc_path", valid_roc_paths()) @@ -73,11 +69,5 @@ def test_valid_ro_crates_from_metadata_dict(valid_roc_path): assert metadata_dict is not None, "Failed to load metadata dict" assert isinstance(metadata_dict, dict), "Metadata dict is not a dictionary" do_entity_test( - valid_roc_path, - models.Severity.REQUIRED, - True, - [], - [], - metadata_dict=metadata_dict, - metadata_only=True + valid_roc_path, models.Severity.REQUIRED, True, [], [], metadata_dict=metadata_dict, metadata_only=True ) diff --git a/tests/integration/profiles/workflow-ro-crate/test_main_workflow.py b/tests/integration/profiles/workflow-ro-crate/test_main_workflow.py index d99eb128d..5c98c1f59 100644 --- a/tests/integration/profiles/workflow-ro-crate/test_main_workflow.py +++ b/tests/integration/profiles/workflow-ro-crate/test_main_workflow.py @@ -23,7 +23,7 @@ def test_main_workflow_bad_type(): - """\ + """ Test a Workflow RO-Crate where the main workflow has an incorrect type. """ do_entity_test( @@ -32,12 +32,12 @@ def test_main_workflow_bad_type(): False, ["Main Workflow definition"], ["The Main Workflow must have types File, SoftwareSourceCode, ComputationalWorkflow"], - profile_identifier="workflow-ro-crate" + profile_identifier="workflow-ro-crate", ) def test_main_workflow_no_lang(): - """\ + """ Test a Workflow RO-Crate where the main workflow does not have a programmingLanguage property. """ @@ -47,12 +47,12 @@ def test_main_workflow_no_lang(): False, ["Main Workflow definition"], ["The Main Workflow must refer to its language via programmingLanguage"], - profile_identifier="workflow-ro-crate" + profile_identifier="workflow-ro-crate", ) def test_main_workflow_no_image(): - """\ + """ Test a Workflow RO-Crate where the main workflow does not have an image property. """ @@ -62,12 +62,12 @@ def test_main_workflow_no_image(): False, ["Main Workflow optional properties"], ["The Crate MAY contain a Main Workflow Diagram; if present it MUST be referred to via 'image'"], - profile_identifier="workflow-ro-crate" + profile_identifier="workflow-ro-crate", ) def test_main_workflow_no_cwl_desc(): - """\ + """ Test a Workflow RO-Crate where the main workflow does not have an CWL description. """ @@ -77,12 +77,12 @@ def test_main_workflow_no_cwl_desc(): False, ["Main Workflow optional properties"], ["The Crate MAY contain a Main Workflow CWL Description; if present it MUST be referred to via 'subjectOf'"], - profile_identifier="workflow-ro-crate" + profile_identifier="workflow-ro-crate", ) def test_main_workflow_cwl_desc_bad_type(): - """\ + """ Test a Workflow RO-Crate where the main workflow has a CWL description but of the wrong type. """ @@ -92,12 +92,12 @@ def test_main_workflow_cwl_desc_bad_type(): False, ["Main Workflow optional properties"], ["The CWL Description type must be File, SoftwareSourceCode, HowTo"], - profile_identifier="workflow-ro-crate" + profile_identifier="workflow-ro-crate", ) def test_main_workflow_cwl_desc_no_lang(): - """\ + """ Test a Workflow RO-Crate where the main workflow has a CWL description but the description has no programmingLanguage. """ @@ -107,12 +107,12 @@ def test_main_workflow_cwl_desc_no_lang(): False, ["Main Workflow optional properties"], ["The CWL Description SHOULD have a language of https://w3id.org/workflowhub/workflow-ro-crate#cwl"], - profile_identifier="workflow-ro-crate" + profile_identifier="workflow-ro-crate", ) def test_main_workflow_file_existence(): - """\ + """ Test a Workflow RO-Crate where the main workflow file is not in the crate. """ do_entity_test( @@ -121,12 +121,12 @@ def test_main_workflow_file_existence(): False, ["Main Workflow file existence"], ["Main Workflow", "not found in crate"], - profile_identifier="workflow-ro-crate" + profile_identifier="workflow-ro-crate", ) def test_workflow_diagram_file_existence(): - """\ + """ Test a Workflow RO-Crate where the workflow diagram file is not in the crate. """ @@ -136,12 +136,12 @@ def test_workflow_diagram_file_existence(): False, ["Workflow-related files existence"], ["Workflow diagram", "not found in crate"], - profile_identifier="workflow-ro-crate" + profile_identifier="workflow-ro-crate", ) def test_workflow_description_file_existence(): - """\ + """ Test a Workflow RO-Crate where the workflow CWL description file is not in the crate. """ @@ -151,12 +151,12 @@ def test_workflow_description_file_existence(): False, ["Workflow-related files existence"], ["Workflow CWL description", "not found in crate"], - profile_identifier="workflow-ro-crate" + profile_identifier="workflow-ro-crate", ) def test_main_workflow_bad_conformsto(): - """\ + """ Test a Workflow RO-Crate where the main workflow does not conform to the bioschemas computational workflow 1.0 or later. """ @@ -166,5 +166,5 @@ def test_main_workflow_bad_conformsto(): False, ["Main Workflow recommended properties"], ["The Main Workflow SHOULD comply with Bioschemas ComputationalWorkflow profile version 1.0 or later"], - profile_identifier="workflow-ro-crate" + profile_identifier="workflow-ro-crate", ) diff --git a/tests/integration/profiles/workflow-ro-crate/test_valid_wroc.py b/tests/integration/profiles/workflow-ro-crate/test_valid_wroc.py index 1a1c758b3..256400230 100644 --- a/tests/integration/profiles/workflow-ro-crate/test_valid_wroc.py +++ b/tests/integration/profiles/workflow-ro-crate/test_valid_wroc.py @@ -29,12 +29,12 @@ def test_valid_workflow_roc_required(): Severity.REQUIRED, True, profile_identifier="workflow-ro-crate", - skip_checks=[SKIP_LOCAL_DATA_ENTITY_EXISTENCE_CHECK_IDENTIFIER] + skip_checks=[SKIP_LOCAL_DATA_ENTITY_EXISTENCE_CHECK_IDENTIFIER], ) do_entity_test( ValidROC().workflow_roc_string_license, Severity.REQUIRED, True, profile_identifier="workflow-ro-crate", - skip_checks=[SKIP_LOCAL_DATA_ENTITY_EXISTENCE_CHECK_IDENTIFIER] + skip_checks=[SKIP_LOCAL_DATA_ENTITY_EXISTENCE_CHECK_IDENTIFIER], ) diff --git a/tests/integration/profiles/workflow-ro-crate/test_wroc_crate.py b/tests/integration/profiles/workflow-ro-crate/test_wroc_crate.py index ff9ea0b2e..cfc43460f 100644 --- a/tests/integration/profiles/workflow-ro-crate/test_wroc_crate.py +++ b/tests/integration/profiles/workflow-ro-crate/test_wroc_crate.py @@ -22,7 +22,7 @@ def test_wroc_no_tests(): - """\ + """ Test a Workflow RO-Crate with no test/ Dataset. """ do_entity_test( @@ -31,12 +31,12 @@ def test_wroc_no_tests(): False, ["test directory"], ["The test/ dir should be a Dataset"], - profile_identifier="workflow-ro-crate" + profile_identifier="workflow-ro-crate", ) def test_wroc_no_examples(): - """\ + """ Test a Workflow RO-Crate with no examples/ Dataset. """ do_entity_test( @@ -45,5 +45,5 @@ def test_wroc_no_examples(): False, ["examples directory"], ["The examples/ dir should be a Dataset"], - profile_identifier="workflow-ro-crate" + profile_identifier="workflow-ro-crate", ) diff --git a/tests/integration/profiles/workflow-ro-crate/test_wroc_descriptor.py b/tests/integration/profiles/workflow-ro-crate/test_wroc_descriptor.py index 076ebbea2..a3b633f17 100644 --- a/tests/integration/profiles/workflow-ro-crate/test_wroc_descriptor.py +++ b/tests/integration/profiles/workflow-ro-crate/test_wroc_descriptor.py @@ -23,7 +23,7 @@ def test_wroc_descriptor_bad_conforms_to(): - """\ + """ Test a Workflow RO-Crate where the metadata file descriptor does not contain the required URIs. """ @@ -32,7 +32,9 @@ def test_wroc_descriptor_bad_conforms_to(): Severity.RECOMMENDED, False, ["WROC Metadata File Descriptor properties"], - ["The Metadata File Descriptor conformsTo SHOULD contain https://w3id.org/ro/crate/1.1 " - "and https://w3id.org/workflowhub/workflow-ro-crate/1.0"], - profile_identifier="workflow-ro-crate" + [ + "The Metadata File Descriptor conformsTo SHOULD contain https://w3id.org/ro/crate/1.1 " + "and https://w3id.org/workflowhub/workflow-ro-crate/1.0" + ], + profile_identifier="workflow-ro-crate", ) diff --git a/tests/integration/profiles/workflow-ro-crate/test_wroc_readme.py b/tests/integration/profiles/workflow-ro-crate/test_wroc_readme.py index 0b079b638..e314643b7 100644 --- a/tests/integration/profiles/workflow-ro-crate/test_wroc_readme.py +++ b/tests/integration/profiles/workflow-ro-crate/test_wroc_readme.py @@ -22,7 +22,7 @@ def test_wroc_readme_not_about_crate(): - """\ + """ Test a Workflow RO-Crate where the README.md is not about the crate. """ do_entity_test( @@ -31,12 +31,12 @@ def test_wroc_readme_not_about_crate(): False, ["README.md properties"], ["The README.md SHOULD be about the crate"], - profile_identifier="workflow-ro-crate" + profile_identifier="workflow-ro-crate", ) def test_wroc_readme_wrong_encoding_format(): - """\ + """ Test a Workflow RO-Crate where the README.md has the wrong encodingFormat.. """ do_entity_test( @@ -45,5 +45,5 @@ def test_wroc_readme_wrong_encoding_format(): False, ["README.md properties"], ["The README.md SHOULD have text/markdown as its encodingFormat"], - profile_identifier="workflow-ro-crate" + profile_identifier="workflow-ro-crate", ) diff --git a/tests/integration/profiles/workflow-ro-crate/test_wroc_root_metadata.py b/tests/integration/profiles/workflow-ro-crate/test_wroc_root_metadata.py index c7885cec0..008db720f 100644 --- a/tests/integration/profiles/workflow-ro-crate/test_wroc_root_metadata.py +++ b/tests/integration/profiles/workflow-ro-crate/test_wroc_root_metadata.py @@ -22,7 +22,7 @@ def test_wroc_no_license(): - """\ + """ Test a Workflow RO-Crate where the root data entity has no license. """ do_entity_test( @@ -31,12 +31,12 @@ def test_wroc_no_license(): False, ["WROC Root Data Entity Required Properties"], ["The Crate (Root Data Entity) must specify a license, which should be a URL but can also be a string"], - profile_identifier="workflow-ro-crate" + profile_identifier="workflow-ro-crate", ) def test_wroc_no_mainentity(): - """\ + """ Test a Workflow RO-Crate where the root data entity has no mainEntity. """ do_entity_test( @@ -45,5 +45,5 @@ def test_wroc_no_mainentity(): False, ["Main Workflow entity existence"], ["The Main Workflow must be specified through a `mainEntity` property in the root data entity"], - profile_identifier="workflow-ro-crate" + profile_identifier="workflow-ro-crate", ) diff --git a/tests/integration/profiles/workflow-run-crate/test_valid_wfrc.py b/tests/integration/profiles/workflow-run-crate/test_valid_wfrc.py index 90809229c..3f18c1322 100644 --- a/tests/integration/profiles/workflow-run-crate/test_valid_wfrc.py +++ b/tests/integration/profiles/workflow-run-crate/test_valid_wfrc.py @@ -29,5 +29,5 @@ def test_valid_workflow_run_crate_required(): Severity.REQUIRED, True, profile_identifier="workflow-run-crate", - skip_checks=[SKIP_LOCAL_DATA_ENTITY_EXISTENCE_CHECK_IDENTIFIER] + skip_checks=[SKIP_LOCAL_DATA_ENTITY_EXISTENCE_CHECK_IDENTIFIER], ) diff --git a/tests/integration/profiles/workflow-run-crate/test_wfrc_computational_workflow.py b/tests/integration/profiles/workflow-run-crate/test_wfrc_computational_workflow.py index 0e2c22c1c..201b0ec44 100644 --- a/tests/integration/profiles/workflow-run-crate/test_wfrc_computational_workflow.py +++ b/tests/integration/profiles/workflow-run-crate/test_wfrc_computational_workflow.py @@ -23,7 +23,7 @@ def test_wfrc_workflow_no_input(): - """\ + """ Test a Workflow Run Crate where a ComputationalWorkflow has no input. """ do_entity_test( @@ -32,12 +32,12 @@ def test_wfrc_workflow_no_input(): False, ["Workflow Run Crate ComputationalWorkflow MAY"], ["A ComputationalWorkflow MAY have an input"], - profile_identifier="workflow-run-crate" + profile_identifier="workflow-run-crate", ) def test_wfrc_workflow_no_output(): - """\ + """ Test a Workflow Run Crate where a ComputationalWorkflow has no output. """ do_entity_test( @@ -46,12 +46,12 @@ def test_wfrc_workflow_no_output(): False, ["Workflow Run Crate ComputationalWorkflow MAY"], ["A ComputationalWorkflow MAY have an output"], - profile_identifier="workflow-run-crate" + profile_identifier="workflow-run-crate", ) def test_wfrc_workflow_input_no_formalparam(): - """\ + """ Test a Workflow Run Crate where a ComputationalWorkflow input does not point to FormalParameter instances. """ @@ -61,12 +61,12 @@ def test_wfrc_workflow_input_no_formalparam(): False, ["Workflow Run Crate ComputationalWorkflow MUST"], ["ComputationalWorkflow input and output MUST point to FormalParameter entities"], - profile_identifier="workflow-run-crate" + profile_identifier="workflow-run-crate", ) def test_wfrc_workflow_output_no_formalparam(): - """\ + """ Test a Workflow Run Crate where a ComputationalWorkflow output does not point to FormalParameter instances. """ @@ -76,12 +76,12 @@ def test_wfrc_workflow_output_no_formalparam(): False, ["Workflow Run Crate ComputationalWorkflow MUST"], ["ComputationalWorkflow input and output MUST point to FormalParameter entities"], - profile_identifier="workflow-run-crate" + profile_identifier="workflow-run-crate", ) def test_wfrc_workflow_no_environment(): - """\ + """ Test a Workflow Run Crate where a ComputationalWorkflow does not have an environment. """ @@ -91,12 +91,12 @@ def test_wfrc_workflow_no_environment(): False, ["Workflow Run Crate ComputationalWorkflow MAY"], ["The Workflow MAY have an environment"], - profile_identifier="workflow-run-crate" + profile_identifier="workflow-run-crate", ) def test_wfrc_workflow_bad_environment(): - """\ + """ Test a Workflow Run Crate where a ComputationalWorkflow has an environment that does not point to FormalParameter entities. """ @@ -106,5 +106,5 @@ def test_wfrc_workflow_bad_environment(): False, ["Workflow Run Crate ComputationalWorkflow SHOULD"], ["If the Workflow has an environment, it SHOULD point to entities of type FormalParameter"], - profile_identifier="workflow-run-crate" + profile_identifier="workflow-run-crate", ) diff --git a/tests/integration/profiles/workflow-run-crate/test_wfrc_formal_parameter.py b/tests/integration/profiles/workflow-run-crate/test_wfrc_formal_parameter.py index 89ea4f059..7bfb85798 100644 --- a/tests/integration/profiles/workflow-run-crate/test_wfrc_formal_parameter.py +++ b/tests/integration/profiles/workflow-run-crate/test_wfrc_formal_parameter.py @@ -23,7 +23,7 @@ def test_formalparam_no_inv_exampleofwork(): - """\ + """ Test a Workflow Run Crate where a FormalParameter is not referenced via exampleOfWork. """ @@ -33,12 +33,12 @@ def test_formalparam_no_inv_exampleofwork(): False, ["Workflow Run Crate FormalParameter SHOULD"], ["FormalParameter SHOULD be referenced from a data entity or PropertyValue via exampleOfWork"], - profile_identifier="workflow-run-crate" + profile_identifier="workflow-run-crate", ) def test_formalparam_bad_inv_exampleofwork(): - """\ + """ Test a Workflow Run Crate where a FormalParameter is referenced via exampleOfWork by an entity that is not a data entity or PropertyValue. """ @@ -48,12 +48,12 @@ def test_formalparam_bad_inv_exampleofwork(): False, ["Workflow Run Crate FormalParameter SHOULD"], ["FormalParameter SHOULD be referenced from a data entity or PropertyValue via exampleOfWork"], - profile_identifier="workflow-run-crate" + profile_identifier="workflow-run-crate", ) def test_formalparam_no_workexample(): - """\ + """ Test a Workflow Run Crate where a FormalParameter does not have a workExample property. """ @@ -63,12 +63,12 @@ def test_formalparam_no_workexample(): False, ["Workflow Run Crate FormalParameter MAY"], ["FormalParameter MAY have a workExample"], - profile_identifier="workflow-run-crate" + profile_identifier="workflow-run-crate", ) def test_formalparam_bad_workexample(): - """\ + """ Test a Workflow Run Crate where a FormalParameter references via workExample an entity that is not a data entity or PropertyValue. """ @@ -78,12 +78,12 @@ def test_formalparam_bad_workexample(): False, ["Workflow Run Crate FormalParameter MUST"], ["FormalParameter MUST refer to a data entity or PropertyValue via workExample"], - profile_identifier="workflow-run-crate" + profile_identifier="workflow-run-crate", ) def test_formalparam_no_additionaltype(): - """\ + """ Test a Workflow Run Crate where a FormalParameter does not have an additionalType. """ @@ -93,12 +93,12 @@ def test_formalparam_no_additionaltype(): False, ["Workflow Run Crate FormalParameter MUST"], ["FormalParameter MUST have an additionalType"], - profile_identifier="workflow-run-crate" + profile_identifier="workflow-run-crate", ) def test_formalparam_maps_pv_bad_additionaltype(): - """\ + """ Test a Workflow Run Crate where a FormalParameter that maps to a PropertyValue does not have PropertyValue or a subclass of DataType as its additionalType. @@ -108,14 +108,16 @@ def test_formalparam_maps_pv_bad_additionaltype(): Severity.RECOMMENDED, False, ["Workflow Run Crate FormalParameter that maps to a PropertyValue"], - ["A FormalParameter that maps to a PropertyValue SHOULD have " - "PropertyValue or a subclass of DataType as its additionalType"], - profile_identifier="workflow-run-crate" + [ + "A FormalParameter that maps to a PropertyValue SHOULD have " + "PropertyValue or a subclass of DataType as its additionalType" + ], + profile_identifier="workflow-run-crate", ) def test_formalparam_maps_file_bad_additionaltype(): - """\ + """ Test a Workflow Run Crate where a FormalParameter that maps to a File does not have File as its additionalType. """ @@ -125,12 +127,12 @@ def test_formalparam_maps_file_bad_additionaltype(): False, ["Workflow Run Crate FormalParameter that maps to a File"], ["A FormalParameter that maps to a File SHOULD have File as its additionalType"], - profile_identifier="workflow-run-crate" + profile_identifier="workflow-run-crate", ) def test_formalparam_maps_dataset_bad_additionaltype(): - """\ + """ Test a Workflow Run Crate where a FormalParameter that maps to a Dataset does not have Dataset as its additionalType. """ @@ -140,12 +142,12 @@ def test_formalparam_maps_dataset_bad_additionaltype(): False, ["Workflow Run Crate FormalParameter that maps to a Dataset"], ["A FormalParameter that maps to a Dataset SHOULD have Dataset as its additionalType"], - profile_identifier="workflow-run-crate" + profile_identifier="workflow-run-crate", ) def test_formalparam_maps_collection_bad_additionaltype(): - """\ + """ Test a Workflow Run Crate where a FormalParameter that maps to a Collection does not have Collection as its additionalType. """ @@ -155,12 +157,12 @@ def test_formalparam_maps_collection_bad_additionaltype(): False, ["Workflow Run Crate FormalParameter that maps to a Collection"], ["A FormalParameter that maps to a Collection SHOULD have Collection as its additionalType"], - profile_identifier="workflow-run-crate" + profile_identifier="workflow-run-crate", ) def test_formalparam_no_name(): - """\ + """ Test a Workflow Run Crate where a FormalParameter does not have a name property. """ @@ -170,12 +172,12 @@ def test_formalparam_no_name(): False, ["Workflow Run Crate FormalParameter SHOULD"], ["FormalParameter SHOULD have a name"], - profile_identifier="workflow-run-crate" + profile_identifier="workflow-run-crate", ) def test_formalparam_no_description(): - """\ + """ Test a Workflow Run Crate where a FormalParameter does not have a description property. """ @@ -185,12 +187,12 @@ def test_formalparam_no_description(): False, ["Workflow Run Crate FormalParameter MAY"], ["FormalParameter MAY have a description"], - profile_identifier="workflow-run-crate" + profile_identifier="workflow-run-crate", ) def test_formalparam_env_bad_exampleofwork(): - """\ + """ Test a Workflow Run Crate where a FormalParameter referenced from a ComputationalWorkflow via environment is not referenced from a PropertyValue via exampleOfWork @@ -200,7 +202,9 @@ def test_formalparam_env_bad_exampleofwork(): Severity.RECOMMENDED, False, ["Workflow Run Crate FormalParameter referenced from a ComputationalWorkflow environment"], - ["A FormalParameter referenced from a ComputationalWorkflow via " - "environment SHOULD be referenced from a PropertyValue via exampleOfWork"], - profile_identifier="workflow-run-crate" + [ + "A FormalParameter referenced from a ComputationalWorkflow via " + "environment SHOULD be referenced from a PropertyValue via exampleOfWork" + ], + profile_identifier="workflow-run-crate", ) diff --git a/tests/integration/profiles/workflow-run-crate/test_wfrc_root_data_entity.py b/tests/integration/profiles/workflow-run-crate/test_wfrc_root_data_entity.py index ae4a0dc4b..d942b841b 100644 --- a/tests/integration/profiles/workflow-run-crate/test_wfrc_root_data_entity.py +++ b/tests/integration/profiles/workflow-run-crate/test_wfrc_root_data_entity.py @@ -23,7 +23,7 @@ def test_wfrc_conformsto_no_wfrc(): - """\ + """ Test a Workflow Run Crate where the root data entity does not conformsTo the Workflow Run Crate profile. """ @@ -32,14 +32,16 @@ def test_wfrc_conformsto_no_wfrc(): Severity.REQUIRED, False, ["Root Data Entity Metadata"], - ["The Root Data Entity MUST reference a CreativeWork entity " - "with an @id URI that is consistent with the versioned permalink of the Workflow Run Crate profile"], - profile_identifier="workflow-run-crate" + [ + "The Root Data Entity MUST reference a CreativeWork entity " + "with an @id URI that is consistent with the versioned permalink of the Workflow Run Crate profile" + ], + profile_identifier="workflow-run-crate", ) def test_wfrc_conformsto_no_wroc(): - """\ + """ Test a Workflow Run Crate where the root data entity does not conformsTo the Workflow RO-Crate profile. """ @@ -48,14 +50,16 @@ def test_wfrc_conformsto_no_wroc(): Severity.RECOMMENDED, False, ["Root Data Entity Metadata SHOULD"], - ["The Root Data Entity SHOULD reference CreativeWork entities " - "corresponding to the Process Run Crate and Workflow RO-Crate profiles"], - profile_identifier="workflow-run-crate" + [ + "The Root Data Entity SHOULD reference CreativeWork entities " + "corresponding to the Process Run Crate and Workflow RO-Crate profiles" + ], + profile_identifier="workflow-run-crate", ) def test_wfrc_conformsto_no_procrc(): - """\ + """ Test a Workflow Run Crate where the root data entity does not conformsTo the Process Run Crate profile. """ @@ -64,7 +68,9 @@ def test_wfrc_conformsto_no_procrc(): Severity.RECOMMENDED, False, ["Root Data Entity Metadata SHOULD"], - ["The Root Data Entity SHOULD reference CreativeWork entities " - "corresponding to the Process Run Crate and Workflow RO-Crate profiles"], - profile_identifier="workflow-run-crate" + [ + "The Root Data Entity SHOULD reference CreativeWork entities " + "corresponding to the Process Run Crate and Workflow RO-Crate profiles" + ], + profile_identifier="workflow-run-crate", ) diff --git a/tests/integration/profiles/workflow-testing-ro-crate/test_valid_wtroc.py b/tests/integration/profiles/workflow-testing-ro-crate/test_valid_wtroc.py index 4eaee589a..f1c114c8e 100644 --- a/tests/integration/profiles/workflow-testing-ro-crate/test_valid_wtroc.py +++ b/tests/integration/profiles/workflow-testing-ro-crate/test_valid_wtroc.py @@ -29,5 +29,5 @@ def test_valid_workflow_roc_required(): Severity.REQUIRED, True, profile_identifier="workflow-testing-ro-crate", - skip_checks=[SKIP_LOCAL_DATA_ENTITY_EXISTENCE_CHECK_IDENTIFIER] + skip_checks=[SKIP_LOCAL_DATA_ENTITY_EXISTENCE_CHECK_IDENTIFIER], ) diff --git a/tests/integration/profiles/workflow-testing-ro-crate/test_wtroc_root_data_entity.py b/tests/integration/profiles/workflow-testing-ro-crate/test_wtroc_root_data_entity.py index 8e97c4c05..a7ff4b1d8 100644 --- a/tests/integration/profiles/workflow-testing-ro-crate/test_wtroc_root_data_entity.py +++ b/tests/integration/profiles/workflow-testing-ro-crate/test_wtroc_root_data_entity.py @@ -23,7 +23,7 @@ def test_wtroc_no_suites(): - """\ + """ Test a Workflow Testing RO-Crate where the root data entity does not refer to any TestSuite via mentions. """ @@ -33,5 +33,5 @@ def test_wtroc_no_suites(): False, ["Root Data Entity Metadata"], ["The Root Data Entity MUST refer to one or more test suites via mentions"], - profile_identifier="workflow-testing-ro-crate" + profile_identifier="workflow-testing-ro-crate", ) diff --git a/tests/integration/profiles/workflow-testing-ro-crate/test_wtroc_testdefinition.py b/tests/integration/profiles/workflow-testing-ro-crate/test_wtroc_testdefinition.py index 543726f1b..a40279780 100644 --- a/tests/integration/profiles/workflow-testing-ro-crate/test_wtroc_testdefinition.py +++ b/tests/integration/profiles/workflow-testing-ro-crate/test_wtroc_testdefinition.py @@ -23,7 +23,7 @@ def test_wtroc_testdefinition_bad_type(): - """\ + """ Test a Workflow Testing RO-Crate where a TestDefinition does not have the File (MediaObject) and TestDefinition types. """ @@ -33,12 +33,12 @@ def test_wtroc_testdefinition_bad_type(): False, ["Workflow Testing RO-Crate TestDefinition MUST"], ["The TestDefinition MUST have types TestDefinition and File"], - profile_identifier="workflow-testing-ro-crate" + profile_identifier="workflow-testing-ro-crate", ) def test_wtroc_testdefinition_no_engine(): - """\ + """ Test a Workflow Testing RO-Crate where a TestDefinition does not refer to the test engine SoftwareApplication via conformsTo. """ @@ -48,12 +48,12 @@ def test_wtroc_testdefinition_no_engine(): False, ["Workflow Testing RO-Crate TestDefinition MUST"], ["The TestDefinition MUST refer to the test engine it is written for via conformsTo"], - profile_identifier="workflow-testing-ro-crate" + profile_identifier="workflow-testing-ro-crate", ) def test_wtroc_testdefinition_no_engineversion(): - """\ + """ Test a Workflow Testing RO-Crate where a TestDefinition does not refer to the test engine's version via engineVersion. """ @@ -63,12 +63,12 @@ def test_wtroc_testdefinition_no_engineversion(): False, ["Workflow Testing RO-Crate TestDefinition MUST"], ["The TestDefinition MUST refer to the test engine version via engineVersion"], - profile_identifier="workflow-testing-ro-crate" + profile_identifier="workflow-testing-ro-crate", ) def test_wtroc_testdefinition_bad_conformsto(): - """\ + """ Test a Workflow Testing RO-Crate where a TestDefinition does not refer to the test engine SoftwareApplication via conformsTo. """ @@ -78,12 +78,12 @@ def test_wtroc_testdefinition_bad_conformsto(): False, ["Workflow Testing RO-Crate TestDefinition MUST"], ["The TestDefinition MUST refer to the test engine it is written for via conformsTo"], - profile_identifier="workflow-testing-ro-crate" + profile_identifier="workflow-testing-ro-crate", ) def test_wtroc_testdefinition_bad_engineversion(): - """\ + """ Test a Workflow Testing RO-Crate where a TestDefinition does not refer to the test engine's version as a string. """ @@ -93,5 +93,5 @@ def test_wtroc_testdefinition_bad_engineversion(): False, ["Workflow Testing RO-Crate TestDefinition MUST"], ["The TestDefinition MUST refer to the test engine version via engineVersion"], - profile_identifier="workflow-testing-ro-crate" + profile_identifier="workflow-testing-ro-crate", ) diff --git a/tests/integration/profiles/workflow-testing-ro-crate/test_wtroc_testinstance.py b/tests/integration/profiles/workflow-testing-ro-crate/test_wtroc_testinstance.py index b1238613a..e2ed0d080 100644 --- a/tests/integration/profiles/workflow-testing-ro-crate/test_wtroc_testinstance.py +++ b/tests/integration/profiles/workflow-testing-ro-crate/test_wtroc_testinstance.py @@ -23,7 +23,7 @@ def test_wtroc_testinstance_no_service(): - """\ + """ Test a Workflow Testing RO-Crate where a TestInstance does not refer to a TestService. """ @@ -33,12 +33,12 @@ def test_wtroc_testinstance_no_service(): False, ["Workflow Testing RO-Crate TestInstance MUST"], ["The TestInstance MUST refer to a TestService via runsOn"], - profile_identifier="workflow-testing-ro-crate" + profile_identifier="workflow-testing-ro-crate", ) def test_wtroc_testinstance_no_url(): - """\ + """ Test a Workflow Testing RO-Crate where a TestInstance does not refer to the test service base URL. """ @@ -48,12 +48,12 @@ def test_wtroc_testinstance_no_url(): False, ["Workflow Testing RO-Crate TestInstance MUST"], ["The TestInstance MUST refer to the test service base URL via url"], - profile_identifier="workflow-testing-ro-crate" + profile_identifier="workflow-testing-ro-crate", ) def test_wtroc_testinstance_no_resource(): - """\ + """ Test a Workflow Testing RO-Crate where a TestInstance does not refer to the relative URL of the test project via resource. """ @@ -63,12 +63,12 @@ def test_wtroc_testinstance_no_resource(): False, ["Workflow Testing RO-Crate TestInstance MUST"], ["The TestInstance MUST refer to the relative URL of the test project via resource"], - profile_identifier="workflow-testing-ro-crate" + profile_identifier="workflow-testing-ro-crate", ) def test_wtroc_testinstance_bad_runson(): - """\ + """ Test a Workflow Testing RO-Crate where a TestInstance has a runsOn property that does not refer to a TestService. """ @@ -78,12 +78,12 @@ def test_wtroc_testinstance_bad_runson(): False, ["Workflow Testing RO-Crate TestInstance MUST"], ["The TestInstance MUST refer to a TestService via runsOn"], - profile_identifier="workflow-testing-ro-crate" + profile_identifier="workflow-testing-ro-crate", ) def test_wtroc_testinstance_bad_url(): - """\ + """ Test a Workflow Testing RO-Crate where a TestInstance has a url property that does not refer to a string with a URL pattern. """ @@ -93,12 +93,12 @@ def test_wtroc_testinstance_bad_url(): False, ["Workflow Testing RO-Crate TestInstance MUST"], ["The TestInstance MUST refer to the test service base URL via url"], - profile_identifier="workflow-testing-ro-crate" + profile_identifier="workflow-testing-ro-crate", ) def test_wtroc_testinstance_bad_resource(): - """\ + """ Test a Workflow Testing RO-Crate where a TestInstance has a resource property that does not refer to a string. """ @@ -108,5 +108,5 @@ def test_wtroc_testinstance_bad_resource(): False, ["Workflow Testing RO-Crate TestInstance MUST"], ["The TestInstance MUST refer to the relative URL of the test project via resource"], - profile_identifier="workflow-testing-ro-crate" + profile_identifier="workflow-testing-ro-crate", ) diff --git a/tests/integration/profiles/workflow-testing-ro-crate/test_wtroc_testsuite.py b/tests/integration/profiles/workflow-testing-ro-crate/test_wtroc_testsuite.py index 791bab235..d441e74b7 100644 --- a/tests/integration/profiles/workflow-testing-ro-crate/test_wtroc_testsuite.py +++ b/tests/integration/profiles/workflow-testing-ro-crate/test_wtroc_testsuite.py @@ -23,7 +23,7 @@ def test_wtroc_testsuite_not_mentioned(): - """\ + """ Test a Workflow Testing RO-Crate where a TestSuite is not listed in the Root Data Entity's mentions. """ @@ -33,12 +33,12 @@ def test_wtroc_testsuite_not_mentioned(): False, ["Workflow Testing RO-Crate TestSuite MUST"], ["The TestSuite MUST be referenced from the Root Data Entity via mentions"], - profile_identifier="workflow-testing-ro-crate" + profile_identifier="workflow-testing-ro-crate", ) def test_wtroc_testsuite_no_instance_no_def(): - """\ + """ Test a Workflow Testing RO-Crate where a TestSuite does not refer to either a TestSuite or a TestDefinition. """ @@ -48,12 +48,12 @@ def test_wtroc_testsuite_no_instance_no_def(): False, ["TestSuite instance or definition"], ["The TestSuite MUST refer to a TestInstance or TestDefinition"], - profile_identifier="workflow-testing-ro-crate" + profile_identifier="workflow-testing-ro-crate", ) def test_wtroc_testsuite_no_mainentity(): - """\ + """ Test a Workflow Testing RO-Crate where a TestSuite does not refer to the tested workflow via mainEntity. """ @@ -63,12 +63,12 @@ def test_wtroc_testsuite_no_mainentity(): False, ["Workflow Testing RO-Crate TestSuite SHOULD"], ["The TestSuite SHOULD refer to the tested workflow via mainEntity"], - profile_identifier="workflow-testing-ro-crate" + profile_identifier="workflow-testing-ro-crate", ) def test_wtroc_testsuite_bad_instance(): - """\ + """ Test a Workflow Testing RO-Crate where a TestSuite has an instance property that does not refer to a TestInstance. """ @@ -78,12 +78,12 @@ def test_wtroc_testsuite_bad_instance(): False, ["TestSuite instance or definition"], ["The TestSuite MUST refer to a TestInstance or TestDefinition"], - profile_identifier="workflow-testing-ro-crate" + profile_identifier="workflow-testing-ro-crate", ) def test_wtroc_testsuite_bad_definition(): - """\ + """ Test a Workflow Testing RO-Crate where a TestSuite has a definition property that does not refer to a TestDefinition. """ @@ -93,12 +93,12 @@ def test_wtroc_testsuite_bad_definition(): False, ["TestSuite instance or definition"], ["The TestSuite MUST refer to a TestInstance or TestDefinition"], - profile_identifier="workflow-testing-ro-crate" + profile_identifier="workflow-testing-ro-crate", ) def test_wtroc_testsuite_bad_mainentity(): - """\ + """ Test a Workflow Testing RO-Crate where a TestSuite has a mainEntity property that does not refer to a workflow. """ @@ -108,5 +108,5 @@ def test_wtroc_testsuite_bad_mainentity(): False, ["Workflow Testing RO-Crate TestSuite SHOULD"], ["The TestSuite SHOULD refer to the tested workflow via mainEntity"], - profile_identifier="workflow-testing-ro-crate" + profile_identifier="workflow-testing-ro-crate", ) diff --git a/tests/integration/test_offline_mode.py b/tests/integration/test_offline_mode.py index 3d90750a2..76039acac 100644 --- a/tests/integration/test_offline_mode.py +++ b/tests/integration/test_offline_mode.py @@ -29,9 +29,9 @@ from tests.ro_crates import ValidROC -def _urllib3_response(payload: bytes = b'{"@context": {}}', - status: int = 200, - content_type: str = "application/ld+json") -> urllib3.HTTPResponse: +def _urllib3_response( + payload: bytes = b'{"@context": {}}', status: int = 200, content_type: str = "application/ld+json" +) -> urllib3.HTTPResponse: return urllib3.HTTPResponse( body=io.BytesIO(payload), headers={ @@ -242,7 +242,8 @@ def test_cli_no_cache_disables_cache_backend(cli_runner, tmp_path, network_inter str(ValidROC().wrroc_paper_long_date), "--no-paging", "--no-cache", - "--skip-checks", SKIP_LOCAL_DATA_ENTITY_EXISTENCE_CHECK_IDENTIFIER, + "--skip-checks", + SKIP_LOCAL_DATA_ENTITY_EXISTENCE_CHECK_IDENTIFIER, ], ) # The validation itself may pass or fail depending on upstream checks; we @@ -306,14 +307,18 @@ def test_cli_cache_warm_populates_profile_urls(cli_runner, tmp_path, network_int cli, [ "-y", - "cache", "warm", - "--cache-path", str(cache_path), - "--profile-identifier", "ro-crate-1.1", + "cache", + "warm", + "--cache-path", + str(cache_path), + "--profile-identifier", + "ro-crate-1.1", ], ) assert result.exit_code == 0, result.output - assert any("w3id.org" in c for c in network_interceptor["calls"]), \ + assert any("w3id.org" in c for c in network_interceptor["calls"]), ( f"No expected URL fetched. Calls: {network_interceptor['calls']}" + ) # The URL must now be cached for offline use. HttpRequester.reset() HttpRequester.initialize_cache(cache_path=str(cache_path), cache_max_age=3600, offline=True) @@ -327,9 +332,12 @@ def test_cli_cache_warm_crate_caches_remote_archive(cli_runner, tmp_path, networ cli, [ "-y", - "cache", "warm", - "--cache-path", str(cache_path), - "--crate", crate_url, + "cache", + "warm", + "--cache-path", + str(cache_path), + "--crate", + crate_url, ], ) assert result.exit_code == 0, result.output @@ -357,11 +365,13 @@ def test_cli_validate_offline_warns_when_remote(cli_runner, tmp_path, network_in "https://example.org/fake-crate.zip", "--no-paging", "--offline", - "--cache-path", str(cache_path), + "--cache-path", + str(cache_path), ], ) - assert "offline mode is enabled" in result.output.lower() \ - or "cached version" in result.output.lower(), result.output + assert "offline mode is enabled" in result.output.lower() or "cached version" in result.output.lower(), ( + result.output + ) def test_cli_validate_offline_on_local_crate_succeeds(cli_runner, tmp_path): @@ -375,8 +385,10 @@ def test_cli_validate_offline_on_local_crate_succeeds(cli_runner, tmp_path): str(ValidROC().wrroc_paper_long_date), "--no-paging", "--offline", - "--cache-path", str(cache_path), - "--skip-checks", SKIP_LOCAL_DATA_ENTITY_EXISTENCE_CHECK_IDENTIFIER, + "--cache-path", + str(cache_path), + "--skip-checks", + SKIP_LOCAL_DATA_ENTITY_EXISTENCE_CHECK_IDENTIFIER, ], ) # The validation may report issues for locally missing contexts; what we diff --git a/tests/integration/test_sparql_constraints.py b/tests/integration/test_sparql_constraints.py index d4730b3c2..b448cad84 100644 --- a/tests/integration/test_sparql_constraints.py +++ b/tests/integration/test_sparql_constraints.py @@ -97,11 +97,7 @@ def test_sparql_profile_shape_loaded_correctly(sparql_test_profiles_path): # Find the test shape (AlwaysFailShape or similar name) test_shape = None for shape in shapes: - if ( - "Always" in shape.name - or "Test" in shape.name - or "test" in shape.name.lower() - ): + if "Always" in shape.name or "Test" in shape.name or "test" in shape.name.lower(): test_shape = shape break @@ -110,9 +106,7 @@ def test_sparql_profile_shape_loaded_correctly(sparql_test_profiles_path): assert len(test_shape.description) > 0 -def test_sparql_constraint_with_bnode_sourceShape( - sparql_test_profiles_path, sparql_test_rocrate -): +def test_sparql_constraint_with_bnode_sourceShape(sparql_test_profiles_path, sparql_test_rocrate): """ Test that SPARQL constraint violations with BNode sourceShape are handled gracefully by the validation pipeline. @@ -139,10 +133,9 @@ def test_sparql_constraint_with_bnode_sourceShape( assert issues[0].check.description is not None, "Check should have a description" assert issues[0].message is not None, "Issue should have a message" assert len(issues[0].message) > 0, "Issue message should not be empty" - assert ( - "SPARQL constraint violation" in issues[0].message - or "SPARQL" in issues[0].check.description - ), "Check description should reference parent shape" + assert "SPARQL constraint violation" in issues[0].message or "SPARQL" in issues[0].check.description, ( + "Check description should reference parent shape" + ) def test_resolve_parent_shape_with_sparql_bnode(): diff --git a/tests/ro_crates.py b/tests/ro_crates.py index efed37470..ce793564a 100644 --- a/tests/ro_crates.py +++ b/tests/ro_crates.py @@ -30,7 +30,6 @@ def ro_crates_path() -> Path: class ValidROC: - @property def rocrate_with_data_entities(self) -> Path: return VALID_CRATES_DATA_PATH / "rocrate-with-data-entities" @@ -124,7 +123,6 @@ def multi_profile_crate(self) -> Path: class InvalidFileDescriptor: - base_path = INVALID_CRATES_DATA_PATH / "0_file_descriptor_format" @property @@ -153,7 +151,6 @@ def invalid_jsonld_unexpected_key(self) -> Path: class InvalidRootDataEntity: - base_path = INVALID_CRATES_DATA_PATH / "2_root_data_entity_metadata" @property @@ -206,7 +203,6 @@ def valid_referenced_generic_data_entities(self) -> Path: class InvalidFileDescriptorEntity: - base_path = INVALID_CRATES_DATA_PATH / "1_file_descriptor_metadata" @property @@ -239,7 +235,6 @@ def invalid_conforms_to(self) -> Path: class InvalidDataEntity: - base_path = INVALID_CRATES_DATA_PATH / "4_data_entity_metadata" @property @@ -320,7 +315,6 @@ def missing_file_data_entity_with_absolute_path(self) -> Path: class InvalidMainWorkflow: - base_path = INVALID_CRATES_DATA_PATH / "0_main_workflow" @property @@ -357,7 +351,6 @@ def main_workflow_bad_conformsto(self) -> Path: class WROCInvalidConformsTo: - base_path = INVALID_CRATES_DATA_PATH / "2_wroc_descriptor" @property @@ -366,7 +359,6 @@ def wroc_descriptor_bad_conforms_to(self) -> Path: class WROCInvalidReadme: - base_path = INVALID_CRATES_DATA_PATH / "1_wroc_crate/" @property @@ -379,7 +371,6 @@ def wroc_readme_wrong_encoding_format(self) -> Path: class WROCNoLicense: - base_path = INVALID_CRATES_DATA_PATH / "1_wroc_crate/" @property @@ -388,7 +379,6 @@ def wroc_no_license(self) -> Path: class WROCMainEntity: - base_path = INVALID_CRATES_DATA_PATH / "1_wroc_crate/" @property @@ -397,7 +387,6 @@ def wroc_no_mainentity(self) -> Path: class InvalidProcRC: - base_path = INVALID_CRATES_DATA_PATH / "3_process_run_crate/" @property @@ -574,7 +563,6 @@ def softwareapplication_bad_softwarerequirements(self) -> Path: class InvalidWTROC: - base_path = INVALID_CRATES_DATA_PATH / "5_workflow_testing_ro_crate/" @property @@ -647,7 +635,6 @@ def testdefinition_bad_engineversion(self) -> Path: class InvalidWfRC: - base_path = INVALID_CRATES_DATA_PATH / "4_workflow_run_crate/" @property @@ -736,7 +723,6 @@ def formalparam_env_bad_exampleofwork(self) -> Path: class InvalidProvRC: - base_path = INVALID_CRATES_DATA_PATH / "5_provenance_run_crate/" @property @@ -1006,7 +992,6 @@ def propertyvalue_no_unitcode(self) -> Path: class InvalidMultiProfileROC: - @property def invalid_multi_profile_crate(self) -> Path: return INVALID_CRATES_DATA_PATH / "0_multi_profile_crate" diff --git a/tests/shared.py b/tests/shared.py index 83491a1b0..41a7298d3 100644 --- a/tests/shared.py +++ b/tests/shared.py @@ -153,8 +153,11 @@ def do_entity_test( rocrate_path = Path(rocrate_path) temp_rocrate_path = None - if (any([rocrate_entity_patch, rocrate_entity_mod_sparql]) - and isinstance(rocrate_path, Path) and rocrate_path.is_dir()): + if ( + any([rocrate_entity_patch, rocrate_entity_mod_sparql]) + and isinstance(rocrate_path, Path) + and rocrate_path.is_dir() + ): temp_rocrate_path = _prepare_temp_rocrate(rocrate_path, rocrate_entity_patch, rocrate_entity_mod_sparql) rocrate_path = temp_rocrate_path @@ -169,9 +172,7 @@ def do_entity_test( logger.debug("Checks to skip: %s", skip_checks) # validate RO-Crate - relative_root_path = ( - Path(rocrate_relative_root_path) if rocrate_relative_root_path is not None else None - ) + relative_root_path = Path(rocrate_relative_root_path) if rocrate_relative_root_path is not None else None result: models.ValidationResult = services.validate( models.ValidationSettings( rocrate_uri=models.URI(rocrate_path), @@ -212,9 +213,7 @@ def do_entity_test( logger.debug("Expected issues: %s", expected_triggered_issues) for expected_issue in expected_triggered_issues: if not any(expected_issue in issue for issue in detected_issues): # support partial match - raise AssertionError( - f'The expected issue "{expected_issue}" was not found in the detected issues' - ) + raise AssertionError(f'The expected issue "{expected_issue}" was not found in the detected issues') except Exception: if logger.isEnabledFor(logging.DEBUG): logger.exception("Failed to validate RO-Crate @ path: %s", rocrate_path) diff --git a/tests/test_cli.py b/tests/test_cli.py index 99fb66b8e..2895bb7ab 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -48,33 +48,50 @@ def test_version(cli_runner: CliRunner): def test_validate_subcmd_invalid_rocrate1(cli_runner: CliRunner): - result = cli_runner.invoke(cli, ['validate', str( - InvalidFileDescriptor().invalid_json_format), '--verbose', '--no-paging', '-p', 'ro-crate']) + result = cli_runner.invoke( + cli, + ["validate", str(InvalidFileDescriptor().invalid_json_format), "--verbose", "--no-paging", "-p", "ro-crate"], + ) logger.error(result.output) assert result.exit_code == 1 def test_validate_subcmd_valid_local_folder_rocrate(cli_runner: CliRunner): - result = cli_runner.invoke(cli, ['validate', str(ValidROC().wrroc_paper_long_date), '--verbose', '--no-paging']) + result = cli_runner.invoke(cli, ["validate", str(ValidROC().wrroc_paper_long_date), "--verbose", "--no-paging"]) assert result.exit_code == 0 - assert re.search(r'RO-Crate.*is a valid', result.output) + assert re.search(r"RO-Crate.*is a valid", result.output) def test_validate_subcmd_valid_remote_rocrate(cli_runner: CliRunner): result = cli_runner.invoke( - cli, ['validate', str(ValidROC().sort_and_change_remote), - '--verbose', '--no-paging', - '--skip-checks', SKIP_LOCAL_DATA_ENTITY_EXISTENCE_CHECK_IDENTIFIER]) + cli, + [ + "validate", + str(ValidROC().sort_and_change_remote), + "--verbose", + "--no-paging", + "--skip-checks", + SKIP_LOCAL_DATA_ENTITY_EXISTENCE_CHECK_IDENTIFIER, + ], + ) assert result.exit_code == 0 - assert re.search(r'RO-Crate.*is a valid', result.output) + assert re.search(r"RO-Crate.*is a valid", result.output) def test_validate_subcmd_invalid_local_archive_rocrate(cli_runner: CliRunner): - result = cli_runner.invoke(cli, ['validate', str(ValidROC().sort_and_change_archive), - '--verbose', '--no-paging', - '--skip-checks', SKIP_LOCAL_DATA_ENTITY_EXISTENCE_CHECK_IDENTIFIER]) + result = cli_runner.invoke( + cli, + [ + "validate", + str(ValidROC().sort_and_change_archive), + "--verbose", + "--no-paging", + "--skip-checks", + SKIP_LOCAL_DATA_ENTITY_EXISTENCE_CHECK_IDENTIFIER, + ], + ) assert result.exit_code == 0 - assert re.search(r'RO-Crate.*is a valid', result.output) + assert re.search(r"RO-Crate.*is a valid", result.output) def test_validate_skip_checks_option(cli_runner: CliRunner): @@ -93,20 +110,24 @@ def mock_validate(*args, **kwargs): logger.debug(f"Called args: {called_args}") logger.debug(f"Called kwargs: {called_kwargs}") - with patch('rocrate_validator.cli.commands.validate.services.validate') as mock_validate_rocrate: + with patch("rocrate_validator.cli.commands.validate.services.validate") as mock_validate_rocrate: mock_validate_rocrate.return_value = None mock_validate_rocrate.side_effect = mock_validate skip_checks_1 = ("a", "b", "c") skip_checks_2 = ("d", "e", "f") result = cli_runner.invoke( - cli, [ - '--no-interactive', - 'validate', str(ValidROC().sort_and_change_remote), - '--skip-checks', ','.join(skip_checks_1), - '--skip-checks', ','.join(skip_checks_2), - '--no-paging' - ] + cli, + [ + "--no-interactive", + "validate", + str(ValidROC().sort_and_change_remote), + "--skip-checks", + ",".join(skip_checks_1), + "--skip-checks", + ",".join(skip_checks_2), + "--no-paging", + ], ) # Check the exit code which should be 2 @@ -120,8 +141,9 @@ def mock_validate(*args, **kwargs): assert settings["skip_checks"] is not None, "skip_checks should not be None" # Check if the skip_checks value matches the expected value - assert list(skip_checks_1 + skip_checks_2) == settings["skip_checks"], \ + assert list(skip_checks_1 + skip_checks_2) == settings["skip_checks"], ( f"Expected skip_checks to be {list(skip_checks_1 + skip_checks_2)}, but got {settings['skip_checks']}" + ) def test_validate_with_invalid_profiles_path_dir(cli_runner: CliRunner): @@ -132,10 +154,11 @@ def test_validate_with_invalid_profiles_path_dir(cli_runner: CliRunner): [ "validate", str(ValidROC().wrroc_paper_long_date), - "--profiles-path", dummy_profiles_path, + "--profiles-path", + dummy_profiles_path, "--verbose", - "--no-paging" - ] + "--no-paging", + ], ) assert result.exit_code == 2 # On narrow terminals the Rich error panel wraps the message across lines @@ -255,9 +278,7 @@ def test_describe_check_verbose_python(cli_runner: CliRunner): """Verbose single-check view on a Python-backed check shows the function source.""" _, requirement, check = _first_visible_check() relative = f"{requirement.order_number}.{check.order_number}" - result = cli_runner.invoke( - cli, ["profiles", "describe", _DESCRIBE_TEST_PROFILE, relative, "-v", "--no-paging"] - ) + result = cli_runner.invoke(cli, ["profiles", "describe", _DESCRIBE_TEST_PROFILE, relative, "-v", "--no-paging"]) assert result.exit_code == 0, result.output assert "Source" in result.output # The decorated check function is what gets serialized @@ -268,9 +289,7 @@ def test_describe_check_verbose_shacl(cli_runner: CliRunner): """Verbose single-check view on a SHACL-backed check shows turtle source.""" _, requirement, check = _first_shacl_check() relative = f"{requirement.order_number}.{check.order_number}" - result = cli_runner.invoke( - cli, ["profiles", "describe", _DESCRIBE_TEST_PROFILE, relative, "-v", "--no-paging"] - ) + result = cli_runner.invoke(cli, ["profiles", "describe", _DESCRIBE_TEST_PROFILE, relative, "-v", "--no-paging"]) assert result.exit_code == 0, result.output assert "Source" in result.output # SHACL serialized as turtle should contain a sh: prefix and a NodeShape/PropertyShape declaration @@ -295,12 +314,11 @@ def test_describe_check_verbose_shacl_includes_target(cli_runner: CliRunner): return requirement, check = nested relative = f"{requirement.order_number}.{check.order_number}" - result = cli_runner.invoke( - cli, ["profiles", "describe", _DESCRIBE_TEST_PROFILE, relative, "-v", "--no-paging"] - ) + result = cli_runner.invoke(cli, ["profiles", "describe", _DESCRIBE_TEST_PROFILE, relative, "-v", "--no-paging"]) assert result.exit_code == 0, result.output # The snippet must surface the owning shape's target declaration so the user can see # what the property check applies to. - assert any(t in result.output for t in ("sh:targetClass", "sh:targetNode", - "sh:targetSubjectsOf", "sh:targetObjectsOf", - "sh:target ")) + assert any( + t in result.output + for t in ("sh:targetClass", "sh:targetNode", "sh:targetSubjectsOf", "sh:targetObjectsOf", "sh:target ") + ) diff --git a/tests/test_models.py b/tests/test_models.py index 3c4802842..cd580e07b 100644 --- a/tests/test_models.py +++ b/tests/test_models.py @@ -31,27 +31,27 @@ def test_severity_ordering(): def test_level_ordering(): - may = RequirementLevel('MAY', Severity.OPTIONAL) - should = RequirementLevel('SHOULD', Severity.RECOMMENDED) + may = RequirementLevel("MAY", Severity.OPTIONAL) + should = RequirementLevel("SHOULD", Severity.RECOMMENDED) assert may < should assert should > may assert should != may - assert RequirementLevel('MAY', Severity.OPTIONAL) == may + assert RequirementLevel("MAY", Severity.OPTIONAL) == may assert may != 1 - assert may != RequirementLevel('OPTIONAL', Severity.OPTIONAL) + assert may != RequirementLevel("OPTIONAL", Severity.OPTIONAL) with pytest.raises(TypeError): _ = may > 1 def test_level_basics(): - may = RequirementLevel('MAY', Severity.OPTIONAL) + may = RequirementLevel("MAY", Severity.OPTIONAL) assert str(may) == "MAY" assert int(may) == Severity.OPTIONAL.value assert hash(may) != 0 # should be find as long as it runs def test_level_collection(): - assert LevelCollection.get('may') == LevelCollection.MAY + assert LevelCollection.get("may") == LevelCollection.MAY # Test ordering assert LevelCollection.MAY < LevelCollection.SHOULD @@ -63,19 +63,15 @@ def test_level_collection(): assert len(all_levels) == 10 level_names = [level.name for level in all_levels] # Test a few of the keys - assert 'MAY' in level_names - assert 'SHOULD_NOT' in level_names - assert 'RECOMMENDED' in level_names - assert 'REQUIRED' in level_names + assert "MAY" in level_names + assert "SHOULD_NOT" in level_names + assert "RECOMMENDED" in level_names + assert "REQUIRED" in level_names @pytest.fixture def validation_settings(): - return ValidationSettings( - rocrate_uri=URI("file:///"), - requirement_severity=Severity.OPTIONAL, - abort_on_first=False - ) + return ValidationSettings(rocrate_uri=URI("file:///"), requirement_severity=Severity.OPTIONAL, abort_on_first=False) # @pytest.mark.skip(reason="Temporarily disabled: we need an RO-Crate with multiple failed requirements to test this.") diff --git a/tests/unit/requirements/test_load_requirements.py b/tests/unit/requirements/test_load_requirements.py index bf0952ade..577bae5a8 100644 --- a/tests/unit/requirements/test_load_requirements.py +++ b/tests/unit/requirements/test_load_requirements.py @@ -47,10 +47,7 @@ def test_requirements_loading(profiles_requirement_loading: str): number_of_checks_per_requirement = 4 # Define the settings - settings: dict[str, Any] = { - "profiles_path": profiles_requirement_loading, - "severity": Severity.OPTIONAL - } + settings: dict[str, Any] = {"profiles_path": profiles_requirement_loading, "severity": Severity.OPTIONAL} # Load the profiles profiles = Profile.load_profiles(**settings) @@ -87,16 +84,18 @@ def test_requirements_loading(profiles_requirement_loading: str): if requirement_name in ["A", "B"]: assert requirement.severity_from_path is None, "The severity of the requirement should be None" elif requirement_name in ["A_MUST", "B_MUST"]: - assert requirement.severity_from_path == Severity.REQUIRED, \ + assert requirement.severity_from_path == Severity.REQUIRED, ( "The severity of the requirement should be REQUIRED" + ) offset = 1 if isinstance(requirement, SHACLRequirement) else 0 - assert len(requirement.get_checks()) == number_of_checks_per_requirement + offset, \ + assert len(requirement.get_checks()) == number_of_checks_per_requirement + offset, ( "The number of requirement checks is incorrect" + ) for i in range(number_of_checks_per_requirement): logger.debug("The requirement check: %r", f"{requirement_name}_{i}") - check = requirement.get_checks()[i+offset] + check = requirement.get_checks()[i + offset] assert check.name == f"{requirement_name}_{i}", "The name of the requirement check is incorrect" assert check.level.severity == levels[i].severity, "The level of the requirement check is incorrect" @@ -119,8 +118,13 @@ def test_order_of_loaded_profile_requirements(profiles_path: str): requirements = profile.get_requirements() assert len(requirements) > 0 for requirement in requirements: - logger.debug("%r The requirement: %r -> severity: %r (path: %s)", requirement.order_number, - requirement.name, requirement.severity_from_path, requirement.path) + logger.debug( + "%r The requirement: %r -> severity: %r (path: %s)", + requirement.order_number, + requirement.name, + requirement.severity_from_path, + requirement.path, + ) # Sort requirements by their order requirements = sorted( @@ -150,11 +154,13 @@ def test_order_of_loaded_profile_requirements(profiles_path: str): # Inspect the first requirement check requirement_check = r_checks[0] - assert requirement_check.name == "Root Data Entity: RECOMMENDED value", \ + assert requirement_check.name == "Root Data Entity: RECOMMENDED value", ( "The name of the requirement check is incorrect" - assert requirement_check.description == \ - "Check if the Root Data Entity is denoted by the string `./` in the file descriptor JSON-LD", \ - "The description of the requirement check is incorrect" + ) + assert ( + requirement_check.description + == "Check if the Root Data Entity is denoted by the string `./` in the file descriptor JSON-LD" + ), "The description of the requirement check is incorrect" assert requirement_check.severity == Severity.RECOMMENDED, "The severity of the requirement check is incorrect" @@ -164,10 +170,7 @@ def test_hidden_requirements(profiles_loading_hidden_requirements: str): requirements_names = ["A", "B", "A_MUST", "B_MUST"] # Define the settings - settings: dict[str, Any] = { - "profiles_path": profiles_loading_hidden_requirements, - "severity": Severity.OPTIONAL - } + settings: dict[str, Any] = {"profiles_path": profiles_loading_hidden_requirements, "severity": Severity.OPTIONAL} # Load the profiles profiles = Profile.load_profiles(**settings) diff --git a/tests/unit/requirements/test_profiles.py b/tests/unit/requirements/test_profiles.py index 4a5920950..8b7986d31 100644 --- a/tests/unit/requirements/test_profiles.py +++ b/tests/unit/requirements/test_profiles.py @@ -128,9 +128,9 @@ def test_profile_spec_properties(fake_profiles_path: str): assert profile.comment == "Comment for the Profile C.", "The profile comment should be 'Comment for the Profile C.'" assert profile.version == "1.0.0", "The profile version should be 1.0.0" assert profile.is_profile_of == ["https://w3id.org/a"], "The profileOf property should be ['a']" - assert profile.is_transitive_profile_of == [ - "https://w3id.org/a" - ], "The transitiveProfileOf property should be ['a']" + assert profile.is_transitive_profile_of == ["https://w3id.org/a"], ( + "The transitiveProfileOf property should be ['a']" + ) def test_profiles_loading_free_folder_structure(profiles_with_free_folder_structure_path: str): @@ -204,9 +204,9 @@ def __perform_test__(profile_identifier: str, expected_inherited_profiles: list[ # The number of profiles should be 1 profiles_names = [_.token for _ in profile.inherited_profiles] - assert ( - profiles_names == expected_inherited_profiles - ), f"The number of profiles should be {expected_inherited_profiles}" + assert profiles_names == expected_inherited_profiles, ( + f"The number of profiles should be {expected_inherited_profiles}" + ) # Test the inheritance mode with 1 profile __perform_test__("a", []) @@ -383,24 +383,24 @@ def test_profile_check_overriding(check_overriding_profiles_path: str): def check_profile(profile, check, inherited_profiles, overridden_by, override): # Check inherited profiles - assert len(profile.inherited_profiles) == len( - inherited_profiles - ), f"The number of inherited profiles should be {len(inherited_profiles)}" + assert len(profile.inherited_profiles) == len(inherited_profiles), ( + f"The number of inherited profiles should be {len(inherited_profiles)}" + ) inherited_profiles_tokens = [_.token for _ in profile.inherited_profiles] - assert set(inherited_profiles_tokens) == set( - inherited_profiles - ), f"The inherited profiles should be {inherited_profiles}" + assert set(inherited_profiles_tokens) == set(inherited_profiles), ( + f"The inherited profiles should be {inherited_profiles}" + ) # Check overridden status logger.debug( "%r overridden by: %r", check.identifier, [_.requirement.profile.identifier for _ in check.overridden_by] ) - assert check.overridden == ( - len(overridden_by) > 0 - ), f"The check overridden status should be {len(overridden_by) > 0}" - assert len(check.overridden_by) == len( - overridden_by - ), f"The number of overridden checks should be {len(overridden_by)}" + assert check.overridden == (len(overridden_by) > 0), ( + f"The check overridden status should be {len(overridden_by) > 0}" + ) + assert len(check.overridden_by) == len(overridden_by), ( + f"The number of overridden checks should be {len(overridden_by)}" + ) overridden_by_tokens = [_.requirement.profile.identifier for _ in check.overridden_by] assert set(overridden_by_tokens) == set(overridden_by), f"The overridden checks should be {overridden_by}" @@ -505,9 +505,9 @@ def test_shacl_shape_with_deactivated_marks_check_skipped(fake_profiles_path: st target_property_check = target.requirements[0].get_checks()[1] parent_property_check = parent_c.requirements[0].get_checks()[1] - assert ( - target_property_check.deactivated is True - ), "The deactivated property should reflect sh:deactivated true on the PropertyShape" + assert target_property_check.deactivated is True, ( + "The deactivated property should reflect sh:deactivated true on the PropertyShape" + ) # The parent property check is overridden by the child's (same sh:name). overridden_by_tokens = [c.requirement.profile.token for c in parent_property_check.overridden_by] @@ -555,9 +555,9 @@ def test_shacl_check_deactivated_via_cross_profile_triple(fake_profiles_path: st sh = Namespace(SHACL_NS) target_registry._shapes_graph.add((parent_shape_check.shape.node, sh.deactivated, Literal(True))) - assert ( - parent_shape_check.deactivated is True - ), "The parent's check must read sh:deactivated true from the child's shapes graph" + assert parent_shape_check.deactivated is True, ( + "The parent's check must read sh:deactivated true from the child's shapes graph" + ) def test_shacl_check_deactivation_scoped_to_descendants(fake_profiles_path: str): @@ -599,6 +599,6 @@ def test_shacl_check_deactivation_scoped_to_descendants(fake_profiles_path: str) (parent_shape_check.shape.node, sh.deactivated, Literal(True)) ) - assert ( - parent_shape_check.deactivated is False - ), "An unrelated profile's deactivation triple must not affect the check" + assert parent_shape_check.deactivated is False, ( + "An unrelated profile's deactivation triple must not affect the check" + ) diff --git a/tests/unit/requirements/test_shacl_checks.py b/tests/unit/requirements/test_shacl_checks.py index a25b656ae..f4de84cc5 100644 --- a/tests/unit/requirements/test_shacl_checks.py +++ b/tests/unit/requirements/test_shacl_checks.py @@ -70,9 +70,7 @@ def test_description_fallback_parent_description(): shape = Shape(URIRef("http://example.org/shape"), g) shape._name = "ChildShape" shape._description = None - shape._parent = cast("Shape", MockParentShape( - name="ParentShape", description="Parent Description" - )) + shape._parent = cast("Shape", MockParentShape(name="ParentShape", description="Parent Description")) req = cast("Requirement", MockRequirement()) check = SHACLCheck(req, shape) @@ -302,9 +300,7 @@ def test_path_based_level_takes_precedence_over_derivation(): shape = NodeShape(URIRef("http://example.org/NodeShape"), g) shape.add_property(_make_property(g, f"{SHACL_NS}Info")) - check = SHACLCheck( - cast("Requirement", MockRequirement(requirement_level_from_path=LevelCollection.SHOULD)), shape - ) + check = SHACLCheck(cast("Requirement", MockRequirement(requirement_level_from_path=LevelCollection.SHOULD)), shape) assert check.level == LevelCollection.SHOULD diff --git a/tests/unit/test_cache_warmup.py b/tests/unit/test_cache_warmup.py index 0fbc09d18..ca22e5c0f 100644 --- a/tests/unit/test_cache_warmup.py +++ b/tests/unit/test_cache_warmup.py @@ -108,10 +108,9 @@ def test_discover_urls_on_multiple_profiles_deduplicates(sample_profile, tmp_pat other_dir = tmp_path / "sample_other" other_dir.mkdir() (other_dir / "profile.ttl").write_text( - PROFILE_TTL_TEMPLATE - .replace("", - "") - .replace('prof:hasToken "sample"', 'prof:hasToken "other"') + PROFILE_TTL_TEMPLATE.replace( + "", "" + ).replace('prof:hasToken "sample"', 'prof:hasToken "other"') ) other_profile = Profile(profiles_base_path=tmp_path, profile_path=other_dir) aggregated = discover_cacheable_urls_from_profiles([sample_profile, other_profile]) diff --git a/tests/unit/test_cli_internals.py b/tests/unit/test_cli_internals.py index 365a6de41..049a59b00 100644 --- a/tests/unit/test_cli_internals.py +++ b/tests/unit/test_cli_internals.py @@ -32,13 +32,15 @@ def test_compute_stats(fake_profiles_path): - settings = ValidationSettings.parse({ - "profiles_path": fake_profiles_path, - "profile_identifier": "a", - "enable_profile_inheritance": True, - "allow_requirement_check_override": True, - "requirement_severity": "REQUIRED", - }) + settings = ValidationSettings.parse( + { + "profiles_path": fake_profiles_path, + "profile_identifier": "a", + "enable_profile_inheritance": True, + "allow_requirement_check_override": True, + "requirement_severity": "REQUIRED", + } + ) profiles_path = settings.profiles_path or DEFAULT_PROFILES_PATH logger.debug("The profiles path: %r", profiles_path) @@ -68,8 +70,7 @@ def test_compute_stats(fake_profiles_path): assert len(stats["profiles"]) == 1 # check if the requirements match - assert stats["requirements"] == requirements, \ - "The requirements in stats do not match the profile requirements" + assert stats["requirements"] == requirements, "The requirements in stats do not match the profile requirements" # Check if the number of requirements is greater than 0 assert len(stats["requirements"]) > 0, "There should be at least one requirement" @@ -82,8 +83,9 @@ def test_compute_stats(fake_profiles_path): # check the number of requirement checks requirements_list = list(requirements) - assert stats["checks"] == {_ for _ in requirements_list[0].get_checks() if not _.overridden or - _.requirement.profile.identifier == "a"} + assert stats["checks"] == { + _ for _ in requirements_list[0].get_checks() if not _.overridden or _.requirement.profile.identifier == "a" + } logger.error(stats) @@ -92,16 +94,19 @@ def test_compute_stats_resolves_profile_from_extra_profiles_path(fake_profiles_p # ValidationStatistics.__initialise__ used to call Profile.load_profiles # without forwarding extra_profiles_path, so any profile that lived only # under --extra-profiles-path raised ProfileNotFound. - settings = ValidationSettings.parse({ - "profiles_path": DEFAULT_PROFILES_PATH, - "extra_profiles_path": fake_profiles_path, - "profile_identifier": "a", - "enable_profile_inheritance": True, - "allow_requirement_check_override": True, - "requirement_severity": "REQUIRED", - }) + settings = ValidationSettings.parse( + { + "profiles_path": DEFAULT_PROFILES_PATH, + "extra_profiles_path": fake_profiles_path, + "profile_identifier": "a", + "enable_profile_inheritance": True, + "allow_requirement_check_override": True, + "requirement_severity": "REQUIRED", + } + ) stats = ValidationStatistics.__initialise__(validation_settings=settings) - assert any(p.identifier == "a" for p in stats["profiles"]), \ + assert any(p.identifier == "a" for p in stats["profiles"]), ( "Profile 'a' from extra_profiles_path was not resolved by ValidationStatistics" + ) diff --git a/tests/unit/test_document_loader.py b/tests/unit/test_document_loader.py index cbe1ae8eb..86b4b713b 100644 --- a/tests/unit/test_document_loader.py +++ b/tests/unit/test_document_loader.py @@ -30,8 +30,9 @@ from rocrate_validator.utils.http import HttpRequester, OfflineCacheMissError -def _urllib3_response(payload: bytes = b'{"@context": {"name": "https://schema.org/name"}}', - status: int = 200) -> urllib3.HTTPResponse: +def _urllib3_response( + payload: bytes = b'{"@context": {"name": "https://schema.org/name"}}', status: int = 200 +) -> urllib3.HTTPResponse: return urllib3.HTTPResponse( body=io.BytesIO(payload), headers={ @@ -125,6 +126,7 @@ def test_patched_source_to_json_routes_http_urls(tmp_path, mock_network): HttpRequester.initialize_cache(cache_path=str(tmp_path / "cache"), cache_max_age=60) install_document_loader() from rdflib.plugins.shared.jsonld import util as jsonld_util + doc, _ = jsonld_util.source_to_json("https://example.org/context") assert doc == {"@context": {"name": "https://schema.org/name"}} @@ -133,6 +135,7 @@ def test_patched_source_to_json_ignores_non_http(tmp_path): HttpRequester.initialize_cache(cache_path=str(tmp_path / "cache"), cache_max_age=60) install_document_loader() from rdflib.plugins.shared.jsonld import util as jsonld_util + file_path = tmp_path / "context.jsonld" file_path.write_text('{"@context": {"foo": "https://example.org/foo"}}') doc, _ = jsonld_util.source_to_json(str(file_path)) @@ -170,7 +173,7 @@ def test_install_patches_both_util_and_context(tmp_path): install_document_loader() assert jsonld_util.source_to_json is document_loader._patched_source_to_json - assert jsonld_context.source_to_json is document_loader._patched_source_to_json # pyright: ignore[reportPrivateImportUsage] + assert jsonld_context.source_to_json is document_loader._patched_source_to_json # pyright: ignore[reportPrivateImportUsage] def test_uninstall_restores_both_util_and_context(tmp_path): @@ -182,7 +185,7 @@ def test_uninstall_restores_both_util_and_context(tmp_path): uninstall_document_loader() assert jsonld_util.source_to_json is document_loader._original_source_to_json - assert jsonld_context.source_to_json is document_loader._original_source_to_json # pyright: ignore[reportPrivateImportUsage] + assert jsonld_context.source_to_json is document_loader._original_source_to_json # pyright: ignore[reportPrivateImportUsage] def test_context_module_resolution_routes_through_http(tmp_path, mock_network): @@ -192,7 +195,7 @@ def test_context_module_resolution_routes_through_http(tmp_path, mock_network): install_document_loader() from rdflib.plugins.shared.jsonld import context as jsonld_context - doc, _ = jsonld_context.source_to_json("https://example.org/context") # pyright: ignore[reportPrivateImportUsage] + doc, _ = jsonld_context.source_to_json("https://example.org/context") # pyright: ignore[reportPrivateImportUsage] assert doc == {"@context": {"name": "https://schema.org/name"}} assert HttpRequester().has_cached("https://example.org/context") is True diff --git a/tests/unit/test_http_requester_offline.py b/tests/unit/test_http_requester_offline.py index 60bf9e863..2765fcf40 100644 --- a/tests/unit/test_http_requester_offline.py +++ b/tests/unit/test_http_requester_offline.py @@ -26,9 +26,9 @@ from rocrate_validator.utils.http import OFFLINE_CACHE_MISS_STATUS, HttpRequester -def _build_urllib3_response(body: bytes = b'{"ok": true}', - status: int = 200, - content_type: str = "application/json") -> urllib3.HTTPResponse: +def _build_urllib3_response( + body: bytes = b'{"ok": true}', status: int = 200, content_type: str = "application/json" +) -> urllib3.HTTPResponse: return urllib3.HTTPResponse( body=io.BytesIO(body), headers={"Content-Type": content_type, "Content-Length": str(len(body))}, @@ -190,6 +190,7 @@ def __enter__(self): import logging as _logging from rocrate_validator.utils import http as http_module + self.records.clear() self.handler = _logging.Handler() self.handler.setLevel(_logging.DEBUG) diff --git a/tests/unit/test_remote_context_retrieval.py b/tests/unit/test_remote_context_retrieval.py index 1c903e2b8..274c98c50 100644 --- a/tests/unit/test_remote_context_retrieval.py +++ b/tests/unit/test_remote_context_retrieval.py @@ -23,8 +23,7 @@ def fd_format(): """Load the module with numeric prefix.""" spec = importlib.util.spec_from_file_location( - "fd_format", - "rocrate_validator/profiles/ro-crate/must/0_file_descriptor_format.py" + "fd_format", "rocrate_validator/profiles/ro-crate/must/0_file_descriptor_format.py" ) assert spec is not None and spec.loader is not None module = importlib.util.module_from_spec(spec) @@ -59,7 +58,7 @@ def test_fallback_to_alternate_link(self, fd_format): mock_response.status_code = 200 mock_response.headers = { "Content-Type": "text/html", - "Link": '; rel="alternate"; type="application/ld+json"' + "Link": '; rel="alternate"; type="application/ld+json"', } mock_alternate_response = MagicMock() @@ -80,6 +79,7 @@ def get(self, url, headers=None): return mock_response # Return the alternate response for the second call return mock_alternate_response + return MockHttpRequester() fd_format.HttpRequester = mock_requester @@ -97,7 +97,7 @@ def test_relative_alternate_url_resolution(self, fd_format): mock_response.status_code = 200 mock_response.headers = { "Content-Type": "text/html", - "Link": '<./alternate-context.json>; rel="alternate"; type="application/ld+json"' + "Link": '<./alternate-context.json>; rel="alternate"; type="application/ld+json"', } mock_alternate_response = MagicMock() @@ -117,6 +117,7 @@ def get(self, url, headers=None): if call_count[0] == 1: return mock_response return mock_alternate_response + return MockHttpRequester() fd_format.HttpRequester = mock_requester @@ -162,10 +163,7 @@ def test_invalid_alternate_link_format_raises_error(self, fd_format): """Test error when alternate Link header format is invalid.""" mock_response = MagicMock() mock_response.status_code = 200 - mock_response.headers = { - "Content-Type": "text/html", - "Link": "invalid-link-format" - } + mock_response.headers = {"Content-Type": "text/html", "Link": "invalid-link-format"} original_requester = fd_format.HttpRequester fd_format.HttpRequester = lambda: MagicMock(get=lambda url, headers=None: mock_response) @@ -200,7 +198,7 @@ def test_alternate_request_failed_raises_error(self, fd_format): mock_response.status_code = 200 mock_response.headers = { "Content-Type": "text/html", - "Link": '; rel="alternate"; type="application/ld+json"' + "Link": '; rel="alternate"; type="application/ld+json"', } mock_alternate_response = MagicMock() @@ -216,6 +214,7 @@ def get(self, url, headers=None): if call_count[0] == 1: return mock_response return mock_alternate_response + return MockHttpRequester() fd_format.HttpRequester = mock_requester @@ -234,7 +233,7 @@ def test_alternate_wrong_content_type_raises_error(self, fd_format): mock_response.status_code = 200 mock_response.headers = { "Content-Type": "text/html", - "Link": '; rel="alternate"; type="application/ld+json"' + "Link": '; rel="alternate"; type="application/ld+json"', } mock_alternate_response = MagicMock() @@ -251,6 +250,7 @@ def get(self, url, headers=None): if call_count[0] == 1: return mock_response return mock_alternate_response + return MockHttpRequester() fd_format.HttpRequester = mock_requester @@ -265,7 +265,6 @@ def get(self, url, headers=None): class TestCheckRemoteContext: - def test_check_remote_context_valid(self, fd_format): """Test successful remote context validation.""" mock_response = MagicMock() @@ -300,7 +299,6 @@ def test_check_remote_context_invalid(self, fd_format): class TestGetContextKeys: - def test_get_context_keys_from_string(self, fd_format): """Test getting context keys from a remote URI string.""" mock_response = MagicMock() diff --git a/tests/unit/test_rocrate.py b/tests/unit/test_rocrate.py index c46d4db21..d4eacb969 100644 --- a/tests/unit/test_rocrate.py +++ b/tests/unit/test_rocrate.py @@ -50,23 +50,19 @@ def test_invalid_local_ro_crate(): def test_is_bagit_rocrate(): - assert BagitROCrate.is_bagit_wrapping_crate(ValidROC().bagit), \ - "Should be a BagIt RO-Crate" + assert BagitROCrate.is_bagit_wrapping_crate(ValidROC().bagit), "Should be a BagIt RO-Crate" - assert BagitROCrate.is_bagit_wrapping_crate(ValidROC().bagit_zip), \ - "Should be a BagIt Zip RO-Crate" + assert BagitROCrate.is_bagit_wrapping_crate(ValidROC().bagit_zip), "Should be a BagIt Zip RO-Crate" - assert BagitROCrate.is_bagit_wrapping_crate(ValidROC().bagit_remote_zip), \ - "Should be a BagIt Remote Zip RO-Crate" + assert BagitROCrate.is_bagit_wrapping_crate(ValidROC().bagit_remote_zip), "Should be a BagIt Remote Zip RO-Crate" - assert not BagitROCrate.is_bagit_wrapping_crate(ValidROC().wrroc_paper), \ - "Should not be a BagIt RO-Crate" + assert not BagitROCrate.is_bagit_wrapping_crate(ValidROC().wrroc_paper), "Should not be a BagIt RO-Crate" - assert not BagitROCrate.is_bagit_wrapping_crate(ValidROC().sort_and_change_archive), \ + assert not BagitROCrate.is_bagit_wrapping_crate(ValidROC().sort_and_change_archive), ( "Should not be a BagIt RO-Crate" + ) - assert not BagitROCrate.is_bagit_wrapping_crate(ValidROC().sort_and_change_remote), \ - "Should not be a BagIt RO-Crate" + assert not BagitROCrate.is_bagit_wrapping_crate(ValidROC().sort_and_change_remote), "Should not be a BagIt RO-Crate" def test_abstract_bagit_rocrate_instantiation(): @@ -256,8 +252,9 @@ def test_valid_local_rocrate(): assert isinstance(root_data_entity, ROCrateEntity), "Entity should be ROCrateEntity" assert root_data_entity.id == "./", "Id should be ./" assert root_data_entity.type == "Dataset", "Type should be Dataset" - assert root_data_entity.name == "Recording provenance of workflow runs with RO-Crate (RO-Crate and mapping)", \ + assert root_data_entity.name == "Recording provenance of workflow runs with RO-Crate (RO-Crate and mapping)", ( "Name should be wrroc-paper" + ) # check metadata consistency assert root_data_entity.metadata == metadata, "Metadata should be the same" @@ -273,8 +270,7 @@ def test_valid_local_folder_rocrate_with_relative_root(): # set relative root path relative_root_path = Path("custom-relative-root") # create ROCrateLocalFolder with relative root path - roc = ROCrateLocalFolder(ValidROC().rocrate_with_relative_root, - relative_root_path=relative_root_path) + roc = ROCrateLocalFolder(ValidROC().rocrate_with_relative_root, relative_root_path=relative_root_path) assert isinstance(roc, ROCrateLocalFolder) logger.debug("Testing bagit with relative root path: %s", relative_root_path) @@ -287,8 +283,7 @@ def test_valid_local_folder_rocrate_with_relative_root(): # test has_file assert roc.has_file(Path("ro-crate-metadata.json")), "Should have ro-crate-metadata.json file" - assert roc.has_file(Path("pics/2017-06-11%252012.56.14.jpg")), \ - "Should have pics/2017-06-11%252012.56.14.jpg file" + assert roc.has_file(Path("pics/2017-06-11%252012.56.14.jpg")), "Should have pics/2017-06-11%252012.56.14.jpg file" # test get_file_content content = roc.get_file_content(Path("ro-crate-metadata.json")) @@ -375,6 +370,7 @@ def test_valid_zip_rocrate(): # ROCrate Local Bagit Zip ################################ + def test_paths_valid_bagit_rocrate(): roc = ROCrate(ValidROC().bagit_zip) # type: ignore[abstract] assert isinstance(roc, ROCrateLocalZip) @@ -467,7 +463,6 @@ def test_valid_bagit_zip_rocrate(): # check availability of 'pics/2017-06-11%2012.56.14.jpg' - ################################ # ROCrateRemoteZip ################################ @@ -643,9 +638,7 @@ def test_local_file_uri_data_entity_is_not_remote(entity_id): crate = ROCrate.from_metadata_dict(_metadata_dict_with_id(entity_id)) entity = crate.metadata.get_entity(entity_id) assert entity is not None, "Entity should be present in the metadata" - assert not entity.is_remote(), ( - f"Entity with local file URI '{entity_id}' should NOT be classified as remote" - ) + assert not entity.is_remote(), f"Entity with local file URI '{entity_id}' should NOT be classified as remote" assert entity not in crate.metadata.get_web_data_entities(), ( f"Entity '{entity_id}' should not be listed as a web data entity" ) diff --git a/tests/unit/test_services.py b/tests/unit/test_services.py index ae6ed477c..009d8223a 100644 --- a/tests/unit/test_services.py +++ b/tests/unit/test_services.py @@ -53,17 +53,17 @@ def test_extra_profiles_list(fake_profiles_path: Path): all_profiles = get_profiles(extra_profiles_path=fake_profiles_path) logger.error("All profiles: %s", all_profiles) # Check the number of all profiles - assert len(all_profiles) > len(default_profiles), \ + assert len(all_profiles) > len(default_profiles), ( "Expected more profiles with extra profiles added than the default ones" - assert len(all_profiles) == len(extra_profiles) + len(default_profiles), \ + ) + assert len(all_profiles) == len(extra_profiles) + len(default_profiles), ( "Expected the number of all profiles to be the sum of default and extra profiles" + ) def test_valid_local_rocrate(): logger.debug("Validating a local RO-Crate: %s", ValidROC().wrroc_paper) - profiles = detect_profiles(ValidationSettings( - rocrate_uri=URI(ValidROC().wrroc_paper) - )) + profiles = detect_profiles(ValidationSettings(rocrate_uri=URI(ValidROC().wrroc_paper))) logger.debug("Candidate profiles: %s", profiles) # Check the number of detected profiles @@ -76,9 +76,7 @@ def test_valid_local_workflow_rocrate(): # Set the rocrate_uri to the workflow RO-Crate crate_path = ValidROC().workflow_roc logger.debug("Validating a local RO-Crate: %s", crate_path) - profiles = detect_profiles(ValidationSettings( - rocrate_uri=URI(crate_path) - )) + profiles = detect_profiles(ValidationSettings(rocrate_uri=URI(crate_path))) assert len(profiles) == 1, "Expected a single profile" assert profiles[0].identifier == "workflow-ro-crate-1.0", "Expected the 'workflow-ro-crate-1.0' profile" @@ -87,9 +85,7 @@ def test_valid_local_process_run_crate(): # Set the rocrate_uri to the process run RO-Crate crate_path = ValidROC().process_run_crate logger.debug("Validating a local RO-Crate: %s", crate_path) - profiles = detect_profiles(ValidationSettings( - rocrate_uri=URI(crate_path) - )) + profiles = detect_profiles(ValidationSettings(rocrate_uri=URI(crate_path))) assert len(profiles) == 1, "Expected a single profile" assert profiles[0].identifier == "process-run-crate-0.5", "Expected the 'process-run-crate-0.5' profile" @@ -98,12 +94,11 @@ def test_valid_local_workflow_testing_ro_crate(): # Set the rocrate_uri to the workflow testing RO-Crate crate_path = ValidROC().workflow_testing_ro_crate logger.debug("Validating a local RO-Crate: %s", crate_path) - profiles = detect_profiles(ValidationSettings( - rocrate_uri=URI(crate_path) - )) + profiles = detect_profiles(ValidationSettings(rocrate_uri=URI(crate_path))) assert len(profiles) == 1, "Expected a single profile" - assert profiles[0].identifier == "workflow-testing-ro-crate-0.1", \ + assert profiles[0].identifier == "workflow-testing-ro-crate-0.1", ( "Expected the 'workflow-testing-ro-crate-0.1' profile" + ) def test_disable_inherited_profiles_issue_reporting(): @@ -112,10 +107,7 @@ def test_disable_inherited_profiles_issue_reporting(): logger.debug("Validating a local RO-Crate: %s", crate_path) # First, validate with inherited profiles issue reporting enabled - settings = ValidationSettings( - rocrate_uri=URI(crate_path), - disable_inherited_profiles_issue_reporting=False - ) + settings = ValidationSettings(rocrate_uri=URI(crate_path), disable_inherited_profiles_issue_reporting=False) result = validate(settings) total_issues_with_inheritance = len(result.get_issues()) logger.debug("Total issues with inherited profiles issue reporting enabled: %d", total_issues_with_inheritance) @@ -127,14 +119,16 @@ def test_disable_inherited_profiles_issue_reporting(): logger.debug("Total issues with inherited profiles issue reporting disabled: %d", total_issues_without_inheritance) # Check that disabling inherited profiles issue reporting reduces the number of reported issues - assert total_issues_without_inheritance <= total_issues_with_inheritance, \ + assert total_issues_without_inheritance <= total_issues_with_inheritance, ( "Disabling inherited profiles issue reporting should not increase the number of reported issues" + ) # Check that all reported issues are from the main profile main_profile_identifier = "workflow-testing-ro-crate-0.1" for issue in result.get_issues(): - assert issue.check.requirement.profile.identifier == main_profile_identifier, \ + assert issue.check.requirement.profile.identifier == main_profile_identifier, ( "All reported issues should belong to the main profile when inherited profiles issue reporting is disabled" + ) def test_skip_pycheck_on_workflow_ro_crate(): @@ -143,19 +137,23 @@ def test_skip_pycheck_on_workflow_ro_crate(): logger.debug("Validating a local RO-Crate: %s", crate_path) settings = ValidationSettings(rocrate_uri=URI(crate_path)) result = validate(settings) - assert not result.passed(), \ + assert not result.passed(), ( "The RO-Crate is expected to be invalid because of an incorrect conformsTo field and missing resources" + ) assert len(result.failed_checks) == 2, "No failed checks expected when skipping the problematic check" - assert any(check.identifier == "ro-crate-1.1_5.3" for check in result.failed_checks), \ + assert any(check.identifier == "ro-crate-1.1_5.3" for check in result.failed_checks), ( "Expected the check 'ro-crate-1.1_5.3' to fail" - assert any(check.identifier == "ro-crate-1.1_12.1" for check in result.failed_checks), \ + ) + assert any(check.identifier == "ro-crate-1.1_12.1" for check in result.failed_checks), ( "Expected the check 'ro-crate-1.1_12.1' to fail" + ) # Perform a new validation skipping specific checks settings.skip_checks = ["ro-crate-1.1_5.3", "ro-crate-1.1_12.1"] result = validate(settings) - assert result.passed(), \ + assert result.passed(), ( "The RO-Crate should be valid when skipping the checks related to the invalid file descriptor entity" + ) # Ensure that the skipped checks are indeed skipped skipped_check_ids = {check.identifier for check in result.skipped_checks} @@ -167,16 +165,13 @@ def test_valid_local_multi_profile_crate(): # Set the rocrate_uri to the multi-profile RO-Crate crate_path = InvalidMultiProfileROC().invalid_multi_profile_crate logger.debug("Validating a local RO-Crate: %s", crate_path) - profiles = detect_profiles(ValidationSettings( - rocrate_uri=URI(crate_path) - )) + profiles = detect_profiles(ValidationSettings(rocrate_uri=URI(crate_path))) assert len(profiles) == 2, "Expected two profiles" # Extract profiles identifiers profiles_ids = [profile.identifier for profile in profiles] assert "provenance-run-crate-0.5" in profiles_ids, "Expected the 'provenance-run-crate' profile" - assert "workflow-testing-ro-crate-0.1" in profiles_ids, \ - "Expected the 'workflow-testing-ro-crate-0.1' profile" + assert "workflow-testing-ro-crate-0.1" in profiles_ids, "Expected the 'workflow-testing-ro-crate-0.1' profile" def test_valid_crate_folder_with_metadata_only(): @@ -191,10 +186,7 @@ def test_valid_crate_folder_with_metadata_only(): shutil.copy(metadata_src, metadata_dst) # Define shared settings object - settings = ValidationSettings( - rocrate_uri=URI(Path(tmpdirname)), - metadata_only=True - ) + settings = ValidationSettings(rocrate_uri=URI(Path(tmpdirname)), metadata_only=True) profiles = detect_profiles(settings) @@ -231,5 +223,6 @@ def test_valid_crate_metadata_dict_with_metadata_only(): assert profiles[0].identifier == "ro-crate-1.1", "Expected the 'ro-crate' profile" from rocrate_validator.services import validate_metadata_as_dict + result = validate_metadata_as_dict(metadata_dict, settings) assert result.passed(), "RO-Crate should be valid in metadata-only mode" diff --git a/tests/unit/test_uri.py b/tests/unit/test_uri.py index e160d932b..bfd26a414 100644 --- a/tests/unit/test_uri.py +++ b/tests/unit/test_uri.py @@ -58,13 +58,16 @@ def test_s3_uri_is_remote(): assert not uri.is_natively_checkable() -@pytest.mark.parametrize("uri_str,expected_scheme", [ - # Scheme-only (no authority) absolute URIs are valid per RFC 3986 and - # accepted by RO-Crate 1.1 § 4.2.2 as Data Entity `@id` values. - ("urn:doi:10.5281/zenodo.1234", "urn"), - ("doi:10.5281/zenodo.1234", "doi"), - ("arcp://name,foo/bar", "arcp"), -]) +@pytest.mark.parametrize( + "uri_str,expected_scheme", + [ + # Scheme-only (no authority) absolute URIs are valid per RFC 3986 and + # accepted by RO-Crate 1.1 § 4.2.2 as Data Entity `@id` values. + ("urn:doi:10.5281/zenodo.1234", "urn"), + ("doi:10.5281/zenodo.1234", "doi"), + ("arcp://name,foo/bar", "arcp"), + ], +) def test_scheme_only_absolute_uri_is_remote(uri_str, expected_scheme): uri = URI(uri_str) assert uri.scheme == expected_scheme @@ -83,10 +86,13 @@ def test_file_uri_with_remote_host_is_remote(): assert not uri.is_natively_checkable() -@pytest.mark.parametrize("uri_str", [ - "file:///absolute/path/file.txt", - "file://localhost/absolute/path/file.txt", -]) +@pytest.mark.parametrize( + "uri_str", + [ + "file:///absolute/path/file.txt", + "file://localhost/absolute/path/file.txt", + ], +) def test_file_uri_to_local_host_is_local(uri_str): # An empty or `localhost` authority denotes the local machine. uri = URI(uri_str) @@ -173,7 +179,7 @@ def test_rocrate_uri_local_folder_invalid(): with pytest.raises(ROCrateInvalidURIError) as excinfo: validate_rocrate_uri(uri, silent=False) assert str(excinfo.value) == ( - f"\"{uri}\" is not a valid RO-Crate URI. " + f'"{uri}" is not a valid RO-Crate URI. ' "It MUST be either a local path to the RO-Crate root directory " "or a local/remote RO-Crate ZIP file." ) @@ -194,7 +200,7 @@ def test_rocrate_uri_local_zip_invalid(): with pytest.raises(ROCrateInvalidURIError) as excinfo: validate_rocrate_uri(uri, silent=False) assert str(excinfo.value) == ( - f"\"{uri}\" is not a valid RO-Crate URI. " + f'"{uri}" is not a valid RO-Crate URI. ' "It MUST be either a local path to the RO-Crate root directory " "or a local/remote RO-Crate ZIP file." ) @@ -208,8 +214,7 @@ def test_rocrate_uri_remote_valid(): def test_rocrate_uri_remote_invalid(): # An unknown scheme is a valid URI but cannot be used as an RO-Crate root. uri = URI("httpx:///example.com") - assert not validate_rocrate_uri(uri, silent=True), \ - f"The URI {uri} should not be accepted as an RO-Crate root" + assert not validate_rocrate_uri(uri, silent=True), f"The URI {uri} should not be accepted as an RO-Crate root" with pytest.raises(ROCrateInvalidURIError): validate_rocrate_uri(uri, silent=False) @@ -221,9 +226,8 @@ def test_rocrate_uri_remote_invalid(): # Use verbose mode to print the error message with pytest.raises(ROCrateInvalidURIError) as excinfo: validate_rocrate_uri(uri, silent=False) - assert str( - excinfo.value) == f"The RO-crate at the URI \"{uri}\" is not available" + assert str(excinfo.value) == f'The RO-crate at the URI "{uri}" is not available' -if __name__ == '__main__': +if __name__ == "__main__": unittest.main() diff --git a/tests/unit/test_validation_settings.py b/tests/unit/test_validation_settings.py index add220a06..7f4284e11 100644 --- a/tests/unit/test_validation_settings.py +++ b/tests/unit/test_validation_settings.py @@ -24,7 +24,7 @@ def test_validation_settings_parse_dict(): "requirement_severity": "RECOMMENDED", "enable_profile_inheritance": False, "disable_inherited_profiles_issue_reporting": True, - "skip_checks": ["check1", "check2"] + "skip_checks": ["check1", "check2"], } settings = ValidationSettings.parse(settings_dict) assert str(settings.rocrate_uri) == "/path/to/data" @@ -42,7 +42,7 @@ def test_validation_settings_parse_object(): requirement_severity=Severity.RECOMMENDED, enable_profile_inheritance=False, disable_inherited_profiles_issue_reporting=True, - skip_checks=["check1", "check2"] + skip_checks=["check1", "check2"], ) settings = ValidationSettings.parse(existing_settings) assert str(settings.rocrate_uri) == "/path/to/data" @@ -63,7 +63,7 @@ def test_validation_settings_to_dict(): rocrate_uri=URI("/path/to/data"), profiles_path="/path/to/profiles", # type: ignore[arg-type] requirement_severity=Severity.RECOMMENDED, - enable_profile_inheritance=False + enable_profile_inheritance=False, ) settings_dict = settings.to_dict() assert settings_dict["rocrate_uri"] == "/path/to/data" From b96211f1d1ae6e603e8b539a3ba32f68685bca74 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Fri, 12 Jun 2026 15:38:24 +0200 Subject: [PATCH 315/352] =?UTF-8?q?style(tests):=20=F0=9F=8E=A8=20fix=20"d?= =?UTF-8?q?escending"=20typo=20and=20add=20trailing=20newline=20to=20test?= =?UTF-8?q?=20crates?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../invalid/0_multi_profile_crate/packed.cwl | 2 +- .../ro-crate-metadata.json | 2 +- .../ro-crate-metadata.json | 4 +-- .../ro-crate-metadata.json | 4 +-- .../ro-crate-metadata.json | 4 +-- .../ro-crate-metadata.json | 4 +-- .../conformsto_no_wfrc/ro-crate-metadata.json | 4 +-- .../conformsto_no_wroc/ro-crate-metadata.json | 4 +-- .../ro-crate-metadata.json | 4 +-- .../ro-crate-metadata.json | 4 +-- .../ro-crate-metadata.json | 4 +-- .../ro-crate-metadata.json | 4 +-- .../ro-crate-metadata.json | 4 +-- .../ro-crate-metadata.json | 4 +-- .../ro-crate-metadata.json | 4 +-- .../ro-crate-metadata.json | 4 +-- .../ro-crate-metadata.json | 4 +-- .../ro-crate-metadata.json | 4 +-- .../ro-crate-metadata.json | 4 +-- .../ro-crate-metadata.json | 4 +-- .../ro-crate-metadata.json | 4 +-- .../ro-crate-metadata.json | 4 +-- .../ro-crate-metadata.json | 4 +-- .../ro-crate-metadata.json | 4 +-- .../ro-crate-metadata.json | 4 +-- .../ro-crate-metadata.json | 4 +-- .../ro-crate-metadata.json | 4 +-- .../ro-crate-metadata.json | 4 +-- .../ro-crate-metadata.json | 4 +-- .../ro-crate-metadata.json | 4 +-- .../ro-crate-metadata.json | 4 +-- .../ro-crate-metadata.json | 4 +-- .../ro-crate-metadata.json | 4 +-- .../ro-crate-metadata.json | 4 +-- .../ro-crate-metadata.json | 4 +-- .../ro-crate-metadata.json | 4 +-- .../ro-crate-metadata.json | 4 +-- .../ro-crate-metadata.json | 4 +-- .../ro-crate-metadata.json | 4 +-- .../ro-crate-metadata.json | 4 +-- .../ro-crate-metadata.json | 4 +-- .../ro-crate-metadata.json | 4 +-- .../ro-crate-metadata.json | 2 +- .../ro-crate-metadata.json | 2 +- .../ro-crate-metadata.json | 2 +- .../ro-crate-metadata.json | 4 +-- .../ro-crate-metadata.json | 4 +-- .../ro-crate-metadata.json | 4 +-- .../tool_bad_input/ro-crate-metadata.json | 4 +-- .../ro-crate-metadata.json | 4 +-- .../tool_bad_output/ro-crate-metadata.json | 4 +-- .../ro-crate-metadata.json | 4 +-- .../ro-crate-metadata.json | 4 +-- .../tool_no_input/ro-crate-metadata.json | 4 +-- .../ro-crate-metadata.json | 4 +-- .../tool_no_mainentity/ro-crate-metadata.json | 4 +-- .../tool_no_output/ro-crate-metadata.json | 4 +-- .../ro-crate-metadata.json | 4 +-- .../ro-crate-metadata.json | 4 +-- .../ro-crate-metadata.json | 4 +-- .../ro-crate-metadata.json | 4 +-- .../workflow_bad_step/ro-crate-metadata.json | 4 +-- .../ro-crate-metadata.json | 4 +-- .../ro-crate-metadata.json | 4 +-- .../ro-crate-metadata.json | 4 +-- .../workflow_no_step/ro-crate-metadata.json | 4 +-- .../ro-crate-metadata.json | 4 +-- .../valid/provenance-run-crate/packed.cwl | 2 +- .../workflow-roc/sort-and-change-case.cwl | 1 - .../Galaxy-Workflow-Hello_World.ga | 2 +- .../valid/wrroc-paper/mapping/README.md | 5 ++- .../mapping/prov-mapping-w-metadata.tsv | 33 +++++++++---------- .../wrroc-paper/mapping/prov-mapping.tsv | 32 +++++++++--------- tests/ro-crate-metadata.json | 2 +- 74 files changed, 166 insertions(+), 169 deletions(-) diff --git a/tests/data/crates/invalid/0_multi_profile_crate/packed.cwl b/tests/data/crates/invalid/0_multi_profile_crate/packed.cwl index 251587dae..acc2ce0d0 100644 --- a/tests/data/crates/invalid/0_multi_profile_crate/packed.cwl +++ b/tests/data/crates/invalid/0_multi_profile_crate/packed.cwl @@ -134,4 +134,4 @@ "file:///home/stain/src/cwltool/tests/wf/empty.ttl", "file:///home/stain/src/cwltool/tests/wf/empty2.ttl" ] -} \ No newline at end of file +} diff --git a/tests/data/crates/invalid/0_multi_profile_crate/ro-crate-metadata.json b/tests/data/crates/invalid/0_multi_profile_crate/ro-crate-metadata.json index 990ee3892..d352ec07b 100644 --- a/tests/data/crates/invalid/0_multi_profile_crate/ro-crate-metadata.json +++ b/tests/data/crates/invalid/0_multi_profile_crate/ro-crate-metadata.json @@ -183,7 +183,7 @@ "@type": "FormalParameter", "additionalType": "Boolean", "defaultValue": "True", - "description": "If true, reverse (decending) sort", + "description": "If true, reverse (descending) sort", "name": "reverse_sort" }, { diff --git a/tests/data/crates/invalid/5_provenance_run_crate/action_bad_resourceusage/ro-crate-metadata.json b/tests/data/crates/invalid/5_provenance_run_crate/action_bad_resourceusage/ro-crate-metadata.json index e966224bf..071677674 100644 --- a/tests/data/crates/invalid/5_provenance_run_crate/action_bad_resourceusage/ro-crate-metadata.json +++ b/tests/data/crates/invalid/5_provenance_run_crate/action_bad_resourceusage/ro-crate-metadata.json @@ -180,7 +180,7 @@ "@type": "FormalParameter", "additionalType": "Boolean", "defaultValue": "True", - "description": "If true, reverse (decending) sort", + "description": "If true, reverse (descending) sort", "name": "reverse_sort" }, { @@ -716,4 +716,4 @@ "name": "input object document" } ] -} \ No newline at end of file +} diff --git a/tests/data/crates/invalid/5_provenance_run_crate/action_no_resourceusage/ro-crate-metadata.json b/tests/data/crates/invalid/5_provenance_run_crate/action_no_resourceusage/ro-crate-metadata.json index 6aa09ee4b..9c9e8c32f 100644 --- a/tests/data/crates/invalid/5_provenance_run_crate/action_no_resourceusage/ro-crate-metadata.json +++ b/tests/data/crates/invalid/5_provenance_run_crate/action_no_resourceusage/ro-crate-metadata.json @@ -180,7 +180,7 @@ "@type": "FormalParameter", "additionalType": "Boolean", "defaultValue": "True", - "description": "If true, reverse (decending) sort", + "description": "If true, reverse (descending) sort", "name": "reverse_sort" }, { @@ -693,4 +693,4 @@ "name": "input object document" } ] -} \ No newline at end of file +} diff --git a/tests/data/crates/invalid/5_provenance_run_crate/conformsto_no_procrc/ro-crate-metadata.json b/tests/data/crates/invalid/5_provenance_run_crate/conformsto_no_procrc/ro-crate-metadata.json index b7017b89e..156eb7751 100644 --- a/tests/data/crates/invalid/5_provenance_run_crate/conformsto_no_procrc/ro-crate-metadata.json +++ b/tests/data/crates/invalid/5_provenance_run_crate/conformsto_no_procrc/ro-crate-metadata.json @@ -156,7 +156,7 @@ "@type": "FormalParameter", "additionalType": "Boolean", "defaultValue": "True", - "description": "If true, reverse (decending) sort", + "description": "If true, reverse (descending) sort", "name": "reverse_sort" }, { @@ -520,4 +520,4 @@ "name": "input object document" } ] -} \ No newline at end of file +} diff --git a/tests/data/crates/invalid/5_provenance_run_crate/conformsto_no_provrc/ro-crate-metadata.json b/tests/data/crates/invalid/5_provenance_run_crate/conformsto_no_provrc/ro-crate-metadata.json index fdf058f1f..079d3b623 100644 --- a/tests/data/crates/invalid/5_provenance_run_crate/conformsto_no_provrc/ro-crate-metadata.json +++ b/tests/data/crates/invalid/5_provenance_run_crate/conformsto_no_provrc/ro-crate-metadata.json @@ -156,7 +156,7 @@ "@type": "FormalParameter", "additionalType": "Boolean", "defaultValue": "True", - "description": "If true, reverse (decending) sort", + "description": "If true, reverse (descending) sort", "name": "reverse_sort" }, { @@ -520,4 +520,4 @@ "name": "input object document" } ] -} \ No newline at end of file +} diff --git a/tests/data/crates/invalid/5_provenance_run_crate/conformsto_no_wfrc/ro-crate-metadata.json b/tests/data/crates/invalid/5_provenance_run_crate/conformsto_no_wfrc/ro-crate-metadata.json index da4d822b0..05e8b7376 100644 --- a/tests/data/crates/invalid/5_provenance_run_crate/conformsto_no_wfrc/ro-crate-metadata.json +++ b/tests/data/crates/invalid/5_provenance_run_crate/conformsto_no_wfrc/ro-crate-metadata.json @@ -156,7 +156,7 @@ "@type": "FormalParameter", "additionalType": "Boolean", "defaultValue": "True", - "description": "If true, reverse (decending) sort", + "description": "If true, reverse (descending) sort", "name": "reverse_sort" }, { @@ -520,4 +520,4 @@ "name": "input object document" } ] -} \ No newline at end of file +} diff --git a/tests/data/crates/invalid/5_provenance_run_crate/conformsto_no_wroc/ro-crate-metadata.json b/tests/data/crates/invalid/5_provenance_run_crate/conformsto_no_wroc/ro-crate-metadata.json index 08271c29d..d0f5ce549 100644 --- a/tests/data/crates/invalid/5_provenance_run_crate/conformsto_no_wroc/ro-crate-metadata.json +++ b/tests/data/crates/invalid/5_provenance_run_crate/conformsto_no_wroc/ro-crate-metadata.json @@ -156,7 +156,7 @@ "@type": "FormalParameter", "additionalType": "Boolean", "defaultValue": "True", - "description": "If true, reverse (decending) sort", + "description": "If true, reverse (descending) sort", "name": "reverse_sort" }, { @@ -520,4 +520,4 @@ "name": "input object document" } ] -} \ No newline at end of file +} diff --git a/tests/data/crates/invalid/5_provenance_run_crate/controlaction_bad_actionstatus/ro-crate-metadata.json b/tests/data/crates/invalid/5_provenance_run_crate/controlaction_bad_actionstatus/ro-crate-metadata.json index 762a9bb7a..451a2293f 100644 --- a/tests/data/crates/invalid/5_provenance_run_crate/controlaction_bad_actionstatus/ro-crate-metadata.json +++ b/tests/data/crates/invalid/5_provenance_run_crate/controlaction_bad_actionstatus/ro-crate-metadata.json @@ -165,7 +165,7 @@ "@type": "FormalParameter", "additionalType": "Boolean", "defaultValue": "True", - "description": "If true, reverse (decending) sort", + "description": "If true, reverse (descending) sort", "name": "reverse_sort" }, { @@ -590,4 +590,4 @@ "name": "input object document" } ] -} \ No newline at end of file +} diff --git a/tests/data/crates/invalid/5_provenance_run_crate/controlaction_bad_instrument/ro-crate-metadata.json b/tests/data/crates/invalid/5_provenance_run_crate/controlaction_bad_instrument/ro-crate-metadata.json index 2808ff376..24fed472a 100644 --- a/tests/data/crates/invalid/5_provenance_run_crate/controlaction_bad_instrument/ro-crate-metadata.json +++ b/tests/data/crates/invalid/5_provenance_run_crate/controlaction_bad_instrument/ro-crate-metadata.json @@ -165,7 +165,7 @@ "@type": "FormalParameter", "additionalType": "Boolean", "defaultValue": "True", - "description": "If true, reverse (decending) sort", + "description": "If true, reverse (descending) sort", "name": "reverse_sort" }, { @@ -583,4 +583,4 @@ "name": "input object document" } ] -} \ No newline at end of file +} diff --git a/tests/data/crates/invalid/5_provenance_run_crate/controlaction_bad_object/ro-crate-metadata.json b/tests/data/crates/invalid/5_provenance_run_crate/controlaction_bad_object/ro-crate-metadata.json index e586cedac..19a80a6ad 100644 --- a/tests/data/crates/invalid/5_provenance_run_crate/controlaction_bad_object/ro-crate-metadata.json +++ b/tests/data/crates/invalid/5_provenance_run_crate/controlaction_bad_object/ro-crate-metadata.json @@ -165,7 +165,7 @@ "@type": "FormalParameter", "additionalType": "Boolean", "defaultValue": "True", - "description": "If true, reverse (decending) sort", + "description": "If true, reverse (descending) sort", "name": "reverse_sort" }, { @@ -583,4 +583,4 @@ "name": "input object document" } ] -} \ No newline at end of file +} diff --git a/tests/data/crates/invalid/5_provenance_run_crate/controlaction_error_not_failed_status/ro-crate-metadata.json b/tests/data/crates/invalid/5_provenance_run_crate/controlaction_error_not_failed_status/ro-crate-metadata.json index 7c4a033e7..88cda91b7 100644 --- a/tests/data/crates/invalid/5_provenance_run_crate/controlaction_error_not_failed_status/ro-crate-metadata.json +++ b/tests/data/crates/invalid/5_provenance_run_crate/controlaction_error_not_failed_status/ro-crate-metadata.json @@ -165,7 +165,7 @@ "@type": "FormalParameter", "additionalType": "Boolean", "defaultValue": "True", - "description": "If true, reverse (decending) sort", + "description": "If true, reverse (descending) sort", "name": "reverse_sort" }, { @@ -591,4 +591,4 @@ "name": "input object document" } ] -} \ No newline at end of file +} diff --git a/tests/data/crates/invalid/5_provenance_run_crate/controlaction_no_actionstatus/ro-crate-metadata.json b/tests/data/crates/invalid/5_provenance_run_crate/controlaction_no_actionstatus/ro-crate-metadata.json index 8b93d1dfe..7b1d7a452 100644 --- a/tests/data/crates/invalid/5_provenance_run_crate/controlaction_no_actionstatus/ro-crate-metadata.json +++ b/tests/data/crates/invalid/5_provenance_run_crate/controlaction_no_actionstatus/ro-crate-metadata.json @@ -165,7 +165,7 @@ "@type": "FormalParameter", "additionalType": "Boolean", "defaultValue": "True", - "description": "If true, reverse (decending) sort", + "description": "If true, reverse (descending) sort", "name": "reverse_sort" }, { @@ -589,4 +589,4 @@ "name": "input object document" } ] -} \ No newline at end of file +} diff --git a/tests/data/crates/invalid/5_provenance_run_crate/controlaction_no_error/ro-crate-metadata.json b/tests/data/crates/invalid/5_provenance_run_crate/controlaction_no_error/ro-crate-metadata.json index e6a3f614a..210e62712 100644 --- a/tests/data/crates/invalid/5_provenance_run_crate/controlaction_no_error/ro-crate-metadata.json +++ b/tests/data/crates/invalid/5_provenance_run_crate/controlaction_no_error/ro-crate-metadata.json @@ -165,7 +165,7 @@ "@type": "FormalParameter", "additionalType": "Boolean", "defaultValue": "True", - "description": "If true, reverse (decending) sort", + "description": "If true, reverse (descending) sort", "name": "reverse_sort" }, { @@ -589,4 +589,4 @@ "name": "input object document" } ] -} \ No newline at end of file +} diff --git a/tests/data/crates/invalid/5_provenance_run_crate/controlaction_no_instrument/ro-crate-metadata.json b/tests/data/crates/invalid/5_provenance_run_crate/controlaction_no_instrument/ro-crate-metadata.json index 9364349d6..3fa35b40c 100644 --- a/tests/data/crates/invalid/5_provenance_run_crate/controlaction_no_instrument/ro-crate-metadata.json +++ b/tests/data/crates/invalid/5_provenance_run_crate/controlaction_no_instrument/ro-crate-metadata.json @@ -165,7 +165,7 @@ "@type": "FormalParameter", "additionalType": "Boolean", "defaultValue": "True", - "description": "If true, reverse (decending) sort", + "description": "If true, reverse (descending) sort", "name": "reverse_sort" }, { @@ -580,4 +580,4 @@ "name": "input object document" } ] -} \ No newline at end of file +} diff --git a/tests/data/crates/invalid/5_provenance_run_crate/controlaction_no_object/ro-crate-metadata.json b/tests/data/crates/invalid/5_provenance_run_crate/controlaction_no_object/ro-crate-metadata.json index 53b756bb2..76fb9543f 100644 --- a/tests/data/crates/invalid/5_provenance_run_crate/controlaction_no_object/ro-crate-metadata.json +++ b/tests/data/crates/invalid/5_provenance_run_crate/controlaction_no_object/ro-crate-metadata.json @@ -165,7 +165,7 @@ "@type": "FormalParameter", "additionalType": "Boolean", "defaultValue": "True", - "description": "If true, reverse (decending) sort", + "description": "If true, reverse (descending) sort", "name": "reverse_sort" }, { @@ -580,4 +580,4 @@ "name": "input object document" } ] -} \ No newline at end of file +} diff --git a/tests/data/crates/invalid/5_provenance_run_crate/environment_file_no_conformsto/ro-crate-metadata.json b/tests/data/crates/invalid/5_provenance_run_crate/environment_file_no_conformsto/ro-crate-metadata.json index 4476cb8d4..aa0784c9f 100644 --- a/tests/data/crates/invalid/5_provenance_run_crate/environment_file_no_conformsto/ro-crate-metadata.json +++ b/tests/data/crates/invalid/5_provenance_run_crate/environment_file_no_conformsto/ro-crate-metadata.json @@ -180,7 +180,7 @@ "@type": "FormalParameter", "additionalType": "Boolean", "defaultValue": "True", - "description": "If true, reverse (decending) sort", + "description": "If true, reverse (descending) sort", "name": "reverse_sort" }, { @@ -667,4 +667,4 @@ "name": "input object document" } ] -} \ No newline at end of file +} diff --git a/tests/data/crates/invalid/5_provenance_run_crate/environment_file_no_encodingformat/ro-crate-metadata.json b/tests/data/crates/invalid/5_provenance_run_crate/environment_file_no_encodingformat/ro-crate-metadata.json index 3b53d74dd..5cca4c0ea 100644 --- a/tests/data/crates/invalid/5_provenance_run_crate/environment_file_no_encodingformat/ro-crate-metadata.json +++ b/tests/data/crates/invalid/5_provenance_run_crate/environment_file_no_encodingformat/ro-crate-metadata.json @@ -180,7 +180,7 @@ "@type": "FormalParameter", "additionalType": "Boolean", "defaultValue": "True", - "description": "If true, reverse (decending) sort", + "description": "If true, reverse (descending) sort", "name": "reverse_sort" }, { @@ -669,4 +669,4 @@ "name": "input object document" } ] -} \ No newline at end of file +} diff --git a/tests/data/crates/invalid/5_provenance_run_crate/howtostep_bad_buildinstructions/ro-crate-metadata.json b/tests/data/crates/invalid/5_provenance_run_crate/howtostep_bad_buildinstructions/ro-crate-metadata.json index 7ec587029..a7ce512e0 100644 --- a/tests/data/crates/invalid/5_provenance_run_crate/howtostep_bad_buildinstructions/ro-crate-metadata.json +++ b/tests/data/crates/invalid/5_provenance_run_crate/howtostep_bad_buildinstructions/ro-crate-metadata.json @@ -180,7 +180,7 @@ "@type": "FormalParameter", "additionalType": "Boolean", "defaultValue": "True", - "description": "If true, reverse (decending) sort", + "description": "If true, reverse (descending) sort", "name": "reverse_sort" }, { @@ -670,4 +670,4 @@ "name": "input object document" } ] -} \ No newline at end of file +} diff --git a/tests/data/crates/invalid/5_provenance_run_crate/howtostep_bad_connection/ro-crate-metadata.json b/tests/data/crates/invalid/5_provenance_run_crate/howtostep_bad_connection/ro-crate-metadata.json index b8be0262c..1697ff309 100644 --- a/tests/data/crates/invalid/5_provenance_run_crate/howtostep_bad_connection/ro-crate-metadata.json +++ b/tests/data/crates/invalid/5_provenance_run_crate/howtostep_bad_connection/ro-crate-metadata.json @@ -165,7 +165,7 @@ "@type": "FormalParameter", "additionalType": "Boolean", "defaultValue": "True", - "description": "If true, reverse (decending) sort", + "description": "If true, reverse (descending) sort", "name": "reverse_sort" }, { @@ -590,4 +590,4 @@ "name": "input object document" } ] -} \ No newline at end of file +} diff --git a/tests/data/crates/invalid/5_provenance_run_crate/howtostep_bad_inv_step/ro-crate-metadata.json b/tests/data/crates/invalid/5_provenance_run_crate/howtostep_bad_inv_step/ro-crate-metadata.json index b63d1fcf8..6a3b7aa9f 100644 --- a/tests/data/crates/invalid/5_provenance_run_crate/howtostep_bad_inv_step/ro-crate-metadata.json +++ b/tests/data/crates/invalid/5_provenance_run_crate/howtostep_bad_inv_step/ro-crate-metadata.json @@ -165,7 +165,7 @@ "@type": "FormalParameter", "additionalType": "Boolean", "defaultValue": "True", - "description": "If true, reverse (decending) sort", + "description": "If true, reverse (descending) sort", "name": "reverse_sort" }, { @@ -583,4 +583,4 @@ "name": "input object document" } ] -} \ No newline at end of file +} diff --git a/tests/data/crates/invalid/5_provenance_run_crate/howtostep_bad_position/ro-crate-metadata.json b/tests/data/crates/invalid/5_provenance_run_crate/howtostep_bad_position/ro-crate-metadata.json index 91ed291ca..fa754f394 100644 --- a/tests/data/crates/invalid/5_provenance_run_crate/howtostep_bad_position/ro-crate-metadata.json +++ b/tests/data/crates/invalid/5_provenance_run_crate/howtostep_bad_position/ro-crate-metadata.json @@ -165,7 +165,7 @@ "@type": "FormalParameter", "additionalType": "Boolean", "defaultValue": "True", - "description": "If true, reverse (decending) sort", + "description": "If true, reverse (descending) sort", "name": "reverse_sort" }, { @@ -590,4 +590,4 @@ "name": "input object document" } ] -} \ No newline at end of file +} diff --git a/tests/data/crates/invalid/5_provenance_run_crate/howtostep_bad_workexample/ro-crate-metadata.json b/tests/data/crates/invalid/5_provenance_run_crate/howtostep_bad_workexample/ro-crate-metadata.json index d8102054f..26ed5fc02 100644 --- a/tests/data/crates/invalid/5_provenance_run_crate/howtostep_bad_workexample/ro-crate-metadata.json +++ b/tests/data/crates/invalid/5_provenance_run_crate/howtostep_bad_workexample/ro-crate-metadata.json @@ -165,7 +165,7 @@ "@type": "FormalParameter", "additionalType": "Boolean", "defaultValue": "True", - "description": "If true, reverse (decending) sort", + "description": "If true, reverse (descending) sort", "name": "reverse_sort" }, { @@ -583,4 +583,4 @@ "name": "input object document" } ] -} \ No newline at end of file +} diff --git a/tests/data/crates/invalid/5_provenance_run_crate/howtostep_no_buildinstructions/ro-crate-metadata.json b/tests/data/crates/invalid/5_provenance_run_crate/howtostep_no_buildinstructions/ro-crate-metadata.json index cddd8fcb4..87b68a50e 100644 --- a/tests/data/crates/invalid/5_provenance_run_crate/howtostep_no_buildinstructions/ro-crate-metadata.json +++ b/tests/data/crates/invalid/5_provenance_run_crate/howtostep_no_buildinstructions/ro-crate-metadata.json @@ -177,7 +177,7 @@ "@type": "FormalParameter", "additionalType": "Boolean", "defaultValue": "True", - "description": "If true, reverse (decending) sort", + "description": "If true, reverse (descending) sort", "name": "reverse_sort" }, { @@ -655,4 +655,4 @@ "name": "input object document" } ] -} \ No newline at end of file +} diff --git a/tests/data/crates/invalid/5_provenance_run_crate/howtostep_no_connection/ro-crate-metadata.json b/tests/data/crates/invalid/5_provenance_run_crate/howtostep_no_connection/ro-crate-metadata.json index d0e46602b..f1ef0121f 100644 --- a/tests/data/crates/invalid/5_provenance_run_crate/howtostep_no_connection/ro-crate-metadata.json +++ b/tests/data/crates/invalid/5_provenance_run_crate/howtostep_no_connection/ro-crate-metadata.json @@ -165,7 +165,7 @@ "@type": "FormalParameter", "additionalType": "Boolean", "defaultValue": "True", - "description": "If true, reverse (decending) sort", + "description": "If true, reverse (descending) sort", "name": "reverse_sort" }, { @@ -585,4 +585,4 @@ "name": "input object document" } ] -} \ No newline at end of file +} diff --git a/tests/data/crates/invalid/5_provenance_run_crate/howtostep_no_inv_step/ro-crate-metadata.json b/tests/data/crates/invalid/5_provenance_run_crate/howtostep_no_inv_step/ro-crate-metadata.json index ed24c1c8e..d886e49f9 100644 --- a/tests/data/crates/invalid/5_provenance_run_crate/howtostep_no_inv_step/ro-crate-metadata.json +++ b/tests/data/crates/invalid/5_provenance_run_crate/howtostep_no_inv_step/ro-crate-metadata.json @@ -162,7 +162,7 @@ "@type": "FormalParameter", "additionalType": "Boolean", "defaultValue": "True", - "description": "If true, reverse (decending) sort", + "description": "If true, reverse (descending) sort", "name": "reverse_sort" }, { @@ -580,4 +580,4 @@ "name": "input object document" } ] -} \ No newline at end of file +} diff --git a/tests/data/crates/invalid/5_provenance_run_crate/howtostep_no_position/ro-crate-metadata.json b/tests/data/crates/invalid/5_provenance_run_crate/howtostep_no_position/ro-crate-metadata.json index 97fb135bb..5b1412a92 100644 --- a/tests/data/crates/invalid/5_provenance_run_crate/howtostep_no_position/ro-crate-metadata.json +++ b/tests/data/crates/invalid/5_provenance_run_crate/howtostep_no_position/ro-crate-metadata.json @@ -165,7 +165,7 @@ "@type": "FormalParameter", "additionalType": "Boolean", "defaultValue": "True", - "description": "If true, reverse (decending) sort", + "description": "If true, reverse (descending) sort", "name": "reverse_sort" }, { @@ -582,4 +582,4 @@ "name": "input object document" } ] -} \ No newline at end of file +} diff --git a/tests/data/crates/invalid/5_provenance_run_crate/howtostep_no_workexample/ro-crate-metadata.json b/tests/data/crates/invalid/5_provenance_run_crate/howtostep_no_workexample/ro-crate-metadata.json index 4bcfe37c7..45f00c600 100644 --- a/tests/data/crates/invalid/5_provenance_run_crate/howtostep_no_workexample/ro-crate-metadata.json +++ b/tests/data/crates/invalid/5_provenance_run_crate/howtostep_no_workexample/ro-crate-metadata.json @@ -165,7 +165,7 @@ "@type": "FormalParameter", "additionalType": "Boolean", "defaultValue": "True", - "description": "If true, reverse (decending) sort", + "description": "If true, reverse (descending) sort", "name": "reverse_sort" }, { @@ -580,4 +580,4 @@ "name": "input object document" } ] -} \ No newline at end of file +} diff --git a/tests/data/crates/invalid/5_provenance_run_crate/organizeaction_bad_actionstatus/ro-crate-metadata.json b/tests/data/crates/invalid/5_provenance_run_crate/organizeaction_bad_actionstatus/ro-crate-metadata.json index 3e2b96c05..de087608a 100644 --- a/tests/data/crates/invalid/5_provenance_run_crate/organizeaction_bad_actionstatus/ro-crate-metadata.json +++ b/tests/data/crates/invalid/5_provenance_run_crate/organizeaction_bad_actionstatus/ro-crate-metadata.json @@ -165,7 +165,7 @@ "@type": "FormalParameter", "additionalType": "Boolean", "defaultValue": "True", - "description": "If true, reverse (decending) sort", + "description": "If true, reverse (descending) sort", "name": "reverse_sort" }, { @@ -590,4 +590,4 @@ "name": "input object document" } ] -} \ No newline at end of file +} diff --git a/tests/data/crates/invalid/5_provenance_run_crate/organizeaction_bad_instrument/ro-crate-metadata.json b/tests/data/crates/invalid/5_provenance_run_crate/organizeaction_bad_instrument/ro-crate-metadata.json index 5ab35c131..814d971be 100644 --- a/tests/data/crates/invalid/5_provenance_run_crate/organizeaction_bad_instrument/ro-crate-metadata.json +++ b/tests/data/crates/invalid/5_provenance_run_crate/organizeaction_bad_instrument/ro-crate-metadata.json @@ -165,7 +165,7 @@ "@type": "FormalParameter", "additionalType": "Boolean", "defaultValue": "True", - "description": "If true, reverse (decending) sort", + "description": "If true, reverse (descending) sort", "name": "reverse_sort" }, { @@ -583,4 +583,4 @@ "name": "input object document" } ] -} \ No newline at end of file +} diff --git a/tests/data/crates/invalid/5_provenance_run_crate/organizeaction_bad_object/ro-crate-metadata.json b/tests/data/crates/invalid/5_provenance_run_crate/organizeaction_bad_object/ro-crate-metadata.json index d2f2dd48e..c8378449e 100644 --- a/tests/data/crates/invalid/5_provenance_run_crate/organizeaction_bad_object/ro-crate-metadata.json +++ b/tests/data/crates/invalid/5_provenance_run_crate/organizeaction_bad_object/ro-crate-metadata.json @@ -165,7 +165,7 @@ "@type": "FormalParameter", "additionalType": "Boolean", "defaultValue": "True", - "description": "If true, reverse (decending) sort", + "description": "If true, reverse (descending) sort", "name": "reverse_sort" }, { @@ -583,4 +583,4 @@ "name": "input object document" } ] -} \ No newline at end of file +} diff --git a/tests/data/crates/invalid/5_provenance_run_crate/organizeaction_bad_result/ro-crate-metadata.json b/tests/data/crates/invalid/5_provenance_run_crate/organizeaction_bad_result/ro-crate-metadata.json index fd86bf5bd..c74b190dd 100644 --- a/tests/data/crates/invalid/5_provenance_run_crate/organizeaction_bad_result/ro-crate-metadata.json +++ b/tests/data/crates/invalid/5_provenance_run_crate/organizeaction_bad_result/ro-crate-metadata.json @@ -165,7 +165,7 @@ "@type": "FormalParameter", "additionalType": "Boolean", "defaultValue": "True", - "description": "If true, reverse (decending) sort", + "description": "If true, reverse (descending) sort", "name": "reverse_sort" }, { @@ -583,4 +583,4 @@ "name": "input object document" } ] -} \ No newline at end of file +} diff --git a/tests/data/crates/invalid/5_provenance_run_crate/organizeaction_error_not_failed_status/ro-crate-metadata.json b/tests/data/crates/invalid/5_provenance_run_crate/organizeaction_error_not_failed_status/ro-crate-metadata.json index c9aaf9c7b..fc482c454 100644 --- a/tests/data/crates/invalid/5_provenance_run_crate/organizeaction_error_not_failed_status/ro-crate-metadata.json +++ b/tests/data/crates/invalid/5_provenance_run_crate/organizeaction_error_not_failed_status/ro-crate-metadata.json @@ -165,7 +165,7 @@ "@type": "FormalParameter", "additionalType": "Boolean", "defaultValue": "True", - "description": "If true, reverse (decending) sort", + "description": "If true, reverse (descending) sort", "name": "reverse_sort" }, { @@ -591,4 +591,4 @@ "name": "input object document" } ] -} \ No newline at end of file +} diff --git a/tests/data/crates/invalid/5_provenance_run_crate/organizeaction_no_actionstatus/ro-crate-metadata.json b/tests/data/crates/invalid/5_provenance_run_crate/organizeaction_no_actionstatus/ro-crate-metadata.json index faac8d71d..99366d13b 100644 --- a/tests/data/crates/invalid/5_provenance_run_crate/organizeaction_no_actionstatus/ro-crate-metadata.json +++ b/tests/data/crates/invalid/5_provenance_run_crate/organizeaction_no_actionstatus/ro-crate-metadata.json @@ -165,7 +165,7 @@ "@type": "FormalParameter", "additionalType": "Boolean", "defaultValue": "True", - "description": "If true, reverse (decending) sort", + "description": "If true, reverse (descending) sort", "name": "reverse_sort" }, { @@ -589,4 +589,4 @@ "name": "input object document" } ] -} \ No newline at end of file +} diff --git a/tests/data/crates/invalid/5_provenance_run_crate/organizeaction_no_error/ro-crate-metadata.json b/tests/data/crates/invalid/5_provenance_run_crate/organizeaction_no_error/ro-crate-metadata.json index c0bfbb3d2..e328908e9 100644 --- a/tests/data/crates/invalid/5_provenance_run_crate/organizeaction_no_error/ro-crate-metadata.json +++ b/tests/data/crates/invalid/5_provenance_run_crate/organizeaction_no_error/ro-crate-metadata.json @@ -165,7 +165,7 @@ "@type": "FormalParameter", "additionalType": "Boolean", "defaultValue": "True", - "description": "If true, reverse (decending) sort", + "description": "If true, reverse (descending) sort", "name": "reverse_sort" }, { @@ -590,4 +590,4 @@ "name": "input object document" } ] -} \ No newline at end of file +} diff --git a/tests/data/crates/invalid/5_provenance_run_crate/organizeaction_no_instrument/ro-crate-metadata.json b/tests/data/crates/invalid/5_provenance_run_crate/organizeaction_no_instrument/ro-crate-metadata.json index 46ec7a7e5..f13a1fb1c 100644 --- a/tests/data/crates/invalid/5_provenance_run_crate/organizeaction_no_instrument/ro-crate-metadata.json +++ b/tests/data/crates/invalid/5_provenance_run_crate/organizeaction_no_instrument/ro-crate-metadata.json @@ -165,7 +165,7 @@ "@type": "FormalParameter", "additionalType": "Boolean", "defaultValue": "True", - "description": "If true, reverse (decending) sort", + "description": "If true, reverse (descending) sort", "name": "reverse_sort" }, { @@ -575,4 +575,4 @@ "name": "input object document" } ] -} \ No newline at end of file +} diff --git a/tests/data/crates/invalid/5_provenance_run_crate/organizeaction_no_object/ro-crate-metadata.json b/tests/data/crates/invalid/5_provenance_run_crate/organizeaction_no_object/ro-crate-metadata.json index 1350abb69..80257d03d 100644 --- a/tests/data/crates/invalid/5_provenance_run_crate/organizeaction_no_object/ro-crate-metadata.json +++ b/tests/data/crates/invalid/5_provenance_run_crate/organizeaction_no_object/ro-crate-metadata.json @@ -165,7 +165,7 @@ "@type": "FormalParameter", "additionalType": "Boolean", "defaultValue": "True", - "description": "If true, reverse (decending) sort", + "description": "If true, reverse (descending) sort", "name": "reverse_sort" }, { @@ -575,4 +575,4 @@ "name": "input object document" } ] -} \ No newline at end of file +} diff --git a/tests/data/crates/invalid/5_provenance_run_crate/organizeaction_no_result/ro-crate-metadata.json b/tests/data/crates/invalid/5_provenance_run_crate/organizeaction_no_result/ro-crate-metadata.json index a1c12767f..027e35d79 100644 --- a/tests/data/crates/invalid/5_provenance_run_crate/organizeaction_no_result/ro-crate-metadata.json +++ b/tests/data/crates/invalid/5_provenance_run_crate/organizeaction_no_result/ro-crate-metadata.json @@ -165,7 +165,7 @@ "@type": "FormalParameter", "additionalType": "Boolean", "defaultValue": "True", - "description": "If true, reverse (decending) sort", + "description": "If true, reverse (descending) sort", "name": "reverse_sort" }, { @@ -580,4 +580,4 @@ "name": "input object document" } ] -} \ No newline at end of file +} diff --git a/tests/data/crates/invalid/5_provenance_run_crate/parameterconnection_bad_sourceparameter/ro-crate-metadata.json b/tests/data/crates/invalid/5_provenance_run_crate/parameterconnection_bad_sourceparameter/ro-crate-metadata.json index 7753f58fa..9e5176e0c 100644 --- a/tests/data/crates/invalid/5_provenance_run_crate/parameterconnection_bad_sourceparameter/ro-crate-metadata.json +++ b/tests/data/crates/invalid/5_provenance_run_crate/parameterconnection_bad_sourceparameter/ro-crate-metadata.json @@ -165,7 +165,7 @@ "@type": "FormalParameter", "additionalType": "Boolean", "defaultValue": "True", - "description": "If true, reverse (decending) sort", + "description": "If true, reverse (descending) sort", "name": "reverse_sort" }, { @@ -590,4 +590,4 @@ "name": "input object document" } ] -} \ No newline at end of file +} diff --git a/tests/data/crates/invalid/5_provenance_run_crate/parameterconnection_bad_targetparameter/ro-crate-metadata.json b/tests/data/crates/invalid/5_provenance_run_crate/parameterconnection_bad_targetparameter/ro-crate-metadata.json index 1b7d016c4..1c7ab5429 100644 --- a/tests/data/crates/invalid/5_provenance_run_crate/parameterconnection_bad_targetparameter/ro-crate-metadata.json +++ b/tests/data/crates/invalid/5_provenance_run_crate/parameterconnection_bad_targetparameter/ro-crate-metadata.json @@ -165,7 +165,7 @@ "@type": "FormalParameter", "additionalType": "Boolean", "defaultValue": "True", - "description": "If true, reverse (decending) sort", + "description": "If true, reverse (descending) sort", "name": "reverse_sort" }, { @@ -590,4 +590,4 @@ "name": "input object document" } ] -} \ No newline at end of file +} diff --git a/tests/data/crates/invalid/5_provenance_run_crate/parameterconnection_no_sourceparameter/ro-crate-metadata.json b/tests/data/crates/invalid/5_provenance_run_crate/parameterconnection_no_sourceparameter/ro-crate-metadata.json index a66e5c3b7..9fd01d6db 100644 --- a/tests/data/crates/invalid/5_provenance_run_crate/parameterconnection_no_sourceparameter/ro-crate-metadata.json +++ b/tests/data/crates/invalid/5_provenance_run_crate/parameterconnection_no_sourceparameter/ro-crate-metadata.json @@ -165,7 +165,7 @@ "@type": "FormalParameter", "additionalType": "Boolean", "defaultValue": "True", - "description": "If true, reverse (decending) sort", + "description": "If true, reverse (descending) sort", "name": "reverse_sort" }, { @@ -587,4 +587,4 @@ "name": "input object document" } ] -} \ No newline at end of file +} diff --git a/tests/data/crates/invalid/5_provenance_run_crate/parameterconnection_no_targetparameter/ro-crate-metadata.json b/tests/data/crates/invalid/5_provenance_run_crate/parameterconnection_no_targetparameter/ro-crate-metadata.json index 624e6b394..12c6fcc38 100644 --- a/tests/data/crates/invalid/5_provenance_run_crate/parameterconnection_no_targetparameter/ro-crate-metadata.json +++ b/tests/data/crates/invalid/5_provenance_run_crate/parameterconnection_no_targetparameter/ro-crate-metadata.json @@ -165,7 +165,7 @@ "@type": "FormalParameter", "additionalType": "Boolean", "defaultValue": "True", - "description": "If true, reverse (decending) sort", + "description": "If true, reverse (descending) sort", "name": "reverse_sort" }, { @@ -587,4 +587,4 @@ "name": "input object document" } ] -} \ No newline at end of file +} diff --git a/tests/data/crates/invalid/5_provenance_run_crate/parameterconnection_not_referenced/ro-crate-metadata.json b/tests/data/crates/invalid/5_provenance_run_crate/parameterconnection_not_referenced/ro-crate-metadata.json index 0b7b2a792..536d5110e 100644 --- a/tests/data/crates/invalid/5_provenance_run_crate/parameterconnection_not_referenced/ro-crate-metadata.json +++ b/tests/data/crates/invalid/5_provenance_run_crate/parameterconnection_not_referenced/ro-crate-metadata.json @@ -180,7 +180,7 @@ "@type": "FormalParameter", "additionalType": "Boolean", "defaultValue": "True", - "description": "If true, reverse (decending) sort", + "description": "If true, reverse (descending) sort", "name": "reverse_sort" }, { diff --git a/tests/data/crates/invalid/5_provenance_run_crate/parameterconnection_not_step_referenced/ro-crate-metadata.json b/tests/data/crates/invalid/5_provenance_run_crate/parameterconnection_not_step_referenced/ro-crate-metadata.json index 0802e77f1..c5da98282 100644 --- a/tests/data/crates/invalid/5_provenance_run_crate/parameterconnection_not_step_referenced/ro-crate-metadata.json +++ b/tests/data/crates/invalid/5_provenance_run_crate/parameterconnection_not_step_referenced/ro-crate-metadata.json @@ -188,7 +188,7 @@ "@type": "FormalParameter", "additionalType": "Boolean", "defaultValue": "True", - "description": "If true, reverse (decending) sort", + "description": "If true, reverse (descending) sort", "name": "reverse_sort" }, { diff --git a/tests/data/crates/invalid/5_provenance_run_crate/parameterconnection_not_workflow_referenced/ro-crate-metadata.json b/tests/data/crates/invalid/5_provenance_run_crate/parameterconnection_not_workflow_referenced/ro-crate-metadata.json index 439909ca3..f853389f1 100644 --- a/tests/data/crates/invalid/5_provenance_run_crate/parameterconnection_not_workflow_referenced/ro-crate-metadata.json +++ b/tests/data/crates/invalid/5_provenance_run_crate/parameterconnection_not_workflow_referenced/ro-crate-metadata.json @@ -180,7 +180,7 @@ "@type": "FormalParameter", "additionalType": "Boolean", "defaultValue": "True", - "description": "If true, reverse (decending) sort", + "description": "If true, reverse (descending) sort", "name": "reverse_sort" }, { diff --git a/tests/data/crates/invalid/5_provenance_run_crate/propertyvalue_no_propertyid/ro-crate-metadata.json b/tests/data/crates/invalid/5_provenance_run_crate/propertyvalue_no_propertyid/ro-crate-metadata.json index c6ff06e70..93f8c2bdf 100644 --- a/tests/data/crates/invalid/5_provenance_run_crate/propertyvalue_no_propertyid/ro-crate-metadata.json +++ b/tests/data/crates/invalid/5_provenance_run_crate/propertyvalue_no_propertyid/ro-crate-metadata.json @@ -180,7 +180,7 @@ "@type": "FormalParameter", "additionalType": "Boolean", "defaultValue": "True", - "description": "If true, reverse (decending) sort", + "description": "If true, reverse (descending) sort", "name": "reverse_sort" }, { @@ -715,4 +715,4 @@ "name": "input object document" } ] -} \ No newline at end of file +} diff --git a/tests/data/crates/invalid/5_provenance_run_crate/propertyvalue_no_unitcode/ro-crate-metadata.json b/tests/data/crates/invalid/5_provenance_run_crate/propertyvalue_no_unitcode/ro-crate-metadata.json index 73ff46775..5c4690879 100644 --- a/tests/data/crates/invalid/5_provenance_run_crate/propertyvalue_no_unitcode/ro-crate-metadata.json +++ b/tests/data/crates/invalid/5_provenance_run_crate/propertyvalue_no_unitcode/ro-crate-metadata.json @@ -180,7 +180,7 @@ "@type": "FormalParameter", "additionalType": "Boolean", "defaultValue": "True", - "description": "If true, reverse (decending) sort", + "description": "If true, reverse (descending) sort", "name": "reverse_sort" }, { @@ -717,4 +717,4 @@ "name": "input object document" } ] -} \ No newline at end of file +} diff --git a/tests/data/crates/invalid/5_provenance_run_crate/tool_bad_environment/ro-crate-metadata.json b/tests/data/crates/invalid/5_provenance_run_crate/tool_bad_environment/ro-crate-metadata.json index 86e0e9911..265966310 100644 --- a/tests/data/crates/invalid/5_provenance_run_crate/tool_bad_environment/ro-crate-metadata.json +++ b/tests/data/crates/invalid/5_provenance_run_crate/tool_bad_environment/ro-crate-metadata.json @@ -165,7 +165,7 @@ "@type": "FormalParameter", "additionalType": "Boolean", "defaultValue": "True", - "description": "If true, reverse (decending) sort", + "description": "If true, reverse (descending) sort", "name": "reverse_sort" }, { @@ -583,4 +583,4 @@ "name": "input object document" } ] -} \ No newline at end of file +} diff --git a/tests/data/crates/invalid/5_provenance_run_crate/tool_bad_input/ro-crate-metadata.json b/tests/data/crates/invalid/5_provenance_run_crate/tool_bad_input/ro-crate-metadata.json index f53248a09..13a6e1b8b 100644 --- a/tests/data/crates/invalid/5_provenance_run_crate/tool_bad_input/ro-crate-metadata.json +++ b/tests/data/crates/invalid/5_provenance_run_crate/tool_bad_input/ro-crate-metadata.json @@ -165,7 +165,7 @@ "@type": "FormalParameter", "additionalType": "Boolean", "defaultValue": "True", - "description": "If true, reverse (decending) sort", + "description": "If true, reverse (descending) sort", "name": "reverse_sort" }, { @@ -583,4 +583,4 @@ "name": "input object document" } ] -} \ No newline at end of file +} diff --git a/tests/data/crates/invalid/5_provenance_run_crate/tool_bad_inv_instrument/ro-crate-metadata.json b/tests/data/crates/invalid/5_provenance_run_crate/tool_bad_inv_instrument/ro-crate-metadata.json index 2a58aa802..bbd4dd1c6 100644 --- a/tests/data/crates/invalid/5_provenance_run_crate/tool_bad_inv_instrument/ro-crate-metadata.json +++ b/tests/data/crates/invalid/5_provenance_run_crate/tool_bad_inv_instrument/ro-crate-metadata.json @@ -165,7 +165,7 @@ "@type": "FormalParameter", "additionalType": "Boolean", "defaultValue": "True", - "description": "If true, reverse (decending) sort", + "description": "If true, reverse (descending) sort", "name": "reverse_sort" }, { @@ -586,4 +586,4 @@ "name": "input object document" } ] -} \ No newline at end of file +} diff --git a/tests/data/crates/invalid/5_provenance_run_crate/tool_bad_output/ro-crate-metadata.json b/tests/data/crates/invalid/5_provenance_run_crate/tool_bad_output/ro-crate-metadata.json index fab2c2da9..6e41180f4 100644 --- a/tests/data/crates/invalid/5_provenance_run_crate/tool_bad_output/ro-crate-metadata.json +++ b/tests/data/crates/invalid/5_provenance_run_crate/tool_bad_output/ro-crate-metadata.json @@ -165,7 +165,7 @@ "@type": "FormalParameter", "additionalType": "Boolean", "defaultValue": "True", - "description": "If true, reverse (decending) sort", + "description": "If true, reverse (descending) sort", "name": "reverse_sort" }, { @@ -583,4 +583,4 @@ "name": "input object document" } ] -} \ No newline at end of file +} diff --git a/tests/data/crates/invalid/5_provenance_run_crate/tool_bad_softwarerequirements/ro-crate-metadata.json b/tests/data/crates/invalid/5_provenance_run_crate/tool_bad_softwarerequirements/ro-crate-metadata.json index 8d3b94e33..a31ad534f 100644 --- a/tests/data/crates/invalid/5_provenance_run_crate/tool_bad_softwarerequirements/ro-crate-metadata.json +++ b/tests/data/crates/invalid/5_provenance_run_crate/tool_bad_softwarerequirements/ro-crate-metadata.json @@ -165,7 +165,7 @@ "@type": "FormalParameter", "additionalType": "Boolean", "defaultValue": "True", - "description": "If true, reverse (decending) sort", + "description": "If true, reverse (descending) sort", "name": "reverse_sort" }, { @@ -622,4 +622,4 @@ "name": "input object document" } ] -} \ No newline at end of file +} diff --git a/tests/data/crates/invalid/5_provenance_run_crate/tool_no_environment/ro-crate-metadata.json b/tests/data/crates/invalid/5_provenance_run_crate/tool_no_environment/ro-crate-metadata.json index 9d7da273a..278f0e931 100644 --- a/tests/data/crates/invalid/5_provenance_run_crate/tool_no_environment/ro-crate-metadata.json +++ b/tests/data/crates/invalid/5_provenance_run_crate/tool_no_environment/ro-crate-metadata.json @@ -165,7 +165,7 @@ "@type": "FormalParameter", "additionalType": "Boolean", "defaultValue": "True", - "description": "If true, reverse (decending) sort", + "description": "If true, reverse (descending) sort", "name": "reverse_sort" }, { @@ -529,4 +529,4 @@ "name": "input object document" } ] -} \ No newline at end of file +} diff --git a/tests/data/crates/invalid/5_provenance_run_crate/tool_no_input/ro-crate-metadata.json b/tests/data/crates/invalid/5_provenance_run_crate/tool_no_input/ro-crate-metadata.json index 759a63636..f68a3f913 100644 --- a/tests/data/crates/invalid/5_provenance_run_crate/tool_no_input/ro-crate-metadata.json +++ b/tests/data/crates/invalid/5_provenance_run_crate/tool_no_input/ro-crate-metadata.json @@ -165,7 +165,7 @@ "@type": "FormalParameter", "additionalType": "Boolean", "defaultValue": "True", - "description": "If true, reverse (decending) sort", + "description": "If true, reverse (descending) sort", "name": "reverse_sort" }, { @@ -518,4 +518,4 @@ "name": "input object document" } ] -} \ No newline at end of file +} diff --git a/tests/data/crates/invalid/5_provenance_run_crate/tool_no_inv_instrument/ro-crate-metadata.json b/tests/data/crates/invalid/5_provenance_run_crate/tool_no_inv_instrument/ro-crate-metadata.json index 3641e9cf9..946d4cad4 100644 --- a/tests/data/crates/invalid/5_provenance_run_crate/tool_no_inv_instrument/ro-crate-metadata.json +++ b/tests/data/crates/invalid/5_provenance_run_crate/tool_no_inv_instrument/ro-crate-metadata.json @@ -165,7 +165,7 @@ "@type": "FormalParameter", "additionalType": "Boolean", "defaultValue": "True", - "description": "If true, reverse (decending) sort", + "description": "If true, reverse (descending) sort", "name": "reverse_sort" }, { @@ -583,4 +583,4 @@ "name": "input object document" } ] -} \ No newline at end of file +} diff --git a/tests/data/crates/invalid/5_provenance_run_crate/tool_no_mainentity/ro-crate-metadata.json b/tests/data/crates/invalid/5_provenance_run_crate/tool_no_mainentity/ro-crate-metadata.json index d2a4a7186..73f103d5a 100644 --- a/tests/data/crates/invalid/5_provenance_run_crate/tool_no_mainentity/ro-crate-metadata.json +++ b/tests/data/crates/invalid/5_provenance_run_crate/tool_no_mainentity/ro-crate-metadata.json @@ -165,7 +165,7 @@ "@type": "FormalParameter", "additionalType": "Boolean", "defaultValue": "True", - "description": "If true, reverse (decending) sort", + "description": "If true, reverse (descending) sort", "name": "reverse_sort" }, { @@ -619,4 +619,4 @@ "name": "input object document" } ] -} \ No newline at end of file +} diff --git a/tests/data/crates/invalid/5_provenance_run_crate/tool_no_output/ro-crate-metadata.json b/tests/data/crates/invalid/5_provenance_run_crate/tool_no_output/ro-crate-metadata.json index a706f0f46..ca18c99a3 100644 --- a/tests/data/crates/invalid/5_provenance_run_crate/tool_no_output/ro-crate-metadata.json +++ b/tests/data/crates/invalid/5_provenance_run_crate/tool_no_output/ro-crate-metadata.json @@ -165,7 +165,7 @@ "@type": "FormalParameter", "additionalType": "Boolean", "defaultValue": "True", - "description": "If true, reverse (decending) sort", + "description": "If true, reverse (descending) sort", "name": "reverse_sort" }, { @@ -518,4 +518,4 @@ "name": "input object document" } ] -} \ No newline at end of file +} diff --git a/tests/data/crates/invalid/5_provenance_run_crate/tool_no_softwarerequirements/ro-crate-metadata.json b/tests/data/crates/invalid/5_provenance_run_crate/tool_no_softwarerequirements/ro-crate-metadata.json index 91e690484..ff3373743 100644 --- a/tests/data/crates/invalid/5_provenance_run_crate/tool_no_softwarerequirements/ro-crate-metadata.json +++ b/tests/data/crates/invalid/5_provenance_run_crate/tool_no_softwarerequirements/ro-crate-metadata.json @@ -165,7 +165,7 @@ "@type": "FormalParameter", "additionalType": "Boolean", "defaultValue": "True", - "description": "If true, reverse (decending) sort", + "description": "If true, reverse (descending) sort", "name": "reverse_sort" }, { @@ -618,4 +618,4 @@ "name": "input object document" } ] -} \ No newline at end of file +} diff --git a/tests/data/crates/invalid/5_provenance_run_crate/workflow_bad_buildinstructions/ro-crate-metadata.json b/tests/data/crates/invalid/5_provenance_run_crate/workflow_bad_buildinstructions/ro-crate-metadata.json index d311251c2..e1bcaa12b 100644 --- a/tests/data/crates/invalid/5_provenance_run_crate/workflow_bad_buildinstructions/ro-crate-metadata.json +++ b/tests/data/crates/invalid/5_provenance_run_crate/workflow_bad_buildinstructions/ro-crate-metadata.json @@ -180,7 +180,7 @@ "@type": "FormalParameter", "additionalType": "Boolean", "defaultValue": "True", - "description": "If true, reverse (decending) sort", + "description": "If true, reverse (descending) sort", "name": "reverse_sort" }, { @@ -670,4 +670,4 @@ "name": "input object document" } ] -} \ No newline at end of file +} diff --git a/tests/data/crates/invalid/5_provenance_run_crate/workflow_bad_connection/ro-crate-metadata.json b/tests/data/crates/invalid/5_provenance_run_crate/workflow_bad_connection/ro-crate-metadata.json index 7424a4c0a..b99d4c9fa 100644 --- a/tests/data/crates/invalid/5_provenance_run_crate/workflow_bad_connection/ro-crate-metadata.json +++ b/tests/data/crates/invalid/5_provenance_run_crate/workflow_bad_connection/ro-crate-metadata.json @@ -165,7 +165,7 @@ "@type": "FormalParameter", "additionalType": "Boolean", "defaultValue": "True", - "description": "If true, reverse (decending) sort", + "description": "If true, reverse (descending) sort", "name": "reverse_sort" }, { @@ -590,4 +590,4 @@ "name": "input object document" } ] -} \ No newline at end of file +} diff --git a/tests/data/crates/invalid/5_provenance_run_crate/workflow_bad_haspart/ro-crate-metadata.json b/tests/data/crates/invalid/5_provenance_run_crate/workflow_bad_haspart/ro-crate-metadata.json index c99cf26d3..fc1b91fe2 100644 --- a/tests/data/crates/invalid/5_provenance_run_crate/workflow_bad_haspart/ro-crate-metadata.json +++ b/tests/data/crates/invalid/5_provenance_run_crate/workflow_bad_haspart/ro-crate-metadata.json @@ -165,7 +165,7 @@ "@type": "FormalParameter", "additionalType": "Boolean", "defaultValue": "True", - "description": "If true, reverse (decending) sort", + "description": "If true, reverse (descending) sort", "name": "reverse_sort" }, { @@ -529,4 +529,4 @@ "name": "input object document" } ] -} \ No newline at end of file +} diff --git a/tests/data/crates/invalid/5_provenance_run_crate/workflow_bad_step/ro-crate-metadata.json b/tests/data/crates/invalid/5_provenance_run_crate/workflow_bad_step/ro-crate-metadata.json index 7450446e8..ab57a9dd4 100644 --- a/tests/data/crates/invalid/5_provenance_run_crate/workflow_bad_step/ro-crate-metadata.json +++ b/tests/data/crates/invalid/5_provenance_run_crate/workflow_bad_step/ro-crate-metadata.json @@ -165,7 +165,7 @@ "@type": "FormalParameter", "additionalType": "Boolean", "defaultValue": "True", - "description": "If true, reverse (decending) sort", + "description": "If true, reverse (descending) sort", "name": "reverse_sort" }, { @@ -583,4 +583,4 @@ "name": "input object document" } ] -} \ No newline at end of file +} diff --git a/tests/data/crates/invalid/5_provenance_run_crate/workflow_no_buildinstructions/ro-crate-metadata.json b/tests/data/crates/invalid/5_provenance_run_crate/workflow_no_buildinstructions/ro-crate-metadata.json index 1e2b099c6..25790629e 100644 --- a/tests/data/crates/invalid/5_provenance_run_crate/workflow_no_buildinstructions/ro-crate-metadata.json +++ b/tests/data/crates/invalid/5_provenance_run_crate/workflow_no_buildinstructions/ro-crate-metadata.json @@ -174,7 +174,7 @@ "@type": "FormalParameter", "additionalType": "Boolean", "defaultValue": "True", - "description": "If true, reverse (decending) sort", + "description": "If true, reverse (descending) sort", "name": "reverse_sort" }, { @@ -655,4 +655,4 @@ "name": "input object document" } ] -} \ No newline at end of file +} diff --git a/tests/data/crates/invalid/5_provenance_run_crate/workflow_no_connection/ro-crate-metadata.json b/tests/data/crates/invalid/5_provenance_run_crate/workflow_no_connection/ro-crate-metadata.json index 773c3eb8a..8f0980486 100644 --- a/tests/data/crates/invalid/5_provenance_run_crate/workflow_no_connection/ro-crate-metadata.json +++ b/tests/data/crates/invalid/5_provenance_run_crate/workflow_no_connection/ro-crate-metadata.json @@ -160,7 +160,7 @@ "@type": "FormalParameter", "additionalType": "Boolean", "defaultValue": "True", - "description": "If true, reverse (decending) sort", + "description": "If true, reverse (descending) sort", "name": "reverse_sort" }, { @@ -585,4 +585,4 @@ "name": "input object document" } ] -} \ No newline at end of file +} diff --git a/tests/data/crates/invalid/5_provenance_run_crate/workflow_no_haspart/ro-crate-metadata.json b/tests/data/crates/invalid/5_provenance_run_crate/workflow_no_haspart/ro-crate-metadata.json index 446bb0e06..229c60e47 100644 --- a/tests/data/crates/invalid/5_provenance_run_crate/workflow_no_haspart/ro-crate-metadata.json +++ b/tests/data/crates/invalid/5_provenance_run_crate/workflow_no_haspart/ro-crate-metadata.json @@ -157,7 +157,7 @@ "@type": "FormalParameter", "additionalType": "Boolean", "defaultValue": "True", - "description": "If true, reverse (decending) sort", + "description": "If true, reverse (descending) sort", "name": "reverse_sort" }, { @@ -521,4 +521,4 @@ "name": "input object document" } ] -} \ No newline at end of file +} diff --git a/tests/data/crates/invalid/5_provenance_run_crate/workflow_no_step/ro-crate-metadata.json b/tests/data/crates/invalid/5_provenance_run_crate/workflow_no_step/ro-crate-metadata.json index 68ff64660..c5fb72b34 100644 --- a/tests/data/crates/invalid/5_provenance_run_crate/workflow_no_step/ro-crate-metadata.json +++ b/tests/data/crates/invalid/5_provenance_run_crate/workflow_no_step/ro-crate-metadata.json @@ -157,7 +157,7 @@ "@type": "FormalParameter", "additionalType": "Boolean", "defaultValue": "True", - "description": "If true, reverse (decending) sort", + "description": "If true, reverse (descending) sort", "name": "reverse_sort" }, { @@ -575,4 +575,4 @@ "name": "input object document" } ] -} \ No newline at end of file +} diff --git a/tests/data/crates/invalid/5_provenance_run_crate/workflow_type_no_howto/ro-crate-metadata.json b/tests/data/crates/invalid/5_provenance_run_crate/workflow_type_no_howto/ro-crate-metadata.json index 337c429b3..651da1af5 100644 --- a/tests/data/crates/invalid/5_provenance_run_crate/workflow_type_no_howto/ro-crate-metadata.json +++ b/tests/data/crates/invalid/5_provenance_run_crate/workflow_type_no_howto/ro-crate-metadata.json @@ -164,7 +164,7 @@ "@type": "FormalParameter", "additionalType": "Boolean", "defaultValue": "True", - "description": "If true, reverse (decending) sort", + "description": "If true, reverse (descending) sort", "name": "reverse_sort" }, { @@ -582,4 +582,4 @@ "name": "input object document" } ] -} \ No newline at end of file +} diff --git a/tests/data/crates/valid/provenance-run-crate/packed.cwl b/tests/data/crates/valid/provenance-run-crate/packed.cwl index 251587dae..acc2ce0d0 100644 --- a/tests/data/crates/valid/provenance-run-crate/packed.cwl +++ b/tests/data/crates/valid/provenance-run-crate/packed.cwl @@ -134,4 +134,4 @@ "file:///home/stain/src/cwltool/tests/wf/empty.ttl", "file:///home/stain/src/cwltool/tests/wf/empty2.ttl" ] -} \ No newline at end of file +} diff --git a/tests/data/crates/valid/workflow-roc/sort-and-change-case.cwl b/tests/data/crates/valid/workflow-roc/sort-and-change-case.cwl index 679fb292d..662ae8628 100644 --- a/tests/data/crates/valid/workflow-roc/sort-and-change-case.cwl +++ b/tests/data/crates/valid/workflow-roc/sort-and-change-case.cwl @@ -39,4 +39,3 @@ steps: out_file1: doc: tabular type: File - diff --git a/tests/data/crates/valid/workflow-run-crate/Galaxy-Workflow-Hello_World.ga b/tests/data/crates/valid/workflow-run-crate/Galaxy-Workflow-Hello_World.ga index 8bd9b1ba8..c29d6786b 100644 --- a/tests/data/crates/valid/workflow-run-crate/Galaxy-Workflow-Hello_World.ga +++ b/tests/data/crates/valid/workflow-run-crate/Galaxy-Workflow-Hello_World.ga @@ -154,4 +154,4 @@ ], "uuid": "576ba0e9-b112-47f0-845e-32d8af3a1f35", "version": 3 -} \ No newline at end of file +} diff --git a/tests/data/crates/valid/wrroc-paper/mapping/README.md b/tests/data/crates/valid/wrroc-paper/mapping/README.md index 49bef88a9..47a9c53cc 100644 --- a/tests/data/crates/valid/wrroc-paper/mapping/README.md +++ b/tests/data/crates/valid/wrroc-paper/mapping/README.md @@ -2,7 +2,7 @@ ## About SSSOM -SSSOM is a way to specify semantic mappings, typically based on SKOS. +SSSOM is a way to specify semantic mappings, typically based on SKOS. * https://mapping-commons.github.io/sssom/spec/ * https://mapping-commons.github.io/sssom/tutorial/ @@ -14,7 +14,7 @@ SSSOM mapping ar typically edited collaboratively as tab-separated text files, w ## Editing -Please edit [prov-mapping.tsv](prov-mapping.tsv) taking care not to break the tabular characters. You may use the _Rainbow CSV_ extension in Visual Studio Code, or a spreadsheet software. +Please edit [prov-mapping.tsv](prov-mapping.tsv) taking care not to break the tabular characters. You may use the _Rainbow CSV_ extension in Visual Studio Code, or a spreadsheet software. The metadata headers are maintained in [prov-mapping.yml](prov-mapping.yml) as well as in [ro-crate-metadata.json](../ro-crate-metadata.json). @@ -34,4 +34,3 @@ Then to generate the converted file formats `prov-mapping.rdf prov-mapping.json ``` make ``` - diff --git a/tests/data/crates/valid/wrroc-paper/mapping/prov-mapping-w-metadata.tsv b/tests/data/crates/valid/wrroc-paper/mapping/prov-mapping-w-metadata.tsv index cc71ad325..b7bed03cb 100644 --- a/tests/data/crates/valid/wrroc-paper/mapping/prov-mapping-w-metadata.tsv +++ b/tests/data/crates/valid/wrroc-paper/mapping/prov-mapping-w-metadata.tsv @@ -20,27 +20,26 @@ subject_id subject_label predicate_id object_id object_label mapping_justification creator_id author_id mapping_date confidence comment prov:Activity Activity skos:narrowMatch schema:CreateAction Create action semapv:ManualMappingCuration orcid:0000-0001-9842-9718 orcid:0000-0003-0454-7145 2023-10-22 0.95 Assuming activity is workflow/process execution prov:Activity Activity skos:narrowMatch schema:OrganizeAction Organize action semapv:ManualMappingCuration orcid:0000-0001-9842-9718 orcid:0000-0003-0454-7145 2023-10-22 0.95 Assuming activity is workflow/process execution -prov:Agent Agent skos:narrowMatch schema:Organization Organization semapv:ManualMappingCuration orcid:0000-0001-9842-9718 orcid:0000-0001-9842-9718 2023-10-22 1.0 -prov:Agent Agent skos:narrowMatch schema:Person Person semapv:ManualMappingCuration orcid:0000-0001-9842-9718 orcid:0000-0001-9842-9718 2023-10-22 1.0 -prov:Agent Agent skos:relatedMatch schema:SoftwareApplication Software application semapv:ManualMappingCuration orcid:0000-0001-9842-9718 orcid:0000-0001-9842-9718 2023-10-22 0.75 +prov:Agent Agent skos:narrowMatch schema:Organization Organization semapv:ManualMappingCuration orcid:0000-0001-9842-9718 orcid:0000-0001-9842-9718 2023-10-22 1.0 +prov:Agent Agent skos:narrowMatch schema:Person Person semapv:ManualMappingCuration orcid:0000-0001-9842-9718 orcid:0000-0001-9842-9718 2023-10-22 1.0 +prov:Agent Agent skos:relatedMatch schema:SoftwareApplication Software application semapv:ManualMappingCuration orcid:0000-0001-9842-9718 orcid:0000-0001-9842-9718 2023-10-22 0.75 prov:Entity Entity skos:narrowMatch schema:Dataset Dataset semapv:ManualMappingCuration orcid:0000-0001-9842-9718 orcid:0000-0003-0454-7145 2023-10-22 0.95 Assuming non-Plan entity prov:Entity Entity skos:narrowMatch schema:MediaObject File (Media object) semapv:ManualMappingCuration orcid:0000-0001-9842-9718 orcid:0000-0003-0454-7145 2023-10-22 0.95 Assuming non-Plan entity prov:Entity Entity skos:narrowMatch schema:PropertyValue Property value semapv:ManualMappingCuration orcid:0000-0001-9842-9718 orcid:0000-0003-0454-7145 2023-10-22 0.95 Assuming non-Plan entity -prov:Organization Organization skos:exactMatch schema:Organization Organization semapv:ManualMappingCuration orcid:0000-0001-9842-9718 orcid:0000-0003-0454-7145 2023-10-22 0.95 -prov:Person Person skos:exactMatch schema:Person Person semapv:ManualMappingCuration orcid:0000-0001-9842-9718 orcid:0000-0003-0454-7145 2023-10-22 0.95 -prov:Plan Plan skos:narrowMatch bioschema:ComputationalWorkflow Computational workflow semapv:ManualMappingCuration orcid:0000-0001-9842-9718 orcid:0000-0003-0454-7145 2023-10-22 0.95 -prov:Plan Plan skos:narrowMatch schema:HowTo How-to semapv:ManualMappingCuration orcid:0000-0001-9842-9718 orcid:0000-0003-0454-7145 2023-10-22 0.95 -prov:Plan Plan skos:narrowMatch schema:SoftwareApplication Software application semapv:ManualMappingCuration orcid:0000-0001-9842-9718 orcid:0000-0003-0454-7145 2023-10-22 0.95 -prov:SoftwareAgent Software agent skos:relatedMatch schema:SoftwareApplication Software application semapv:ManualMappingCuration orcid:0000-0001-9842-9718 orcid:0000-0003-0454-7145 2023-10-22 0.95 +prov:Organization Organization skos:exactMatch schema:Organization Organization semapv:ManualMappingCuration orcid:0000-0001-9842-9718 orcid:0000-0003-0454-7145 2023-10-22 0.95 +prov:Person Person skos:exactMatch schema:Person Person semapv:ManualMappingCuration orcid:0000-0001-9842-9718 orcid:0000-0003-0454-7145 2023-10-22 0.95 +prov:Plan Plan skos:narrowMatch bioschema:ComputationalWorkflow Computational workflow semapv:ManualMappingCuration orcid:0000-0001-9842-9718 orcid:0000-0003-0454-7145 2023-10-22 0.95 +prov:Plan Plan skos:narrowMatch schema:HowTo How-to semapv:ManualMappingCuration orcid:0000-0001-9842-9718 orcid:0000-0003-0454-7145 2023-10-22 0.95 +prov:Plan Plan skos:narrowMatch schema:SoftwareApplication Software application semapv:ManualMappingCuration orcid:0000-0001-9842-9718 orcid:0000-0003-0454-7145 2023-10-22 0.95 +prov:SoftwareAgent Software agent skos:relatedMatch schema:SoftwareApplication Software application semapv:ManualMappingCuration orcid:0000-0001-9842-9718 orcid:0000-0003-0454-7145 2023-10-22 0.95 prov:agent agent skos:narrowMatch schema:instrument instrument semapv:ManualMappingCuration orcid:0000-0001-9842-9718 orcid:0000-0003-0454-7145 2023-10-22 0.95 Assuming agent is a workflow management system prov:agent agent skos:relatedMatch schema:agent agent semapv:ManualMappingCuration orcid:0000-0001-9842-9718 orcid:0000-0003-0454-7145 2023-10-22 0.95 Complex mapping: an agent implies the existence of a qualified association (prov:Association) linked to a prov:Agent through prov:agent -prov:endedAtTime ended at time skos:closeMatch schema:endTime end time semapv:ManualMappingCuration orcid:0000-0001-9842-9718 orcid:0000-0001-9842-9718 2023-10-22 0.95 +prov:endedAtTime ended at time skos:closeMatch schema:endTime end time semapv:ManualMappingCuration orcid:0000-0001-9842-9718 orcid:0000-0001-9842-9718 2023-10-22 0.95 prov:hadPlan hadPlan skos:relatedMatch schema:instrument instrument semapv:ManualMappingCuration orcid:0000-0001-9842-9718 orcid:0000-0003-0454-7145 2023-10-22 0.95 Complex mapping: an instrument implies the existence of a qualified association (prov:Association) linked to a prov:Plan through prov:hadPlan -prov:startedAtTime started at time skos:closeMatch schema:startTime start time semapv:ManualMappingCuration orcid:0000-0001-9842-9718 orcid:0000-0001-9842-9718 2023-10-22 0.95 -prov:used used skos:exactMatch schema:object object semapv:ManualMappingCuration orcid:0000-0001-9842-9718 orcid:0000-0003-0454-7145 2023-10-22 0.95 -prov:wasAssociatedWith was associated with skos:narrowMatch schema:agent agent semapv:ManualMappingCuration orcid:0000-0001-9842-9718 orcid:0000-0003-0454-7145 2023-10-22 0.95 -prov:wasAssociatedWith was associated with skos:narrowMatch schema:instrument instrument semapv:ManualMappingCuration orcid:0000-0001-9842-9718 orcid:0000-0003-0454-7145 2023-10-22 0.95 -prov:wasEndedBy was ended by skos:relatedMatch schema:agent agent semapv:ManualMappingCuration orcid:0000-0001-9842-9718 orcid:0000-0003-0454-7145|orcid:0000-0001-9842-9718 2023-10-22 0.95 +prov:startedAtTime started at time skos:closeMatch schema:startTime start time semapv:ManualMappingCuration orcid:0000-0001-9842-9718 orcid:0000-0001-9842-9718 2023-10-22 0.95 +prov:used used skos:exactMatch schema:object object semapv:ManualMappingCuration orcid:0000-0001-9842-9718 orcid:0000-0003-0454-7145 2023-10-22 0.95 +prov:wasAssociatedWith was associated with skos:narrowMatch schema:agent agent semapv:ManualMappingCuration orcid:0000-0001-9842-9718 orcid:0000-0003-0454-7145 2023-10-22 0.95 +prov:wasAssociatedWith was associated with skos:narrowMatch schema:instrument instrument semapv:ManualMappingCuration orcid:0000-0001-9842-9718 orcid:0000-0003-0454-7145 2023-10-22 0.95 +prov:wasEndedBy was ended by skos:relatedMatch schema:agent agent semapv:ManualMappingCuration orcid:0000-0001-9842-9718 orcid:0000-0003-0454-7145|orcid:0000-0001-9842-9718 2023-10-22 0.95 prov:wasGeneratedBy was generated by skos:closeMatch schema:result object_label semapv:ManualMappingCuration orcid:0000-0001-9842-9718 orcid:0000-0003-0454-7145 2023-10-22 0.95 Note inverse properties: :ent prov:wasGeneratedBy :act vs :act schema:result :ent -prov:wasStartedBy was started by skos:relatedMatch schema:agent agent semapv:ManualMappingCuration orcid:0000-0001-9842-9718 orcid:0000-0003-0454-7145|orcid:0000-0001-9842-9718 2023-10-22 0.95 - +prov:wasStartedBy was started by skos:relatedMatch schema:agent agent semapv:ManualMappingCuration orcid:0000-0001-9842-9718 orcid:0000-0003-0454-7145|orcid:0000-0001-9842-9718 2023-10-22 0.95 diff --git a/tests/data/crates/valid/wrroc-paper/mapping/prov-mapping.tsv b/tests/data/crates/valid/wrroc-paper/mapping/prov-mapping.tsv index 3444964d6..a8debe937 100644 --- a/tests/data/crates/valid/wrroc-paper/mapping/prov-mapping.tsv +++ b/tests/data/crates/valid/wrroc-paper/mapping/prov-mapping.tsv @@ -2,25 +2,25 @@ subject_id subject_label predicate_id object_id object_label mapping_justificati prov:Activity Activity skos:narrowMatch schema:CreateAction Create action semapv:ManualMappingCuration 2023-10-22 orcid:0000-0001-9842-9718 orcid:0000-0003-0454-7145 0.95 Assuming activity is workflow/process execution prov:Activity Activity skos:narrowMatch schema:OrganizeAction Organize action semapv:ManualMappingCuration 2023-10-22 orcid:0000-0001-9842-9718 orcid:0000-0003-0454-7145 0.95 Assuming activity is workflow/process execution prov:agent agent skos:narrowMatch schema:instrument instrument semapv:ManualMappingCuration 2023-10-22 orcid:0000-0001-9842-9718 orcid:0000-0003-0454-7145 0.95 Assuming agent is a workflow management system -prov:Agent Agent skos:narrowMatch schema:Person Person semapv:ManualMappingCuration 2023-10-22 orcid:0000-0001-9842-9718 orcid:0000-0001-9842-9718 1 -prov:Agent Agent skos:narrowMatch schema:Organization Organization semapv:ManualMappingCuration 2023-10-22 orcid:0000-0001-9842-9718 orcid:0000-0001-9842-9718 1 -prov:Agent Agent skos:relatedMatch schema:SoftwareApplication Software application semapv:ManualMappingCuration 2023-10-22 orcid:0000-0001-9842-9718 orcid:0000-0001-9842-9718 0.75 -prov:Person Person skos:exactMatch schema:Person Person semapv:ManualMappingCuration 2023-10-22 orcid:0000-0001-9842-9718 orcid:0000-0003-0454-7145 0.95 -prov:Organization Organization skos:exactMatch schema:Organization Organization semapv:ManualMappingCuration 2023-10-22 orcid:0000-0001-9842-9718 orcid:0000-0003-0454-7145 0.95 -prov:SoftwareAgent Software agent skos:relatedMatch schema:SoftwareApplication Software application semapv:ManualMappingCuration 2023-10-22 orcid:0000-0001-9842-9718 orcid:0000-0003-0454-7145 0.95 -prov:Plan Plan skos:narrowMatch bioschema:ComputationalWorkflow Computational workflow semapv:ManualMappingCuration 2023-10-22 orcid:0000-0001-9842-9718 orcid:0000-0003-0454-7145 0.95 -prov:Plan Plan skos:narrowMatch schema:SoftwareApplication Software application semapv:ManualMappingCuration 2023-10-22 orcid:0000-0001-9842-9718 orcid:0000-0003-0454-7145 0.95 -prov:Plan Plan skos:narrowMatch schema:HowTo How-to semapv:ManualMappingCuration 2023-10-22 orcid:0000-0001-9842-9718 orcid:0000-0003-0454-7145 0.95 +prov:Agent Agent skos:narrowMatch schema:Person Person semapv:ManualMappingCuration 2023-10-22 orcid:0000-0001-9842-9718 orcid:0000-0001-9842-9718 1 +prov:Agent Agent skos:narrowMatch schema:Organization Organization semapv:ManualMappingCuration 2023-10-22 orcid:0000-0001-9842-9718 orcid:0000-0001-9842-9718 1 +prov:Agent Agent skos:relatedMatch schema:SoftwareApplication Software application semapv:ManualMappingCuration 2023-10-22 orcid:0000-0001-9842-9718 orcid:0000-0001-9842-9718 0.75 +prov:Person Person skos:exactMatch schema:Person Person semapv:ManualMappingCuration 2023-10-22 orcid:0000-0001-9842-9718 orcid:0000-0003-0454-7145 0.95 +prov:Organization Organization skos:exactMatch schema:Organization Organization semapv:ManualMappingCuration 2023-10-22 orcid:0000-0001-9842-9718 orcid:0000-0003-0454-7145 0.95 +prov:SoftwareAgent Software agent skos:relatedMatch schema:SoftwareApplication Software application semapv:ManualMappingCuration 2023-10-22 orcid:0000-0001-9842-9718 orcid:0000-0003-0454-7145 0.95 +prov:Plan Plan skos:narrowMatch bioschema:ComputationalWorkflow Computational workflow semapv:ManualMappingCuration 2023-10-22 orcid:0000-0001-9842-9718 orcid:0000-0003-0454-7145 0.95 +prov:Plan Plan skos:narrowMatch schema:SoftwareApplication Software application semapv:ManualMappingCuration 2023-10-22 orcid:0000-0001-9842-9718 orcid:0000-0003-0454-7145 0.95 +prov:Plan Plan skos:narrowMatch schema:HowTo How-to semapv:ManualMappingCuration 2023-10-22 orcid:0000-0001-9842-9718 orcid:0000-0003-0454-7145 0.95 prov:Entity Entity skos:narrowMatch schema:MediaObject File (Media object) semapv:ManualMappingCuration 2023-10-22 orcid:0000-0001-9842-9718 orcid:0000-0003-0454-7145 0.95 Assuming non-Plan entity prov:Entity Entity skos:narrowMatch schema:Dataset Dataset semapv:ManualMappingCuration 2023-10-22 orcid:0000-0001-9842-9718 orcid:0000-0003-0454-7145 0.95 Assuming non-Plan entity prov:Entity Entity skos:narrowMatch schema:PropertyValue Property value semapv:ManualMappingCuration 2023-10-22 orcid:0000-0001-9842-9718 orcid:0000-0003-0454-7145 0.95 Assuming non-Plan entity -prov:wasStartedBy was started by skos:relatedMatch schema:agent agent semapv:ManualMappingCuration 2023-10-22 orcid:0000-0001-9842-9718 orcid:0000-0003-0454-7145|orcid:0000-0001-9842-9718 0.95 -prov:startedAtTime started at time skos:closeMatch schema:startTime start time semapv:ManualMappingCuration 2023-10-22 orcid:0000-0001-9842-9718 orcid:0000-0001-9842-9718 0.95 -prov:wasEndedBy was ended by skos:relatedMatch schema:agent agent semapv:ManualMappingCuration 2023-10-22 orcid:0000-0001-9842-9718 orcid:0000-0003-0454-7145|orcid:0000-0001-9842-9718 0.95 -prov:endedAtTime ended at time skos:closeMatch schema:endTime end time semapv:ManualMappingCuration 2023-10-22 orcid:0000-0001-9842-9718 orcid:0000-0001-9842-9718 0.95 -prov:wasAssociatedWith was associated with skos:narrowMatch schema:agent agent semapv:ManualMappingCuration 2023-10-22 orcid:0000-0001-9842-9718 orcid:0000-0003-0454-7145 0.95 -prov:wasAssociatedWith was associated with skos:narrowMatch schema:instrument instrument semapv:ManualMappingCuration 2023-10-22 orcid:0000-0001-9842-9718 orcid:0000-0003-0454-7145 0.95 +prov:wasStartedBy was started by skos:relatedMatch schema:agent agent semapv:ManualMappingCuration 2023-10-22 orcid:0000-0001-9842-9718 orcid:0000-0003-0454-7145|orcid:0000-0001-9842-9718 0.95 +prov:startedAtTime started at time skos:closeMatch schema:startTime start time semapv:ManualMappingCuration 2023-10-22 orcid:0000-0001-9842-9718 orcid:0000-0001-9842-9718 0.95 +prov:wasEndedBy was ended by skos:relatedMatch schema:agent agent semapv:ManualMappingCuration 2023-10-22 orcid:0000-0001-9842-9718 orcid:0000-0003-0454-7145|orcid:0000-0001-9842-9718 0.95 +prov:endedAtTime ended at time skos:closeMatch schema:endTime end time semapv:ManualMappingCuration 2023-10-22 orcid:0000-0001-9842-9718 orcid:0000-0001-9842-9718 0.95 +prov:wasAssociatedWith was associated with skos:narrowMatch schema:agent agent semapv:ManualMappingCuration 2023-10-22 orcid:0000-0001-9842-9718 orcid:0000-0003-0454-7145 0.95 +prov:wasAssociatedWith was associated with skos:narrowMatch schema:instrument instrument semapv:ManualMappingCuration 2023-10-22 orcid:0000-0001-9842-9718 orcid:0000-0003-0454-7145 0.95 prov:hadPlan hadPlan skos:relatedMatch schema:instrument instrument semapv:ManualMappingCuration 2023-10-22 orcid:0000-0001-9842-9718 orcid:0000-0003-0454-7145 0.95 Complex mapping: an instrument implies the existence of a qualified association (prov:Association) linked to a prov:Plan through prov:hadPlan prov:agent agent skos:relatedMatch schema:agent agent semapv:ManualMappingCuration 2023-10-22 orcid:0000-0001-9842-9718 orcid:0000-0003-0454-7145 0.95 Complex mapping: an agent implies the existence of a qualified association (prov:Association) linked to a prov:Agent through prov:agent -prov:used used skos:exactMatch schema:object object semapv:ManualMappingCuration 2023-10-22 orcid:0000-0001-9842-9718 orcid:0000-0003-0454-7145 0.95 +prov:used used skos:exactMatch schema:object object semapv:ManualMappingCuration 2023-10-22 orcid:0000-0001-9842-9718 orcid:0000-0003-0454-7145 0.95 prov:wasGeneratedBy was generated by skos:closeMatch schema:result object_label semapv:ManualMappingCuration 2023-10-22 orcid:0000-0001-9842-9718 orcid:0000-0003-0454-7145 0.95 Note inverse properties: :ent prov:wasGeneratedBy :act vs :act schema:result :ent diff --git a/tests/ro-crate-metadata.json b/tests/ro-crate-metadata.json index 8f45fbb5d..f6813d65f 100644 --- a/tests/ro-crate-metadata.json +++ b/tests/ro-crate-metadata.json @@ -309,4 +309,4 @@ "value": "True" } ] -} \ No newline at end of file +} From 3874c81dd4f15d0d4ac1304e69c80af0b2bdb695 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Fri, 12 Jun 2026 15:51:35 +0200 Subject: [PATCH 316/352] =?UTF-8?q?style(format):=20=F0=9F=8E=A8=20fix=20m?= =?UTF-8?q?issing=20trailing=20new=20line?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .env | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.env b/.env index e86de2aa6..5c41b8f91 100644 --- a/.env +++ b/.env @@ -1 +1 @@ -PYTHONPATH=rocrate_validator \ No newline at end of file +PYTHONPATH=rocrate_validator From 4c3501dd2dc55c09d15caefe34971e1d4d9525df Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Fri, 12 Jun 2026 15:52:04 +0200 Subject: [PATCH 317/352] =?UTF-8?q?build(ruff):=20=F0=9F=94=A7=20set=20tar?= =?UTF-8?q?get-version=20=3D=20py310=20to=20align=20linter=20with=20projec?= =?UTF-8?q?t?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- ruff.toml | 34 +++++++++++++++++++--------------- 1 file changed, 19 insertions(+), 15 deletions(-) diff --git a/ruff.toml b/ruff.toml index 44c574d89..74c75bdbc 100644 --- a/ruff.toml +++ b/ruff.toml @@ -23,6 +23,12 @@ # which is a common convention in Python projects. line-length = 120 +# Lowest supported Python version. Ruff only infers this from +# `[project].requires-python`, which this (Poetry-based) project does not use, +# so the linter would otherwise fall back to its py39 default and skip 3.10+ +# modernizations. Keep in sync with `tool.poetry.dependencies.python` (>=3.10). +target-version = "py310" + # Extend the default set of rules with additional rules from various plugins. [lint] extend-select = [ @@ -61,26 +67,24 @@ explicit-preview-rules = true # Rules intentionally disabled. ignore = [ - "UP045", # non-pep604-annotation-optional: keep `Optional[X]` instead of `X | None` (project supports 3.9) - "UP007", # non-pep604-annotation: same rationale as UP045 - "PLR0904", # too-many-public-methods: pre-existing project style + # "PLR0904", # too-many-public-methods: pre-existing project style "PLR0913", # too-many-arguments: pre-existing project style - "PLR0917", # too-many-positional-arguments: pre-existing project style - "PLW3201", # bad-dunder-method-name: project uses `__method__` convention; renaming is not viable + # "PLR0917", # too-many-positional-arguments: pre-existing project style + # "PLW3201", # bad-dunder-method-name: project uses `__method__` convention; renaming is not viable # --- Stable rules pulled in only as a side effect of `preview = true` --- # (not part of the original selection; suppressed to keep the preview flag # scoped to PLR0914 / PLR1702. Enable individually later if desired.) "BLE001", # blind-except - "TRY401", # verbose-log-message - "TRY201", # verbose-raise - "TRY002", # raise-vanilla-class - "D419", # empty-docstring - "PIE804", # unnecessary-dict-kwargs - "PIE810", # multiple-starts-ends-with - "DTZ901", # datetime-min-max - "PYI034", # non-self-return-type - "PYI061", # redundant-none-literal - "S110", # try-except-pass + # "TRY401", # verbose-log-message + # "TRY201", # verbose-raise + # "TRY002", # raise-vanilla-class + # "D419", # empty-docstring + # "PIE804", # unnecessary-dict-kwargs + # "PIE810", # multiple-starts-ends-with + # "DTZ901", # datetime-min-max + # "PYI034", # non-self-return-type + # "PYI061", # redundant-none-literal + # "S110", # try-except-pass ] [lint.per-file-ignores] From 3790e4e94d99216a2906ae96a385504b2b215c86 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Fri, 12 Jun 2026 15:53:33 +0200 Subject: [PATCH 318/352] =?UTF-8?q?ci(cspell):=20=F0=9F=93=9D=20add=20CSpe?= =?UTF-8?q?ll=20dictionary=20for=20project-specific=20terms?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Whitelist domain vocabulary (RO-Crate/JSON-LD terms, tool names, identifiers) so the spell checker stops flagging known words. --- .cspell.json | 251 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 251 insertions(+) create mode 100644 .cspell.json diff --git a/.cspell.json b/.cspell.json new file mode 100644 index 000000000..1ed66f9e3 --- /dev/null +++ b/.cspell.json @@ -0,0 +1,251 @@ +{ + "version": "0.2", + "language": "en,en-GB", + "allowCompoundWords": true, + "words": [ + "abfs", + "abfss", + "arcp", + "autoenum", + "bagit", + "bioschemas", + "bnode", + "bysource", + "camelcase", + "conformsTo", + "crate", + "crs", + "cwltool", + "dav", + "davs", + "dedup", + "dedups", + "eocd", + "exampleofwork", + "fmap", + "focusNode", + "ftps", + "furb", + "getattr", + "getch", + "getinfo", + "hasToken", + "hdfs", + "hier", + "howtostep", + "inquirerpy", + "ipynb", + "irods", + "isort", + "isProfileOf", + "isTransitiveProfileOf", + "iter", + "ivar", + "jsonld", + "kibibyte", + "kikkomep", + "linkml", + "luca", + "mainEntity", + "mccabe", + "metavar", + "mypy", + "myst", + "nbsphinx", + "nocheck", + "noqa", + "openapi", + "owlrl", + "partx", + "pfzy", + "piras", + "pireddu", + "pireddur", + "procrc", + "prof", + "provrc", + "pycache", + "pycheck", + "pydocstyle", + "pygments", + "pylint", + "pypi", + "pyproject", + "pyshacl", + "pytest", + "pytypes", + "pyupgrade", + "pyvenv", + "rdflib", + "rdfs", + "readthedocs", + "redef", + "regconfig", + "resultMessage", + "resultPath", + "resultSeverity", + "rgxs", + "roc", + "rocr", + "rocrate", + "rocv", + "rsync", + "rtype", + "scp", + "sftp", + "shacl", + "shoulds", + "simleo", + "skos", + "sourceConstraintComponent", + "sourceShape", + "sparql", + "sqlite", + "sssom", + "subclassof", + "subcls", + "tcgetattr", + "tcsadrain", + "tcsetattr", + "tldr", + "tmpdirname", + "toctree", + "trig", + "uncheckable", + "validator", + "venv", + "violatingEntity", + "violatingProperty", + "violatingPropertyValue", + "virtualenv", + "virtualenvs", + "wasb", + "wasbs", + "wfrc", + "wroc", + "wrroc", + "wtroc", + "xone", + "zenodo" + ], + "ignoreWords": [ + "AggregatedValidationStatistics", + "AvailabilityStatus", + "BadSyntaxError", + "BaseOutputFormatter", + "CheckIssue", + "CheckValidationError", + "CustomEncoder", + "DuplicateRequirement", + "DuplicateRequirementCheck", + "HttpRequester", + "InvalidProfilePath", + "InvalidSerializationFormat", + "JSONOutputFormatter", + "LevelCollection", + "LiveTextProgressLayout", + "LoggerProxy", + "MapIndex", + "MultiIndexMap", + "NodeShape", + "OfflineCacheMissError", + "OfflineFallbackSession", + "OutputFormatter", + "ProfileSpecificationError", + "ProfileSpecificationNotFound", + "ProfilesDirectoryNotFound", + "PropertyGroup", + "PropertyShape", + "PyFunctionCheck", + "PyRequirement", + "PyRequirementLoader", + "ROCrate", + "ROCrateBagitLocalFolder", + "ROCrateBagitLocalZip", + "ROCrateBagitRemoteZip", + "ROCrateEntity", + "ROCrateInvalidURIError", + "ROCrateLocalFolder", + "ROCrateLocalZip", + "ROCrateMetadata", + "ROCrateMetadataNotFoundError", + "ROCrateRemoteZip", + "ROCValidator", + "ROCValidatorError", + "RequirementCheck", + "RequirementCheckValidationEvent", + "RequirementLevel", + "RequirementLoader", + "RequirementValidationEvent", + "SHACLCheck", + "SHACLNode", + "SHACLNodeCollection", + "SHACLRequirement", + "SHACLRequirementLoader", + "SHACLValidationAlreadyProcessed", + "SHACLValidationContext", + "SHACLValidationContextManager", + "SHACLValidationError", + "SHACLValidationResult", + "SHACLValidationSkip", + "SHACLValidator", + "SHACLViolation", + "ShapesList", + "ShapesRegistry", + "SkipRequirementCheck", + "SourceSnippet", + "SystemPager", + "TextOutputFormatter", + "ValidationCommandView", + "ValidationStatisticsListener", + "WarmUpResult" + ], + "flagWords": [ + "accross", + "becuase", + "definately", + "enviroment", + "occured", + "occurence", + "paramter", + "recieve", + "refered", + "seperate", + "succesful", + "teh", + "untill" + ], + "ignorePaths": [ + ".git", + ".venv", + "__pycache__", + "*.lock", + "*.bck", + "*.mdbck", + "poetry.lock", + "extra_crates/", + "extra_profiles/", + "tests/data/", + "test.txt" + ], + "languageSettings": [ + { + "languageId": "python", + "dictionaries": [ + "python" + ] + }, + { + "languageId": "toml", + "dictionaries": [ + "python" + ] + }, + { + "languageId": "markdown", + "dictionaries": [ + "python" + ] + } + ] +} From 3d4e4944e4db01999f8bbe0dea597ffd3466ee83 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Fri, 12 Jun 2026 15:55:02 +0200 Subject: [PATCH 319/352] =?UTF-8?q?build(deps):=20=F0=9F=94=92=20update=20?= =?UTF-8?q?poetry.lock?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- poetry.lock | 568 +++++++++++++++++++++++++++++++++++++--------------- 1 file changed, 406 insertions(+), 162 deletions(-) diff --git a/poetry.lock b/poetry.lock index 9a8f80028..174c67100 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 2.2.1 and should not be changed by hand. +# This file is automatically @generated by Poetry 2.4.1 and should not be changed by hand. [[package]] name = "alabaster" @@ -63,6 +63,49 @@ files = [ {file = "appnope-0.1.4.tar.gz", hash = "sha256:1de3860566df9caf38f01f86f65e0e13e379af54f9e4bee1e66b48f2efffd1ee"}, ] +[[package]] +name = "ast-serialize" +version = "0.5.0" +description = "Python bindings for mypy AST serialization" +optional = false +python-versions = ">=3.7" +groups = ["dev"] +files = [ + {file = "ast_serialize-0.5.0-cp314-cp314t-macosx_10_12_x86_64.whl", hash = "sha256:8f5c14f169eb0972c0c21bada5358b23d6047c76583b005234f865b11f1fa00a"}, + {file = "ast_serialize-0.5.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:7d1a2de9de5be04652f0ed60738356ef94f66db37924a9499fffe98dc491aa0b"}, + {file = "ast_serialize-0.5.0-cp314-cp314t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:be5173fb66f9b49026d9d5a2ff0fc7c7009077107c0eb285b2d60fdf1fe10bd1"}, + {file = "ast_serialize-0.5.0-cp314-cp314t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:f8015cd071ac1339924ee2b8098c93e00e155f30a16f40ec9816fcf84f4753f6"}, + {file = "ast_serialize-0.5.0-cp314-cp314t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5499e8797edff2a9186aa313ed382c6b422e798e9332d9953badcee6e69a88f2"}, + {file = "ast_serialize-0.5.0-cp314-cp314t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:6848f2a093fb5548751a9a09bff8fcd229e2bbeb0e3331f391b6ae6d26cd9903"}, + {file = "ast_serialize-0.5.0-cp314-cp314t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:832d4c998e0b091fd60a6d6bceee535483c4d490de9ba85003af835225719261"}, + {file = "ast_serialize-0.5.0-cp314-cp314t-manylinux_2_31_riscv64.whl", hash = "sha256:16db7c62ec0b8efe1d7afd283a388d8f74f2605d56032e5a37747d2de8dba027"}, + {file = "ast_serialize-0.5.0-cp314-cp314t-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:baf5eb061eb5bccade4128ad42da33787d72f6013809cd1b590376ece8b3c937"}, + {file = "ast_serialize-0.5.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:104e4a35bd7c124173c41760ef9aaea17ddb3f86c65cb643671d59afbe3ee94c"}, + {file = "ast_serialize-0.5.0-cp314-cp314t-musllinux_1_2_armv7l.whl", hash = "sha256:36be371028fc1675acb38a331bde160dbab7ff907fdf00b67eb6911aa106951b"}, + {file = "ast_serialize-0.5.0-cp314-cp314t-musllinux_1_2_i686.whl", hash = "sha256:061ee58bdb52341c8201a6df41182a977736bae3b7ded87ca7176ca25a8a47ab"}, + {file = "ast_serialize-0.5.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:b15219e9cdc9f53f6f4cb51c009203507228226148c05c5e8fe451c28b435eb3"}, + {file = "ast_serialize-0.5.0-cp314-cp314t-win32.whl", hash = "sha256:842d1c004bb466c7df036f95fabef789570541922b10976b12f5592a69cf0b38"}, + {file = "ast_serialize-0.5.0-cp314-cp314t-win_amd64.whl", hash = "sha256:b0c06d760909b095cc466356dfccd05a1c7233a6ca191c020dca2c6a6f16c24c"}, + {file = "ast_serialize-0.5.0-cp314-cp314t-win_arm64.whl", hash = "sha256:787baedb0262cc49e8ce37cc15c00ae818e46a165a3b36f5e21ed174998104cb"}, + {file = "ast_serialize-0.5.0-cp39-abi3-macosx_10_12_x86_64.whl", hash = "sha256:0668aa9459cfa8c9c49ddd2163ebcf43088ba045ef7492af6fe22e0098303101"}, + {file = "ast_serialize-0.5.0-cp39-abi3-macosx_11_0_arm64.whl", hash = "sha256:bf683d6363edf2b39eed6b6d4fe22d34b6203867a67e27134d9e2a2680c4bc4a"}, + {file = "ast_serialize-0.5.0-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9cc22cf0c9be65e71cf88fda130af60d61eb4a79370ad4cfe7900d48a4aa2211"}, + {file = "ast_serialize-0.5.0-cp39-abi3-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:f66173891548c9f2726bf27957b41cabce12fa679dc6da505ddbde4d4b3b31cf"}, + {file = "ast_serialize-0.5.0-cp39-abi3-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e42d729ef2be96a14efbad355093284739e3670ece3e534f82cc8832790911d9"}, + {file = "ast_serialize-0.5.0-cp39-abi3-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b725026bafa801dbd7310eb13a75f0a2e370e7e51b2cb225f9d21fcfadf919ee"}, + {file = "ast_serialize-0.5.0-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b54f60c1d78767a53b67eaa663f0dfac3afe606aa07f1301572f588b73d64809"}, + {file = "ast_serialize-0.5.0-cp39-abi3-manylinux_2_31_riscv64.whl", hash = "sha256:27d51654fc240a1e87e742d353d98eb45b75f62f129086b3596ab53df2ac2a43"}, + {file = "ast_serialize-0.5.0-cp39-abi3-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:2782c36237c46dd1674542f2109740ea5ea485a169bf1431939ada0434e17934"}, + {file = "ast_serialize-0.5.0-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:1943db345233cc7194a470f13afa9c59772c0b123dea0c9414c4d4ca54369759"}, + {file = "ast_serialize-0.5.0-cp39-abi3-musllinux_1_2_armv7l.whl", hash = "sha256:df1c00022cbbcb064bfaa505aa9c9295362443ce5dacb459d1331d3da353f887"}, + {file = "ast_serialize-0.5.0-cp39-abi3-musllinux_1_2_i686.whl", hash = "sha256:cae65289fc456fde04af979a2be09302ef5d8ab92ef23e596d6746dc267ada27"}, + {file = "ast_serialize-0.5.0-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:239a4c354e8d676e9d94631d1d4a64edc6b266f86ff3a5a80aedd344f342c01d"}, + {file = "ast_serialize-0.5.0-cp39-abi3-win32.whl", hash = "sha256:143a4ef63285a075871908fda3672dc21864b83a8ec3ee12304aa3e4c5387b9a"}, + {file = "ast_serialize-0.5.0-cp39-abi3-win_amd64.whl", hash = "sha256:cf25572c526add400f26a4750dc6ce0c3bb93fc1f75e7ae0cad4ce4f2cd5c590"}, + {file = "ast_serialize-0.5.0-cp39-abi3-win_arm64.whl", hash = "sha256:92a31c9c20d25a076edaeec76b128a3535d74a24f340b9a8a7e96c9b86dc9642"}, + {file = "ast_serialize-0.5.0.tar.gz", hash = "sha256:5880091bfe6f4f986f22866375c2e884843e7a0b6343ae41aeea659613d879b6"}, +] + [[package]] name = "astroid" version = "3.3.11" @@ -138,14 +181,14 @@ dev = ["backports.zoneinfo ; python_version < \"3.9\"", "freezegun (>=1.0,<2.0)" [[package]] name = "beautifulsoup4" -version = "4.14.3" +version = "4.15.0" description = "Screen-scraping library" optional = false python-versions = ">=3.7.0" groups = ["docs"] files = [ - {file = "beautifulsoup4-4.14.3-py3-none-any.whl", hash = "sha256:0918bfe44902e6ad8d57732ba310582e98da931428d231a5ecb9e7c703a735bb"}, - {file = "beautifulsoup4-4.14.3.tar.gz", hash = "sha256:6292b1c5186d356bba669ef9f7f051757099565ad9ada5dd630bd9de5fa7fb86"}, + {file = "beautifulsoup4-4.15.0-py3-none-any.whl", hash = "sha256:d6f88de62e1d4e38ecb1077eb9724cd0eff29d2a08ca16a401e9b9e93f117cf9"}, + {file = "beautifulsoup4-4.15.0.tar.gz", hash = "sha256:288e3ca7d54b06f2ac191970bc275c1939cb46d450b255bf6718b04aa37ab4f7"}, ] [package.dependencies] @@ -161,21 +204,23 @@ lxml = ["lxml"] [[package]] name = "bleach" -version = "4.1.0" +version = "6.4.0" description = "An easy safelist-based HTML-sanitizing tool." optional = false -python-versions = ">=3.6" +python-versions = ">=3.10" groups = ["docs"] files = [ - {file = "bleach-4.1.0-py2.py3-none-any.whl", hash = "sha256:4d2651ab93271d1129ac9cbc679f524565cc8a1b791909c4a51eac4446a15994"}, - {file = "bleach-4.1.0.tar.gz", hash = "sha256:0900d8b37eba61a802ee40ac0061f8c2b5dee29c1927dd1d233e075ebf5a71da"}, + {file = "bleach-6.4.0-py3-none-any.whl", hash = "sha256:4b6b6a54fff2e69a3dde9d21cc6301220bee3c3cb792187d11403fd795031081"}, + {file = "bleach-6.4.0.tar.gz", hash = "sha256:4202482733d85cedd04e59fcb2f89f4e4c7c385a78d3c3c23c30446843a37452"}, ] [package.dependencies] -packaging = "*" -six = ">=1.9.0" +tinycss2 = {version = ">=1.1.0", optional = true, markers = "extra == \"css\""} webencodings = "*" +[package.extras] +css = ["tinycss2 (>=1.1.0)"] + [[package]] name = "cachecontrol" version = "0.14.4" @@ -768,14 +813,14 @@ profile = ["gprof2dot (>=2022.7.29)"] [[package]] name = "distlib" -version = "0.4.1" +version = "0.4.3" description = "Distribution utilities" optional = false python-versions = "*" groups = ["dev"] files = [ - {file = "distlib-0.4.1-py2.py3-none-any.whl", hash = "sha256:9c2c552c68cbadc619f2d0ed3a69e27c351a3f4c9baa9ffb7df9e9cdc3d19a97"}, - {file = "distlib-0.4.1.tar.gz", hash = "sha256:c3804d0d2d4b5fcd44036eb860cb6660485fcdf5c2aba53dc324d805837ea65b"}, + {file = "distlib-0.4.3-py2.py3-none-any.whl", hash = "sha256:4b0ce306c966eb73bc3a7b6abad017c556dadd92c44701562cd528ac7fde4d5b"}, + {file = "distlib-0.4.3.tar.gz", hash = "sha256:f152097224a0ae24be5a0f6bae1b9359af82133bce63f98a95f86cae1aede9ed"}, ] [[package]] @@ -816,7 +861,7 @@ version = "0.12.0" description = "Tools to expand Python's enum module." optional = false python-versions = ">=3.6" -groups = ["main", "docs"] +groups = ["main"] files = [ {file = "enum_tools-0.12.0-py3-none-any.whl", hash = "sha256:d69b019f193c7b850b17d9ce18440db7ed62381571409af80ccc08c5218b340a"}, {file = "enum_tools-0.12.0.tar.gz", hash = "sha256:13ceb9376a4c5f574a1e7c5f9c8eb7f3d3fbfbb361cc18a738df1a58dfefd460"}, @@ -896,14 +941,14 @@ devel = ["colorama", "json-spec", "jsonschema", "pylint", "pytest", "pytest-benc [[package]] name = "filelock" -version = "3.29.1" +version = "3.29.3" description = "A platform independent file lock." optional = false python-versions = ">=3.10" groups = ["dev", "docs"] files = [ - {file = "filelock-3.29.1-py3-none-any.whl", hash = "sha256:85199dfd706869641b72b2e8955d5416a4b2b7dc4b0e8e6d97b4cc1299a6983b"}, - {file = "filelock-3.29.1.tar.gz", hash = "sha256:d97e6b1b9757569626c58caa07dc4beb1613f4a2938b1e8cc81afca398906c9e"}, + {file = "filelock-3.29.3-py3-none-any.whl", hash = "sha256:e58333029cc9b925f39aad59b1d8f0a1ad836af4e60d7217f4a4dba87461261d"}, + {file = "filelock-3.29.3.tar.gz", hash = "sha256:7fc1b3f39cf172fd8203812043c57b8a65aef9969f38b6704f628b881f761a84"}, ] [[package]] @@ -977,7 +1022,7 @@ description = "Getting image size from png/jpeg/jpeg2000/gif file" optional = false python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,>=2.7" groups = ["docs"] -markers = "python_version >= \"3.12\"" +markers = "python_version >= \"3.15\"" files = [ {file = "imagesize-1.5.0-py2.py3-none-any.whl", hash = "sha256:32677681b3f434c2cb496f00e89c5a291247b35b1f527589909e008057da5899"}, {file = "imagesize-1.5.0.tar.gz", hash = "sha256:8bfc5363a7f2133a89f0098451e0bcb1cd71aba4dc02bbcecb39d99d40e1b94f"}, @@ -990,7 +1035,7 @@ description = "Get image size from headers (BMP/PNG/JPEG/JPEG2000/GIF/TIFF/SVG/N optional = false python-versions = "<3.15,>=3.10" groups = ["docs"] -markers = "python_version < \"3.12\"" +markers = "python_version < \"3.15\"" files = [ {file = "imagesize-2.0.0-py2.py3-none-any.whl", hash = "sha256:5667c5bbb57ab3f1fa4bc366f4fbc971db3d5ed011fd2715fd8001f782718d96"}, {file = "imagesize-2.0.0.tar.gz", hash = "sha256:8e8358c4a05c304f1fccf7ff96f036e7243a189e9e42e90851993c558cfe9ee3"}, @@ -1050,7 +1095,7 @@ pfzy = ">=0.3.1,<0.4.0" prompt-toolkit = ">=3.0.1,<4.0.0" [package.extras] -docs = ["Sphinx (>=4.1.2,<5.0.0)", "furo (>=2021.8.17-beta.43,<2022.0.0)", "myst-parser (>=0.15.1,<0.16.0)", "sphinx-autobuild (>=2021.3.14,<2022.0.0)", "sphinx-copybutton (>=0.4.0,<0.5.0)"] +docs = ["Sphinx (>=4.1.2,<5.0.0)", "furo (>=2021.8.17b43,<2022.0.0)", "myst-parser (>=0.15.1,<0.16.0)", "sphinx-autobuild (>=2021.3.14,<2022.0.0)", "sphinx-copybutton (>=0.4.0,<0.5.0)"] [[package]] name = "ipykernel" @@ -1128,15 +1173,15 @@ test-extra = ["curio", "ipython[test]", "jupyter_ai", "matplotlib (!=3.2.0)", "n [[package]] name = "ipython" -version = "9.14.0" +version = "9.14.1" description = "IPython: Productive Interactive Computing" optional = false python-versions = ">=3.11" groups = ["dev"] markers = "python_version >= \"3.11\"" files = [ - {file = "ipython-9.14.0-py3-none-any.whl", hash = "sha256:8fd984a3372c14b12790b084ba6b5cff5678c0cb063244a0034f06a51f20d6c2"}, - {file = "ipython-9.14.0.tar.gz", hash = "sha256:6f27ff0f1d9ea050e0551f71568bc4b34d8aba579e8f111c5b4175f44ac6b4aa"}, + {file = "ipython-9.14.1-py3-none-any.whl", hash = "sha256:5d4a9ecaa3b10e6e5f269dd0948bdb58ca9cb851899cd23e07c320d3eb11613c"}, + {file = "ipython-9.14.1.tar.gz", hash = "sha256:f913bf74df06d458e46ced84ca506c23797590d594b236fe60b14df213291e7b"}, ] [package.dependencies] @@ -1257,7 +1302,7 @@ files = [ [package.dependencies] attrs = ">=22.2.0" -jsonschema-specifications = ">=2023.03.6" +jsonschema-specifications = ">=2023.3.6" referencing = ">=0.28.4" rpds-py = ">=0.25.0" @@ -1282,14 +1327,14 @@ referencing = ">=0.31.0" [[package]] name = "jupyter-client" -version = "8.8.0" +version = "8.9.1" description = "Jupyter protocol implementation and client libraries" optional = false python-versions = ">=3.10" groups = ["dev", "docs"] files = [ - {file = "jupyter_client-8.8.0-py3-none-any.whl", hash = "sha256:f93a5b99c5e23a507b773d3a1136bd6e16c67883ccdbd9a829b0bbdb98cd7d7a"}, - {file = "jupyter_client-8.8.0.tar.gz", hash = "sha256:d556811419a4f2d96c869af34e854e3f059b7cc2d6d01a9cd9c85c267691be3e"}, + {file = "jupyter_client-8.9.1-py3-none-any.whl", hash = "sha256:0b7a295bc46e8751e9adae84781f726c851c1d911bd793edc4a3bde942e3da81"}, + {file = "jupyter_client-8.9.1.tar.gz", hash = "sha256:a58f730dd9e728ba16ba1d62ebccf7ffe1ebbdbce4e95cfae941b7321ae1f4fa"}, ] [package.dependencies] @@ -1298,6 +1343,7 @@ python-dateutil = ">=2.8.2" pyzmq = ">=25.0" tornado = ">=6.4.1" traitlets = ">=5.3" +typing-extensions = ">=4.13.0" [package.extras] docs = ["ipykernel", "myst-parser", "pydata-sphinx-theme", "sphinx (>=4)", "sphinx-autodoc-typehints", "sphinxcontrib-github-alt", "sphinxcontrib-spelling"] @@ -1336,6 +1382,107 @@ files = [ {file = "jupyterlab_pygments-0.3.0.tar.gz", hash = "sha256:721aca4d9029252b11cfa9d185e5b5af4d54772bb8072f9b7036f4170054d35d"}, ] +[[package]] +name = "librt" +version = "0.11.0" +description = "Mypyc runtime library" +optional = false +python-versions = ">=3.9" +groups = ["dev"] +markers = "platform_python_implementation != \"PyPy\"" +files = [ + {file = "librt-0.11.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:6e94ebfcfa2d5e9926d6c3b9aa4617ffc42a845b4321fb84021b872358c82a0f"}, + {file = "librt-0.11.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:ae627397a2f351560440d872d6f7c8dbb4072e57868e7b2fc5b8b430fe489d45"}, + {file = "librt-0.11.0-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:dc329359321b67d24efdf4bc69012b0597001649544db662c001db5a0184794c"}, + {file = "librt-0.11.0-cp310-cp310-manylinux2014_i686.manylinux_2_17_i686.manylinux_2_28_i686.whl", hash = "sha256:7e82e642ab0f7608ce2fe53d76ca2280a9ee33a1b06556142c7c6fe80a86fc33"}, + {file = "librt-0.11.0-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:88145c15c67731d54283d135b03244028c750cc9edc334a96a4f5950ebdb2884"}, + {file = "librt-0.11.0-cp310-cp310-manylinux_2_34_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:9d36a51b3d93320b686588e27123f4995804dbf1bce81df78c02fc3c6eea9280"}, + {file = "librt-0.11.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:d00f3ac06a2a8b246327f11e186a53a100a4d5c7ed52346367e5ec751d51586c"}, + {file = "librt-0.11.0-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:461bbceede621f1ffb8839755f8663e886087ee7af16294cab7fb4d782c62eeb"}, + {file = "librt-0.11.0-cp310-cp310-musllinux_1_2_riscv64.whl", hash = "sha256:0cad8a4d6a8ff03c9b76f9414caccd78e7cfbc8a2e12fa334d8e1d9932753783"}, + {file = "librt-0.11.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:f37aa505b3cf60701562eddb32df74b12a9e380c207fd8b06dd157a943ac7ea0"}, + {file = "librt-0.11.0-cp310-cp310-win32.whl", hash = "sha256:94663a21534637f0e787ec2a2a756022df6e5b7b2335a5cdd7d8e33d68a2af89"}, + {file = "librt-0.11.0-cp310-cp310-win_amd64.whl", hash = "sha256:dec7db73758c2b54953fd8b7fe348c45188fe26b39ee18446196edd08453a5d4"}, + {file = "librt-0.11.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:93d95bd45b7d58343d8b90d904450a545144eec19a002511163426f8ab1fae29"}, + {file = "librt-0.11.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:4ee278c769a713638cdacd4c0436d72156e75df3ebc0166ab2b9dc43acc386c9"}, + {file = "librt-0.11.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f230cb1cbc9faaa616f9a678f530ebcf186e414b6bcbd88b960e4ba1b92428d5"}, + {file = "librt-0.11.0-cp311-cp311-manylinux2014_i686.manylinux_2_17_i686.manylinux_2_28_i686.whl", hash = "sha256:5d63c855d86938d9de93e265c9bd8c705b51ec494de5738340ee93767a686e4b"}, + {file = "librt-0.11.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:993f028be9e96a08d31df3479ac80d99be374d17f3b78e4796b3fd3c913d4e89"}, + {file = "librt-0.11.0-cp311-cp311-manylinux_2_34_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:258d73a0aa66a055e65b2e4d1b8cdb23b9d132c5bb915d9547d804fcaed116cc"}, + {file = "librt-0.11.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:0827efe7854718f04aaddf6496e96960a956e676fe1d0f04eb41511fd8ad06d5"}, + {file = "librt-0.11.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:7753e57d6e12d019c0d8786f1c09c709f4c3fcc57c3887b24e36e6c06ec938b7"}, + {file = "librt-0.11.0-cp311-cp311-musllinux_1_2_riscv64.whl", hash = "sha256:11bd19822431cc21af9f27374e7ae2e58103c7d98bda823536a6c47f6bb2bb3d"}, + {file = "librt-0.11.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:22bdf239b219d3993761a148ffa134b19e52e9989c84f845d5d7b71d70a17412"}, + {file = "librt-0.11.0-cp311-cp311-win32.whl", hash = "sha256:46c60b61e308eb535fbd6fa622b1ee1bb2815691c1ad9c98bf7b84952ec3bc8d"}, + {file = "librt-0.11.0-cp311-cp311-win_amd64.whl", hash = "sha256:902e546ff044f579ff1c953ff5fce97b636fe9e3943996b2177710c6ef076f73"}, + {file = "librt-0.11.0-cp311-cp311-win_arm64.whl", hash = "sha256:65ac3bc20f78aa0ee5ae84baa68917f89fef4af63e941084dd019a0d0e749f0c"}, + {file = "librt-0.11.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:b87504f1690a23b9a2cca841191a04f83895d4fc2dd04df91d82b1a04ca2ad46"}, + {file = "librt-0.11.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:40071fc5fe0ce8daa6de616702314a01e1250711682b0523d6ab8d4525910cb3"}, + {file = "librt-0.11.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:137e79445c896a0ea7b265f52d23954e05b64222ee1af69e2cb34219067cbb67"}, + {file = "librt-0.11.0-cp312-cp312-manylinux2014_i686.manylinux_2_17_i686.manylinux_2_28_i686.whl", hash = "sha256:cca6644054e78746d8d4ef238681f9c34ff8b584fe6b988ecebb8db3b15e622a"}, + {file = "librt-0.11.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d5b0eea49f5562861ee8d757a32ef7d559c1d35be2aaaa1ec28941d74c9ffc8a"}, + {file = "librt-0.11.0-cp312-cp312-manylinux_2_34_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:0d1029d7e1ae1a7e647ed6fb5df8c4ce2dffefb7a9f5fd1376a4554d96dac09f"}, + {file = "librt-0.11.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:bc3ce6b33c5828d9e80592011a5c584cb2ce86edbc4088405f70da47dc1d1b3b"}, + {file = "librt-0.11.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:936c5995f3514a42111f20099397d8177c79b4d7e70961e396c6f5a0a3566766"}, + {file = "librt-0.11.0-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:9bc0ca6ad9381cbe8e4aa6e5726e4c80c78115a6e9723c599ed1d73e092bc49d"}, + {file = "librt-0.11.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:070aa8c26c0a74774317a72df8851facc7f0f012a5b406557ac56992d92e1ec8"}, + {file = "librt-0.11.0-cp312-cp312-win32.whl", hash = "sha256:6bf14feb84b05ae945277395451998c89c54d0def4070eb5c08de544930b245a"}, + {file = "librt-0.11.0-cp312-cp312-win_amd64.whl", hash = "sha256:75672f0bc524ede266287d532d7923dbce94c7514ad07627bac3d0c6d92cc4d9"}, + {file = "librt-0.11.0-cp312-cp312-win_arm64.whl", hash = "sha256:2f10cf143e4a9bb0f4f5af568a00df94a2d69ef41c2579584454bb0fe5cc642c"}, + {file = "librt-0.11.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:78dc31f7fdfe9c9d0eb0e8f42d139db230e826415bbcabd9f0e9faaaee909894"}, + {file = "librt-0.11.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:fa475675db22290c3158e1d42326d0f5a65f04f44a0e68c3630a25b53560fb9c"}, + {file = "librt-0.11.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:621db29691044bdeda22e789e482e1b0f3a985d90e3426c9c6d17606416205ea"}, + {file = "librt-0.11.0-cp313-cp313-manylinux2014_i686.manylinux_2_17_i686.manylinux_2_28_i686.whl", hash = "sha256:a9010e2ed5b3a9e158c5fd966b3ab7e834bb3d3aacc8f66c91dd4b57a3799230"}, + {file = "librt-0.11.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:7c39513d8b7477a2e1ed8c43fc21c524e8d5a0f8d4e8b7b074dbdbe7820a08e2"}, + {file = "librt-0.11.0-cp313-cp313-manylinux_2_34_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:7aef3cf1d5af86e770ab04bfd993dfc4ae8b8c17f66fb77dd4a7d50de7bbb1a3"}, + {file = "librt-0.11.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:557183ddc36babe46b27dd60facbd5adb4492181a5be887587d57cda6e092f21"}, + {file = "librt-0.11.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:83d3e1f72bd42f6c5c0b7daec530c3f829bd02db42c70b8ddf0c2d90a2459930"}, + {file = "librt-0.11.0-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:4ce1f21fbe589bc1afd7872dece84fb0e1144f794a288e58a10d2c54a55c43be"}, + {file = "librt-0.11.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:970b09f7044ea2b64c9da42fd3d335666518cfd1c6e8a182c95da73d0214b41e"}, + {file = "librt-0.11.0-cp313-cp313-win32.whl", hash = "sha256:78fddc31cd4d3caa897ad5d31f856b1faadc9474021ad6cb182b9018793e254e"}, + {file = "librt-0.11.0-cp313-cp313-win_amd64.whl", hash = "sha256:8ca8aa88751a775870b764e93bad5135385f563cb8dcee399abf034ea4d3cb47"}, + {file = "librt-0.11.0-cp313-cp313-win_arm64.whl", hash = "sha256:96f044bb325fd9cf1a723015638c219e9143f0dfbc0ca54c565df2b7fc748b44"}, + {file = "librt-0.11.0-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:4a017a95e5837dc15a8c5661d60e05daa96b90908b1aa6b7acdf443cd25c8ebd"}, + {file = "librt-0.11.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:b1ecbd9819deccc39b7542bf4d2a740d8a620694d39989e58661d3763458f8d4"}, + {file = "librt-0.11.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:7da327dacd7be8f8ec36547373550744a3cc0e536d54665cd83f8bcd961200e8"}, + {file = "librt-0.11.0-cp314-cp314-manylinux2014_i686.manylinux_2_17_i686.manylinux_2_28_i686.whl", hash = "sha256:0dc56b1f8d06e60db362cc3fdae206681817f86ce4725d34511473487f12a34b"}, + {file = "librt-0.11.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:05fb8fb2ab90e21c8d12ea240d744ad514da9baf381ebfa70d91d20d21713175"}, + {file = "librt-0.11.0-cp314-cp314-manylinux_2_34_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:cae74872be221df4374d10fec61f93ed1513b9546ea84f2c0bf73ab3e9bd0b03"}, + {file = "librt-0.11.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:32bcc918c0148eb7e3d57385125bac7e5f9e4359d05f07448b09f6f778c2f31c"}, + {file = "librt-0.11.0-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:f9743fc99135d5f78d2454435615f6dec0473ca507c26ce9d92b10b562a280d3"}, + {file = "librt-0.11.0-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:5ba067f4aadae8fda802d91d2124c90c42195ff32d9161d3549e6d05cfe26f96"}, + {file = "librt-0.11.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:de3bf945454d032f9e390b85c4072e0a0570bf825421c8be0e71209fa65e1abe"}, + {file = "librt-0.11.0-cp314-cp314-win32.whl", hash = "sha256:d2277a05f6dcb9fd13db9566aac4fabd68c3ea1ea46ee5567d4eef8efa495a2f"}, + {file = "librt-0.11.0-cp314-cp314-win_amd64.whl", hash = "sha256:ab73e8db5e3f564d812c1f5c3a175930a5f9bc96ccb5e3b22a34d7858b401cf7"}, + {file = "librt-0.11.0-cp314-cp314-win_arm64.whl", hash = "sha256:aea3caa317752e3a466fa8af45d91ee0ea8c7fdd96e42b0a8dd9b76a7931eba1"}, + {file = "librt-0.11.0-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:d1b36540d7aaf9b9101b3a6f376c8d8e9f7a9aec93ed05918f2c69d493ffef72"}, + {file = "librt-0.11.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:efbb343ab2ce3540f4ecbe6315d677ed70f37cd9a72b1e58066c918ca83acbaa"}, + {file = "librt-0.11.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:aa0dd688aab3f7914d3e6e5e3554978e0383312fb8e771d84be008a35b9ee548"}, + {file = "librt-0.11.0-cp314-cp314t-manylinux2014_i686.manylinux_2_17_i686.manylinux_2_28_i686.whl", hash = "sha256:f5fb36b8c6c63fdcbb1d526d94c0d1331610d43f4118cc1beb4efef4f3faacb2"}, + {file = "librt-0.11.0-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4a9a237d13addb93715b6fee74023d5ee3469b53fce527626c0e088aa585805f"}, + {file = "librt-0.11.0-cp314-cp314t-manylinux_2_34_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:5ddd17bd87b2c56ddd60e546a7984a2e64c4e8eab92fb4cf3830a48ad5469d51"}, + {file = "librt-0.11.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:bd43992b4473d42f12ff9e68326079f0696d9d4e6000e8f39a0238d482ba6ee2"}, + {file = "librt-0.11.0-cp314-cp314t-musllinux_1_2_i686.whl", hash = "sha256:f8e3e8056dd674e279741485e2e512d6e9a751c7455809d0114e6ebf8d781085"}, + {file = "librt-0.11.0-cp314-cp314t-musllinux_1_2_riscv64.whl", hash = "sha256:c1f708d8ae9c56cf38a903c44297243d2ec83fd82b396b977e0144a3e76217e3"}, + {file = "librt-0.11.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:0add982e0e7b9fc14cf4b33789d5f13f66581889b88c2f58099f6ce8f92617bd"}, + {file = "librt-0.11.0-cp314-cp314t-win32.whl", hash = "sha256:2b481d846ac894c4e8403c5fd0e87c5d11d6499e404b474602508a224ff531c8"}, + {file = "librt-0.11.0-cp314-cp314t-win_amd64.whl", hash = "sha256:28edb433edde181112a908c78907af28f964eabc15f4dd16c9d66c834302677c"}, + {file = "librt-0.11.0-cp314-cp314t-win_arm64.whl", hash = "sha256:dee008f20b542e3cd162ba338a7f9ec0f6d23d395f66fe8aeeec3c9d067ea253"}, + {file = "librt-0.11.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:6bd72d903911d995ab666dbd1871f8b1e80925a699af8063fbf50053329fb05f"}, + {file = "librt-0.11.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:0ef69ac715f3cd8e5cd252cb2aebfa72c015492aacc339d5d7bf8fef3c62c677"}, + {file = "librt-0.11.0-cp39-cp39-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:624a40c4a4ad7773315c287276cd024509b2c66ff5904f504bfc08d2c70293ab"}, + {file = "librt-0.11.0-cp39-cp39-manylinux2014_i686.manylinux_2_17_i686.manylinux_2_28_i686.whl", hash = "sha256:41dc19fe150b69716c8ece4f76773a9e8813fe3e35e032a58b4d46423fb8d7c0"}, + {file = "librt-0.11.0-cp39-cp39-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4e8bd98ea9c47ae90b319a087ab28dac493f1ffbc1ecd1f28fcdbf3b7e1108d1"}, + {file = "librt-0.11.0-cp39-cp39-manylinux_2_34_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:84308fc49423ce6475d1c5d1985cd69a8ca9f0325fc7d5f81bb690a3f3625d4e"}, + {file = "librt-0.11.0-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:ff0fbaf5f44a21beeb0110f2ab64f45135a9536a834b79c0d1ef018f2786bbfa"}, + {file = "librt-0.11.0-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:9c028a9442a18e266955d364ce42259136e79a7ba14d773e0d778d5f70cd56f1"}, + {file = "librt-0.11.0-cp39-cp39-musllinux_1_2_riscv64.whl", hash = "sha256:9f1692105a02bcf853f355032a5fdc5494358ef83d8fd22d16de375c85cec3f5"}, + {file = "librt-0.11.0-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:7a80a71e1fda83cc752a9141e87aae7fef279538597564d670e9ce513f286192"}, + {file = "librt-0.11.0-cp39-cp39-win32.whl", hash = "sha256:140695816ddf3c86eb972981a26f35efd871c44b0c3aed44c8cd01749386617f"}, + {file = "librt-0.11.0-cp39-cp39-win_amd64.whl", hash = "sha256:92f7ff819c197fc30473190a12c2856f325ac90aabfccbeb2072d28cc2e234e3"}, + {file = "librt-0.11.0.tar.gz", hash = "sha256:075dc3ef4458a278e0195cbf6ac9d38808d9b906c5a6c7f7f79c3888276a3fb1"}, +] + [[package]] name = "markdown-it-py" version = "3.0.0" @@ -1539,74 +1686,162 @@ typing-extensions = {version = "*", markers = "python_version < \"3.11\""} [[package]] name = "msgpack" -version = "1.1.2" +version = "1.2.0" description = "MessagePack serializer" optional = false -python-versions = ">=3.9" +python-versions = ">=3.10" groups = ["docs"] files = [ - {file = "msgpack-1.1.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:0051fffef5a37ca2cd16978ae4f0aef92f164df86823871b5162812bebecd8e2"}, - {file = "msgpack-1.1.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:a605409040f2da88676e9c9e5853b3449ba8011973616189ea5ee55ddbc5bc87"}, - {file = "msgpack-1.1.2-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:8b696e83c9f1532b4af884045ba7f3aa741a63b2bc22617293a2c6a7c645f251"}, - {file = "msgpack-1.1.2-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:365c0bbe981a27d8932da71af63ef86acc59ed5c01ad929e09a0b88c6294e28a"}, - {file = "msgpack-1.1.2-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:41d1a5d875680166d3ac5c38573896453bbbea7092936d2e107214daf43b1d4f"}, - {file = "msgpack-1.1.2-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:354e81bcdebaab427c3df4281187edc765d5d76bfb3a7c125af9da7a27e8458f"}, - {file = "msgpack-1.1.2-cp310-cp310-win32.whl", hash = "sha256:e64c8d2f5e5d5fda7b842f55dec6133260ea8f53c4257d64494c534f306bf7a9"}, - {file = "msgpack-1.1.2-cp310-cp310-win_amd64.whl", hash = "sha256:db6192777d943bdaaafb6ba66d44bf65aa0e9c5616fa1d2da9bb08828c6b39aa"}, - {file = "msgpack-1.1.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:2e86a607e558d22985d856948c12a3fa7b42efad264dca8a3ebbcfa2735d786c"}, - {file = "msgpack-1.1.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:283ae72fc89da59aa004ba147e8fc2f766647b1251500182fac0350d8af299c0"}, - {file = "msgpack-1.1.2-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:61c8aa3bd513d87c72ed0b37b53dd5c5a0f58f2ff9f26e1555d3bd7948fb7296"}, - {file = "msgpack-1.1.2-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:454e29e186285d2ebe65be34629fa0e8605202c60fbc7c4c650ccd41870896ef"}, - {file = "msgpack-1.1.2-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:7bc8813f88417599564fafa59fd6f95be417179f76b40325b500b3c98409757c"}, - {file = "msgpack-1.1.2-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:bafca952dc13907bdfdedfc6a5f579bf4f292bdd506fadb38389afa3ac5b208e"}, - {file = "msgpack-1.1.2-cp311-cp311-win32.whl", hash = "sha256:602b6740e95ffc55bfb078172d279de3773d7b7db1f703b2f1323566b878b90e"}, - {file = "msgpack-1.1.2-cp311-cp311-win_amd64.whl", hash = "sha256:d198d275222dc54244bf3327eb8cbe00307d220241d9cec4d306d49a44e85f68"}, - {file = "msgpack-1.1.2-cp311-cp311-win_arm64.whl", hash = "sha256:86f8136dfa5c116365a8a651a7d7484b65b13339731dd6faebb9a0242151c406"}, - {file = "msgpack-1.1.2-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:70a0dff9d1f8da25179ffcf880e10cf1aad55fdb63cd59c9a49a1b82290062aa"}, - {file = "msgpack-1.1.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:446abdd8b94b55c800ac34b102dffd2f6aa0ce643c55dfc017ad89347db3dbdb"}, - {file = "msgpack-1.1.2-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c63eea553c69ab05b6747901b97d620bb2a690633c77f23feb0c6a947a8a7b8f"}, - {file = "msgpack-1.1.2-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:372839311ccf6bdaf39b00b61288e0557916c3729529b301c52c2d88842add42"}, - {file = "msgpack-1.1.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:2929af52106ca73fcb28576218476ffbb531a036c2adbcf54a3664de124303e9"}, - {file = "msgpack-1.1.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:be52a8fc79e45b0364210eef5234a7cf8d330836d0a64dfbb878efa903d84620"}, - {file = "msgpack-1.1.2-cp312-cp312-win32.whl", hash = "sha256:1fff3d825d7859ac888b0fbda39a42d59193543920eda9d9bea44d958a878029"}, - {file = "msgpack-1.1.2-cp312-cp312-win_amd64.whl", hash = "sha256:1de460f0403172cff81169a30b9a92b260cb809c4cb7e2fc79ae8d0510c78b6b"}, - {file = "msgpack-1.1.2-cp312-cp312-win_arm64.whl", hash = "sha256:be5980f3ee0e6bd44f3a9e9dea01054f175b50c3e6cdb692bc9424c0bbb8bf69"}, - {file = "msgpack-1.1.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:4efd7b5979ccb539c221a4c4e16aac1a533efc97f3b759bb5a5ac9f6d10383bf"}, - {file = "msgpack-1.1.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:42eefe2c3e2af97ed470eec850facbe1b5ad1d6eacdbadc42ec98e7dcf68b4b7"}, - {file = "msgpack-1.1.2-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1fdf7d83102bf09e7ce3357de96c59b627395352a4024f6e2458501f158bf999"}, - {file = "msgpack-1.1.2-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:fac4be746328f90caa3cd4bc67e6fe36ca2bf61d5c6eb6d895b6527e3f05071e"}, - {file = "msgpack-1.1.2-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:fffee09044073e69f2bad787071aeec727183e7580443dfeb8556cbf1978d162"}, - {file = "msgpack-1.1.2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:5928604de9b032bc17f5099496417f113c45bc6bc21b5c6920caf34b3c428794"}, - {file = "msgpack-1.1.2-cp313-cp313-win32.whl", hash = "sha256:a7787d353595c7c7e145e2331abf8b7ff1e6673a6b974ded96e6d4ec09f00c8c"}, - {file = "msgpack-1.1.2-cp313-cp313-win_amd64.whl", hash = "sha256:a465f0dceb8e13a487e54c07d04ae3ba131c7c5b95e2612596eafde1dccf64a9"}, - {file = "msgpack-1.1.2-cp313-cp313-win_arm64.whl", hash = "sha256:e69b39f8c0aa5ec24b57737ebee40be647035158f14ed4b40e6f150077e21a84"}, - {file = "msgpack-1.1.2-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:e23ce8d5f7aa6ea6d2a2b326b4ba46c985dbb204523759984430db7114f8aa00"}, - {file = "msgpack-1.1.2-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:6c15b7d74c939ebe620dd8e559384be806204d73b4f9356320632d783d1f7939"}, - {file = "msgpack-1.1.2-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:99e2cb7b9031568a2a5c73aa077180f93dd2e95b4f8d3b8e14a73ae94a9e667e"}, - {file = "msgpack-1.1.2-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:180759d89a057eab503cf62eeec0aa61c4ea1200dee709f3a8e9397dbb3b6931"}, - {file = "msgpack-1.1.2-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:04fb995247a6e83830b62f0b07bf36540c213f6eac8e851166d8d86d83cbd014"}, - {file = "msgpack-1.1.2-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:8e22ab046fa7ede9e36eeb4cfad44d46450f37bb05d5ec482b02868f451c95e2"}, - {file = "msgpack-1.1.2-cp314-cp314-win32.whl", hash = "sha256:80a0ff7d4abf5fecb995fcf235d4064b9a9a8a40a3ab80999e6ac1e30b702717"}, - {file = "msgpack-1.1.2-cp314-cp314-win_amd64.whl", hash = "sha256:9ade919fac6a3e7260b7f64cea89df6bec59104987cbea34d34a2fa15d74310b"}, - {file = "msgpack-1.1.2-cp314-cp314-win_arm64.whl", hash = "sha256:59415c6076b1e30e563eb732e23b994a61c159cec44deaf584e5cc1dd662f2af"}, - {file = "msgpack-1.1.2-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:897c478140877e5307760b0ea66e0932738879e7aa68144d9b78ea4c8302a84a"}, - {file = "msgpack-1.1.2-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:a668204fa43e6d02f89dbe79a30b0d67238d9ec4c5bd8a940fc3a004a47b721b"}, - {file = "msgpack-1.1.2-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5559d03930d3aa0f3aacb4c42c776af1a2ace2611871c84a75afe436695e6245"}, - {file = "msgpack-1.1.2-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:70c5a7a9fea7f036b716191c29047374c10721c389c21e9ffafad04df8c52c90"}, - {file = "msgpack-1.1.2-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:f2cb069d8b981abc72b41aea1c580ce92d57c673ec61af4c500153a626cb9e20"}, - {file = "msgpack-1.1.2-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:d62ce1f483f355f61adb5433ebfd8868c5f078d1a52d042b0a998682b4fa8c27"}, - {file = "msgpack-1.1.2-cp314-cp314t-win32.whl", hash = "sha256:1d1418482b1ee984625d88aa9585db570180c286d942da463533b238b98b812b"}, - {file = "msgpack-1.1.2-cp314-cp314t-win_amd64.whl", hash = "sha256:5a46bf7e831d09470ad92dff02b8b1ac92175ca36b087f904a0519857c6be3ff"}, - {file = "msgpack-1.1.2-cp314-cp314t-win_arm64.whl", hash = "sha256:d99ef64f349d5ec3293688e91486c5fdb925ed03807f64d98d205d2713c60b46"}, - {file = "msgpack-1.1.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:ea5405c46e690122a76531ab97a079e184c0daf491e588592d6a23d3e32af99e"}, - {file = "msgpack-1.1.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:9fba231af7a933400238cb357ecccf8ab5d51535ea95d94fc35b7806218ff844"}, - {file = "msgpack-1.1.2-cp39-cp39-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:a8f6e7d30253714751aa0b0c84ae28948e852ee7fb0524082e6716769124bc23"}, - {file = "msgpack-1.1.2-cp39-cp39-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:94fd7dc7d8cb0a54432f296f2246bc39474e017204ca6f4ff345941d4ed285a7"}, - {file = "msgpack-1.1.2-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:350ad5353a467d9e3b126d8d1b90fe05ad081e2e1cef5753f8c345217c37e7b8"}, - {file = "msgpack-1.1.2-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:6bde749afe671dc44893f8d08e83bf475a1a14570d67c4bb5cec5573463c8833"}, - {file = "msgpack-1.1.2-cp39-cp39-win32.whl", hash = "sha256:ad09b984828d6b7bb52d1d1d0c9be68ad781fa004ca39216c8a1e63c0f34ba3c"}, - {file = "msgpack-1.1.2-cp39-cp39-win_amd64.whl", hash = "sha256:67016ae8c8965124fdede9d3769528ad8284f14d635337ffa6a713a580f6c030"}, - {file = "msgpack-1.1.2.tar.gz", hash = "sha256:3b60763c1373dd60f398488069bcdc703cd08a711477b5d480eecc9f9626f47e"}, + {file = "msgpack-1.2.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:ed8c9495a0f12d17a2b4b69e23f895b88f26aabe40911c86594d3fbddecfff08"}, + {file = "msgpack-1.2.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:d7384859c90b45a28a4b31aa50b49cca84504c9f27df459cea6e072627650dcb"}, + {file = "msgpack-1.2.0-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:63b35e8e65f04ff7ad5c9c70885da587c74f51e4b4eb3db624eac6d250e8cf59"}, + {file = "msgpack-1.2.0-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9004c5a02acd3eca4e15e1ae7b461c32e3711105a28b1ad78be2f6facff4c523"}, + {file = "msgpack-1.2.0-cp310-cp310-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:7e2032dacb0a973fcbf7bd088415a369dae31c5af40e199d234806be22e86765"}, + {file = "msgpack-1.2.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:c1feb100651fbe4b39826207cb20af065dfbfbfa43b1bafd7eaa2252abf7acfd"}, + {file = "msgpack-1.2.0-cp310-cp310-musllinux_1_2_riscv64.whl", hash = "sha256:82487709d4c597d252311a65370220675fb1cc859e7da9269a3060c03ac02cf6"}, + {file = "msgpack-1.2.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:0268c67a74f5f913f545a0fdbbfaa3f6ebcf23b4c3209bb99704a2ea87e13f90"}, + {file = "msgpack-1.2.0-cp310-cp310-win32.whl", hash = "sha256:7df87173b0e13ddd134919731f13525dbbf75204145597decf1cb86887ebb492"}, + {file = "msgpack-1.2.0-cp310-cp310-win_amd64.whl", hash = "sha256:6371edb47788fbfd8a22016f9a97b5616dd9849bc50abcbb8e82d38f71efa096"}, + {file = "msgpack-1.2.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:ec35cd3f127f50806aa10c3f74bf27b749f13ddf1d2217964ada8f38042d1653"}, + {file = "msgpack-1.2.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:317eb298297121bfad9173d748124a04a36af27b6ac39c2bbc1db1ce57608dcf"}, + {file = "msgpack-1.2.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:50fe6434de89073273026dd032a62e8b63f8857a261d7a2df5b07c9e72f3a8f7"}, + {file = "msgpack-1.2.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:106c6d333ff3d4eda075b7d4b9695d1752c5bcc635e40d0dbaf4e276c9ed80e1"}, + {file = "msgpack-1.2.0-cp311-cp311-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:67055a611e871cb1bd0acb732f2e9f64ca8155ca0bba1d0a5bb362e7209e5541"}, + {file = "msgpack-1.2.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:ceec7f8e633d5a4b4a32b0416bef90ee3cd1017ea36247f705e523072e576119"}, + {file = "msgpack-1.2.0-cp311-cp311-musllinux_1_2_riscv64.whl", hash = "sha256:7ec5851160a3c2c0f77d68ddec620318cd8e7d88d94f9c058190e8ce0dfa1d31"}, + {file = "msgpack-1.2.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:dd7140f7b09dbe1984a0dff3189375d840247e3e4cf4ac45c5a499b3b599c8d2"}, + {file = "msgpack-1.2.0-cp311-cp311-win32.whl", hash = "sha256:cbfd54018d386da0951c7a2be13de0f58559d251313e613b2155e52ed1cbd8f1"}, + {file = "msgpack-1.2.0-cp311-cp311-win_amd64.whl", hash = "sha256:653373c4614c31463ba486a67776e4bb396af289921bd5353e209534b71467fa"}, + {file = "msgpack-1.2.0-cp311-cp311-win_arm64.whl", hash = "sha256:7a260aea1e5e7d6c7f1d9284c7360d29021627b61dc4dd7df144b81210810537"}, + {file = "msgpack-1.2.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:e2d6047ccd11a12c96a69f2bfe026471abef67334c3d0494a93e5310e45140a2"}, + {file = "msgpack-1.2.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:0347e3ac0dfee99086d3b68fe959da3f5f657c0019ddbaeaaa259a85f8603422"}, + {file = "msgpack-1.2.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:25552ff1f2ff3dc8333e27eabb94f702da5929ed0e07969688194a3e9f12e151"}, + {file = "msgpack-1.2.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a0d94420d9d52c56568159a69200af7e45eadb29615fa9d09fada140de1c38c7"}, + {file = "msgpack-1.2.0-cp312-cp312-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:d16e1f2db4a9eebc07b7cc91898d71e710f2eed8358711a605fee802caff8923"}, + {file = "msgpack-1.2.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:e9cb2e700e85f1e27bbb5c9de6cc1c9a4bc5ac64d5404bdcbcb37a0dc7a947a3"}, + {file = "msgpack-1.2.0-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:717d0b166dd176a5f786aeafff081f6439680acf5af193eb63e6266c12b04d3d"}, + {file = "msgpack-1.2.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:e87c7a21654d18111eb1a89bd5c42baba42e61887365d9e89585e112b4203f9e"}, + {file = "msgpack-1.2.0-cp312-cp312-win32.whl", hash = "sha256:967e0c891f5f23ab65762f2e5dc95922759c79f1ef99ef4c7e1fdd863e0d0af9"}, + {file = "msgpack-1.2.0-cp312-cp312-win_amd64.whl", hash = "sha256:6c23e33cee28dcffa112ae205661da4636fd7b06bd9ad1559a890623b92d060b"}, + {file = "msgpack-1.2.0-cp312-cp312-win_arm64.whl", hash = "sha256:6eeb771571f63f68045433b1a35c0256b946f31ed62f006997e40b8ad8b735af"}, + {file = "msgpack-1.2.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:3a1d30df1f302f2b7a7404afbac2ab76d510036c34cf34dffb01f704a7288e45"}, + {file = "msgpack-1.2.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:581e317112260d8ca488d490cad9290a5682276f309c41c7de237a85ed8799c8"}, + {file = "msgpack-1.2.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c6827d12eacc16873eba62408a1b7bbe8ecfb4a8f7ed78a631ae9bae6ad43cf2"}, + {file = "msgpack-1.2.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a186027e4279efa4c8bf06ce30605498d7d0d3af0fba0b9799dce85a3fd4a93c"}, + {file = "msgpack-1.2.0-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:a96142c14a11cf1a509e8b9aaf72858a3b742b7613e095ce646913e88ce7bd99"}, + {file = "msgpack-1.2.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:50c220579b68a6085b95408b2eaa486b259520f55d8e363ddc9b5d7ba5a6ac6d"}, + {file = "msgpack-1.2.0-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:4dcb9d12ab100ecacdfaaf37a3d72fe8392eacc7054afc1916b12d1b747c8446"}, + {file = "msgpack-1.2.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:a804727188ab0ebb237fadb303b743f04925a69d8c3247292d1e33e679767c15"}, + {file = "msgpack-1.2.0-cp313-cp313-win32.whl", hash = "sha256:1a1ac6ae1fe23298f79380e7b144c8a454e5d05616b0096584f353ba2d750114"}, + {file = "msgpack-1.2.0-cp313-cp313-win_amd64.whl", hash = "sha256:1c3c80949d79578f9dc85fd9fb91edfe6694e8a729cd5744634d59d8455fdde3"}, + {file = "msgpack-1.2.0-cp313-cp313-win_arm64.whl", hash = "sha256:fcf8f76fa587c2395fd0057c7232dbf071241f9ad280b235adb7ab585289989e"}, + {file = "msgpack-1.2.0-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:f854fa1a8b55d75d82ef9a905d9cdbeffdf7897c088f6020bd221867da5e56a5"}, + {file = "msgpack-1.2.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:e90df581f80f53b372d5d9d9349078d729851a3a0d0bd74f53ccb598d01e45b8"}, + {file = "msgpack-1.2.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b276ed50d8ac75d1f134a433ae79af8557d0fa25ee5b4737da533dfc2ce382e8"}, + {file = "msgpack-1.2.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:544d972459c92aa32e63b800d07c2d9cf2734a3be29cee3a0b478a622850e9f5"}, + {file = "msgpack-1.2.0-cp314-cp314-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:a070147cc2cf6b8a891734e0f5c8fe8f70ed8739ab30ba140b058005a6e86af4"}, + {file = "msgpack-1.2.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:7685e23b0f51745a751629c31713fbefdef8896b31b2bb38299dfa4ae6c0740c"}, + {file = "msgpack-1.2.0-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:b9204daeee8d91a7ae5acf2d2a8e3983be9a3025f38aa21bfaefbd7eea84a7dc"}, + {file = "msgpack-1.2.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:bfc057248609742ebbabf6bcd27fea4fd99c4980584e613c168c9b002318298f"}, + {file = "msgpack-1.2.0-cp314-cp314-win32.whl", hash = "sha256:a3faa7edf2388337ae849239878e92f0298b4dab4488e4f1834062f9d0c410c9"}, + {file = "msgpack-1.2.0-cp314-cp314-win_amd64.whl", hash = "sha256:1a3effc392a57744e4681e55d05f97d5ee7b598747d718340a9b4b8a970c40e1"}, + {file = "msgpack-1.2.0-cp314-cp314-win_arm64.whl", hash = "sha256:56a318f7df6bec7b40928d6b0519961f20a510d8baabf6baa393a70444588f0a"}, + {file = "msgpack-1.2.0-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:afa4a65ab2097795e771a74a3a81ea49534aaeba874eaf426a3332268e045ae6"}, + {file = "msgpack-1.2.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:409550770632bb28daa70a11d0ed5763f7db38f40b06f7db9f11dd2794d01102"}, + {file = "msgpack-1.2.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:bf47e3cd11ce044965a9736a322afdd390b31ed602d1c1b10211d1a841f1d587"}, + {file = "msgpack-1.2.0-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:204bc9f5d6e59c1718c0a4a84fc8ff71b5b4562faac257c1a68bca611ecf9b72"}, + {file = "msgpack-1.2.0-cp314-cp314t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:610154307b27267266368bc1d1c7bb8aeb71da7be9356d403cb2442d9e6399f5"}, + {file = "msgpack-1.2.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:6799f157bb63e79f11e2e590cfdb28423fc18dd60c270c3914b5b4586ae36f7e"}, + {file = "msgpack-1.2.0-cp314-cp314t-musllinux_1_2_riscv64.whl", hash = "sha256:72bd844902cf0a5ac3af2ef742f253cd0b1e5bcd184f49b4fb9a6a1f7bf305e8"}, + {file = "msgpack-1.2.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:3c0bd450f78d0d81722c80da6cdbf674a856967870a9db2f6c4debc4d8b3c67c"}, + {file = "msgpack-1.2.0-cp314-cp314t-win32.whl", hash = "sha256:378caf74c4c718dfc17590ce68a6d710ed398ff6fcf08237de23b77755730b55"}, + {file = "msgpack-1.2.0-cp314-cp314t-win_amd64.whl", hash = "sha256:553b42598165c4dd3235994fd6e4b0dfb1ce5f3fd33d94ba9609442643015f38"}, + {file = "msgpack-1.2.0-cp314-cp314t-win_arm64.whl", hash = "sha256:2825bb1da548d214ab8a810906b7dd69a10f3838b615a2cc46e5172d3cb44f6e"}, + {file = "msgpack-1.2.0.tar.gz", hash = "sha256:8e17af38197bf58e7e819041678f6178f4491493f5b8c8580414f40f7c2c3c41"}, +] + +[[package]] +name = "mypy" +version = "2.1.0" +description = "Optional static typing for Python" +optional = false +python-versions = ">=3.10" +groups = ["dev"] +files = [ + {file = "mypy-2.1.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:11a6beb180257a805961aea9ec591bbd0bd17f1e18d35b8456d57aee5bedfedc"}, + {file = "mypy-2.1.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:8ef78c1d306bbf9a8a12f526c44902c9c28dffd6c52c52bf6a72641ce18d3849"}, + {file = "mypy-2.1.0-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c209a90853081ff01d01ee895cafe10f7db1474e0d95beaeef0f6c1db9119bbd"}, + {file = "mypy-2.1.0-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:47cebf61abde7c088a4e27718a8b13a81655686b2e9c251f5c0915a802248166"}, + {file = "mypy-2.1.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:d57a90ae5e872138a425ec328edbc9b235d1934c4377881a33ec05b341acc9a8"}, + {file = "mypy-2.1.0-cp310-cp310-win_amd64.whl", hash = "sha256:aea7f7a8a55b459c34275fc468ada6ca7c173a5e43a68f5dbe588a563d8a06b8"}, + {file = "mypy-2.1.0-cp310-cp310-win_arm64.whl", hash = "sha256:c989640253f0d76843e9c6c1bbf4bd48c5e85ada61bde4beb37cb3eca035685e"}, + {file = "mypy-2.1.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:a683016b16fe2f572dc04c72be7ee0504ac1605a265d0200f5cea695fb788f41"}, + {file = "mypy-2.1.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:1a293c534adb55271fef24a26da04b855540a8c13cc07bc5917b9fd2c394f2ca"}, + {file = "mypy-2.1.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:7406f4d048e71e576f5356d317e5b0a9e666dfd966bd99f9d14ca06e1a341538"}, + {file = "mypy-2.1.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:e0210d626fc8b31ccc90233754c7bc90e1f43205e85d96387f7db1285b55c398"}, + {file = "mypy-2.1.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:3712c20deed54e814eaaa825603bada8ea1c390670a397c95b98405347acc563"}, + {file = "mypy-2.1.0-cp311-cp311-win_amd64.whl", hash = "sha256:fcaa0e479066e31f7cceb6a3bea39cb22b2ff51a6b2f24f193d19179ba17c389"}, + {file = "mypy-2.1.0-cp311-cp311-win_arm64.whl", hash = "sha256:0b1a5260c95aa443083f9ed3592662941951bca3d4ca224a5dc517c38b7cf666"}, + {file = "mypy-2.1.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:244358bf1c0da7722230bce60683d52e8e9fd030554926f15b747a84efb5b3af"}, + {file = "mypy-2.1.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:4ec7c57657493c7a75534df2751c8ae2cda383c16ecc55d2106c54476b1b16f6"}, + {file = "mypy-2.1.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d8161b6ff4392410023224f0969d17db93e1e154bc3e4ba62598e720723ae211"}, + {file = "mypy-2.1.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:bf03e12003084a67395184d3eb8cbd6a489dc3655b5664b28c210a9e2403ab0b"}, + {file = "mypy-2.1.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:20509760fd791c51579d573153407d226385ec1f8bcce55d730b354f3336bc22"}, + {file = "mypy-2.1.0-cp312-cp312-win_amd64.whl", hash = "sha256:6753d0c1fdd6b1a23b9e4f283ce80b2153b724adcb2653b20b85a8a28ac6436b"}, + {file = "mypy-2.1.0-cp312-cp312-win_arm64.whl", hash = "sha256:98ebb6589bb3b6d0c6f0c459d53ca55b8091fbc13d277c4041c885392e8195e8"}, + {file = "mypy-2.1.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:35aac3bb114e03888f535d5eb51b8bafbb3266586b599da1940f9b1be3ec5bd5"}, + {file = "mypy-2.1.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:8de55a8c861f2a49331f807be98d90caeceeef520bde13d43a160207f8af613e"}, + {file = "mypy-2.1.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5fdf2941a07434af755837d9880f7d7d25f1dacb1af9dcd4b9b66f2220a3024e"}, + {file = "mypy-2.1.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:e195b817c13f02352a9c124301f9f30f078405444679b6753c1b96b6eed37285"}, + {file = "mypy-2.1.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:5431d42af987ebd92ba2f71d45c85ed41d8e6ca9f5fd209a69f68f707d2469e5"}, + {file = "mypy-2.1.0-cp313-cp313-win_amd64.whl", hash = "sha256:767fe8c66dc3e01e19e1737d4c38ebefead16125e1b8e58ad421903b376f5c65"}, + {file = "mypy-2.1.0-cp313-cp313-win_arm64.whl", hash = "sha256:ecfe70d43775ab99562ab128ce49854a362044c9f894961f68f898c23cb7429d"}, + {file = "mypy-2.1.0-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:7354c5a7f69d9345c3d6e69921d57088eea3ddeeb6b20d34c1b3855b02c36ec2"}, + {file = "mypy-2.1.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:49890d4f76ac9e06ec117f9e09f3174da70a620a0c300953d8595c926e80947f"}, + {file = "mypy-2.1.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:761be68e023ef5d94678772396a8af1220030f80837a3afd8d0aef3b419666f4"}, + {file = "mypy-2.1.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c90345fc182dc363b891350457ec69c35140858538f38b4540845afcc32b1aef"}, + {file = "mypy-2.1.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:b84802e7b5a6daf1f5e15bc9fcd7ddae77be13981ffab037f1c67bb84d67d135"}, + {file = "mypy-2.1.0-cp314-cp314-win_amd64.whl", hash = "sha256:022c771234936ceac541ebaf836fe9e2abeb3f5e09aff21588fe543ff006fe21"}, + {file = "mypy-2.1.0-cp314-cp314-win_arm64.whl", hash = "sha256:498207db725cec88829a6a5c2fc771205fd043719ef98bc49aba8fb9fc4e6d57"}, + {file = "mypy-2.1.0-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:7d5e5cad0efeba72b93cd17490cc0d69c5ac9ca132994fe3fb0314808aeeb83e"}, + {file = "mypy-2.1.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:ff715050c127d724fd260a2e666e7747fdd83511c0c47d449d98238970aef780"}, + {file = "mypy-2.1.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:82208da9e09414d520e912d3e462d454854bed0810b71540bb016dcbca7308fd"}, + {file = "mypy-2.1.0-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:e79ebc1b904b84f0310dff7469655a9c36c7a68bddb37bdd42b67a332df61d08"}, + {file = "mypy-2.1.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:e583edc957cfb0deb142079162ae826f58449b116c1d442f2d91c69d9fced081"}, + {file = "mypy-2.1.0-cp314-cp314t-win_amd64.whl", hash = "sha256:b33b6cd332695bba180d55e717a79d3038e479a2c49cc5eb3d53603409b9a5d7"}, + {file = "mypy-2.1.0-cp314-cp314t-win_arm64.whl", hash = "sha256:4f910fe825376a7b66ef7ca8c98e5a149e8cd64c19ae71d84047a74ee060d4e6"}, + {file = "mypy-2.1.0-py3-none-any.whl", hash = "sha256:a663814603a5c563fb87a4f96fb473eeb30d1f5a4885afcf44f9db000a366289"}, + {file = "mypy-2.1.0.tar.gz", hash = "sha256:81e76ad12c2d804512e9b13240d1588316531bfba07558286078bfbce9613633"}, +] + +[package.dependencies] +ast-serialize = ">=0.3.0,<1.0.0" +librt = {version = ">=0.11.0", markers = "platform_python_implementation != \"PyPy\""} +mypy_extensions = ">=1.0.0" +pathspec = ">=1.0.0" +tomli = {version = ">=1.1.0", markers = "python_version < \"3.11\""} +typing_extensions = [ + {version = ">=4.6.0", markers = "python_version < \"3.15\""}, + {version = ">=4.14.0", markers = "python_version >= \"3.15\""}, +] + +[package.extras] +dmypy = ["psutil (>=4.0)"] +faster-cache = ["orjson"] +install-types = ["pip"] +mypyc = ["setuptools (>=50)"] +reports = ["lxml"] + +[[package]] +name = "mypy-extensions" +version = "1.1.0" +description = "Type system extensions for programs checked with the mypy type checker." +optional = false +python-versions = ">=3.8" +groups = ["dev"] +files = [ + {file = "mypy_extensions-1.1.0-py3-none-any.whl", hash = "sha256:1be4cccdb0f2482337c4743e60421de3a356cd97508abadd57d47403e94f5505"}, + {file = "mypy_extensions-1.1.0.tar.gz", hash = "sha256:52e68efc3284861e772bbcd66823fde5ae21fd2fdb51c62a211403730b916558"}, ] [[package]] @@ -1654,21 +1889,21 @@ icu = ["PyICU (>=1.0.0)"] [[package]] name = "nbclient" -version = "0.10.4" +version = "0.11.0" description = "A client library for executing notebooks. Formerly nbconvert's ExecutePreprocessor." optional = false python-versions = ">=3.10.0" groups = ["docs"] files = [ - {file = "nbclient-0.10.4-py3-none-any.whl", hash = "sha256:9162df5a7373d70d606527300a95a975a47c137776cd942e52d9c7e29ff83440"}, - {file = "nbclient-0.10.4.tar.gz", hash = "sha256:1e54091b16e6da39e297b0ece3e10f6f29f4ac4e8ee515d29f8a7099bd6553c9"}, + {file = "nbclient-0.11.0-py3-none-any.whl", hash = "sha256:ef7fa0d59d6e1d41103933d8a445a18d5de860ca6b613b87b8574accdb3c2895"}, + {file = "nbclient-0.11.0.tar.gz", hash = "sha256:04a134a5b087f2c5887f228aca155db50169b8cd9334dee6942c8e927e56081a"}, ] [package.dependencies] -jupyter-client = ">=6.1.12" -jupyter-core = ">=4.12,<5.0.dev0 || >=5.1.dev0" -nbformat = ">=5.1.3" -traitlets = ">=5.4" +jupyter-client = ">=7.0.0" +jupyter-core = ">=5.4.0" +nbformat = ">=5.2.0" +traitlets = ">=5.13" [package.extras] dev = ["pre-commit"] @@ -1833,6 +2068,23 @@ files = [ qa = ["flake8 (==5.0.4)", "types-setuptools (==67.2.0.1)", "zuban (==0.5.1)"] testing = ["docopt", "pytest"] +[[package]] +name = "pathspec" +version = "1.1.1" +description = "Utility library for gitignore style pattern matching of file paths." +optional = false +python-versions = ">=3.9" +groups = ["dev"] +files = [ + {file = "pathspec-1.1.1-py3-none-any.whl", hash = "sha256:a00ce642f577bf7f473932318056212bc4f8bfdf53128c78bbd5af0b9b20b189"}, + {file = "pathspec-1.1.1.tar.gz", hash = "sha256:17db5ecd524104a120e173814c90367a96a98d07c45b2e10c2f3919fff91bf5a"}, +] + +[package.extras] +hyperscan = ["hyperscan (>=0.7)"] +optional = ["typing-extensions (>=4)"] +re2 = ["google-re2 (>=1.1)"] + [[package]] name = "pexpect" version = "4.9.0" @@ -1862,7 +2114,7 @@ files = [ ] [package.extras] -docs = ["Sphinx (>=4.1.2,<5.0.0)", "furo (>=2021.8.17-beta.43,<2022.0.0)", "myst-parser (>=0.15.1,<0.16.0)", "sphinx-autobuild (>=2021.3.14,<2022.0.0)", "sphinx-copybutton (>=0.4.0,<0.5.0)"] +docs = ["Sphinx (>=4.1.2,<5.0.0)", "furo (>=2021.8.17b43,<2022.0.0)", "myst-parser (>=0.15.1,<0.16.0)", "sphinx-autobuild (>=2021.3.14,<2022.0.0)", "sphinx-copybutton (>=0.4.0,<0.5.0)"] [[package]] name = "platformdirs" @@ -2052,8 +2304,8 @@ astroid = ">=3.3.8,<=3.4.0.dev0" colorama = {version = ">=0.4.5", markers = "sys_platform == \"win32\""} dill = [ {version = ">=0.2", markers = "python_version < \"3.11\""}, - {version = ">=0.3.7", markers = "python_version >= \"3.12\""}, {version = ">=0.3.6", markers = "python_version == \"3.11\""}, + {version = ">=0.3.7", markers = "python_version >= \"3.12\""}, ] isort = ">=4.2.5,<5.13 || >5.13,<7" mccabe = ">=0.6,<0.8" @@ -2190,14 +2442,14 @@ six = ">=1.5" [[package]] name = "python-discovery" -version = "1.4.0" +version = "1.4.2" description = "Python interpreter discovery" optional = false python-versions = ">=3.8" groups = ["dev"] files = [ - {file = "python_discovery-1.4.0-py3-none-any.whl", hash = "sha256:26ed78d703e234879a66244c7d4114563fb13ec5cd30a2d1357e5fb4850782da"}, - {file = "python_discovery-1.4.0.tar.gz", hash = "sha256:eb8bc7daad3c226c147e45bb4e970a1feb1bf4048ee178e6db59e197b8010ce3"}, + {file = "python_discovery-1.4.2-py3-none-any.whl", hash = "sha256:475803f53b7b2ed6e490e27373f9d8340f7d2eebf9acdaf645d7d714c97bb500"}, + {file = "python_discovery-1.4.2.tar.gz", hash = "sha256:8f3746c4b4968d22afbb97d36e1a0e5b66e6c0f297290f2e95f05b9b8bf18690"}, ] [package.dependencies] @@ -2847,30 +3099,30 @@ oldlibyaml = ["ruamel.yaml.clib ; platform_python_implementation == \"CPython\"" [[package]] name = "ruff" -version = "0.15.15" +version = "0.15.17" description = "An extremely fast Python linter and code formatter, written in Rust." optional = false python-versions = ">=3.7" groups = ["dev"] files = [ - {file = "ruff-0.15.15-py3-none-linux_armv6l.whl", hash = "sha256:cf93e5388f412e1b108b1f8b34a6e036b70fe8aff89393befad96fe48670311b"}, - {file = "ruff-0.15.15-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:ac5a646d1f6a7dadd5d50842dae2c1f9862ac887ef5d1b1375e02def791fde6e"}, - {file = "ruff-0.15.15-py3-none-macosx_11_0_arm64.whl", hash = "sha256:77d955a431430c66f72dd94e379ad38a16daea3d25094872ac4edf9e797be530"}, - {file = "ruff-0.15.15-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7614ee79c69788cf6cedd568069ade9cecc22a1ad20494efe8d0c9ebb4b622d4"}, - {file = "ruff-0.15.15-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:3cdb1679e06a1f6b47bc384714ae96f6e2fb65ca441eb78c43d2ca554176ce1f"}, - {file = "ruff-0.15.15-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2728b93d7b23a603ea2c0ac6eb73d760bd38ec9de35f35fb41e18f7a3fee7622"}, - {file = "ruff-0.15.15-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:be582fcc0db438902c7792b08d6ddf6c9b9e21addaa10092c2c741cfb09e5a45"}, - {file = "ruff-0.15.15-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7aa77465b8ecaf1a27bea098d696f7fed5e1eccbd10b321b682d6de586ae5627"}, - {file = "ruff-0.15.15-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:48decfa11d740de4889de623be1463308346312f2409a56e24aa280c86162dc4"}, - {file = "ruff-0.15.15-py3-none-manylinux_2_31_riscv64.whl", hash = "sha256:a5015088452ca0081387063649ec67f06d3d1d6b8b936a1f836b5e9657ecd48c"}, - {file = "ruff-0.15.15-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:f5294aab6356c81600fcdea3a62bb1b924dfd5e91767c12318d3f68f86af57cd"}, - {file = "ruff-0.15.15-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:db5bd4d802415cca656dc1616070b725952d6ae95eb5d4831e49fbd94a38f75f"}, - {file = "ruff-0.15.15-py3-none-musllinux_1_2_i686.whl", hash = "sha256:587a6278ed42059191c1a466e490bd7930fb50bd2e255398bc29616c895a61cb"}, - {file = "ruff-0.15.15-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:df0c1c084f5f4be9812f61518a45c440d3c30d69ce4bf6c5270e66d38338f02a"}, - {file = "ruff-0.15.15-py3-none-win32.whl", hash = "sha256:29428ea79694afbe756d45fd59b36f22b6b020dc0443cf7de0173046236964b9"}, - {file = "ruff-0.15.15-py3-none-win_amd64.whl", hash = "sha256:8df0323902e15e24bc4bf246da830573d3cf3352bd0b9a164eab335d111ff4a4"}, - {file = "ruff-0.15.15-py3-none-win_arm64.whl", hash = "sha256:3c8ceca6792f38196b8f589bc92eccd03eef286602da92e5dc05cc42ef6441b7"}, - {file = "ruff-0.15.15.tar.gz", hash = "sha256:b8dff018130b46d8e5bf0f926ef6b60cf871d6d5ae45fc9334e09632daa741d6"}, + {file = "ruff-0.15.17-py3-none-linux_armv6l.whl", hash = "sha256:d9feddb927fc68bd295f5eebc587a7e42cfaf9b65f60ca4a2386febff575da8f"}, + {file = "ruff-0.15.17-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:25805a226d741c47d274a35ad5c10a7dde175fcddfa511d7cf3da0a21eb3eab7"}, + {file = "ruff-0.15.17-py3-none-macosx_11_0_arm64.whl", hash = "sha256:f6ad73b14c2d18a3bf8ad7cb6974294d7f613a7898604826058e6ac64918ef4d"}, + {file = "ruff-0.15.17-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6ba0c1e4f95bcb3869d0d30cbd5917071ef2e28665abfec970cdab0492c713ed"}, + {file = "ruff-0.15.17-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:81647960f10bff57d2e51cadd0c3950fe598400c852863a038720ef5b8cca91e"}, + {file = "ruff-0.15.17-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0e01a84ddbc8c16c23055ba3924476850f1bbc1917cebbb9376665a63e74260d"}, + {file = "ruff-0.15.17-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:84fe9f653152f8f294f9f7e03bf3a453d8b4a27f7a59c78c8666167f2b17b96c"}, + {file = "ruff-0.15.17-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:8c0fe88a7676e7a05b73174d4d4a59cb2ac21ff8263583f87a81a6018475a978"}, + {file = "ruff-0.15.17-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ecfc3c7878fff94633ab0348524e093f9ce3243080416dd7d14f8ba400174719"}, + {file = "ruff-0.15.17-py3-none-manylinux_2_31_riscv64.whl", hash = "sha256:b8461180b22420b1bdc289909410930761629fddf2a5aaf60fae1ab26cedc4c4"}, + {file = "ruff-0.15.17-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:6eccbe50a038b503e7140b441aa9c7fc8c1f36edf23ebef9f4165c2f28f568b7"}, + {file = "ruff-0.15.17-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:382fc0521025f5a8ad447d8bdd523545d0d7646adb718eb1c2dac5065ec27c0f"}, + {file = "ruff-0.15.17-py3-none-musllinux_1_2_i686.whl", hash = "sha256:456d41fcd1b2777ad63f09a6e7121d43f7b688bbc76a800c10f7f8fb1f912c3f"}, + {file = "ruff-0.15.17-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:b1a04bcc94ae6194e9db05d16ad31f298a7194bfbcb08258bbe589cee1d587b8"}, + {file = "ruff-0.15.17-py3-none-win32.whl", hash = "sha256:596065960ab1ff593f744220c9fe6580eda00a95003cffa9f4048bb5b1bf0392"}, + {file = "ruff-0.15.17-py3-none-win_amd64.whl", hash = "sha256:6769e5fa1710b179b92e0bfa5a51735b35baea9013dadb06d5f44cbcf9547084"}, + {file = "ruff-0.15.17-py3-none-win_arm64.whl", hash = "sha256:f3be1fbb34bcdfd146240d8fb92a709d4c2c8191348580a3c044ec60fa0b4456"}, + {file = "ruff-0.15.17.tar.gz", hash = "sha256:2ec446937fd16c8c4de2674a209cc5af64d9c6f17d21fbf1151054fa0bcf5219"}, ] [[package]] @@ -3445,22 +3697,22 @@ files = [ [[package]] name = "tornado" -version = "6.5.6" +version = "6.5.7" description = "Tornado is a Python web framework and asynchronous networking library, originally developed at FriendFeed." optional = false python-versions = ">=3.9" groups = ["dev", "docs"] files = [ - {file = "tornado-6.5.6-cp39-abi3-macosx_10_9_universal2.whl", hash = "sha256:65fcfaafb079435c2c19dc9e07c0f1cf0fa9051759ed0a7d0a3ba7ea7f64919c"}, - {file = "tornado-6.5.6-cp39-abi3-macosx_10_9_x86_64.whl", hash = "sha256:38bc01b4acacded2de63ae78023548e41ebe6fbed3ec05a796d7ae3ad893887e"}, - {file = "tornado-6.5.6-cp39-abi3-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:b942e6a137fda31ff54bf8e6e2c8d1c37f1f50583f3ed53fb840b53b9601d104"}, - {file = "tornado-6.5.6-cp39-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:8666946e70171b8c3f1fc9b7876fac492e84822c4c7f3746f4e8f8bc9ac92a79"}, - {file = "tornado-6.5.6-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:1c34cfab7ad6d104f052f55de06d39bbafc5885cfeb4da688803308dbcfa90b7"}, - {file = "tornado-6.5.6-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:385f35e4e22fb52551dfcda4cdc8c30c61c2c001aef5ddad99cdfe116952efd3"}, - {file = "tornado-6.5.6-cp39-abi3-win32.whl", hash = "sha256:db475f1b67b2809b10bb16264829087724ca8d24fe4ed47f7b8675cae453ef86"}, - {file = "tornado-6.5.6-cp39-abi3-win_amd64.whl", hash = "sha256:6739bf1e8eb09230f1280ddbd3236f0309db70f2c551a8dbc40f62babdf82f79"}, - {file = "tornado-6.5.6-cp39-abi3-win_arm64.whl", hash = "sha256:2543597b24a695d72338a9a77818362d72387c03ae173f1f169eadc5c91466ac"}, - {file = "tornado-6.5.6.tar.gz", hash = "sha256:9a365179fe8ff6b8766f602c0f67c185d778193e9bdd828b19f0b6ed7764177d"}, + {file = "tornado-6.5.7-cp39-abi3-macosx_10_9_universal2.whl", hash = "sha256:148b2eb15c2c765a50796172c1e499649b35f30d2e3c3d3e15913cfa56bfb163"}, + {file = "tornado-6.5.7-cp39-abi3-macosx_10_9_x86_64.whl", hash = "sha256:9da38de27f1da3b78a966f0dae12b5a1ea9afe72ca805d84ff06508272ddf100"}, + {file = "tornado-6.5.7-cp39-abi3-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:8d759e71906ee783f8867b93bf26a265743da4c1e2f4a018464c1ba019862972"}, + {file = "tornado-6.5.7-cp39-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:8a46347a18f23fb92b396beebe0fb78f61dda0cc302445202c16203d8a18848b"}, + {file = "tornado-6.5.7-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:7778b30bef919231265e91c69963ce0f49a1e9c07ac900bbe75b19ce2575ba92"}, + {file = "tornado-6.5.7-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:e726f0c75da7726eec023aa62751ff8878bd2737e34fbdd33b1ae5897d2200f5"}, + {file = "tornado-6.5.7-cp39-abi3-win32.whl", hash = "sha256:f8de3bf12d3efdd0cbe7c8887868198f8a91415e3f29fcf258d9b8eb7b1d9ae4"}, + {file = "tornado-6.5.7-cp39-abi3-win_amd64.whl", hash = "sha256:de942f843533a039ef9fa3d9c88c7cd8a7c94553fb5ad0154270989b3d99a2c4"}, + {file = "tornado-6.5.7-cp39-abi3-win_arm64.whl", hash = "sha256:ff934fce95643af5f11efdae618eaa73d469dc588641e5c8d19295a0c65c4796"}, + {file = "tornado-6.5.7.tar.gz", hash = "sha256:66c513a76cda70d53907bc27cf1447557699c2e95aa48ba27a442ff61c3ddfc2"}, ] [[package]] @@ -3479,6 +3731,18 @@ files = [ docs = ["myst-parser", "pydata-sphinx-theme", "sphinx"] test = ["argcomplete (>=3.0.3)", "mypy (>=1.17.0,<1.19)", "pre-commit", "pytest (>=7.0,<8.2)", "pytest-mock", "pytest-mypy-testing"] +[[package]] +name = "types-toml" +version = "0.10.8.20260518" +description = "Typing stubs for toml" +optional = false +python-versions = ">=3.10" +groups = ["dev"] +files = [ + {file = "types_toml-0.10.8.20260518-py3-none-any.whl", hash = "sha256:0e564ab05f6fde62a315b3b5a9b6624fda569399795d30a37e64705a70459303"}, + {file = "types_toml-0.10.8.20260518.tar.gz", hash = "sha256:80e10facd24fdeda9d5c672187d72be3ac284843788d67f5aae59e3e016db6fe"}, +] + [[package]] name = "typing-extensions" version = "4.15.0" @@ -3490,27 +3754,7 @@ files = [ {file = "typing_extensions-4.15.0-py3-none-any.whl", hash = "sha256:f0fa19c6845758ab08074a0cfa8b7aecb71c999ca73d62883bc25cc018c4e548"}, {file = "typing_extensions-4.15.0.tar.gz", hash = "sha256:0cea48d173cc12fa28ecabc3b837ea3cf6f38c6d1136f85cbaaf598984861466"}, ] -markers = {dev = "python_version < \"3.12\"", test = "python_version == \"3.10\""} - -[[package]] -name = "typos" -version = "1.47.2" -description = "Source Code Spelling Correction" -optional = false -python-versions = ">=3.8" -groups = ["main"] -files = [ - {file = "typos-1.47.2-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:87df3040f9d34afd9b19a9437045fbb8838a0435eb00f047e4bac48d92f2fc44"}, - {file = "typos-1.47.2-py3-none-macosx_11_0_arm64.whl", hash = "sha256:287e2718a058c561baf5f55ec6b466d9270546bcb1951a2c120e594c574b9597"}, - {file = "typos-1.47.2-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3e4ef6632b280ce237caaec38d80dd3c2d956e28aa6925f80d4e915335b94a36"}, - {file = "typos-1.47.2-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:cd7b310019943e26552809bd17f9f202b45eb0c9694f437f1708ab0868248ced"}, - {file = "typos-1.47.2-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f525edf9b67d3ede552bb70bd4171f23e5e8edec3187189dfe8d1676df630b44"}, - {file = "typos-1.47.2-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:6c36a97ab3dd8c8924cd9b907a32e9aac504fc779d0c3b05e19204ca93385c37"}, - {file = "typos-1.47.2-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:4eb36a44daed1d719ce417d2a6dd7a323d814ccdc647d9bb20d17ac2bed9e38c"}, - {file = "typos-1.47.2-py3-none-win32.whl", hash = "sha256:d0c01034bc029d8883406f3e2bed46dfa9b090ce6ad4a99e580070ae51307cfa"}, - {file = "typos-1.47.2-py3-none-win_amd64.whl", hash = "sha256:749bbba363067bfc0e54ccc6e7580750e17f5ef093c91fedf6c2eb27d32efee6"}, - {file = "typos-1.47.2.tar.gz", hash = "sha256:d303e8c495ea870f750d8b37f2d3c3fe2441b00cf18ca5d7e0b52eca1938c7b7"}, -] +markers = {test = "python_version == \"3.10\""} [[package]] name = "url-normalize" @@ -3550,33 +3794,33 @@ zstd = ["backports-zstd (>=1.0.0) ; python_version < \"3.14\""] [[package]] name = "virtualenv" -version = "21.4.2" +version = "21.4.3" description = "Virtual Python Environment builder" optional = false python-versions = ">=3.8" groups = ["dev"] files = [ - {file = "virtualenv-21.4.2-py3-none-any.whl", hash = "sha256:854210ca524a1a4d0d744734f4acbc721c3ffe163b85bbf5d56d14d5ae2f0fae"}, - {file = "virtualenv-21.4.2.tar.gz", hash = "sha256:38e6ee0a555615c0ea9da2ac7e9998fe8dc3b911dd33ad8eaad2020957653b0c"}, + {file = "virtualenv-21.4.3-py3-none-any.whl", hash = "sha256:75f4127d4067397c64f38579ce918fec6bf9ca2cd4f48685e82952cc3c035840"}, + {file = "virtualenv-21.4.3.tar.gz", hash = "sha256:938ff0fd3f4e0f0d3a025f67a3d2f25e3c3aabbcd5857ea6170619138d72d141"}, ] [package.dependencies] distlib = ">=0.3.7,<1" filelock = {version = ">=3.24.2,<4", markers = "python_version >= \"3.10\""} platformdirs = ">=3.9.1,<5" -python-discovery = ">=1.4" +python-discovery = ">=1.4.2" typing-extensions = {version = ">=4.13.2", markers = "python_version < \"3.11\""} [[package]] name = "wcwidth" -version = "0.7.0" +version = "0.8.1" description = "Measures the displayed width of unicode strings in a terminal" optional = false python-versions = ">=3.8" groups = ["main", "dev"] files = [ - {file = "wcwidth-0.7.0-py3-none-any.whl", hash = "sha256:5d69154c429a82910e241c738cd0e2976fac8a2dd47a1a805f4afed1c0f136f2"}, - {file = "wcwidth-0.7.0.tar.gz", hash = "sha256:90e3a7ea092341c44b99562e75d09e4d5160fe7a3974c6fb842a101a95e7eed0"}, + {file = "wcwidth-0.8.1-py3-none-any.whl", hash = "sha256:f453740b1e4a4f3291faa37944c555d71056c4da08d59809b307ef4feba695c8"}, + {file = "wcwidth-0.8.1.tar.gz", hash = "sha256:faf5b4a5366a72dc49cad48cdf21f52bdf63bdda995178e483ba247ff79089b9"}, ] [[package]] @@ -3615,4 +3859,4 @@ type = ["pytest-mypy (>=1.0.1) ; platform_python_implementation != \"PyPy\""] [metadata] lock-version = "2.1" python-versions = ">=3.10,<4.0" -content-hash = "cabed2165dd182a3b780942b4599772c212c0fa84e8c3040925a1cdcdbd411a0" +content-hash = "907371566c23fe8e3a8301b5b635461851b491d2d67683d2b6b72cb9dfee6987" From 686db62098327c08d59aff919e5052ee7f56eb02 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Fri, 12 Jun 2026 15:56:36 +0200 Subject: [PATCH 320/352] =?UTF-8?q?docs(readme):=20=F0=9F=93=9D=20add=20De?= =?UTF-8?q?velopment=20section=20with=20pre-commit=20usage?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.md | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/README.md b/README.md index ebc37d740..5c8061760 100644 --- a/README.md +++ b/README.md @@ -169,6 +169,40 @@ To run the `rocrate-validator` tests, use the following command: poetry run pytest ``` +## Development + +When working from source, install the dependencies (including the dev and test +groups) with: + +```bash +poetry install +``` + +### Pre-commit hooks + +The repository ships a [pre-commit](https://pre-commit.com/) configuration +(`.pre-commit-config.yaml`) that runs spell checking (`typos`), linting and +formatting (`ruff`), and static type checking (`mypy`). The hooks are **not** +active until you install them once in your local clone: + +```bash +poetry run pre-commit install +``` + +After this, the checks run automatically on every `git commit`. You can also run +them manually at any time: + +```bash +# Run all hooks against the whole codebase +poetry run pre-commit run --all-files + +# Run a single hook (e.g. typos or ruff) +poetry run pre-commit run typos --all-files + +# mypy is configured as a manual-stage hook, so run it explicitly +poetry run pre-commit run --hook-stage manual +``` + From b8b1060c5435241fb6e62d062edb5a1f4acb0123 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Fri, 12 Jun 2026 16:08:13 +0200 Subject: [PATCH 321/352] =?UTF-8?q?style(lint):=20=F0=9F=8E=A8=20add=20per?= =?UTF-8?q?-file=20lint=20suppressions?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- docs/conf.py | 6 +++++- .../profiles/ro-crate/must/0_file_descriptor_format.py | 2 ++ .../profiles/ro-crate/must/4_data_entity_metadata.py | 2 ++ .../ro-crate/should/2_root_data_entity_relative_uri.py | 2 ++ .../profiles/ro-crate/should/4_data_entity_existence.py | 2 ++ .../profiles/ro-crate/should/5_web_data_entity_metadata.py | 2 ++ .../profiles/workflow-ro-crate/may/1_main_workflow.py | 2 ++ .../profiles/workflow-ro-crate/must/0_main_workflow.py | 2 ++ rocrate_validator/requirements/python/__init__.py | 4 ++-- 9 files changed, 21 insertions(+), 3 deletions(-) diff --git a/docs/conf.py b/docs/conf.py index c1ee16e9d..9f2896fe5 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -17,6 +17,10 @@ # For the full list of built-in configuration values, see the documentation: # https://www.sphinx-doc.org/en/master/usage/configuration.html +# Sphinx conf.py keeps several configuration options commented out as inline +# documentation of what can be enabled; do not flag them as dead code. +# ruff: noqa: ERA001 + # -- Project information ----------------------------------------------------- # https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information @@ -117,7 +121,7 @@ autosummary_generate = True -autodoc_default_options = { +autodoc_default_options: dict[str, bool | str] = { # 'members': True, # Does now show base classes otherwise... why such bad defaults? # But with this it does show useless bases like `object`. What is one to do? diff --git a/rocrate_validator/profiles/ro-crate/must/0_file_descriptor_format.py b/rocrate_validator/profiles/ro-crate/must/0_file_descriptor_format.py index 31b7d4718..fa51484ae 100644 --- a/rocrate_validator/profiles/ro-crate/must/0_file_descriptor_format.py +++ b/rocrate_validator/profiles/ro-crate/must/0_file_descriptor_format.py @@ -12,6 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +# pylint: disable=invalid-name # profile filename uses digit prefix (load-order convention) + import re from typing import Any from urllib.parse import urljoin diff --git a/rocrate_validator/profiles/ro-crate/must/4_data_entity_metadata.py b/rocrate_validator/profiles/ro-crate/must/4_data_entity_metadata.py index 859609e72..7aea2a2c6 100644 --- a/rocrate_validator/profiles/ro-crate/must/4_data_entity_metadata.py +++ b/rocrate_validator/profiles/ro-crate/must/4_data_entity_metadata.py @@ -12,6 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +# pylint: disable=invalid-name # profile filename uses digit prefix (load-order convention) + from rocrate_validator.models import ValidationContext from rocrate_validator.requirements.python import PyFunctionCheck, check, requirement from rocrate_validator.utils import log as logging diff --git a/rocrate_validator/profiles/ro-crate/should/2_root_data_entity_relative_uri.py b/rocrate_validator/profiles/ro-crate/should/2_root_data_entity_relative_uri.py index 6f4665a42..a67458c34 100644 --- a/rocrate_validator/profiles/ro-crate/should/2_root_data_entity_relative_uri.py +++ b/rocrate_validator/profiles/ro-crate/should/2_root_data_entity_relative_uri.py @@ -12,6 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +# pylint: disable=invalid-name # profile filename uses digit prefix (load-order convention) + from rocrate_validator.models import ValidationContext from rocrate_validator.requirements.python import PyFunctionCheck, check, requirement from rocrate_validator.utils import log as logging diff --git a/rocrate_validator/profiles/ro-crate/should/4_data_entity_existence.py b/rocrate_validator/profiles/ro-crate/should/4_data_entity_existence.py index d21c37c7b..c770521c3 100644 --- a/rocrate_validator/profiles/ro-crate/should/4_data_entity_existence.py +++ b/rocrate_validator/profiles/ro-crate/should/4_data_entity_existence.py @@ -12,6 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +# pylint: disable=invalid-name # profile filename uses digit prefix (load-order convention) + from rocrate_validator.models import ValidationContext from rocrate_validator.requirements.python import PyFunctionCheck, check, requirement from rocrate_validator.utils import log as logging diff --git a/rocrate_validator/profiles/ro-crate/should/5_web_data_entity_metadata.py b/rocrate_validator/profiles/ro-crate/should/5_web_data_entity_metadata.py index a43415822..e10b9ea70 100644 --- a/rocrate_validator/profiles/ro-crate/should/5_web_data_entity_metadata.py +++ b/rocrate_validator/profiles/ro-crate/should/5_web_data_entity_metadata.py @@ -12,6 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +# pylint: disable=invalid-name # profile filename uses digit prefix (load-order convention) + from rocrate_validator.models import ValidationContext from rocrate_validator.requirements.python import PyFunctionCheck, check, requirement from rocrate_validator.utils import log as logging diff --git a/rocrate_validator/profiles/workflow-ro-crate/may/1_main_workflow.py b/rocrate_validator/profiles/workflow-ro-crate/may/1_main_workflow.py index 681293337..e88e19837 100644 --- a/rocrate_validator/profiles/workflow-ro-crate/may/1_main_workflow.py +++ b/rocrate_validator/profiles/workflow-ro-crate/may/1_main_workflow.py @@ -12,6 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +# pylint: disable=invalid-name # profile filename uses digit prefix (load-order convention) + from rocrate_validator.models import ValidationContext from rocrate_validator.requirements.python import PyFunctionCheck, check, requirement from rocrate_validator.utils import log as logging diff --git a/rocrate_validator/profiles/workflow-ro-crate/must/0_main_workflow.py b/rocrate_validator/profiles/workflow-ro-crate/must/0_main_workflow.py index 3941190e4..4e6beaf33 100644 --- a/rocrate_validator/profiles/workflow-ro-crate/must/0_main_workflow.py +++ b/rocrate_validator/profiles/workflow-ro-crate/must/0_main_workflow.py @@ -12,6 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +# pylint: disable=invalid-name # profile filename uses digit prefix (load-order convention) + from rocrate_validator.models import ValidationContext from rocrate_validator.requirements.python import PyFunctionCheck, check, requirement from rocrate_validator.utils import log as logging diff --git a/rocrate_validator/requirements/python/__init__.py b/rocrate_validator/requirements/python/__init__.py index 6bb9e8282..c6bfa6f3e 100644 --- a/rocrate_validator/requirements/python/__init__.py +++ b/rocrate_validator/requirements/python/__init__.py @@ -261,8 +261,8 @@ def load( profile: Profile, requirement_level: RequirementLevel, # pylint: disable=unused-argument file_path: Path, - publicID: str | None = None, - ) -> list[Requirement]: # pylint: disable=unused-argument + publicID: str | None = None, # pylint: disable=unused-argument + ) -> list[Requirement]: # instantiate a list to store the requirements requirements: list[Requirement] = [] From 5339b33e93cc87d7a4537a1a4a5f46c2946b676d Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Fri, 12 Jun 2026 16:12:58 +0200 Subject: [PATCH 322/352] =?UTF-8?q?build(pylint):=20=F0=9F=94=A7=20whiteli?= =?UTF-8?q?st=20SHACL/JSON-LD=20names,=20disable=20R0801?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- pyproject.toml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 58ba45660..69bf4c20f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -157,6 +157,9 @@ good-names-rgxs = [ "^(basicConfig|getLogger)$", # drop-in mirror of the stdlib `logging` API "^(_installed|_config|__profiles_loaded)$", # module-level mutable state (not constants) "^[A-Z][A-Z0-9_]*_TYPES$", # UPPER_CASE type aliases + "^(focusNode|resultPath|sourceConstraintComponent|sourceShape|propertyGroup|requirementCheck)$", # SHACL/rdflib API + "^SHACL$", # rdflib Namespace alias (uppercase by convention) + "^jsonLD(_[a-zA-Z]+)?$", # JSON-LD domain term ] [tool.pylint."messages control"] @@ -175,7 +178,7 @@ disable = [ # data/config classes and cross-file similarity are acceptable here. "too-few-public-methods", # R0903 "too-many-instance-attributes", # R0902 - # "duplicate-code", # R0801: noisy across CLI/output layers + "duplicate-code", # R0801: noisy across CLI/output layers # Deferred imports are intentional: circular-import avoidance, # platform-specific modules, optional dependencies, and lazy CLI loading. "import-outside-toplevel", # C0415 From 035494bd2276921355f3be814c48dd5847a92de3 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Fri, 12 Jun 2026 16:15:04 +0200 Subject: [PATCH 323/352] =?UTF-8?q?refactor(console):=20=E2=99=BB=EF=B8=8F?= =?UTF-8?q?=20match=20rich=20Console.print=20signature?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- rocrate_validator/utils/io_helpers/output/console.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/rocrate_validator/utils/io_helpers/output/console.py b/rocrate_validator/utils/io_helpers/output/console.py index 4b012ead7..09883d55c 100644 --- a/rocrate_validator/utils/io_helpers/output/console.py +++ b/rocrate_validator/utils/io_helpers/output/console.py @@ -62,13 +62,13 @@ def register_formatter(self, formatter: OutputFormatter, type_: type | None = No assert type_ is not None # guaranteed by the check above self._formatters[type_] = formatter - def __format_data__(self, obj, *_, **__): # pylint: disable=unused-argument + def __format_data__(self, obj): formatter = self._formatters.get(type(obj)) if formatter: return formatter(obj) return obj - def print(self, obj, *args, **kwargs): # type: ignore[override] # intentional formatting wrapper + def print(self, *objects, **kwargs): if not self.disabled: - out = self.__format_data__(obj, *args, **kwargs) - super().print(out, *args, **kwargs) + formatted = tuple(self.__format_data__(o) for o in objects) + super().print(*formatted, **kwargs) From 11fd7fb27b8bf8a935db9b071d1b4feb88d9a9f5 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Fri, 12 Jun 2026 16:20:25 +0200 Subject: [PATCH 324/352] =?UTF-8?q?refactor(shacl):=20=E2=99=BB=EF=B8=8F?= =?UTF-8?q?=20extract=20node-shape=20registration=20helper?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../requirements/shacl/models.py | 74 +++++++++++-------- .../requirements/shacl/validator.py | 1 + 2 files changed, 43 insertions(+), 32 deletions(-) diff --git a/rocrate_validator/requirements/shacl/models.py b/rocrate_validator/requirements/shacl/models.py index d3f71826c..c7b439c89 100644 --- a/rocrate_validator/requirements/shacl/models.py +++ b/rocrate_validator/requirements/shacl/models.py @@ -369,38 +369,7 @@ def load_shapes(self, shapes_path: str | Path, publicID: str | None = None) -> l # register Node Shapes for node_shape in shapes_list.node_shapes: - # flag to check if the nested properties are in a group - grouped = False - # list of properties ungrouped - ungrouped_properties = [] - # get the shape graph - node_graph = shapes_list.get_shape_graph(node_shape) - # create a node shape object - shape = NodeShape(node_shape, node_graph) - # load the nested properties - shacl_ns = Namespace(SHACL_NS) - nested_properties = node_graph.objects(subject=node_shape, predicate=shacl_ns.property) - for property_shape in nested_properties: - property_graph = shapes_list.get_shape_property_graph(node_shape, property_shape) - p_shape = PropertyShape(property_shape, property_graph, shape) - shape.add_property(p_shape) - group = __process_property_group__(property_groups, p_shape) - if group and group not in shapes: - grouped = True - shapes.append(cast("Shape", group)) - if not group: - ungrouped_properties.append(p_shape) - - # store the property shape in the registry - self.add_shape(p_shape) - # store the node shape in the registry - self.add_shape(shape) - - # store the node in the list of shapes - if not grouped: - shapes.append(shape) - else: - shapes.extend(ungrouped_properties) + self._register_node_shape(node_shape, shapes_list, property_groups, shapes) # register Property Shapes for property_shape in shapes_list.property_shapes: @@ -410,6 +379,47 @@ def load_shapes(self, shapes_path: str | Path, publicID: str | None = None) -> l return shapes + def _register_node_shape( + self, + node_shape: Node, + shapes_list: ShapesList, + property_groups: dict[str, PropertyGroup], + shapes: list[Shape], + ) -> None: + """Instantiate ``node_shape`` and its nested PropertyShapes, registering them and appending to ``shapes``.""" + # flag to check if the nested properties are in a group + grouped = False + # list of properties ungrouped + ungrouped_properties: list[PropertyShape] = [] + # get the shape graph + node_graph = shapes_list.get_shape_graph(node_shape) + # create a node shape object + shape = NodeShape(node_shape, node_graph) + # load the nested properties + shacl_ns = Namespace(SHACL_NS) + nested_properties = node_graph.objects(subject=node_shape, predicate=shacl_ns.property) + for property_shape in nested_properties: + property_graph = shapes_list.get_shape_property_graph(node_shape, property_shape) + p_shape = PropertyShape(property_shape, property_graph, shape) + shape.add_property(p_shape) + group = __process_property_group__(property_groups, p_shape) + if group and group not in shapes: + grouped = True + shapes.append(cast("Shape", group)) + if not group: + ungrouped_properties.append(p_shape) + + # store the property shape in the registry + self.add_shape(p_shape) + # store the node shape in the registry + self.add_shape(shape) + + # store the node in the list of shapes + if not grouped: + shapes.append(shape) + else: + shapes.extend(ungrouped_properties) + def __str__(self): return f"ShapesRegistry: {self._shapes}" diff --git a/rocrate_validator/requirements/shacl/validator.py b/rocrate_validator/requirements/shacl/validator.py index 0787c25c2..af9eb7ffb 100644 --- a/rocrate_validator/requirements/shacl/validator.py +++ b/rocrate_validator/requirements/shacl/validator.py @@ -399,6 +399,7 @@ def shapes_graph(self) -> GraphLike | str | bytes | None: def ont_graph(self) -> GraphLike | str | bytes | None: return self._ont_graph + # pylint: disable-next=too-many-locals def validate( self, # data to validate From 17c6720e7559d4405f71af641c2af78119782c01 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Fri, 12 Jun 2026 18:55:04 +0200 Subject: [PATCH 325/352] =?UTF-8?q?refactor(rocrate):=20=E2=99=BB=EF=B8=8F?= =?UTF-8?q?=20split=20rocrate.py=20into=20a=20package=20with=20cohesive=20?= =?UTF-8?q?submodules?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- rocrate_validator/rocrate.py | 1108 ------------------------- rocrate_validator/rocrate/__init__.py | 46 + rocrate_validator/rocrate/bagit.py | 162 ++++ rocrate_validator/rocrate/base.py | 380 +++++++++ rocrate_validator/rocrate/entity.py | 289 +++++++ rocrate_validator/rocrate/metadata.py | 168 ++++ rocrate_validator/rocrate/plain.py | 249 ++++++ 7 files changed, 1294 insertions(+), 1108 deletions(-) delete mode 100644 rocrate_validator/rocrate.py create mode 100644 rocrate_validator/rocrate/__init__.py create mode 100644 rocrate_validator/rocrate/bagit.py create mode 100644 rocrate_validator/rocrate/base.py create mode 100644 rocrate_validator/rocrate/entity.py create mode 100644 rocrate_validator/rocrate/metadata.py create mode 100644 rocrate_validator/rocrate/plain.py diff --git a/rocrate_validator/rocrate.py b/rocrate_validator/rocrate.py deleted file mode 100644 index da1bc33b2..000000000 --- a/rocrate_validator/rocrate.py +++ /dev/null @@ -1,1108 +0,0 @@ -# Copyright (c) 2024-2026 CRS4 -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import annotations - -import io -import json -import re -import struct -import zipfile -from abc import ABC, abstractmethod -from pathlib import Path -from typing import Any, cast -from urllib.parse import unquote - -from rdflib import Graph - -from rocrate_validator.constants import HTTP_STATUS_OK -from rocrate_validator.errors import ROCrateInvalidURIError -from rocrate_validator.utils import log as logging -from rocrate_validator.utils.http import HttpRequester -from rocrate_validator.utils.uri import URI, AvailabilityStatus, is_external_reference, validate_rocrate_uri - -# set up logging -logger = logging.getLogger(__name__) - - -class ROCrateEntity: - def __init__(self, metadata: ROCrateMetadata, raw_data: dict) -> None: - self._raw_data: dict = raw_data - self._metadata = metadata - - @property - def id(self) -> str: - return cast("str", self._raw_data.get("@id")) - - @property - def type(self) -> str | list[str]: - return cast("str | list[str]", self._raw_data.get("@type")) - - def is_dataset(self) -> bool: - return self.has_type("Dataset") - - def is_file(self) -> bool: - return self.has_type("File") - - @property - def name(self) -> str: - return cast("str", self._raw_data.get("name")) - - @property - def metadata(self) -> ROCrateMetadata: - return self._metadata - - @property - def ro_crate(self) -> ROCrate: - return self.metadata.ro_crate - - def is_remote(self) -> bool: - return self.id_as_uri.is_remote_resource() - - @classmethod - def get_id_as_path(cls, entity_id: str, ro_crate: ROCrate | None = None) -> Path: - return cls.get_path_from_identifier( - entity_id, - ro_crate.uri.as_path() if ro_crate and ro_crate.uri.is_local_resource() else None, - ) - - @staticmethod - def get_path_from_identifier( - identifier: str, - rocrate_path: str | Path | None = None, - decode: bool = False, - ) -> Path: - """ - Get the path from an identifier. - - :param identifier: the identifier of the entity - :type identifier: str - - :param rocrate_path: the path to the RO-Crate - :type rocrate_path: Optional[Union[str, Path] - - :return: the path to the entity - :rtype: Path - - """ - - def __define_path__(path: str, decode: bool = False) -> Path: - # ensure the path is a string and remove the file:// prefix - path = str(path).replace("file://", "") - # Decode the path if required - if decode: - path = unquote(path) - # Convert the path to a Path object - path_obj = Path(path) - # if the path is absolute, return it - if path_obj.is_absolute(): - return path_obj - # set the base path - base_path: Path - if rocrate_path is None: - base_path = Path("./") - elif not isinstance(rocrate_path, Path): - base_path = Path(rocrate_path) - else: - base_path = rocrate_path - try: - # Check if the path if the root of the RO-Crate - if path_obj == Path("./"): - return base_path - # if the path is relative, try to resolve it - return base_path / path_obj.relative_to(base_path) - except ValueError: - # if the path cannot be resolved, return the absolute path - return base_path / path_obj - - # Define the path based on the identifier - path = __define_path__(identifier, decode=decode) - logger.debug("Defined path '%s' from identifier '%s'", path, identifier) - return path - - @property - def id_as_path(self) -> Path: - return self.get_id_as_path(self.id, self.ro_crate) - - @classmethod - def get_id_as_uri(cls, entity_id: str, ro_crate: ROCrate) -> URI: - assert entity_id, "Entity ID cannot be None" - # Per RO-Crate 1.1 § 4.2.2, an `@id` is either a relative URI path or - # an external URI/IRI (RFC 3986/3987). External references are used - # as-is (without resolving them against the crate URI) so the entity - # is classified as remote/web-based; this covers both authority-based - # forms (``http://``, ``scp://``) and scheme-only ones (``urn:``, - # ``doi:``, ``arcp:``). - if is_external_reference(entity_id): - return URI(entity_id) - # Otherwise the `@id` is a relative path: if the RO-Crate itself is - # remote, resolve it against the crate URI so the entity is still - # classified as remote/web-based. - if ro_crate.uri.is_remote_resource() and entity_id.startswith("./"): - return URI(f"{ro_crate.uri}/{entity_id[2:]}") - return URI(cls.get_id_as_path(entity_id, ro_crate)) - - @property - def id_as_uri(self) -> URI: - return self.get_id_as_uri(self.id, self.ro_crate) - - def has_absolute_path(self) -> bool: - return self.get_id_as_path(self.id).is_absolute() - - def has_relative_path(self) -> bool: - return not self.has_absolute_path() - - def has_local_identifier(self) -> bool: - has_local_id = ( - self.id.startswith("#") or f"{self.ro_crate.uri}/#" in self.id or f"file://{self.ro_crate.uri}/#" in self.id - ) - logger.debug( - "Identifier '%s' is %s a local identifier", - self.id, - "" if has_local_id else " not", - ) - return has_local_id - - def has_type(self, entity_type: str) -> bool: - assert isinstance(entity_type, str), "Entity type must be a string" - e_types = self.type if isinstance(self.type, list) else [self.type] - return entity_type in e_types - - def has_types(self, entity_types: list[str], all_types: bool = False) -> bool: - """ - Check if the entity has any or all of the specified types. - """ - assert isinstance(entity_types, list), "Entity types must be a list" - e_types = self.type if isinstance(self.type, list) else [self.type] - if all_types: - return all(t in e_types for t in entity_types) - return any(t in e_types for t in entity_types) - - def __process_property__(self, _name: str, data: object) -> object: - if isinstance(data, dict) and "@id" in data: - entity = self.metadata.get_entity(data["@id"]) - if entity is None: - return ROCrateEntity(self.metadata, data) - return entity - return data - - def get_property(self, name: str, default=None) -> Any: - data = self._raw_data.get(name, default) - if data is None: - return None - if isinstance(data, list): - return [self.__process_property__(name, _) for _ in data] - return self.__process_property__(name, data) - - @property - def raw_data(self) -> object: - return self._raw_data - - def is_local(self) -> bool: - return not self.is_remote() - - def _check_local_availability(self) -> AvailabilityStatus: - if self.ro_crate.uri.is_local_resource(): - if isinstance(self.ro_crate, ROCrateLocalFolder): - found = self.ro_crate.has_file(self.id_as_path) or self.ro_crate.has_directory(self.id_as_path) - return AvailabilityStatus.AVAILABLE if found else AvailabilityStatus.UNAVAILABLE - if isinstance(self.ro_crate, ROCrateLocalZip): - if self.id == "./": - return AvailabilityStatus.AVAILABLE - found = self.ro_crate.has_directory(Path(unquote(str(self.id)))) or self.ro_crate.has_file( - Path(unquote(str(self.id))) - ) - return AvailabilityStatus.AVAILABLE if found else AvailabilityStatus.UNAVAILABLE - - if self.ro_crate.uri.is_remote_resource(): - if self.id == "./": - found = self.ro_crate.get_file_size(self.id_as_path) > 0 - else: - found = self.ro_crate.has_directory(Path(unquote(str(self.id)))) or self.ro_crate.has_file( - Path(unquote(str(self.id))) - ) - return AvailabilityStatus.AVAILABLE if found else AvailabilityStatus.UNAVAILABLE - - raise ROCrateInvalidURIError(uri=self.id, message="Could not determine the availability of the entity") - - def check_availability(self) -> AvailabilityStatus: - """ - Return a fine-grained availability status for this entity. - - This is the primary check; :meth:`is_available` is the boolean - shortcut built on top of it. The status distinguishes definitely - unavailable resources, auth-protected ones, and remote URIs whose - scheme the validator cannot natively check (scp://, s3://, ...). - """ - try: - entity_uri = self.id_as_uri - if entity_uri.is_natively_checkable(): - logger.debug("Checking the availability of a remote entity") - return entity_uri.check_availability() - - if entity_uri.is_remote_resource(): - logger.debug( - "Cannot natively verify availability for entity '%s' (scheme '%s')", - self.id, - entity_uri.scheme, - ) - return AvailabilityStatus.UNCHECKABLE - - return self._check_local_availability() - except Exception: - if logger.isEnabledFor(logging.DEBUG): - logger.exception("Error checking entity availability") - return AvailabilityStatus.UNAVAILABLE - - def is_available(self) -> bool: - return self.check_availability() == AvailabilityStatus.AVAILABLE - - def get_size(self) -> int: - try: - return self.metadata.ro_crate.get_file_size(Path(self.id)) - except Exception: - if logger.isEnabledFor(logging.DEBUG): - logger.exception("Error getting entity file size") - return 0 - - def __str__(self) -> str: - return f"Entity({self.id})" - - def __repr__(self) -> str: - return str(self) - - def __eq__(self, other: object) -> bool: - if not isinstance(other, ROCrateEntity): - return False - return self.id == other.id - - def __hash__(self) -> int: - return hash(self.id) - - -class ROCrateMetadata: - METADATA_FILE_DESCRIPTOR = "ro-crate-metadata.json" - - def __init__(self, ro_crate: ROCrate, metadata_dict: dict | None = None) -> None: - self._ro_crate = ro_crate - self._dict = metadata_dict - self._json: str | None = json.dumps(metadata_dict) if metadata_dict else None - self._graph: Graph | None = None - - @property - def ro_crate(self) -> ROCrate: - return self._ro_crate - - @property - def size(self) -> int: - try: - return len(self.as_json()) - except Exception: - if logger.isEnabledFor(logging.DEBUG): - logger.exception("Error computing entity JSON size") - return 0 - - def get_file_descriptor_entity(self) -> ROCrateEntity: - metadata_file_descriptor = self.get_entity(self.METADATA_FILE_DESCRIPTOR) - if not metadata_file_descriptor: - raise ValueError("no metadata file descriptor in crate") - return metadata_file_descriptor - - def get_root_data_entity(self) -> ROCrateEntity: - metadata_file_descriptor = self.get_file_descriptor_entity() - main_entity = metadata_file_descriptor.get_property("about") - if not main_entity: - raise ValueError("no main entity in metadata file descriptor") - return main_entity - - def get_root_data_entity_conforms_to(self) -> list[str] | None: - try: - root_data_entity = self.get_root_data_entity() - result = root_data_entity.get_property("conformsTo", []) - if result is None: - return None - if not isinstance(result, list): - result = [result] - return [_.id for _ in result] - except Exception: - if logger.isEnabledFor(logging.DEBUG): - logger.exception("Error getting entity image") - return None - - def get_main_workflow(self) -> ROCrateEntity: - root_data_entity = self.get_root_data_entity() - main_workflow = root_data_entity.get_property("mainEntity") - if not main_workflow: - raise ValueError("no main workflow in metadata file descriptor") - return main_workflow - - def get_entity(self, entity_id: str) -> ROCrateEntity | None: - for entity in self.as_dict().get("@graph", []): - if entity.get("@id") == entity_id: - return ROCrateEntity(self, entity) - return None - - def get_entities(self) -> list[ROCrateEntity]: - return [ROCrateEntity(self, entity) for entity in self.as_dict().get("@graph", [])] - - def get_entities_by_type(self, entity_type: str | list[str]) -> list[ROCrateEntity]: - entity_types = [entity_type] if isinstance(entity_type, str) else entity_type - return [e for e in self.get_entities() if e.has_types(entity_types)] - - def get_dataset_entities(self) -> list[ROCrateEntity]: - return self.get_entities_by_type("Dataset") - - def get_file_entities(self) -> list[ROCrateEntity]: - return self.get_entities_by_type("File") - - def get_data_entities(self, exclude_web_data_entities: bool = False) -> list[ROCrateEntity]: - if not exclude_web_data_entities: - return self.get_entities_by_type(["Dataset", "File"]) - return [e for e in self.get_entities_by_type(["Dataset", "File"]) if not e.is_remote()] - - def get_web_data_entities(self) -> list[ROCrateEntity]: - return [ - entity - for entity in self.get_entities() - if (entity.has_type("File") or entity.has_type("Dataset")) and entity.is_remote() - ] - - def get_conforms_to(self) -> list[str] | None: - try: - file_descriptor = self.get_file_descriptor_entity() - result = file_descriptor.get_property("conformsTo", []) - if result is None: - return None - if not isinstance(result, list): - result = [result] - return [_.id for _ in result] - except Exception: - if logger.isEnabledFor(logging.DEBUG): - logger.exception("Error getting entity identifiers by type") - return None - - def as_json(self) -> str: - if not self._json: - self._json = cast( - "str", self.ro_crate.get_file_content(Path(self.METADATA_FILE_DESCRIPTOR), binary_mode=False) - ) - return self._json - - def as_dict(self) -> dict[Any, Any]: - if not self._dict: - # if the dictionary is not cached, load it - self._dict = json.loads(self.as_json()) - assert self._dict is not None, "Metadata dictionary should not be None after loading" - return self._dict - - def as_graph(self, publicID: str | None = None) -> Graph: - if not self._graph: - # if the graph is not cached, load it - self._graph = Graph(base=publicID or str(self.ro_crate.uri)) - self._graph.parse(data=self.as_json(), format="json-ld") - return self._graph - - def __str__(self) -> str: - return f"Metadata({self.ro_crate})" - - def __repr__(self) -> str: - return str(self) - - def __eq__(self, other: object) -> bool: - if not isinstance(other, ROCrateMetadata): - return False - return self.ro_crate == other.ro_crate - - def __hash__(self) -> int: - return hash(self.ro_crate) - - -class ROCrate(ABC): - """ - Base class for representing and interacting with a Research Object Crate (RO-Crate). - """ - - def __new__(cls, uri: str | Path | URI, relative_root_path: Path | None = None): - """ - Factory method to create the appropriate ROCrate subclass instance. - - :param uri: the URI of the RO-Crate - :type uri: Union[str, Path, URI] - - :param relative_root_path: the relative root path inside the RO-Crate - :type relative_root_path: Path - - :return: an instance of the appropriate ROCrate subclass - :rtype: ROCrate - - :raises ROCrateInvalidURIError: if the URI is invalid - """ - if cls is not ROCrate: - # If called on a subclass, use normal instantiation - return super().__new__(cls) - - # If called on ROCrate directly, use factory logic - instance = cls.new_instance(uri) - if relative_root_path: - instance.relative_root_path = relative_root_path - return instance - - def __init__(self, uri: str | Path | URI, relative_root_path: Path | None = None) -> None: - """ - Initialize the RO-Crate. - - :param uri: the URI of the RO-Crate - :type uri: Union[str, Path, URI] - - :raises ROCrateInvalidURIError: if the URI is invalid - """ - - # store the path to the crate - self._uri = uri if isinstance(uri, URI) else URI(uri) - - # the relative root path inside the RO-Crate - self.relative_root_path = relative_root_path - - # cache the list of files - self._files: list[Path] | None = None - - # initialize variables to cache the data - self._dict: dict | None = None - self._graph: Graph | None = None - - self._metadata: ROCrateMetadata | None = None - - @property - def uri(self) -> URI: - """ - The URI of the RO-Crate. - """ - return self._uri - - @property - def metadata(self) -> ROCrateMetadata: - """ - An ROCrateMetadata object representing the RO-Crate metadata. - - :return: the metadata object - :rtype: ROCrateMetadata - """ - if not self._metadata: - self._metadata = ROCrateMetadata(self) - return self._metadata - - @property - @abstractmethod - def size(self) -> int: - """ - The size of the RO-Crate. - - :return: the size of the RO-Crate - :rtype: int - """ - - @abstractmethod - def list_files(self) -> list[Path]: - """ - List all the files in the RO-Crate. - - :return: a list of file paths - :rtype: list[Path] - """ - - def __get_search_path__(self, path: Path) -> tuple[Path, Path]: - """ - Get the search path relative to the RO-Crate root path. - - :param path: the path to resolve - :type path: Path - :return: the search path - :rtype: Path - """ - assert path, "Path cannot be None" - # Identify the root path of the RO-Crate - root_path = self.uri.as_path() if self.uri.is_local_resource() and isinstance(path, Path) else Path("./") - # Extract the search path relative to the root of the RO-Crate root path - try: - search_path = path.relative_to(root_path) - except Exception: - search_path = path - return search_path, root_path - - def __check_search_path__(self, path) -> tuple[Path | None, Path | None]: - """ " - Extract the search path if it does not contain the relative root path. - - :param path: the path to resolve - :type path: Path - :return: the search path if valid, None otherwise - :rtype: Path or None - """ - if not self.relative_root_path: - return None, None - - search_path, root_path = self.__get_search_path__(path) - # Check if the path has the substring 'relative_root_path/' in it - has_sub_data_path = re.search(str(self.relative_root_path), str(search_path)) - if not has_sub_data_path: - return search_path, root_path - return None, None - - def __parse_path__(self, path: Path) -> Path: - """ " - Parse the given path to resolve it within the RO-Crate. - :param path: the path to resolve - :type path: Path - :return: the resolved path - :rtype: Path - """ - assert path, "Path cannot be None" - - # Resolve the path based on the RO-Crate location - rocrate_path = self.uri.as_path() if self.uri.is_local_resource() else None - rocrate_path_arg = rocrate_path if not str(rocrate_path).endswith(".zip") else None - paths_to_try = [path] - unquoted_path = Path(unquote(str(path))) - if str(path) != str(unquoted_path): - paths_to_try.append(unquoted_path) - path_identifier = path - for p in paths_to_try: - path_identifier = ROCrateEntity.get_path_from_identifier( - str(p), rocrate_path=rocrate_path_arg, decode=False - ) - search_path, base_path = self.__check_search_path__(path_identifier) - if search_path and base_path: - if self.relative_root_path: - path_identifier = base_path / self.relative_root_path / search_path - else: - path_identifier = base_path / search_path - if path_identifier.exists(): - return path_identifier - return path_identifier - - def has_descriptor(self) -> bool: - """ - Check if the RO-Crate has a metadata descriptor file. - - :return: `True` if the RO-Crate has a metadata descriptor file, `False` otherwise - :rtype: bool - """ - path = self.__parse_path__(Path(self.metadata.METADATA_FILE_DESCRIPTOR)) - logger.debug("Checking for metadata descriptor at path: %s", path) - return self.has_file(path) - - def has_file(self, path: Path) -> bool: - """ - Check if the RO-Crate has a file. - - :param path: the path to the file - :type path: Path - - :return: `True` if the RO-Crate has the file, `False` otherwise - :rtype: bool - """ - try: - return self.__parse_path__(path).is_file() - except Exception: - if logger.isEnabledFor(logging.DEBUG): - logger.exception("Error checking if path is a file") - return False - - def has_directory(self, path: Path) -> bool: - """ - Check if the RO-Crate has a directory. - - :param path: the path to the directory - :type path: Path - - :return: `True` if the RO-Crate has the directory, `False` otherwise - :rtype: bool - """ - try: - return self.__parse_path__(path).is_dir() - except Exception: - if logger.isEnabledFor(logging.DEBUG): - logger.exception("Error checking if path is a directory") - return False - - @abstractmethod - def get_file_size(self, path: Path) -> int: - """ - Get the size of a file in the RO-Crate. - - :param path: the path to the file - :type path: Path - - :return: the size of the file - :rtype: int - """ - - @abstractmethod - def get_file_content(self, path: Path, binary_mode: bool = True) -> str | bytes: - """ - Get the content of a file in the RO-Crate. - - :param path: the path to the file - :type path: Path - - :param binary_mode: if `True`, return the file as a `bytes` object; otherwise, return it as a `str` - :type binary_mode: bool - - :return: the content of the file - :rtype: Union[str, bytes] - """ - - @staticmethod - def get_external_file_content(uri: str, binary_mode: bool = True) -> str | bytes: - """ - Get the content of an external file. - - :param uri: the URI of the file - :type uri: str - - :param binary_mode: if `True`, return the file as a `bytes` object; otherwise, return it as a `str` - :type binary_mode: bool - - :return: the content of the file - :rtype: Union[str, bytes] - """ - response = HttpRequester().get(str(uri)) - response.raise_for_status() - return response.content if binary_mode else response.text - - @staticmethod - def get_external_file_size(uri: str) -> int: - """ - Get the size of an external file. - - :param uri: the URI of the file - :type uri: str - - :return: the size of the file - :rtype: int - - :raises requests.HTTPError: if the request fails - """ - response = HttpRequester().head(str(uri)) - response.raise_for_status() - return int(response.headers.get("Content-Length")) - - @staticmethod - def from_metadata_dict(metadata_dict: dict) -> ROCrate: - """ - Create a new instance of the RO-Crate based on the metadata dictionary. - - :param metadata_dict: the metadata dictionary - :type metadata_dict: dict - - :raises ROCrateInvalidURIError: if the URI is invalid - """ - # create a new instance based on the URI (the ROCrate factory __new__ - # dispatches to a concrete subclass, so this is not truly abstract) - ro_crate = ROCrate(URI("./"), relative_root_path=None) # type: ignore[abstract] - - # override the metadata with the provided dictionary - ro_crate._metadata = ROCrateMetadata(ro_crate, metadata_dict=metadata_dict) - return ro_crate - - @staticmethod - def new_instance(uri: str | Path | URI, relative_root_path: Path | None = None) -> ROCrate: - """ - Create a new instance of the RO-Crate based on the URI. - - :param uri: the URI of the RO-Crate - :type uri: Union[str, Path, URI] - - :return: a new instance of the RO-Crate - :rtype: ROCrate - - :raises ROCrateInvalidURIError: if the URI is invalid - """ - # check if the URI is valid - validate_rocrate_uri(uri, silent=False) - # create a new instance based on the URI - if not isinstance(uri, URI): - uri = URI(uri) - # check if the URI is a BagIt-wrapped crate - is_bagit_crate = BagitROCrate.is_bagit_wrapping_crate(uri) - - # check if the URI is a local directory - if uri.is_local_directory(): - return ( - ROCrateBagitLocalFolder(uri, relative_root_path=relative_root_path) - if is_bagit_crate - else ROCrateLocalFolder(uri, relative_root_path=relative_root_path) - ) - # check if the URI is a local zip file - if uri.is_local_file(): - return ( - ROCrateBagitLocalZip(uri, relative_root_path=relative_root_path) - if is_bagit_crate - else ROCrateLocalZip(uri, relative_root_path=relative_root_path) - ) - # check if the URI is a remote zip file - if uri.is_remote_resource(): - return ( - ROCrateBagitRemoteZip(uri, relative_root_path=relative_root_path) - if is_bagit_crate - else ROCrateRemoteZip(uri, relative_root_path=relative_root_path) - ) - # if the URI is not supported, raise an error - raise ROCrateInvalidURIError(uri=uri, message="Unsupported RO-Crate URI") - - -class ROCrateLocalFolder(ROCrate): - """ - Class representing an RO-Crate stored in a local folder. - """ - - def __init__(self, path: str | Path | URI, relative_root_path: Path | None = None): - super().__init__(path, relative_root_path=relative_root_path) - - # cache the list of files - self._files = None - - # check if the path is a directory - if not self.has_directory(self.uri.as_path()): - raise ROCrateInvalidURIError(uri=path) - - @property - def size(self) -> int: - return sum(f.stat().st_size for f in self.list_files() if f.is_file()) - - def list_files(self) -> list[Path]: - if not self._files: - self._files = [] - base_path = self.uri.as_path() - for file in base_path.rglob("*"): - if file.is_file(): - self._files.append(base_path / file) - return self._files - - def get_file_size(self, path: Path) -> int: - path = self.__parse_path__(path) - if not self.has_file(path): - raise FileNotFoundError(f"File not found: {path}") - return path.stat().st_size - - def get_file_content(self, path: Path, binary_mode: bool = True) -> str | bytes: - path = self.__parse_path__(path) - if not self.has_file(path): - raise FileNotFoundError(f"File not found: {path}") - return path.read_bytes() if binary_mode else path.read_text(encoding="utf-8") - - -class ROCrateLocalZip(ROCrate): - def __init__( - self, - path: str | Path | URI, - relative_root_path: Path | None = None, - init_zip: bool = True, - ): - super().__init__(path, relative_root_path=relative_root_path) - - # initialize the zip reference - self._zipref: zipfile.ZipFile | None = None - if init_zip: - self.__init_zip_reference__() - - # cache the list of files - self._files = None - - def __del__(self): - try: - if self._zipref and self._zipref.fp is not None: - self._zipref.close() - del self._zipref - except Exception: - if logger.isEnabledFor(logging.DEBUG): - logger.exception("Error closing zip reference") - - def __parse_path__(self, path): - assert path, "Path cannot be None" - # If the RO-Crate is a zip file, the path should be changed - return path - - @property - def size(self) -> int: - return self.uri.as_path().stat().st_size - - def __init_zip_reference__(self): - path = self.uri.as_path() - # check if the path is a file - if not self.uri.as_path().is_file(): - raise ROCrateInvalidURIError(uri=path) - # check if the file is a zip file - if self.uri.as_path().suffix != ".zip": - raise ROCrateInvalidURIError(uri=path) - self._zipref = zipfile.ZipFile(path) # pylint: disable=consider-using-with - logger.debug("Initialized zip reference: %s", self._zipref) - - def __get_file_info__(self, path: str | Path) -> zipfile.ZipInfo: - assert self._zipref is not None, "Zip reference not initialized" - try: - return self._zipref.getinfo(str(path)) - except KeyError: - logger.error("File not found in zip: %s", path) - raise FileNotFoundError(f"File not found in zip: {path}") from None - - def has_descriptor(self) -> bool: - """ - Check if the RO-Crate has a metadata descriptor file. - :rtype: bool - """ - path = self.__parse_path__(Path(self.metadata.METADATA_FILE_DESCRIPTOR)) - return str(path) in [str(_) for _ in self.list_files()] - - def has_file(self, path: Path) -> bool: - path = self.__parse_path__(path) - for p in self.list_files(): - if str(path) == str(p): - info = self.__get_file_info__(path) - return not info.is_dir() - return False - - def has_directory(self, path: Path) -> bool: - assert path, "Path cannot be None" - assert self._zipref is not None, "Zip reference not initialized" - for px in (path, self.__parse_path__(path)): - for p in self._zipref.namelist(): - if f"{px!s}/" == str(p) or str(px) == str(p): - info = self.__get_file_info__(p) - return info.is_dir() - return False - - def list_files(self) -> list[Path]: - if not self._files: - assert self._zipref is not None, "Zip reference not initialized" - self._files = [] - for file in self._zipref.namelist(): - self._files.append(Path(file)) - return self._files - - def list_entries(self) -> list[zipfile.ZipInfo]: - assert self._zipref is not None, "Zip reference not initialized" - return self._zipref.infolist() - - def get_entry(self, path: Path) -> zipfile.ZipInfo: - """ - Return the ZipInfo object for the specified path. - """ - return self.__get_file_info__(self.__parse_path__(path)) - - def get_file_size(self, path: Path) -> int: - assert self._zipref is not None, "Zip reference not initialized" - return self._zipref.getinfo(str(self.__parse_path__(path))).file_size - - def get_file_content(self, path: Path, binary_mode: bool = True) -> str | bytes: - path = self.__parse_path__(path) - if not self.has_file(path): - raise FileNotFoundError(f"File not found: {path}") - assert self._zipref is not None, "Zip reference not initialized" - data = self._zipref.read(str(path)) - return data if binary_mode else data.decode("utf-8") - - -class ROCrateRemoteZip(ROCrateLocalZip): - def __init__(self, path: str | Path | URI, relative_root_path: Path | None = None): - super().__init__(path, relative_root_path=relative_root_path, init_zip=False) - - # # initialize the zip reference - self.__init_zip_reference__() - - def __init_zip_reference__(self): - url = str(self.uri) - - # check if the URI is available - if not self.uri.is_available(): - raise ROCrateInvalidURIError(uri=url, message="URI is not available") - - # Step 1: Fetch the last 22 bytes to find the EOCD record - eocd_data = self.__fetch_range__(url, -22, "") - - # Step 2: Find the EOCD record - eocd_offset = self.__find_eocd__(eocd_data) - - # Step 3: Fetch the EOCD and parse it - eocd_full_data = self.__fetch_range__(url, -22 - eocd_offset, -1) - central_directory_offset, central_directory_size = self.__parse_eocd__(eocd_full_data) - - # Step 4: Fetch the central directory - central_directory_data = self.__fetch_range__( - url, central_directory_offset, central_directory_offset + central_directory_size - 1 - ) - # Step 5: Parse the central directory and return the zip file - self._zipref = zipfile.ZipFile(io.BytesIO(central_directory_data)) # pylint: disable=consider-using-with - - @property - def size(self) -> int: - response = HttpRequester().head(str(self.uri)) - response.raise_for_status() # Check if the request was successful - file_size = response.headers.get("Content-Length") - if file_size is not None: - return int(file_size) - raise ValueError("Could not determine the file size from the headers") - - @staticmethod - def __fetch_range__(uri: str, start, end): - headers = {"Range": f"bytes={start}-{end}"} - response = HttpRequester().get(uri, headers=headers) - response.raise_for_status() - return response.content - - @staticmethod - def __find_eocd__(data): - eocd_signature = b"PK\x05\x06" - eocd_offset = data.rfind(eocd_signature) - if eocd_offset == -1: - raise ValueError("EOCD not found") - return eocd_offset - - @staticmethod - def __parse_eocd__(data): - eocd_size = struct.calcsize("<4s4H2LH") - eocd = struct.unpack("<4s4H2LH", data[-eocd_size:]) - central_directory_size = eocd[5] - central_directory_offset = eocd[6] - return central_directory_offset, central_directory_size - - -class BagitROCrate(ROCrate, ABC): - def __init__(self, uri, relative_root_path=None): - super().__init__(uri, relative_root_path) - - # check if the path is a BagIt-wrapped crate - assert self.is_bagit_wrapping_crate(uri), "Not a BagIt-wrapped RO-Crate" - - @staticmethod - def is_bagit_wrapping_crate(uri: str | Path | URI) -> bool: - """ - Check if the RO-Crate is a BagIt-wrapped crate. - - :param uri: the URI of the RO-Crate - :type uri: Union[str, Path, URI] - - :return: `True` if the RO-Crate is a BagIt-wrapped crate, `False` otherwise - :rtype: bool - """ - if not isinstance(uri, URI): - uri = URI(uri) - - result = False - try: - # Check for local directory - if uri.is_local_directory(): - base_path = uri.as_path() - result = (base_path / "bagit.txt").is_file() and ( - base_path / "data" / "ro-crate-metadata.json" - ).is_file() - - # Check for local zip file - elif uri.is_local_file(): - path = uri.as_path() - if path.suffix == ".zip": - with zipfile.ZipFile(path, "r") as zf: - namelist = zf.namelist() - result = "bagit.txt" in namelist and "data/ro-crate-metadata.json" in namelist - - # Check for remote zip file - elif uri.is_remote_resource(): - # For remote resources, we need to check if both files exist - # We'll use HTTP HEAD requests to check without downloading - base_url = str(uri).rstrip("/") - - if not base_url.endswith(".zip"): - # Check for bagit.txt - bagit_response = HttpRequester().head(f"{base_url}/bagit.txt") - if bagit_response.status_code == HTTP_STATUS_OK: - # Check for data/ro-crate-metadata.json - metadata_response = HttpRequester().head(f"{base_url}/data/ro-crate-metadata.json") - result = metadata_response.status_code == HTTP_STATUS_OK - else: - # If it's a remote zip file, we need to download it partially - # Temporarily create instance to check - temp_crate = ROCrateRemoteZip(uri) - logger.debug("Initializing ROCrateRemoteZip for URI: %s", uri) - has_bagit_txt = temp_crate.has_file(Path("bagit.txt")) - logger.debug("Presence of 'bagit.txt': %s", has_bagit_txt) - has_ro_crate_metadata = temp_crate.has_file(Path("data/ro-crate-metadata.json")) - logger.debug("Presence of 'data/ro-crate-metadata.json': %s", has_ro_crate_metadata) - result = has_bagit_txt and has_ro_crate_metadata - del temp_crate - except Exception: - if logger.isEnabledFor(logging.DEBUG): - logger.exception("Error loading remote BagIt RO-Crate metadata") - return result - - def __check_search_path__(self, path): - """ - Check if the search path is valid for a BagIt-wrapped RO-Crate, - i.e., if it points to the 'data/' directory. - - :param path: the path to resolve - :type path: Path - :return: the search path if valid, None otherwise - :rtype: Path or None - """ - search_path, root_path = super().__get_search_path__(path) - # Check if the path has the substring 'data/' in it - has_sub_data_path = re.search(r"data/", str(search_path)) - logger.debug( - "The search path '%s' %s the 'data/' sub-path", - search_path, - "contains" if has_sub_data_path else "does not contain", - ) - if search_path == "." or not has_sub_data_path: - return search_path, root_path - return None, None - - -class ROCrateBagitLocalFolder(BagitROCrate, ROCrateLocalFolder): - def __init__(self, uri: str | Path | URI, relative_root_path: Path | None = None): - # initialize the parent classes - super(ROCrateLocalFolder, self).__init__(uri, relative_root_path=relative_root_path) - # check if the path is a BagIt-wrapped crate - assert self.is_bagit_wrapping_crate(uri), "Not a BagIt-wrapped RO-Crate" - - def __parse_path__(self, path: Path) -> Path: - search_path, root_path = self.__check_search_path__(path) - # if search_path and root_path are set, adjust the path - if search_path and root_path: - path = root_path / Path("data") / search_path - if not path.exists(): - path = Path(unquote(str(path))) - return path - - -class ROCrateBagitLocalZip(BagitROCrate, ROCrateLocalZip): - """ - Class representing an RO-Crate stored in a local BagIt-wrapped zip file. - """ - - def __parse_path__(self, path: Path) -> Path: - # Extract the search path relative to the root of the RO-Crate root path - search_path, _ = super().__check_search_path__(path) - - # if search_path is set, adjust the path - if search_path: - path = Path("data") / search_path - assert self._zipref is not None, "Zip reference not initialized" - zip_namelist = self._zipref.namelist() - if str(path) not in zip_namelist and f"{path}/" not in zip_namelist: - path = Path(unquote(str(path))) - return path - - -class ROCrateBagitRemoteZip(ROCrateBagitLocalZip, ROCrateRemoteZip): - pass diff --git a/rocrate_validator/rocrate/__init__.py b/rocrate_validator/rocrate/__init__.py new file mode 100644 index 000000000..2615e7b0a --- /dev/null +++ b/rocrate_validator/rocrate/__init__.py @@ -0,0 +1,46 @@ +# Copyright (c) 2024-2026 CRS4 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Public re-exports for ``rocrate_validator.rocrate``. + +The implementation is split across submodules grouped by responsibility: + +* :mod:`.entity` — :class:`ROCrateEntity` (RO-Crate JSON-LD entities) +* :mod:`.metadata` — :class:`ROCrateMetadata` (file descriptor wrapper) +* :mod:`.base` — :class:`ROCrate` abstract base + factory +* :mod:`.plain` — local folder / local zip / remote zip variants +* :mod:`.bagit` — BagIt-wrapped variants + +Callers should keep importing from ``rocrate_validator.rocrate`` as before; +this module re-exports every public name from the submodules. +""" + +from .bagit import BagitROCrate, ROCrateBagitLocalFolder, ROCrateBagitLocalZip, ROCrateBagitRemoteZip +from .base import ROCrate +from .entity import ROCrateEntity +from .metadata import ROCrateMetadata +from .plain import ROCrateLocalFolder, ROCrateLocalZip, ROCrateRemoteZip + +__all__ = [ + "BagitROCrate", + "ROCrate", + "ROCrateBagitLocalFolder", + "ROCrateBagitLocalZip", + "ROCrateBagitRemoteZip", + "ROCrateEntity", + "ROCrateLocalFolder", + "ROCrateLocalZip", + "ROCrateMetadata", + "ROCrateRemoteZip", +] diff --git a/rocrate_validator/rocrate/bagit.py b/rocrate_validator/rocrate/bagit.py new file mode 100644 index 000000000..5f09c3b39 --- /dev/null +++ b/rocrate_validator/rocrate/bagit.py @@ -0,0 +1,162 @@ +# Copyright (c) 2024-2026 CRS4 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import annotations + +import re +import zipfile +from abc import ABC +from pathlib import Path +from urllib.parse import unquote + +from rocrate_validator.constants import HTTP_STATUS_OK +from rocrate_validator.utils import log as logging +from rocrate_validator.utils.http import HttpRequester +from rocrate_validator.utils.uri import URI + +from .base import ROCrate +from .plain import ROCrateLocalFolder, ROCrateLocalZip, ROCrateRemoteZip + +# set up logging +logger = logging.getLogger(__name__) + + +class BagitROCrate(ROCrate, ABC): + def __init__(self, uri, relative_root_path=None): + super().__init__(uri, relative_root_path) + + # check if the path is a BagIt-wrapped crate + assert self.is_bagit_wrapping_crate(uri), "Not a BagIt-wrapped RO-Crate" + + @staticmethod + def is_bagit_wrapping_crate(uri: str | Path | URI) -> bool: + """ + Check if the RO-Crate is a BagIt-wrapped crate. + + :param uri: the URI of the RO-Crate + :type uri: Union[str, Path, URI] + + :return: `True` if the RO-Crate is a BagIt-wrapped crate, `False` otherwise + :rtype: bool + """ + if not isinstance(uri, URI): + uri = URI(uri) + + result = False + try: + # Check for local directory + if uri.is_local_directory(): + base_path = uri.as_path() + result = (base_path / "bagit.txt").is_file() and ( + base_path / "data" / "ro-crate-metadata.json" + ).is_file() + + # Check for local zip file + elif uri.is_local_file(): + path = uri.as_path() + if path.suffix == ".zip": + with zipfile.ZipFile(path, "r") as zf: + namelist = zf.namelist() + result = "bagit.txt" in namelist and "data/ro-crate-metadata.json" in namelist + + # Check for remote zip file + elif uri.is_remote_resource(): + # For remote resources, we need to check if both files exist + # We'll use HTTP HEAD requests to check without downloading + base_url = str(uri).rstrip("/") + + if not base_url.endswith(".zip"): + # Check for bagit.txt + bagit_response = HttpRequester().head(f"{base_url}/bagit.txt") + if bagit_response.status_code == HTTP_STATUS_OK: + # Check for data/ro-crate-metadata.json + metadata_response = HttpRequester().head(f"{base_url}/data/ro-crate-metadata.json") + result = metadata_response.status_code == HTTP_STATUS_OK + else: + # If it's a remote zip file, we need to download it partially + # Temporarily create instance to check + temp_crate = ROCrateRemoteZip(uri) + logger.debug("Initializing ROCrateRemoteZip for URI: %s", uri) + has_bagit_txt = temp_crate.has_file(Path("bagit.txt")) + logger.debug("Presence of 'bagit.txt': %s", has_bagit_txt) + has_ro_crate_metadata = temp_crate.has_file(Path("data/ro-crate-metadata.json")) + logger.debug("Presence of 'data/ro-crate-metadata.json': %s", has_ro_crate_metadata) + result = has_bagit_txt and has_ro_crate_metadata + del temp_crate + except Exception: + if logger.isEnabledFor(logging.DEBUG): + logger.exception("Error loading remote BagIt RO-Crate metadata") + return result + + def __check_search_path__(self, path): + """ + Check if the search path is valid for a BagIt-wrapped RO-Crate, + i.e., if it points to the 'data/' directory. + + :param path: the path to resolve + :type path: Path + :return: the search path if valid, None otherwise + :rtype: Path or None + """ + search_path, root_path = super().__get_search_path__(path) + # Check if the path has the substring 'data/' in it + has_sub_data_path = re.search(r"data/", str(search_path)) + logger.debug( + "The search path '%s' %s the 'data/' sub-path", + search_path, + "contains" if has_sub_data_path else "does not contain", + ) + if search_path == "." or not has_sub_data_path: + return search_path, root_path + return None, None + + +class ROCrateBagitLocalFolder(BagitROCrate, ROCrateLocalFolder): + def __init__(self, uri: str | Path | URI, relative_root_path: Path | None = None): + # initialize the parent classes + super(ROCrateLocalFolder, self).__init__(uri, relative_root_path=relative_root_path) + # check if the path is a BagIt-wrapped crate + assert self.is_bagit_wrapping_crate(uri), "Not a BagIt-wrapped RO-Crate" + + def __parse_path__(self, path: Path) -> Path: + search_path, root_path = self.__check_search_path__(path) + # if search_path and root_path are set, adjust the path + if search_path and root_path: + path = root_path / Path("data") / search_path + if not path.exists(): + path = Path(unquote(str(path))) + return path + + +class ROCrateBagitLocalZip(BagitROCrate, ROCrateLocalZip): + """ + Class representing an RO-Crate stored in a local BagIt-wrapped zip file. + """ + + def __parse_path__(self, path: Path) -> Path: + # Extract the search path relative to the root of the RO-Crate root path + search_path, _ = super().__check_search_path__(path) + + # if search_path is set, adjust the path + if search_path: + path = Path("data") / search_path + assert self._zipref is not None, "Zip reference not initialized" + zip_namelist = self._zipref.namelist() + if str(path) not in zip_namelist and f"{path}/" not in zip_namelist: + path = Path(unquote(str(path))) + return path + + +class ROCrateBagitRemoteZip(ROCrateBagitLocalZip, ROCrateRemoteZip): + pass diff --git a/rocrate_validator/rocrate/base.py b/rocrate_validator/rocrate/base.py new file mode 100644 index 000000000..3103010a1 --- /dev/null +++ b/rocrate_validator/rocrate/base.py @@ -0,0 +1,380 @@ +# Copyright (c) 2024-2026 CRS4 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import annotations + +# pylint: disable=cyclic-import # `new_instance` lazy-imports plain/bagit (see PLC0415); entity lazy-imports plain too. The graph is broken at runtime. +import re +from abc import ABC, abstractmethod +from pathlib import Path +from typing import TYPE_CHECKING +from urllib.parse import unquote + +from rocrate_validator.errors import ROCrateInvalidURIError +from rocrate_validator.utils import log as logging +from rocrate_validator.utils.http import HttpRequester +from rocrate_validator.utils.uri import URI, validate_rocrate_uri + +from .entity import ROCrateEntity +from .metadata import ROCrateMetadata + +if TYPE_CHECKING: + from rdflib import Graph + +# set up logging +logger = logging.getLogger(__name__) + + +class ROCrate(ABC): + """ + Base class for representing and interacting with a Research Object Crate (RO-Crate). + """ + + def __new__(cls, uri: str | Path | URI, relative_root_path: Path | None = None): + """ + Factory method to create the appropriate ROCrate subclass instance. + + :param uri: the URI of the RO-Crate + :type uri: Union[str, Path, URI] + + :param relative_root_path: the relative root path inside the RO-Crate + :type relative_root_path: Path + + :return: an instance of the appropriate ROCrate subclass + :rtype: ROCrate + + :raises ROCrateInvalidURIError: if the URI is invalid + """ + if cls is not ROCrate: + # If called on a subclass, use normal instantiation + return super().__new__(cls) + + # If called on ROCrate directly, use factory logic + instance = cls.new_instance(uri) + if relative_root_path: + instance.relative_root_path = relative_root_path + return instance + + def __init__(self, uri: str | Path | URI, relative_root_path: Path | None = None) -> None: + """ + Initialize the RO-Crate. + + :param uri: the URI of the RO-Crate + :type uri: Union[str, Path, URI] + + :raises ROCrateInvalidURIError: if the URI is invalid + """ + + # store the path to the crate + self._uri = uri if isinstance(uri, URI) else URI(uri) + + # the relative root path inside the RO-Crate + self.relative_root_path = relative_root_path + + # cache the list of files + self._files: list[Path] | None = None + + # initialize variables to cache the data + self._dict: dict | None = None + self._graph: Graph | None = None + + self._metadata: ROCrateMetadata | None = None + + @property + def uri(self) -> URI: + """ + The URI of the RO-Crate. + """ + return self._uri + + @property + def metadata(self) -> ROCrateMetadata: + """ + An ROCrateMetadata object representing the RO-Crate metadata. + + :return: the metadata object + :rtype: ROCrateMetadata + """ + if not self._metadata: + self._metadata = ROCrateMetadata(self) + return self._metadata + + @property + @abstractmethod + def size(self) -> int: + """ + The size of the RO-Crate. + + :return: the size of the RO-Crate + :rtype: int + """ + + @abstractmethod + def list_files(self) -> list[Path]: + """ + List all the files in the RO-Crate. + + :return: a list of file paths + :rtype: list[Path] + """ + + def __get_search_path__(self, path: Path) -> tuple[Path, Path]: + """ + Get the search path relative to the RO-Crate root path. + + :param path: the path to resolve + :type path: Path + :return: the search path + :rtype: Path + """ + assert path, "Path cannot be None" + # Identify the root path of the RO-Crate + root_path = self.uri.as_path() if self.uri.is_local_resource() and isinstance(path, Path) else Path("./") + # Extract the search path relative to the root of the RO-Crate root path + try: + search_path = path.relative_to(root_path) + except Exception: + search_path = path + return search_path, root_path + + def __check_search_path__(self, path) -> tuple[Path | None, Path | None]: + """ " + Extract the search path if it does not contain the relative root path. + + :param path: the path to resolve + :type path: Path + :return: the search path if valid, None otherwise + :rtype: Path or None + """ + if not self.relative_root_path: + return None, None + + search_path, root_path = self.__get_search_path__(path) + # Check if the path has the substring 'relative_root_path/' in it + has_sub_data_path = re.search(str(self.relative_root_path), str(search_path)) + if not has_sub_data_path: + return search_path, root_path + return None, None + + def __parse_path__(self, path: Path) -> Path: + """ " + Parse the given path to resolve it within the RO-Crate. + :param path: the path to resolve + :type path: Path + :return: the resolved path + :rtype: Path + """ + assert path, "Path cannot be None" + + # Resolve the path based on the RO-Crate location + rocrate_path = self.uri.as_path() if self.uri.is_local_resource() else None + rocrate_path_arg = rocrate_path if not str(rocrate_path).endswith(".zip") else None + paths_to_try = [path] + unquoted_path = Path(unquote(str(path))) + if str(path) != str(unquoted_path): + paths_to_try.append(unquoted_path) + path_identifier = path + for p in paths_to_try: + path_identifier = ROCrateEntity.get_path_from_identifier( + str(p), rocrate_path=rocrate_path_arg, decode=False + ) + search_path, base_path = self.__check_search_path__(path_identifier) + if search_path and base_path: + if self.relative_root_path: + path_identifier = base_path / self.relative_root_path / search_path + else: + path_identifier = base_path / search_path + if path_identifier.exists(): + return path_identifier + return path_identifier + + def has_descriptor(self) -> bool: + """ + Check if the RO-Crate has a metadata descriptor file. + + :return: `True` if the RO-Crate has a metadata descriptor file, `False` otherwise + :rtype: bool + """ + path = self.__parse_path__(Path(self.metadata.METADATA_FILE_DESCRIPTOR)) + logger.debug("Checking for metadata descriptor at path: %s", path) + return self.has_file(path) + + def has_file(self, path: Path) -> bool: + """ + Check if the RO-Crate has a file. + + :param path: the path to the file + :type path: Path + + :return: `True` if the RO-Crate has the file, `False` otherwise + :rtype: bool + """ + try: + return self.__parse_path__(path).is_file() + except Exception: + if logger.isEnabledFor(logging.DEBUG): + logger.exception("Error checking if path is a file") + return False + + def has_directory(self, path: Path) -> bool: + """ + Check if the RO-Crate has a directory. + + :param path: the path to the directory + :type path: Path + + :return: `True` if the RO-Crate has the directory, `False` otherwise + :rtype: bool + """ + try: + return self.__parse_path__(path).is_dir() + except Exception: + if logger.isEnabledFor(logging.DEBUG): + logger.exception("Error checking if path is a directory") + return False + + @abstractmethod + def get_file_size(self, path: Path) -> int: + """ + Get the size of a file in the RO-Crate. + + :param path: the path to the file + :type path: Path + + :return: the size of the file + :rtype: int + """ + + @abstractmethod + def get_file_content(self, path: Path, binary_mode: bool = True) -> str | bytes: + """ + Get the content of a file in the RO-Crate. + + :param path: the path to the file + :type path: Path + + :param binary_mode: if `True`, return the file as a `bytes` object; otherwise, return it as a `str` + :type binary_mode: bool + + :return: the content of the file + :rtype: Union[str, bytes] + """ + + @staticmethod + def get_external_file_content(uri: str, binary_mode: bool = True) -> str | bytes: + """ + Get the content of an external file. + + :param uri: the URI of the file + :type uri: str + + :param binary_mode: if `True`, return the file as a `bytes` object; otherwise, return it as a `str` + :type binary_mode: bool + + :return: the content of the file + :rtype: Union[str, bytes] + """ + response = HttpRequester().get(str(uri)) + response.raise_for_status() + return response.content if binary_mode else response.text + + @staticmethod + def get_external_file_size(uri: str) -> int: + """ + Get the size of an external file. + + :param uri: the URI of the file + :type uri: str + + :return: the size of the file + :rtype: int + + :raises requests.HTTPError: if the request fails + """ + response = HttpRequester().head(str(uri)) + response.raise_for_status() + return int(response.headers.get("Content-Length")) + + @staticmethod + def from_metadata_dict(metadata_dict: dict) -> ROCrate: + """ + Create a new instance of the RO-Crate based on the metadata dictionary. + + :param metadata_dict: the metadata dictionary + :type metadata_dict: dict + + :raises ROCrateInvalidURIError: if the URI is invalid + """ + # create a new instance based on the URI (the ROCrate factory __new__ + # dispatches to a concrete subclass, so this is not truly abstract) + ro_crate = ROCrate(URI("./"), relative_root_path=None) # type: ignore[abstract] + + # override the metadata with the provided dictionary + ro_crate._metadata = ROCrateMetadata(ro_crate, metadata_dict=metadata_dict) + return ro_crate + + @staticmethod + def new_instance(uri: str | Path | URI, relative_root_path: Path | None = None) -> ROCrate: + """ + Create a new instance of the RO-Crate based on the URI. + + :param uri: the URI of the RO-Crate + :type uri: Union[str, Path, URI] + + :return: a new instance of the RO-Crate + :rtype: ROCrate + + :raises ROCrateInvalidURIError: if the URI is invalid + """ + # Lazy imports break a cycle: bagit/plain inherit from this class, + # but the factory needs runtime references to dispatch to them. + from .bagit import ( # noqa: PLC0415 + BagitROCrate, + ROCrateBagitLocalFolder, + ROCrateBagitLocalZip, + ROCrateBagitRemoteZip, + ) + from .plain import ROCrateLocalFolder, ROCrateLocalZip, ROCrateRemoteZip # noqa: PLC0415 + + # check if the URI is valid + validate_rocrate_uri(uri, silent=False) + # create a new instance based on the URI + if not isinstance(uri, URI): + uri = URI(uri) + # check if the URI is a BagIt-wrapped crate + is_bagit_crate = BagitROCrate.is_bagit_wrapping_crate(uri) + + # check if the URI is a local directory + if uri.is_local_directory(): + return ( + ROCrateBagitLocalFolder(uri, relative_root_path=relative_root_path) + if is_bagit_crate + else ROCrateLocalFolder(uri, relative_root_path=relative_root_path) + ) + # check if the URI is a local zip file + if uri.is_local_file(): + return ( + ROCrateBagitLocalZip(uri, relative_root_path=relative_root_path) + if is_bagit_crate + else ROCrateLocalZip(uri, relative_root_path=relative_root_path) + ) + # check if the URI is a remote zip file + if uri.is_remote_resource(): + return ( + ROCrateBagitRemoteZip(uri, relative_root_path=relative_root_path) + if is_bagit_crate + else ROCrateRemoteZip(uri, relative_root_path=relative_root_path) + ) + # if the URI is not supported, raise an error + raise ROCrateInvalidURIError(uri=uri, message="Unsupported RO-Crate URI") diff --git a/rocrate_validator/rocrate/entity.py b/rocrate_validator/rocrate/entity.py new file mode 100644 index 000000000..8ce84c68c --- /dev/null +++ b/rocrate_validator/rocrate/entity.py @@ -0,0 +1,289 @@ +# Copyright (c) 2024-2026 CRS4 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import annotations + +from pathlib import Path +from typing import TYPE_CHECKING, Any, cast +from urllib.parse import unquote + +from rocrate_validator.errors import ROCrateInvalidURIError +from rocrate_validator.utils import log as logging +from rocrate_validator.utils.uri import URI, AvailabilityStatus, is_external_reference + +if TYPE_CHECKING: + from .base import ROCrate + from .metadata import ROCrateMetadata + +# set up logging +logger = logging.getLogger(__name__) + + +class ROCrateEntity: + def __init__(self, metadata: ROCrateMetadata, raw_data: dict) -> None: + self._raw_data: dict = raw_data + self._metadata = metadata + + @property + def id(self) -> str: + return cast("str", self._raw_data.get("@id")) + + @property + def type(self) -> str | list[str]: + return cast("str | list[str]", self._raw_data.get("@type")) + + def is_dataset(self) -> bool: + return self.has_type("Dataset") + + def is_file(self) -> bool: + return self.has_type("File") + + @property + def name(self) -> str: + return cast("str", self._raw_data.get("name")) + + @property + def metadata(self) -> ROCrateMetadata: + return self._metadata + + @property + def ro_crate(self) -> ROCrate: + return self.metadata.ro_crate + + def is_remote(self) -> bool: + return self.id_as_uri.is_remote_resource() + + @classmethod + def get_id_as_path(cls, entity_id: str, ro_crate: ROCrate | None = None) -> Path: + return cls.get_path_from_identifier( + entity_id, + ro_crate.uri.as_path() if ro_crate and ro_crate.uri.is_local_resource() else None, + ) + + @staticmethod + def get_path_from_identifier( + identifier: str, + rocrate_path: str | Path | None = None, + decode: bool = False, + ) -> Path: + """ + Get the path from an identifier. + + :param identifier: the identifier of the entity + :type identifier: str + + :param rocrate_path: the path to the RO-Crate + :type rocrate_path: Optional[Union[str, Path] + + :return: the path to the entity + :rtype: Path + + """ + + def __define_path__(path: str, decode: bool = False) -> Path: + # ensure the path is a string and remove the file:// prefix + path = str(path).replace("file://", "") + # Decode the path if required + if decode: + path = unquote(path) + # Convert the path to a Path object + path_obj = Path(path) + # if the path is absolute, return it + if path_obj.is_absolute(): + return path_obj + # set the base path + base_path: Path + if rocrate_path is None: + base_path = Path("./") + elif not isinstance(rocrate_path, Path): + base_path = Path(rocrate_path) + else: + base_path = rocrate_path + try: + # Check if the path if the root of the RO-Crate + if path_obj == Path("./"): + return base_path + # if the path is relative, try to resolve it + return base_path / path_obj.relative_to(base_path) + except ValueError: + # if the path cannot be resolved, return the absolute path + return base_path / path_obj + + # Define the path based on the identifier + path = __define_path__(identifier, decode=decode) + logger.debug("Defined path '%s' from identifier '%s'", path, identifier) + return path + + @property + def id_as_path(self) -> Path: + return self.get_id_as_path(self.id, self.ro_crate) + + @classmethod + def get_id_as_uri(cls, entity_id: str, ro_crate: ROCrate) -> URI: + assert entity_id, "Entity ID cannot be None" + # Per RO-Crate 1.1 § 4.2.2, an `@id` is either a relative URI path or + # an external URI/IRI (RFC 3986/3987). External references are used + # as-is (without resolving them against the crate URI) so the entity + # is classified as remote/web-based; this covers both authority-based + # forms (``http://``, ``scp://``) and scheme-only ones (``urn:``, + # ``doi:``, ``arcp:``). + if is_external_reference(entity_id): + return URI(entity_id) + # Otherwise the `@id` is a relative path: if the RO-Crate itself is + # remote, resolve it against the crate URI so the entity is still + # classified as remote/web-based. + if ro_crate.uri.is_remote_resource() and entity_id.startswith("./"): + return URI(f"{ro_crate.uri}/{entity_id[2:]}") + return URI(cls.get_id_as_path(entity_id, ro_crate)) + + @property + def id_as_uri(self) -> URI: + return self.get_id_as_uri(self.id, self.ro_crate) + + def has_absolute_path(self) -> bool: + return self.get_id_as_path(self.id).is_absolute() + + def has_relative_path(self) -> bool: + return not self.has_absolute_path() + + def has_local_identifier(self) -> bool: + has_local_id = ( + self.id.startswith("#") or f"{self.ro_crate.uri}/#" in self.id or f"file://{self.ro_crate.uri}/#" in self.id + ) + logger.debug( + "Identifier '%s' is %s a local identifier", + self.id, + "" if has_local_id else " not", + ) + return has_local_id + + def has_type(self, entity_type: str) -> bool: + assert isinstance(entity_type, str), "Entity type must be a string" + e_types = self.type if isinstance(self.type, list) else [self.type] + return entity_type in e_types + + def has_types(self, entity_types: list[str], all_types: bool = False) -> bool: + """ + Check if the entity has any or all of the specified types. + """ + assert isinstance(entity_types, list), "Entity types must be a list" + e_types = self.type if isinstance(self.type, list) else [self.type] + if all_types: + return all(t in e_types for t in entity_types) + return any(t in e_types for t in entity_types) + + def __process_property__(self, _name: str, data: object) -> object: + if isinstance(data, dict) and "@id" in data: + entity = self.metadata.get_entity(data["@id"]) + if entity is None: + return ROCrateEntity(self.metadata, data) + return entity + return data + + def get_property(self, name: str, default=None) -> Any: + data = self._raw_data.get(name, default) + if data is None: + return None + if isinstance(data, list): + return [self.__process_property__(name, _) for _ in data] + return self.__process_property__(name, data) + + @property + def raw_data(self) -> object: + return self._raw_data + + def is_local(self) -> bool: + return not self.is_remote() + + def _check_local_availability(self) -> AvailabilityStatus: + # Lazy import to avoid a runtime cycle: plain.py inherits from base.py, + # which already depends on this module via ROCrateEntity. + from .plain import ROCrateLocalFolder, ROCrateLocalZip # noqa: PLC0415 + + if self.ro_crate.uri.is_local_resource(): + if isinstance(self.ro_crate, ROCrateLocalFolder): + found = self.ro_crate.has_file(self.id_as_path) or self.ro_crate.has_directory(self.id_as_path) + return AvailabilityStatus.AVAILABLE if found else AvailabilityStatus.UNAVAILABLE + if isinstance(self.ro_crate, ROCrateLocalZip): + if self.id == "./": + return AvailabilityStatus.AVAILABLE + found = self.ro_crate.has_directory(Path(unquote(str(self.id)))) or self.ro_crate.has_file( + Path(unquote(str(self.id))) + ) + return AvailabilityStatus.AVAILABLE if found else AvailabilityStatus.UNAVAILABLE + + if self.ro_crate.uri.is_remote_resource(): + if self.id == "./": + found = self.ro_crate.get_file_size(self.id_as_path) > 0 + else: + found = self.ro_crate.has_directory(Path(unquote(str(self.id)))) or self.ro_crate.has_file( + Path(unquote(str(self.id))) + ) + return AvailabilityStatus.AVAILABLE if found else AvailabilityStatus.UNAVAILABLE + + raise ROCrateInvalidURIError(uri=self.id, message="Could not determine the availability of the entity") + + def check_availability(self) -> AvailabilityStatus: + """ + Return a fine-grained availability status for this entity. + + This is the primary check; :meth:`is_available` is the boolean + shortcut built on top of it. The status distinguishes definitely + unavailable resources, auth-protected ones, and remote URIs whose + scheme the validator cannot natively check (scp://, s3://, ...). + """ + try: + entity_uri = self.id_as_uri + if entity_uri.is_natively_checkable(): + logger.debug("Checking the availability of a remote entity") + return entity_uri.check_availability() + + if entity_uri.is_remote_resource(): + logger.debug( + "Cannot natively verify availability for entity '%s' (scheme '%s')", + self.id, + entity_uri.scheme, + ) + return AvailabilityStatus.UNCHECKABLE + + return self._check_local_availability() + except Exception: + if logger.isEnabledFor(logging.DEBUG): + logger.exception("Error checking entity availability") + return AvailabilityStatus.UNAVAILABLE + + def is_available(self) -> bool: + return self.check_availability() == AvailabilityStatus.AVAILABLE + + def get_size(self) -> int: + try: + return self.metadata.ro_crate.get_file_size(Path(self.id)) + except Exception: + if logger.isEnabledFor(logging.DEBUG): + logger.exception("Error getting entity file size") + return 0 + + def __str__(self) -> str: + return f"Entity({self.id})" + + def __repr__(self) -> str: + return str(self) + + def __eq__(self, other: object) -> bool: + if not isinstance(other, ROCrateEntity): + return False + return self.id == other.id + + def __hash__(self) -> int: + return hash(self.id) diff --git a/rocrate_validator/rocrate/metadata.py b/rocrate_validator/rocrate/metadata.py new file mode 100644 index 000000000..522a399db --- /dev/null +++ b/rocrate_validator/rocrate/metadata.py @@ -0,0 +1,168 @@ +# Copyright (c) 2024-2026 CRS4 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import annotations + +import json +from pathlib import Path +from typing import TYPE_CHECKING, Any, cast + +from rdflib import Graph + +from rocrate_validator.utils import log as logging + +from .entity import ROCrateEntity + +if TYPE_CHECKING: + from .base import ROCrate + +# set up logging +logger = logging.getLogger(__name__) + + +class ROCrateMetadata: + METADATA_FILE_DESCRIPTOR = "ro-crate-metadata.json" + + def __init__(self, ro_crate: ROCrate, metadata_dict: dict | None = None) -> None: + self._ro_crate = ro_crate + self._dict = metadata_dict + self._json: str | None = json.dumps(metadata_dict) if metadata_dict else None + self._graph: Graph | None = None + + @property + def ro_crate(self) -> ROCrate: + return self._ro_crate + + @property + def size(self) -> int: + try: + return len(self.as_json()) + except Exception: + if logger.isEnabledFor(logging.DEBUG): + logger.exception("Error computing entity JSON size") + return 0 + + def get_file_descriptor_entity(self) -> ROCrateEntity: + metadata_file_descriptor = self.get_entity(self.METADATA_FILE_DESCRIPTOR) + if not metadata_file_descriptor: + raise ValueError("no metadata file descriptor in crate") + return metadata_file_descriptor + + def get_root_data_entity(self) -> ROCrateEntity: + metadata_file_descriptor = self.get_file_descriptor_entity() + main_entity = metadata_file_descriptor.get_property("about") + if not main_entity: + raise ValueError("no main entity in metadata file descriptor") + return main_entity + + def get_root_data_entity_conforms_to(self) -> list[str] | None: + try: + root_data_entity = self.get_root_data_entity() + result = root_data_entity.get_property("conformsTo", []) + if result is None: + return None + if not isinstance(result, list): + result = [result] + return [_.id for _ in result] + except Exception: + if logger.isEnabledFor(logging.DEBUG): + logger.exception("Error getting entity image") + return None + + def get_main_workflow(self) -> ROCrateEntity: + root_data_entity = self.get_root_data_entity() + main_workflow = root_data_entity.get_property("mainEntity") + if not main_workflow: + raise ValueError("no main workflow in metadata file descriptor") + return main_workflow + + def get_entity(self, entity_id: str) -> ROCrateEntity | None: + for entity in self.as_dict().get("@graph", []): + if entity.get("@id") == entity_id: + return ROCrateEntity(self, entity) + return None + + def get_entities(self) -> list[ROCrateEntity]: + return [ROCrateEntity(self, entity) for entity in self.as_dict().get("@graph", [])] + + def get_entities_by_type(self, entity_type: str | list[str]) -> list[ROCrateEntity]: + entity_types = [entity_type] if isinstance(entity_type, str) else entity_type + return [e for e in self.get_entities() if e.has_types(entity_types)] + + def get_dataset_entities(self) -> list[ROCrateEntity]: + return self.get_entities_by_type("Dataset") + + def get_file_entities(self) -> list[ROCrateEntity]: + return self.get_entities_by_type("File") + + def get_data_entities(self, exclude_web_data_entities: bool = False) -> list[ROCrateEntity]: + if not exclude_web_data_entities: + return self.get_entities_by_type(["Dataset", "File"]) + return [e for e in self.get_entities_by_type(["Dataset", "File"]) if not e.is_remote()] + + def get_web_data_entities(self) -> list[ROCrateEntity]: + return [ + entity + for entity in self.get_entities() + if (entity.has_type("File") or entity.has_type("Dataset")) and entity.is_remote() + ] + + def get_conforms_to(self) -> list[str] | None: + try: + file_descriptor = self.get_file_descriptor_entity() + result = file_descriptor.get_property("conformsTo", []) + if result is None: + return None + if not isinstance(result, list): + result = [result] + return [_.id for _ in result] + except Exception: + if logger.isEnabledFor(logging.DEBUG): + logger.exception("Error getting entity identifiers by type") + return None + + def as_json(self) -> str: + if not self._json: + self._json = cast( + "str", self.ro_crate.get_file_content(Path(self.METADATA_FILE_DESCRIPTOR), binary_mode=False) + ) + return self._json + + def as_dict(self) -> dict[Any, Any]: + if not self._dict: + # if the dictionary is not cached, load it + self._dict = json.loads(self.as_json()) + assert self._dict is not None, "Metadata dictionary should not be None after loading" + return self._dict + + def as_graph(self, publicID: str | None = None) -> Graph: + if not self._graph: + # if the graph is not cached, load it + self._graph = Graph(base=publicID or str(self.ro_crate.uri)) + self._graph.parse(data=self.as_json(), format="json-ld") + return self._graph + + def __str__(self) -> str: + return f"Metadata({self.ro_crate})" + + def __repr__(self) -> str: + return str(self) + + def __eq__(self, other: object) -> bool: + if not isinstance(other, ROCrateMetadata): + return False + return self.ro_crate == other.ro_crate + + def __hash__(self) -> int: + return hash(self.ro_crate) diff --git a/rocrate_validator/rocrate/plain.py b/rocrate_validator/rocrate/plain.py new file mode 100644 index 000000000..cac9a8181 --- /dev/null +++ b/rocrate_validator/rocrate/plain.py @@ -0,0 +1,249 @@ +# Copyright (c) 2024-2026 CRS4 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import annotations + +import io +import struct +import zipfile +from pathlib import Path +from typing import TYPE_CHECKING + +from rocrate_validator.errors import ROCrateInvalidURIError +from rocrate_validator.utils import log as logging +from rocrate_validator.utils.http import HttpRequester + +from .base import ROCrate + +if TYPE_CHECKING: + from rocrate_validator.utils.uri import URI + +# set up logging +logger = logging.getLogger(__name__) + + +class ROCrateLocalFolder(ROCrate): + """ + Class representing an RO-Crate stored in a local folder. + """ + + def __init__(self, path: str | Path | URI, relative_root_path: Path | None = None): + super().__init__(path, relative_root_path=relative_root_path) + + # cache the list of files + self._files = None + + # check if the path is a directory + if not self.has_directory(self.uri.as_path()): + raise ROCrateInvalidURIError(uri=path) + + @property + def size(self) -> int: + return sum(f.stat().st_size for f in self.list_files() if f.is_file()) + + def list_files(self) -> list[Path]: + if not self._files: + self._files = [] + base_path = self.uri.as_path() + for file in base_path.rglob("*"): + if file.is_file(): + self._files.append(base_path / file) + return self._files + + def get_file_size(self, path: Path) -> int: + path = self.__parse_path__(path) + if not self.has_file(path): + raise FileNotFoundError(f"File not found: {path}") + return path.stat().st_size + + def get_file_content(self, path: Path, binary_mode: bool = True) -> str | bytes: + path = self.__parse_path__(path) + if not self.has_file(path): + raise FileNotFoundError(f"File not found: {path}") + return path.read_bytes() if binary_mode else path.read_text(encoding="utf-8") + + +class ROCrateLocalZip(ROCrate): + def __init__( + self, + path: str | Path | URI, + relative_root_path: Path | None = None, + init_zip: bool = True, + ): + super().__init__(path, relative_root_path=relative_root_path) + + # initialize the zip reference + self._zipref: zipfile.ZipFile | None = None + if init_zip: + self.__init_zip_reference__() + + # cache the list of files + self._files = None + + def __del__(self): + try: + if self._zipref and self._zipref.fp is not None: + self._zipref.close() + del self._zipref + except Exception: + if logger.isEnabledFor(logging.DEBUG): + logger.exception("Error closing zip reference") + + def __parse_path__(self, path): + assert path, "Path cannot be None" + # If the RO-Crate is a zip file, the path should be changed + return path + + @property + def size(self) -> int: + return self.uri.as_path().stat().st_size + + def __init_zip_reference__(self): + path = self.uri.as_path() + # check if the path is a file + if not self.uri.as_path().is_file(): + raise ROCrateInvalidURIError(uri=path) + # check if the file is a zip file + if self.uri.as_path().suffix != ".zip": + raise ROCrateInvalidURIError(uri=path) + self._zipref = zipfile.ZipFile(path) # pylint: disable=consider-using-with + logger.debug("Initialized zip reference: %s", self._zipref) + + def __get_file_info__(self, path: str | Path) -> zipfile.ZipInfo: + assert self._zipref is not None, "Zip reference not initialized" + try: + return self._zipref.getinfo(str(path)) + except KeyError: + logger.error("File not found in zip: %s", path) + raise FileNotFoundError(f"File not found in zip: {path}") from None + + def has_descriptor(self) -> bool: + """ + Check if the RO-Crate has a metadata descriptor file. + :rtype: bool + """ + path = self.__parse_path__(Path(self.metadata.METADATA_FILE_DESCRIPTOR)) + return str(path) in [str(_) for _ in self.list_files()] + + def has_file(self, path: Path) -> bool: + path = self.__parse_path__(path) + for p in self.list_files(): + if str(path) == str(p): + info = self.__get_file_info__(path) + return not info.is_dir() + return False + + def has_directory(self, path: Path) -> bool: + assert path, "Path cannot be None" + assert self._zipref is not None, "Zip reference not initialized" + for px in (path, self.__parse_path__(path)): + for p in self._zipref.namelist(): + if f"{px!s}/" == str(p) or str(px) == str(p): + info = self.__get_file_info__(p) + return info.is_dir() + return False + + def list_files(self) -> list[Path]: + if not self._files: + assert self._zipref is not None, "Zip reference not initialized" + self._files = [] + for file in self._zipref.namelist(): + self._files.append(Path(file)) + return self._files + + def list_entries(self) -> list[zipfile.ZipInfo]: + assert self._zipref is not None, "Zip reference not initialized" + return self._zipref.infolist() + + def get_entry(self, path: Path) -> zipfile.ZipInfo: + """ + Return the ZipInfo object for the specified path. + """ + return self.__get_file_info__(self.__parse_path__(path)) + + def get_file_size(self, path: Path) -> int: + assert self._zipref is not None, "Zip reference not initialized" + return self._zipref.getinfo(str(self.__parse_path__(path))).file_size + + def get_file_content(self, path: Path, binary_mode: bool = True) -> str | bytes: + path = self.__parse_path__(path) + if not self.has_file(path): + raise FileNotFoundError(f"File not found: {path}") + assert self._zipref is not None, "Zip reference not initialized" + data = self._zipref.read(str(path)) + return data if binary_mode else data.decode("utf-8") + + +class ROCrateRemoteZip(ROCrateLocalZip): + def __init__(self, path: str | Path | URI, relative_root_path: Path | None = None): + super().__init__(path, relative_root_path=relative_root_path, init_zip=False) + + # # initialize the zip reference + self.__init_zip_reference__() + + def __init_zip_reference__(self): + url = str(self.uri) + + # check if the URI is available + if not self.uri.is_available(): + raise ROCrateInvalidURIError(uri=url, message="URI is not available") + + # Step 1: Fetch the last 22 bytes to find the EOCD record + eocd_data = self.__fetch_range__(url, -22, "") + + # Step 2: Find the EOCD record + eocd_offset = self.__find_eocd__(eocd_data) + + # Step 3: Fetch the EOCD and parse it + eocd_full_data = self.__fetch_range__(url, -22 - eocd_offset, -1) + central_directory_offset, central_directory_size = self.__parse_eocd__(eocd_full_data) + + # Step 4: Fetch the central directory + central_directory_data = self.__fetch_range__( + url, central_directory_offset, central_directory_offset + central_directory_size - 1 + ) + # Step 5: Parse the central directory and return the zip file + self._zipref = zipfile.ZipFile(io.BytesIO(central_directory_data)) # pylint: disable=consider-using-with + + @property + def size(self) -> int: + response = HttpRequester().head(str(self.uri)) + response.raise_for_status() # Check if the request was successful + file_size = response.headers.get("Content-Length") + if file_size is not None: + return int(file_size) + raise ValueError("Could not determine the file size from the headers") + + @staticmethod + def __fetch_range__(uri: str, start, end): + headers = {"Range": f"bytes={start}-{end}"} + response = HttpRequester().get(uri, headers=headers) + response.raise_for_status() + return response.content + + @staticmethod + def __find_eocd__(data): + eocd_signature = b"PK\x05\x06" + eocd_offset = data.rfind(eocd_signature) + if eocd_offset == -1: + raise ValueError("EOCD not found") + return eocd_offset + + @staticmethod + def __parse_eocd__(data): + eocd_size = struct.calcsize("<4s4H2LH") + eocd = struct.unpack("<4s4H2LH", data[-eocd_size:]) + central_directory_size = eocd[5] + central_directory_offset = eocd[6] + return central_directory_offset, central_directory_size From e662801eac96ab43118447f89fd2630e5da72aac Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Fri, 12 Jun 2026 18:57:17 +0200 Subject: [PATCH 326/352] =?UTF-8?q?refactor(models):=20=E2=99=BB=EF=B8=8F?= =?UTF-8?q?=20split=20models.py=20into=20a=20package=20with=20cohesive=20s?= =?UTF-8?q?ubmodules?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- rocrate_validator/models.py | 3604 ----------------------- rocrate_validator/models/__init__.py | 84 + rocrate_validator/models/_logging.py | 17 + rocrate_validator/models/events.py | 161 + rocrate_validator/models/profile.py | 788 +++++ rocrate_validator/models/requirement.py | 650 ++++ rocrate_validator/models/result.py | 434 +++ rocrate_validator/models/settings.py | 207 ++ rocrate_validator/models/severity.py | 147 + rocrate_validator/models/statistics.py | 792 +++++ rocrate_validator/models/validation.py | 650 ++++ 11 files changed, 3930 insertions(+), 3604 deletions(-) delete mode 100644 rocrate_validator/models.py create mode 100644 rocrate_validator/models/__init__.py create mode 100644 rocrate_validator/models/_logging.py create mode 100644 rocrate_validator/models/events.py create mode 100644 rocrate_validator/models/profile.py create mode 100644 rocrate_validator/models/requirement.py create mode 100644 rocrate_validator/models/result.py create mode 100644 rocrate_validator/models/settings.py create mode 100644 rocrate_validator/models/severity.py create mode 100644 rocrate_validator/models/statistics.py create mode 100644 rocrate_validator/models/validation.py diff --git a/rocrate_validator/models.py b/rocrate_validator/models.py deleted file mode 100644 index 6723e0809..000000000 --- a/rocrate_validator/models.py +++ /dev/null @@ -1,3604 +0,0 @@ -# Copyright (c) 2024-2026 CRS4 -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import annotations - -import bisect -import enum -import importlib -import inspect -import json -import re -from abc import ABC, abstractmethod -from dataclasses import asdict, dataclass -from datetime import datetime, timezone -from functools import total_ordering -from pathlib import Path -from typing import TYPE_CHECKING, Any, Protocol, cast -from urllib.error import HTTPError - -from enum_tools.documentation import document_enum -from rdflib import RDF, RDFS, Graph, Namespace, URIRef - -from rocrate_validator import __version__ -from rocrate_validator.constants import ( - DEFAULT_HTTP_CACHE_MAX_AGE, - DEFAULT_ONTOLOGY_FILE, - DEFAULT_PROFILE_IDENTIFIER, - DEFAULT_PROFILE_README_FILE, - IGNORED_PROFILE_DIRECTORIES, - JSON_OUTPUT_FORMAT_VERSION, - PROF_NS, - PROFILE_FILE_EXTENSIONS, - PROFILE_SPECIFICATION_FILE, - ROCRATE_METADATA_FILE, - SCHEMA_ORG_NS, -) -from rocrate_validator.errors import ( - DuplicateRequirementCheck, - InvalidProfilePath, - ProfileNotFound, - ProfileSpecificationError, - ProfileSpecificationNotFound, - ROCrateMetadataNotFoundError, -) -from rocrate_validator.events import Event, EventType, Publisher, Subscriber -from rocrate_validator.rocrate import ROCrate -from rocrate_validator.utils import log as logging -from rocrate_validator.utils.cache_warmup import auto_warm_up_for_settings -from rocrate_validator.utils.collections import MapIndex, MultiIndexMap -from rocrate_validator.utils.document_loader import install_document_loader -from rocrate_validator.utils.http import HttpRequester, find_offline_cache_miss -from rocrate_validator.utils.paths import ( - get_default_http_cache_path, - get_profiles_path, -) -from rocrate_validator.utils.python_helpers import ( - get_requirement_name_from_file, -) -from rocrate_validator.utils.uri import URI - -if TYPE_CHECKING: - from collections.abc import Collection - -# set the default profiles path -DEFAULT_PROFILES_PATH = get_profiles_path() - -logger = logging.getLogger(__name__) - -BaseTypes = str | Path | bool | int | None - - -@enum.unique -@document_enum -@total_ordering -class Severity(enum.Enum): - """ - Enum ordering "strength" of conditions to be verified - """ - - #: the condition is not mandatory - OPTIONAL = 0 - #: the condition is recommended - RECOMMENDED = 2 - #: the condition is mandatory - REQUIRED = 4 - - def __lt__(self, other: object) -> bool: - if isinstance(other, Severity): - return self.value < other.value - raise TypeError(f"Comparison not supported between instances of {type(self)} and {type(other)}") - - @staticmethod - def get(name: str) -> Severity: - return getattr(Severity, name.upper()) - - -@total_ordering -@dataclass -class RequirementLevel: - """ - Represents a requirement level. - - A requirement has a name and a severity level of type :class:`.Severity`. - It implements the comparison operators to allow ordering of the requirement levels. - """ - - name: str - severity: Severity - - def __eq__(self, other: object) -> bool: - if not isinstance(other, RequirementLevel): - return False - return self.name == other.name and self.severity == other.severity - - def __lt__(self, other: object) -> bool: - # NOTE: this ordering is not totally coherent, since for two objects a and b - # with equal Severity but different names you would have - # not a < b, which implies a >= b - # and also a != b and not a > b, which is incoherent with a >= b - if not isinstance(other, RequirementLevel): - raise TypeError(f"Cannot compare {type(self)} with {type(other)}") - return self.severity < other.severity - - def __hash__(self) -> int: - return hash((self.name, self.severity)) - - def __repr__(self) -> str: - return f"RequirementLevel(name={self.name}, severity={self.severity})" - - def __str__(self) -> str: - return self.name - - def __int__(self) -> int: - return self.severity.value - - def __index__(self) -> int: - return self.severity.value - - -class LevelCollection: - """ - Collection of :class:`.RequirementLevel` instances. - - Provides a set of predefined RequirementLevel instances - that can be used to define the severity of a requirement. - They map the keywords defined in **RFC 2119** to the corresponding severity levels. - - .. note:: - The keywords **MUST**, **MUST NOT**, **REQUIRED**, - **SHALL**, **SHALL NOT**, **SHOULD**, **SHOULD NOT**, - **RECOMMENDED**, **MAY**, and **OPTIONAL** in this document - are to be interpreted as described in **RFC 2119**. - - """ - - #: The requirement level OPTIONAL is mapped to the OPTIONAL severity level - OPTIONAL = RequirementLevel("OPTIONAL", Severity.OPTIONAL) - #: The requirement level MAY is mapped to the OPTIONAL severity level - MAY = RequirementLevel("MAY", Severity.OPTIONAL) - #: The requirement level REQUIRED is mapped to the REQUIRED severity level - REQUIRED = RequirementLevel("REQUIRED", Severity.REQUIRED) - #: The requirement level SHOULD is mapped to the RECOMMENDED severity level - SHOULD = RequirementLevel("SHOULD", Severity.RECOMMENDED) - #: The requirement level SHOULD NOT is mapped to the RECOMMENDED severity level - SHOULD_NOT = RequirementLevel("SHOULD_NOT", Severity.RECOMMENDED) - #: The requirement level RECOMMENDED is mapped to the RECOMMENDED severity level - RECOMMENDED = RequirementLevel("RECOMMENDED", Severity.RECOMMENDED) - - #: The requirement level MUST is mapped to the REQUIRED severity level - MUST = RequirementLevel("MUST", Severity.REQUIRED) - #: The requirement level MUST_NOT is mapped to the REQUIRED severity level - MUST_NOT = RequirementLevel("MUST_NOT", Severity.REQUIRED) - #: The requirement level SHALL is mapped to the REQUIRED severity level - SHALL = RequirementLevel("SHALL", Severity.REQUIRED) - #: The requirement level SHALL_NOT is mapped to the REQUIRED severity level - SHALL_NOT = RequirementLevel("SHALL_NOT", Severity.REQUIRED) - - def __init__(self): - raise NotImplementedError(f"{type(self)} can't be instantiated") - - @staticmethod - def all() -> list[RequirementLevel]: - return [ - level - for name, level in inspect.getmembers(LevelCollection) - if not inspect.isroutine(level) and not inspect.isdatadescriptor(level) and not name.startswith("__") - ] - - @staticmethod - def get(name: str) -> RequirementLevel: - try: - return getattr(LevelCollection, name.upper()) - except AttributeError: - raise ValueError(f"Invalid RequirementLevel: {name}") from None - - -@total_ordering -class Profile: - """ - RO-Crate Validator profile. - - A profile is a named set of requirements that can be used to validate an RO-Crate. - """ - - # store the map of profiles: profile URI -> Profile instance - __profiles_map: MultiIndexMap = MultiIndexMap( - "uri", - indexes=[ - MapIndex("name"), - MapIndex("token", unique=False), - MapIndex("identifier", unique=True), - MapIndex("token_path", unique=False), - ], - ) - - def __init__( - self, - profiles_base_path: Path, - profile_path: Path, - requirements: list[Requirement] | None = None, - identifier: str | None = None, - publicID: str | None = None, - severity: Severity = Severity.REQUIRED, - ): - """ - Initialize the Profile instance - - :param profile_path: the path of the profile - :type profile_path: Path - - :param requirements: the list of requirements of the profile - :type requirements: list[Requirement] - - :param identifier: the identifier of the profile - :type identifier: str - - :param publicID: the public identifier of the profile - :type publicID: str - :meta private: - - :param severity: the severity of the profile - :type severity: Severity - - : raises ProfileSpecificationNotFound: if the profile specification file is not found - : raises ProfileSpecificationError: if the profile specification file contains more than one profile - : raises InvalidProfilePath: if the profile path is not a directory - - :meta private: - """ - self._identifier: str | None = identifier - self._profiles_base_path = profiles_base_path - self._profile_path = profile_path - self._name: str | None = None - self._description: str | None = None - self._requirements: list[Requirement] = requirements if requirements is not None else [] - self._publicID = publicID - self._severity = severity - self._overrides: list[Profile] = [] - self._overridden_by: list[Profile] = [] - - # init property to store the RDF node which is the root of the profile specification graph - self._profile_node: Any = None - - # init property to store the RDF graph of the profile specification - self._profile_specification_graph: Graph | None = None - - # check if the profile specification file exists - spec_file = self.profile_specification_file_path - if not spec_file or not spec_file.exists(): - raise ProfileSpecificationNotFound(str(spec_file)) - # load the profile specification expressed using the Profiles Vocabulary - profile = Graph() - profile.parse(str(spec_file), format="turtle") - # check that the specification Graph hosts only one profile - profiles = list(profile.subjects(predicate=RDF.type, object=PROF_NS.Profile)) - if len(profiles) == 1: - self._profile_node = profiles[0] - self._profile_specification_graph = profile - # initialize the token and version - self._token, self._version = self.__init_token_version__() - - # Check if the profile is overriding an existing profile - existing_profile = self.__profiles_map.get_by_key(cast("Any", self._profile_node).toPython()) - # If an existing profile is being overridden by a different one, log a warning - if existing_profile and existing_profile.path != profile_path: - logger.warning( - "Profile with identifier %s at %s is being overridden by the profile loaded from %s.", - existing_profile.identifier, - existing_profile.path, - profile_path, - ) - # add the existing profile as an override - self.__add_override__(existing_profile) - - # add the profile to the profiles map - self.__profiles_map.add( - cast("Any", self._profile_node).toPython(), - self, - token=self.token, - name=self.name, - identifier=self.identifier, - token_path=self.__extract_token_from_path__(), - ) # add the profile to the profiles map - else: - raise ProfileSpecificationError( - message=f"Profile specification file {spec_file} must contain exactly one profile" - ) - - def __get_specification_property__( - self, - prop: str, - namespace: Namespace, - pop_first: bool = True, - as_python_object: bool = True, - ) -> str | list[str | URIRef] | None: - assert self._profile_specification_graph is not None, "Profile specification graph not loaded" - nodes = list(self._profile_specification_graph.objects(self._profile_node, namespace[prop])) - values: list = [cast("Any", v).toPython() for v in nodes] if (nodes and as_python_object) else list(nodes) - if pop_first: - return values[0] if values else None - return values - - def __add_override__(self, profile: Profile): - """ - Add an override profile to this profile. - - :param profile: the profile that overrides this profile - :type profile: Profile - """ - if not isinstance(profile, Profile): - raise TypeError(f"Expected a Profile instance, got {type(profile)}") - if profile not in self._overrides: - self._overrides.append(profile) - profile._overridden_by.append(self) - - @property - def overrides(self) -> list[Profile]: - """ - The list of profiles that override this profile. - """ - return self._overrides - - @property - def overridden_by(self) -> list[Profile]: - """ - The list of profiles that are overridden by this profile. - """ - return self._overridden_by - - @property - def path(self): - """ - The path of the profile directory - """ - return self._profile_path - - @property - def identifier(self) -> str: - """ - The identifier of the profile. - """ - if not self._identifier: - version = self.version - self._identifier = f"{self.token}-{version}" if version else self.token - return self._identifier - - @property - def name(self): - """ - The name of the profile as specified in the profile specification file - (i.e., the value of the rdfs: label property in the `profile.ttl` file) or - a default name if the label is not specified. - """ - return self.label or f"Profile {self.uri}" - - @property - def profile_specification_graph(self) -> Graph: - """ - The RDF graph of the profile specification. - """ - return self._profile_specification_graph # type: ignore[return-value] - - @property - def profile_node(self): - return self._profile_node - - @property - def token(self): - """ - A token that uniquely identifies the profile - as specified in the profile specification file - (i.e., the value of the prof: hasToken property - in the `profile.ttl` file). - """ - return self._token - - @property - def uri(self): - """ - The URI of the profile. - """ - return self._profile_node.toPython() - - @property - def label(self): - return self.__get_specification_property__("label", RDFS) # type: ignore[arg-type] - - @property - def comment(self): - """ - The comment added to the profile in the profile specification file - (i.e., the value of the rdfs: comment property in the `profile.ttl` file). - """ - return self.__get_specification_property__("comment", RDFS) # type: ignore[arg-type] - - @property - def version(self): - """ - The version of the profile as specified in the profile specification file - (i.e., the value of the prof: version property in the `profile.ttl` file). - """ - return self._version - - @property - def is_profile_of(self) -> list[str]: - """ - The list of profiles that this profile is a profile of - as specified in the profile specification file - (i.e., the value of the prof: isProfileOf property in the `profile.ttl` file). - """ - return cast("list[str]", self.__get_specification_property__("isProfileOf", PROF_NS, pop_first=False)) - - @property - def is_transitive_profile_of(self) -> list[str]: - """ - The list of profiles that this profile is a transitive profile of - as specified in the profile specification file - (i.e., the value of the prof: isTransitiveProfileOf property in the `profile.ttl` file). - """ - return cast("list[str]", self.__get_specification_property__("isTransitiveProfileOf", PROF_NS, pop_first=False)) - - @property - def parents(self) -> list[Profile]: - """ - The list of profiles that this profile is a profile of - as specified in the profile specification file. - """ - return [ - profile - for profile in (self.__profiles_map.get_by_key(_) for _ in self.is_profile_of) - if profile is not None - ] - - @property - def siblings(self) -> list[Profile]: - """ - The list of profiles that are siblings of this profile - (i.e., profiles that share the same parent profile). - """ - return self.get_sibling_profiles(self) - - @property - def descendants(self) -> list[Profile]: - """ - The list of profiles that are descendants of this profile - (i.e., profiles that have this profile among their inherited profiles). - """ - return self.get_descendants(self) - - @property - def readme_file_path(self) -> Path: - """ - The path of the README file of the profile. - """ - return self.path / DEFAULT_PROFILE_README_FILE - - @property - def profile_specification_file_path(self) -> Path: - """ - The path of the profile specification file. - """ - return self.path / PROFILE_SPECIFICATION_FILE - - @property - def publicID(self) -> str | None: - """ - The public identifier of the RO-Crate which is validated by the profile. - - :meta private: - """ - return self._publicID - - @property - def severity(self) -> Severity: - """ - The severity of the profile which the profile is loaded with, - i.e., the minimum severity level of the requirements of the profile. - """ - return self._severity - - @property - def description(self) -> str | None: - """ - The description of the profile as specified in the profile specification file - (i.e., the value of the rdfs: comment property in the `profile.ttl` file). - """ - if not self._description: - if self.path and self.readme_file_path.exists(): - with self.readme_file_path.open(encoding="utf-8") as f: - self._description = f.read() - else: - comment = self.comment - self._description = str(comment) if comment else None - return self._description - - @property - def requirements(self) -> list[Requirement]: - """ - The list of requirements of the profile. - """ - if not self._requirements: - self._requirements = RequirementLoader.load_requirements(self, severity=self.severity) - return self._requirements - - def get_requirements(self, severity: Severity = Severity.REQUIRED, exact_match: bool = False) -> list[Requirement]: - """ - Get the requirements of the profile with the given severity level. - If the exact_match flag is set to `True`, only the requirements with the exact severity level - are returned; otherwise, the requirements with severity level greater than or equal to - the given severity level are returned. - """ - return [ - requirement - for requirement in self.requirements - if (not exact_match and (not requirement.severity_from_path or requirement.severity_from_path >= severity)) - or (exact_match and requirement.severity_from_path == severity) - ] - - def get_requirement(self, name: str) -> Requirement | None: - """ - Get the requirement with the given name - """ - for requirement in self.requirements: - if requirement.name == name: - return requirement - return None - - def get_requirement_check(self, check_name: str) -> RequirementCheck | None: - """ - Get the requirement check with the given name - """ - for requirement in self.requirements: - check = requirement.get_check(check_name) - if check: - return check - return None - - @classmethod - def __get_nested_profiles__(cls, source: str) -> list[str]: - result: list[str] = [] - visited = [] - queue = [source] - while len(queue) > 0: - p = queue.pop() - if p not in visited: - visited.append(p) - profile = cls.__profiles_map.get_by_key(p) - if profile is None: - continue - inherited_profiles = profile.is_profile_of - if inherited_profiles: - for p in sorted(inherited_profiles, reverse=True): - if p not in visited: - queue.append(p) - if p not in result: - result.insert(0, p) - return result - - @property - def inherited_profiles(self) -> list[Profile]: - inherited_profiles = self.is_transitive_profile_of - if not inherited_profiles or len(inherited_profiles) == 0: - inherited_profiles = Profile.__get_nested_profiles__(self.uri) - profile_keys = self.__profiles_map.keys - return [ - profile - for key in inherited_profiles - if key in profile_keys - for profile in [self.__profiles_map.get_by_key(key)] - if isinstance(profile, Profile) - ] - - def add_requirement(self, requirement: Requirement): - self._requirements.append(requirement) - - def remove_requirement(self, requirement: Requirement): - self._requirements.remove(requirement) - - def __eq__(self, other: object) -> bool: - return isinstance(other, Profile) and self.identifier == other.identifier and self.path == other.path - - def __lt__(self, other: object) -> bool: - if not isinstance(other, Profile): - raise TypeError(f"Cannot compare {type(self)} with {type(other)}") - # If one profile is a parent of the other, the parent is greater - if other in self.parents: - return False - # If the number of inherited profiles is the same, compare based on identifier - return self.identifier < other.identifier - - def __hash__(self) -> int: - return hash((self.identifier, self.path)) - - def __repr__(self) -> str: - return ( - f"Profile(identifier={self.identifier}, name={self.name}, path={self.path}, " - if self.path - else f"requirements={self.requirements})" - ) - - def __str__(self) -> str: - return f"{self.name} ({self.identifier})" - - def to_dict(self) -> dict: - return { - "identifier": self.identifier, - "uri": self.uri, - "name": self.name, - "description": self.description, - } - - @staticmethod - def __extract_version_from_token__(token: str) -> str | None: - if not token: - return None - pattern = r"\Wv?(\d+(\.\d+(\.\d+)?)?)" - matches = re.findall(pattern, token) - if matches: - return matches[-1][0] - return None - - def __get_consistent_version__(self, candidate_token: str) -> str | None: - candidates = { - _ - for _ in [ - cast("str | None", self.__get_specification_property__("version", SCHEMA_ORG_NS)), - self.__extract_version_from_token__(candidate_token), - self.__extract_version_from_token__(str(self.path.relative_to(self._profiles_base_path))), - self.__extract_version_from_token__(str(self.uri)), - ] - if _ is not None - } - if len(candidates) > 1: - raise ProfileSpecificationError(f"Inconsistent versions found: {candidates}") - logger.debug("Candidate versions: %s", candidates) - return candidates.pop() if len(candidates) == 1 else None - - def __extract_token_from_path__(self) -> str: - base_path = str(self._profiles_base_path.absolute()) - identifier = str(self.path.absolute()) - # Check if the path starts with the base path - if not identifier.startswith(base_path): - raise ValueError("Path does not start with the base path") - # Remove the base path from the identifier - identifier = identifier.replace(f"{base_path}/", "") - # Replace slashes with hyphens - return identifier.replace("/", "-") - - def __init_token_version__(self) -> tuple[str, str | None]: - # try to extract the token from the specs or the path - candidate_token = cast("str | None", self.__get_specification_property__("hasToken", PROF_NS)) - if not candidate_token: - candidate_token = self.__extract_token_from_path__() - logger.debug("Candidate token: %s", candidate_token) - - # try to extract the version from the specs or the token or the path or the URI - version = self.__get_consistent_version__(candidate_token) - logger.debug("Extracted version: %s", version) - - # remove the version from the token if it is present - if version: - candidate_token = re.sub(r"[\W|_]+" + re.escape(version) + r"$", "", candidate_token) - - # return the candidate token and version - return candidate_token, version - - @classmethod - def __load_profile_path__( - cls, - profiles_base_path: str | Path, - profile_path: str | Path, - publicID: str | None = None, - severity: Severity = Severity.REQUIRED, - ) -> Profile: - # if the path is a string, convert it to a Path - if isinstance(profile_path, str): - profile_path = Path(profile_path) - # check if the path is a directory - if not profile_path.is_dir(): - raise InvalidProfilePath(str(profile_path)) - # create a new profile - profile = Profile( - profiles_base_path=Path(profiles_base_path), - profile_path=profile_path, - publicID=publicID, - severity=severity, - ) - logger.debug("Loaded profile: %s", profile) - return profile - - @classmethod - def __load_profiles_paths__( - cls, - profiles_path: str | Path | None = None, - extra_profiles_path: str | Path | None = None, - ) -> list[tuple[Path, Path]]: - """ - Load the paths of the profiles from the given profiles path and extra profiles path. - - :param profiles_path: the path to the profiles directory - :type profiles_path: Union[str, Path] - :param extra_profiles_path: an additional path to search for profiles - :type extra_profiles_path: Union[str, Path] - - :return: a list of tuples containing the root profile directory and the profile directory - :rtype: list[Tuple[Path, Path]] - - :raises InvalidProfilePath: if the profiles path is not a directory - """ - result = [] - # set the list of root profile directories - root_profile_directories = [profiles_path] if profiles_path else [] - if extra_profiles_path is not None and extra_profiles_path != profiles_path: - root_profile_directories.append(extra_profiles_path) - # collect profiles nested in the root profile directories - for root_profile_directory in root_profile_directories: - # if the path is a string, convert it to a Path - profile_root_directory = ( - Path(root_profile_directory) if isinstance(root_profile_directory, str) else root_profile_directory - ) - # check if the path is a directory and raise an error if not - if not profile_root_directory.is_dir(): - raise InvalidProfilePath(str(profile_root_directory)) - # if the path is a directory, get the profile directories - result.extend( - [ - (profile_root_directory, p.parent) - for p in profile_root_directory.rglob("*.*") - if p.name == PROFILE_SPECIFICATION_FILE - ] - ) - # return the list of profile directories - return result - - @classmethod - def load_profiles( - cls, - profiles_path: str | Path, - extra_profiles_path: str | Path | None = None, - publicID: str | None = None, - severity: Severity = Severity.REQUIRED, - allow_requirement_check_override: bool = True, - ) -> list[Profile]: - # initialize the profiles list - profiles: list[Profile] = [] - # calculate the list of profiles path as the subdirectories of the profiles path - # where the profile specification file is present - profiles_paths = cls.__load_profiles_paths__(profiles_path, extra_profiles_path) - - # iterate through the directories and load the profiles - for root_profile_path, profile_path in profiles_paths: - logger.debug( - "Checking profile path: %s %s %r", - profile_path, - profile_path.is_dir(), - IGNORED_PROFILE_DIRECTORIES, - ) - # check if the profile path is a directory and not in the ignored directories - if profile_path.is_dir() and profile_path not in IGNORED_PROFILE_DIRECTORIES: - profile = Profile.__load_profile_path__( - root_profile_path, - profile_path, - publicID=publicID, - severity=severity, - ) - # if the profile overrides another profile, - # remove the overridden profiles from the list of profiles - # to avoid duplicates and ensure that the most specific profile is used - if profile.overrides: - for overridden_profile in profile.overrides: - if overridden_profile in profiles: - profiles.remove(overridden_profile) - # add the profile to the list of profiles - profiles.append(profile) - logger.debug("Loaded profile: %s (%s)", profile.identifier, profile.path) - - # order profiles based on the inheritance hierarchy, - # from the most specific to the most general - # (i.e., from the leaves of the graph to the root) - profiles = sorted(profiles, reverse=True) - - # Check for overridden checks - if not allow_requirement_check_override: - # Navigate the profiles to check for overridden checks. - # If the override is not enabled in the settings raise an error. - profiles_checks = set() - # Search for duplicated checks in the profiles - for profile in profiles: - profile_checks = [_ for r in profile.get_requirements() for _ in r.get_checks()] - for check in profile_checks: - # If the check is already present in the list of checks, - # raise an error if the override is not enabled. - if check in profiles_checks: - raise DuplicateRequirementCheck(check.name, profile.identifier) - # Add the check to the list of checks - profiles_checks.add(check) - - # order profiles according to the number of profiles they depend on: - # i.e, first the profiles that do not depend on any other profile - # then the profiles that depend on the previous ones, and so on - return sorted( - profiles, - key=lambda x: f"{len(x.inherited_profiles)}_{x.identifier}", - ) - - @classmethod - def get_by_identifier(cls, identifier: str) -> Profile | None: - """ - Get the profile with the given identifier - - :param identifier: the identifier - :type identifier: str - - :return: the profile - :rtype: Profile - """ - profile = cls.__profiles_map.get_by_index("identifier", identifier) - if isinstance(profile, list): - return cast("Profile | None", profile[0] if profile else None) - return cast("Profile | None", profile) - - @classmethod - def get_by_uri(cls, uri: str) -> Profile | None: - """ - Get the profile with the given URI - - :param uri: the URI - :type uri: str - - :return: the profile - :rtype: Profile - """ - return cast("Profile | None", cls.__profiles_map.get_by_key(uri)) - - @classmethod - def get_by_name(cls, name: str) -> list[Profile]: - """ - Get the profile with the given name - - :param name: the name - :type name: str - - :return: the profile - :rtype: Profile - """ - return cast("list[Profile]", cls.__profiles_map.get_by_index("name", name) or []) - - @classmethod - def get_by_token(cls, token: str) -> list[Profile]: - """ - Get the profile with the given token - - :param token: the token - :type token: str - - :return: the profile - :rtype: Profile - """ - return cast("list[Profile]", cls.__profiles_map.get_by_index("token", token) or []) - - @classmethod - def get_sibling_profiles(cls, profile: Profile) -> list[Profile]: - """ - Get the sibling profiles of the given profile - - :param profile: the profile - :type profile: Profile - - :return: the list of sibling profiles - :rtype: list[Profile] - """ - return [p for p in cls.__profiles_map.values() if profile in p.parents] - - @classmethod - def get_descendants(cls, profile: Profile) -> list[Profile]: - """ - Get the transitive descendants of the given profile (any profile - that has `profile` among its `inherited_profiles`). - - :param profile: the profile - :type profile: Profile - - :return: the list of descendant profiles - :rtype: list[Profile] - """ - return [p for p in cls.__profiles_map.values() if profile in p.inherited_profiles] - - @classmethod - def all(cls) -> list[Profile]: - """ - Get all the profiles - - :return: the list of profiles - :rtype: list[Profile] - """ - return list(cls.__profiles_map.values()) - - @classmethod - def find_in_list(cls, profiles: Collection[Profile], profile_identifier: str) -> Profile | None: - """ - Find a profile with the given identifier in the given list of profiles - - :param profiles: the list of profiles - :type profiles: Collection[Profile] - - :param identifier: the identifier - :type identifier: str - - :return: the profile if found, None otherwise - :rtype: Optional[Profile] - """ - profile = next((p for p in profiles if p.identifier == profile_identifier), None) or next( - (p for p in profiles if str(p.identifier).replace(f"-{p.version}", "") == profile_identifier), - None, - ) - if not profile: - raise ProfileNotFound(profile_identifier) - return profile - - -class SkipRequirementCheck(Exception): - def __init__(self, check: RequirementCheck, message: str = ""): - self.check = check - self.message = message - - def __str__(self): - return f"SkipRequirementCheck(check={self.check})" - - -@total_ordering -class Requirement(ABC): - """ - Abstract class representing a requirement of a profile. - A requirement is a named set of checks that can be used to validate an RO-Crate. - """ - - def __init__( - self, - profile: Profile, - name: str = "", - description: str | None = None, - path: Path | None = None, - initialize_checks: bool = True, - ): - """ - Initialize the Requirement instance - - :meta private: - """ - self._order_number: int | None = None - self._profile = profile - self._description = description - self._path = path # path of code implementing the requirement - self._level_from_path: RequirementLevel | None = None - self._checks: list[RequirementCheck] = [] - self._overridden: bool | None = None - - if not name and path: - self._name = get_requirement_name_from_file(path) - else: - self._name = name - - # set flag to indicate if the checks have been initialized - self._checks_initialized = False - # initialize the checks if the flag is set - if initialize_checks: - _ = self.__init_checks__() - # assign order numbers to checks - self.__reorder_checks__() - # update the checks initialized flag - self._checks_initialized = True - - @property - def order_number(self) -> int: - """ - The order number of the requirement in the profile - - :return: the order number - :rtype: int - """ - assert self._order_number is not None - return self._order_number - - @property - def identifier(self) -> str: - """ - The identifier of the requirement - - :return: the identifier - :rtype: str - """ - return f"{self.profile.identifier}_{self.relative_identifier}" - - @property - def relative_identifier(self) -> str: - """ - The relative identifier of the requirement - - :return: the relative identifier - :rtype: str - - :meta private: - """ - return f"{self.order_number}" - - @property - def name(self) -> str: - return self._name - - @property - def severity_from_path(self) -> Severity | None: - return self.requirement_level_from_path.severity if self.requirement_level_from_path else None - - @property - def requirement_level_from_path(self) -> RequirementLevel | None: - if not self._level_from_path and self._path: - try: - self._level_from_path = LevelCollection.get(self._path.parent.name) - except ValueError: - logger.debug( - "The requirement level could not be determined from the path: %s", - self._path, - ) - return self._level_from_path - - @property - def profile(self) -> Profile: - return self._profile - - @property - def description(self) -> str: - if not self._description: - self._description = ( - self.__class__.__doc__.strip() if self.__class__.__doc__ else f"Profile Requirement {self.name}" - ) - return self._description - - @property - def overridden(self) -> bool: - # Check if the requirement has been overridden. - # The requirement can be considered overridden if all its checks have been overridden - if self._overridden is None: - self._overridden = len([_ for _ in self._checks if not _.overridden]) == 0 - return self._overridden - - @property - @abstractmethod - def hidden(self) -> bool: - pass - - @property - def path(self) -> Path | None: - return self._path - - @abstractmethod - def __init_checks__(self) -> list[RequirementCheck]: - pass - - def get_checks(self) -> list[RequirementCheck]: - return self._checks.copy() - - def get_check(self, name: str) -> RequirementCheck | None: - for check in self._checks: - if check.name == name: - return check - return None - - def get_checks_by_level(self, level: RequirementLevel) -> list[RequirementCheck]: - return list({check for check in self._checks if check.level.severity == level.severity}) - - def __reorder_checks__(self) -> None: - for i, check in enumerate(self._checks): - check.order_number = i + 1 - - def _do_validate_(self, context: ValidationContext) -> bool: - """ - Internal method to perform the validation - Returns whether all checks in this requirement passed. - - :meta private: - """ - logger.debug( - "Validating Requirement %s with %s checks", - self.name, - len(self._checks), - ) - all_passed = True - checks_to_perform = [ - _ - for _ in self._checks - if not context.settings.skip_checks or _.identifier not in context.settings.skip_checks - ] - for check in checks_to_perform: - try: - all_passed, should_break = self.__execute_check__(check, context, all_passed) - if should_break: - break - except SkipRequirementCheck as e: - logger.debug("Skipping check '%s' because: %s", check.name, e) - context.result._add_skipped_check(check) - continue - except Exception as e: - if context.maybe_warn_offline_cache_miss(e): - logger.debug("Offline cache miss during check %s: %s", check, e) - else: - logger.warning("Unexpected error during check %s. Exception: %s", check, e) - logger.warning("Consider reporting this as a bug.") - if logger.isEnabledFor(logging.DEBUG): - logger.exception("Unhandled exception during check execution", exc_info=e) - skipped_checks = set(self._checks) - set(checks_to_perform) - context.result.skipped_checks.update(skipped_checks) - logger.debug( - "Checks for Requirement '%s' completed. Checks passed? %s", - self.name, - all_passed, - ) - return all_passed - - def __execute_check__(self, check, context, all_passed): - if check.overridden and check.requirement.profile.identifier != context.profile_identifier: - logger.debug( - "Skipping check '%s' because overridden by '%r'", - check.identifier, - [_.identifier for _ in check.overridden_by], - ) - return all_passed, False - if check.deactivated: - logger.debug("Skipping check '%s' because deactivated", check.identifier) - context.result._add_skipped_check(check) - return all_passed, False - # Determine whether to skip event notification for inherited profiles - skip_event_notify = False - if ( - check.requirement.profile.identifier != context.profile_identifier - and context.settings.disable_inherited_profiles_issue_reporting - ): - logger.debug( - "Inherited profiles reporting disabled. " - "Skipping requirement %s as it belongs to an inherited profile %s", - check.requirement.identifier, - check.requirement.profile.identifier, - ) - skip_event_notify = True - # Notify the start of the check execution if not skip_event_notify is set to True - if not skip_event_notify: - context.validator.notify( - RequirementCheckValidationEvent(EventType.REQUIREMENT_CHECK_VALIDATION_START, check) - ) - # Execute the check and get the result - check_result = check.execute_check(context) - logger.debug("Result of check %s: %s", check.identifier, check_result) - context.result._add_executed_check(check, check_result) - # Notify the end of the check execution if not skip_event_notify is set to True - if not skip_event_notify: - context.validator.notify( - RequirementCheckValidationEvent( - EventType.REQUIREMENT_CHECK_VALIDATION_END, - check, - validation_result=check_result, - ) - ) - logger.debug( - "Ran check '%s'. Got result %s", - check.identifier, - check_result, - ) - # Ensure the check result is a boolean value, otherwise log a warning and ignore the check result - if not isinstance(check_result, bool): - logger.warning( - "Ignoring the check %s as it returned the value %r instead of a boolean", - check.name, - check_result, - ) - raise TypeError(f"Ignoring invalid result from check {check.name}") - new_all_passed = all_passed and check_result - should_break = not new_all_passed and context.fail_fast - return new_all_passed, should_break - - def __eq__(self, other: object) -> bool: - if not isinstance(other, Requirement): - raise TypeError(f"Cannot compare {type(self)} with {type(other)}") - return self.name == other.name and self.description == other.description and self.path == other.path - - def __ne__(self, other: object) -> bool: - return not self.__eq__(other) - - def __hash__(self): - return hash((self.name, self.description, self.path)) - - def __lt__(self, other: object) -> bool: - if not isinstance(other, Requirement): - raise TypeError(f"Cannot compare Requirement with {type(other)}") - return (self._order_number, self.name) < ( - other._order_number, - other.name, - ) - - def __repr__(self): - return ( - f"ProfileRequirement(" - f"_order_number={self._order_number}, " - f"name={self.name}, " - f"description={self.description}" - f", path={self.path}, " - if self.path - else ")" - ) - - def __str__(self) -> str: - return self.name - - def to_dict(self, with_profile: bool = True, with_checks: bool = True) -> dict: - result = { - "identifier": self.identifier, - "name": self.name, - "description": self.description, - "order": self.order_number, - } - if with_profile: - result["profile"] = self.profile.to_dict() - if with_checks: - result["checks"] = [_.to_dict(with_requirement=False, with_profile=False) for _ in self._checks] - return result - - @classmethod - def initialize(cls, context: ValidationContext) -> None: - logger.debug( - "Starting %s requirement initialization for context %s", - cls.__name__, - context, - ) - # do initialization logic here (empty for now) - logger.debug( - "Completed %s requirement initialization for context %s", - cls.__name__, - context, - ) - - @classmethod - def finalize(cls, context: ValidationContext) -> None: - logger.debug( - "Starting %s requirement finalization for context %s", - cls.__name__, - context, - ) - # do finalization logic here (empty for now) - logger.debug( - "Completed %s requirement finalization for context %s", - cls.__name__, - context, - ) - - -class RequirementLoader: - def __init__(self, profile: Profile): - self._profile = profile - - @property - def profile(self) -> Profile: - return self._profile - - @staticmethod - def __get_requirement_type__(requirement_path: Path) -> str: - if requirement_path.suffix == ".py": - return "python" - if requirement_path.suffix == ".ttl": - return "shacl" - raise ValueError(f"Unsupported requirement type: {requirement_path.suffix}") - - @classmethod - def __get_requirement_loader__(cls, profile: Profile, requirement_path: Path) -> RequirementLoader: - requirement_type = cls.__get_requirement_type__(requirement_path) - loader_instance_name = f"_{requirement_type}_loader_instance" - loader_instance = getattr(profile, loader_instance_name, None) - if loader_instance is None: - module_name = f"rocrate_validator.requirements.{requirement_type}" - logger.debug("Loading module: %s", module_name) - module = importlib.import_module(module_name) - loader_class_name = f"{'Py' if requirement_type == 'python' else 'SHACL'}RequirementLoader" - loader_class = getattr(module, loader_class_name) - loader_instance = loader_class(profile) - setattr(profile, loader_instance_name, loader_instance) - return loader_instance - - @staticmethod - def __get_requirement_classes__() -> list[type[Requirement]]: - - # Ensure known requirement modules are imported so subclasses are registered. - for requirement_type in ("python", "shacl"): - module_name = f"rocrate_validator.requirements.{requirement_type}" - try: - importlib.import_module(module_name) - except Exception: - logger.debug( - "Unable to import requirement module: %s", - module_name, - exc_info=True, - ) - - def all_subclasses( - base_class: type[Requirement], - ) -> list[type[Requirement]]: - result: list[type[Requirement]] = [] - for subcls in base_class.__subclasses__(): - result.append(subcls) - result.extend(all_subclasses(subcls)) - return result - - return all_subclasses(Requirement) # type: ignore[type-abstract] - - @staticmethod - def load_requirements(profile: Profile, severity: Severity = Severity.REQUIRED) -> list[Requirement]: - """ - Load the requirements related to the profile - """ - - def ok_file(p: Path) -> bool: - return ( - p.is_file() - and p.suffix in PROFILE_FILE_EXTENSIONS - and p.name not in {DEFAULT_ONTOLOGY_FILE, PROFILE_SPECIFICATION_FILE} - and not p.name.startswith(".") - and not p.name.startswith("_") - ) - - files = sorted( - (p for p in profile.path.rglob("*.*") if ok_file(p)), - key=lambda x: (x.suffix != ".py", x), - ) - - # set the requirement level corresponding to the severity - requirement_level = LevelCollection.get(severity.name) - - requirements = [] - for requirement_path in files: - try: - requirement_level_from_path = LevelCollection.get(requirement_path.parent.name) - if requirement_level_from_path < requirement_level: - continue - except ValueError: - logger.debug( - "The requirement level could not be determined from the path: %s", - requirement_path, - ) - requirement_loader = RequirementLoader.__get_requirement_loader__(profile, requirement_path) - requirements.extend( - cast("Any", requirement_loader).load( - profile, - requirement_level, - requirement_path, - publicID=profile.publicID, - ) - ) - # sort the requirements by severity - requirements = sorted( - requirements, - key=lambda x: ( - (-x.severity_from_path.value, x.path.name, x.name) - if x.severity_from_path is not None - else (0, x.path.name, x.name) - ), - reverse=False, - ) - # assign order numbers to requirements - for i, requirement in enumerate(requirements): - requirement._order_number = i + 1 - # log and return the requirements - logger.debug("Profile %s loaded %s requirements: %s", profile.identifier, len(requirements), requirements) - return requirements - - -@dataclass(frozen=True) -class SourceSnippet: - """ - A snippet of source code backing a :class:`RequirementCheck`. - :ivar language: language tag for syntax highlighting (e.g. ``"python"``, ``"turtle"``). - :ivar code: the source code as text. - :ivar source_path: path to the file the snippet was extracted from, when available. - """ - - language: str - code: str - source_path: Path | None = None - - -@total_ordering -class RequirementCheck(ABC): - def __init__( - self, - requirement: Requirement, - name: str | None, - level: RequirementLevel | None = LevelCollection.REQUIRED, - description: str | None = None, - hidden: bool | None = None, - deactivated: bool = False, - ): - self._requirement: Requirement = requirement - self._order_number = 0 - self._name = name - self._level = level - self._description = description - self._hidden = hidden - self._deactivated = deactivated - - @property - def order_number(self) -> int: - return self._order_number - - @order_number.setter - def order_number(self, value: int) -> None: - if value < 0: - raise ValueError("order_number can't be < 0") - self._order_number = value - - @property - def identifier(self) -> str: - return f"{self.requirement.identifier}.{self.order_number}" - - @property - def relative_identifier(self) -> str: - return f"{self.level.name} {self.requirement.relative_identifier}.{self.order_number}" - - @property - def name(self) -> str: - if not self._name: - return self.__class__.__name__.replace("Check", "") - return self._name - - @property - def description(self) -> str: - if not self._description: - return self.__class__.__doc__.strip() if self.__class__.__doc__ else f"Check {self.name}" - return self._description - - @property - def requirement(self) -> Requirement: - return self._requirement - - @property - def level(self) -> RequirementLevel: - return self._level or self.requirement.requirement_level_from_path or LevelCollection.REQUIRED - - @property - def severity(self) -> Severity: - return self.level.severity - - @property - def overridden_by(self) -> list[RequirementCheck]: - overridden_by = [] - for sibling_profile in self.requirement.profile.siblings: - check = sibling_profile.get_requirement_check(self.name) - if check: - overridden_by.append(check) - return overridden_by - - @property - def overrides(self) -> list[RequirementCheck]: - overrides = [] - for parent in self.requirement.profile.parents: - check = parent.get_requirement_check(self.name) - if check: - overrides.append(check) - return overrides - - @property - def overridden(self) -> bool: - return len(self.overridden_by) > 0 - - @property - def deactivated(self) -> bool: - return self._deactivated - - @property - def hidden(self) -> bool: - if self._hidden is not None: - return self._hidden - return self.requirement.hidden - - @abstractmethod - def execute_check(self, context: ValidationContext) -> bool: - raise NotImplementedError() - - def get_source_snippet(self) -> SourceSnippet | None: - """ - Return the source code that implements this check, or ``None`` if the - backing source cannot be extracted for this check kind. - Concrete subclasses should override this method. - """ - return None - - def to_dict(self, with_requirement: bool = True, with_profile: bool = True) -> dict: - result = { - "identifier": self.identifier, - "label": self.relative_identifier, - "order": self.order_number, - "name": self.name, - "description": self.description, - "severity": self.severity.name, - } - if with_requirement: - result["requirement"] = self.requirement.to_dict(with_profile=with_profile, with_checks=False) - return result - - def __eq__(self, other: object) -> bool: - if not isinstance(other, RequirementCheck): - raise TypeError(f"Cannot compare RequirementCheck with {type(other)}") - return self.requirement == other.requirement and self.name == other.name - - def __lt__(self, other: object) -> bool: - if not isinstance(other, RequirementCheck): - raise TypeError(f"Cannot compare RequirementCheck with {type(other)}") - return (self.requirement, self.identifier) < ( - other.requirement, - other.identifier, - ) - - def __ne__(self, other: object) -> bool: - return not self.__eq__(other) - - def __hash__(self) -> int: - return hash((self.requirement, self.name or "")) - - -@total_ordering -class CheckIssue: - """ - Represents an issue with a check that has been executed - during the validation process. - """ - - def __init__( - self, - check: RequirementCheck, - message: str | None = None, - violatingProperty: str | None = None, - violatingEntity: str | None = None, - value: str | None = None, - ): - self._message = message - self._check: RequirementCheck = check - self._violatingProperty = violatingProperty - self._violatingEntity = violatingEntity - self._propertyValue = value - - @property - def message(self) -> str | None: - """The message associated with the issue""" - return self._message - - @property - def level(self) -> RequirementLevel: - """The level of the issue""" - return self._check.level - - @property - def severity(self) -> Severity: - """Severity of the RequirementLevel associated with this check.""" - return self._check.severity - - @property - def level_name(self) -> str: - return self.level.name - - @property - def check(self) -> RequirementCheck: - """The check that generated the issue""" - return self._check - - @property - def violatingEntity(self) -> str | None: - """ - It represents the specific element being evaluated that fails - to meet the defined rules or constraints within a validation process. - Also referred to as `focusNode` in SHACL terminology - in the context of an RDF graph, it is the subject of a triple - that violates a given constraint on the subject's property/predicate, - represented by the violatingProperty. - """ - return self._violatingEntity - - @property - def violatingProperty(self) -> str | None: - """ - It refers to the specific property or relationship within an item - that leads to a validation failure. - It identifies the part of the data structure that is causing the issue. - Also referred to as `resultPath` in SHACL terminology, - in the context of an RDF graph, it is the predicate of a triple - that violates a given constraint on the subject's property/predicate, - represented by the violatingProperty. - """ - return self._violatingProperty - - @property - def violatingPropertyValue(self) -> str | None: - """ - It represents the value of the violatingProperty - that leads to a validation failure. - """ - return self._propertyValue - - def __eq__(self, other: object) -> bool: - return isinstance(other, CheckIssue) and self._check == other._check and self._message == other._message - - def __lt__(self, other: object) -> bool: - if not isinstance(other, CheckIssue): - raise TypeError(f"Cannot compare {type(self)} with {type(other)}") - return (self._check, self._message) < (other._check, other._message) - - def __hash__(self) -> int: - return hash((self._check, self._message)) - - def __repr__(self) -> str: - return f"CheckIssue(severity={self.severity}, check={self.check}, message={self.message})" - - def __str__(self) -> str: - return f'Issue of severity {self.severity.name} with check "{self.check.identifier}": {self.message}' - - def to_dict( - self, - with_check: bool = True, - with_requirement: bool = True, - with_profile: bool = True, - ) -> dict: - result: dict[str, Any] = { - "severity": self.severity.name, - "message": self.message, - "violatingEntity": self.violatingEntity, - "violatingProperty": self.violatingProperty, - "violatingPropertyValue": self.violatingPropertyValue, - } - if with_check: - result["check"] = self.check.to_dict(with_requirement=with_requirement, with_profile=with_profile) - return result - - def to_json( - self, - with_checks: bool = True, - with_requirements: bool = True, - with_profile: bool = True, - ) -> str: - return json.dumps( - self.to_dict( - with_check=with_checks, - with_requirement=with_requirements, - with_profile=with_profile, - ), - indent=4, - cls=CustomEncoder, - ) - - -class ValidationStatisticsListener(Protocol): - """ - Protocol for listeners interested in validation statistics updates. - """ - - def on_statistics_updated(self, statistics: ValidationStatistics): - logger.debug("Statistics updated: %r", statistics.statistics) - - -class ValidationStatistics(Subscriber): - """ - Computes and stores statistical metrics about the RO-Crate validation process. - """ - - def __init__( - self, - settings: dict | ValidationSettings, - context: ValidationContext | None = None, - skip_initialization: bool = False, - ): - super().__init__(name=self.__class__.__name__) - if isinstance(settings, dict): - settings = ValidationSettings.parse(settings) - self._settings = settings - self._context = context - self._stats = self.__initialise__(settings) if not skip_initialization else {} - self._result: ValidationResult | None = None - self._listeners: list[ValidationStatisticsListener] = [] - - @property - def validation_settings(self) -> ValidationSettings: - """ - Get the validation settings used for statistics computation - """ - return self._settings - - @property - def validation_result(self) -> ValidationResult | None: - """ - Get the validation result - """ - return self._result - - def add_listener(self, listener: ValidationStatisticsListener): - """ - Add a listener to be notified on statistics updates - """ - self._listeners.append(listener) - logger.debug("Listener added: %r", listener) - - def notify_listeners(self): - """ - Notify all registered listeners about statistics updates - """ - for listener in self._listeners: - listener.on_statistics_updated(self) - logger.debug("Notified listener: %r", listener) - - @property - def statistics(self) -> dict: - """ - Get the computed validation statistics - """ - return self._stats.copy() - - @property - def profile(self) -> Profile: - """ - Get the profile being validated - """ - return cast("Profile", self._stats.get("profile")) - - @property - def profiles(self) -> list[Profile]: - """ - Get all profiles involved in validation - """ - return self._stats.get("profiles", []) - - @property - def severity(self) -> Severity: - """ - Get the validation severity level - """ - return cast("Severity", self._stats.get("severity")) - - @property - def checks_by_severity(self) -> dict: - """ - Get the checks grouped by severity - """ - return self._stats.get("checks_by_severity", {}) - - @property - def check_count_by_severity(self) -> dict: - """ - Get the count of checks grouped by severity - """ - return {k: len(v) for k, v in self._stats.get("checks_by_severity", {}).items()} - - @property - def requirements(self) -> list[Requirement]: - """ - Get all requirements being validated - """ - return self._stats.get("requirements", []) - - @property - def passed_requirements(self) -> list[Requirement]: - """ - Get the list of passed requirements - """ - return self._stats.get("passed_requirements", []) - - @property - def failed_requirements(self) -> list[Requirement]: - """ - Get the list of failed requirements - """ - return self._stats.get("failed_requirements", []) - - @property - def total_requirements(self) -> int: - """ - Get the total number of requirements - """ - return len(self._stats.get("requirements", [])) - - @property - def checks(self) -> list[RequirementCheck]: - """ - Get all checks being validated - """ - return self._stats.get("checks", []) - - @property - def passed_checks(self) -> list[RequirementCheck]: - """ - Get the list of passed checks - """ - return self._stats.get("passed_checks", []) - - @property - def failed_checks(self) -> list[RequirementCheck]: - """ - Get the list of failed checks - """ - return self._stats.get("failed_checks", []) - - @property - def total_checks(self) -> int: - """ - Get the total number of checks - """ - return len(self._stats.get("checks", [])) - - @property - def validated_profiles(self) -> list[Profile]: - """ - Get the list of validated profiles - """ - return self._stats.get("validated_profiles", []) - - @property - def validated_requirements(self) -> list[Requirement]: - """ - Get the list of validated requirements - """ - return self._stats.get("validated_requirements", []) - - @property - def validated_checks(self) -> list[RequirementCheck]: - """ - Get the list of validated checks - """ - return self._stats.get("validated_checks", []) - - @property - def started_at(self) -> datetime | None: - """ - Get the timestamp when validation started - """ - return self._stats.get("started_at") - - @property - def finished_at(self) -> datetime | None: - """ - Get the timestamp when validation finished - """ - return self._stats.get("finished_at") - - @property - def duration(self) -> float | None: - """ - Get the duration of the validation process in seconds - """ - started_at = self.started_at - finished_at = self.finished_at - if started_at and finished_at: - return (finished_at - started_at).total_seconds() - return None - - @staticmethod - def __collect_requirement_checks__( - requirement, - severity_validation, - validation_settings, - target_profile_identifier, - checks, - checks_by_severity, - ) -> int: - """Count and register a requirement's checks across severities >= the requested one.""" - requirement_checks_count = 0 - for severity in ( - Severity.REQUIRED, - Severity.RECOMMENDED, - Severity.OPTIONAL, - ): - logger.debug(f"Checking requirement: {requirement} severity: {severity} {severity < severity_validation}") - # skip requirements with lower severity - if severity < severity_validation: - continue - # count the checks - requirement_checks = [ - _ - for _ in requirement.get_checks_by_level(LevelCollection.get(severity.name)) - if (not validation_settings.skip_checks or _.identifier not in validation_settings.skip_checks) - and (not _.overridden or _.requirement.profile.identifier == target_profile_identifier) - ] - num_checks = len(requirement_checks) - requirement_checks_count += num_checks - if num_checks > 0: - logger.debug(f"Requirement: {requirement} has {num_checks} checks of severity: {severity}") - checks.update(requirement_checks) - checks_by_severity[severity].update(requirement_checks) - return requirement_checks_count - - @classmethod - def __initialise__(cls, validation_settings: ValidationSettings): - """ - Compute the statistics of the profile - """ - # extract the validation settings - severity_validation = validation_settings.requirement_severity - profiles: list[Profile] = Profile.load_profiles( - validation_settings.profiles_path, - extra_profiles_path=validation_settings.extra_profiles_path, - severity=cast("Severity", severity_validation), - allow_requirement_check_override=validation_settings.allow_requirement_check_override, - ) - profile: Profile = cast("Profile", Profile.find_in_list(profiles, validation_settings.profile_identifier)) - target_profile_identifier = profile.identifier - # initialize the profiles list - profiles = [profile] - - # add inherited profiles if enabled - if not validation_settings.disable_inherited_profiles_issue_reporting: - profiles.extend(profile.inherited_profiles) - logger.debug("Inherited profiles: %r", profile.inherited_profiles) - - # Initialize the counters - checks_by_severity: dict[Severity, set[RequirementCheck]] = {} - checks: set[RequirementCheck] = set() - requirements: set[Requirement] = set() - - # Initialize the counters - for severity in ( - Severity.REQUIRED, - Severity.RECOMMENDED, - Severity.OPTIONAL, - ): - checks_by_severity[severity] = set() - - # Process the requirements and checks - processed_requirements = [] - for profile in profiles: - for requirement in profile.requirements: - if requirement in processed_requirements: - continue - processed_requirements.append(requirement) - if requirement.hidden: - continue - - requirement_checks_count = cls.__collect_requirement_checks__( - requirement, - severity_validation, - validation_settings, - target_profile_identifier, - checks, - checks_by_severity, - ) - - # count the requirements and checks - if requirement_checks_count == 0: - logger.debug(f"No checks for requirement: {requirement}") - else: - # Only if there are checks for the requirement count it - logger.debug(f"Requirement: {requirement} checks count: {requirement_checks_count}") - assert not requirement.hidden, "Hidden requirements should not be counted" - # add the requirement to the list - requirements.add(requirement) - - # log processed requirements - logger.debug( - "Processed requirements %r: %r", - len(processed_requirements), - processed_requirements, - ) - - # Prepare the result - result = { - "profile": profile, - "profiles": profiles, - "requirements": requirements, - "checks": checks, - "severity": severity_validation, - "checks_by_severity": checks_by_severity, - "failed_requirements": [], - "failed_checks": [], - "passed_requirements": [], - "passed_checks": [], - "started_at": None, - "finished_at": None, - "validated_profiles": [], - "validated_requirements": [], - "validated_checks": [], - } - logger.debug(result) - return result - - def update(self, event: Event, ctx: ValidationContext | None = None) -> None: - self.__event_handlers__.get(event.event_type, lambda e, c: None)(event, ctx) - - def __handle_validation_start__(self, _event: Event, _ctx: ValidationContext | None) -> None: - logger.debug("Validation started") - self._stats["started_at"] = datetime.now(timezone.utc) - - def __handle_profile_validation_start__(self, event: Event, _ctx: ValidationContext | None) -> None: - assert isinstance(event, ProfileValidationEvent) - logger.debug("Profile validation start: %s", event.profile.identifier) - - def __handle_requirement_validation_start__(self, _event: Event, _ctx: ValidationContext | None) -> None: - logger.debug("Requirement validation start") - - def __handle_requirement_check_validation_start__(self, _event: Event, _ctx: ValidationContext | None) -> None: - logger.debug("Requirement check validation start") - - def __handle_requirement_check_validation_end__(self, event: Event, ctx: ValidationContext | None) -> None: - assert isinstance(event, RequirementCheckValidationEvent) - assert ctx is not None - target_profile = ctx.target_validation_profile - if not event.requirement_check.requirement.hidden and ( - not event.requirement_check.overridden - or target_profile.identifier == event.requirement_check.requirement.profile.identifier - ): - if event.validation_result is not None: - if event.validation_result: - self._stats["passed_checks"].append(event.requirement_check) - else: - self._stats["failed_checks"].append(event.requirement_check) - self._stats["validated_checks"].append(event.requirement_check) - self.notify_listeners() - else: - logger.debug( - "Requirement check validation result is None: %s", - event.requirement_check.identifier, - ) - else: - logger.debug( - "Skipping requirement check validation: %s", - event.requirement_check.identifier, - ) - - def __handle_requirement_validation_end__(self, event: Event, _ctx: ValidationContext | None) -> None: - assert isinstance(event, RequirementValidationEvent) - if not event.requirement.hidden: - if event.validation_result: - self._stats["passed_requirements"].append(event.requirement) - else: - self._stats["failed_requirements"].append(event.requirement) - self._stats["validated_requirements"].append(event.requirement) - self.notify_listeners() - - def __handle_profile_validation_end__(self, event: Event, _ctx: ValidationContext | None) -> None: - assert isinstance(event, ProfileValidationEvent) - self._stats["validated_profiles"].append(event.profile) - logger.debug("Profile validation ended: %s", event.profile.identifier) - - def __handle_validation_end__(self, event: Event, _ctx: ValidationContext | None) -> None: - assert isinstance(event, ValidationEvent) - self._result = event.validation_result - self._stats["finished_at"] = datetime.now(timezone.utc) - logger.debug("Validation ended with result: %s", event.validation_result) - - @property - def __event_handlers__(self): - return { - EventType.VALIDATION_START: self.__handle_validation_start__, - EventType.PROFILE_VALIDATION_START: self.__handle_profile_validation_start__, - EventType.REQUIREMENT_VALIDATION_START: self.__handle_requirement_validation_start__, - EventType.REQUIREMENT_CHECK_VALIDATION_START: self.__handle_requirement_check_validation_start__, - EventType.REQUIREMENT_CHECK_VALIDATION_END: self.__handle_requirement_check_validation_end__, - EventType.REQUIREMENT_VALIDATION_END: self.__handle_requirement_validation_end__, - EventType.PROFILE_VALIDATION_END: self.__handle_profile_validation_end__, - EventType.VALIDATION_END: self.__handle_validation_end__, - } - - def to_dict(self) -> dict: - """ - Get the computed validation statistics as a dictionary - """ - return { - # Execution time details - "started_at": self.started_at.isoformat() if self.started_at else None, - "finished_at": self.finished_at.isoformat() if self.finished_at else None, - "duration": self.duration, - # Profile details - "profile": self.profile.identifier if self.profile else None, - "profiles": [p.identifier for p in self.profiles], - "severity": self.severity.name if self.severity else None, - # Computed totals - "total_requirements": self.total_requirements, - "total_passed_requirements": len(self.passed_requirements), - "total_failed_requirements": len(self.failed_requirements), - "total_checks": self.total_checks, - "total_passed_checks": len(self.passed_checks), - "total_failed_checks": len(self.failed_checks), - "total_checks_by_severity": {k.name: len(v) for k, v in self.checks_by_severity.items()}, - # Requirements involved - "requirements": { - "count": self.total_requirements, - "passed": { - "count": len(self.passed_requirements), - "percentage": ( - (len(self.passed_requirements) / self.total_requirements * 100) - if self.total_requirements > 0 - else 0.0 - ), - "identifiers": sorted([r.identifier for r in self.passed_requirements]), - }, - "failed": { - "count": len(self.failed_requirements), - "percentage": ( - (len(self.failed_requirements) / self.total_requirements * 100) - if self.total_requirements > 0 - else 0.0 - ), - "identifiers": sorted([r.identifier for r in self.failed_requirements]), - }, - "identifiers": sorted([r.identifier for r in self.requirements]), - }, - # Checks involved - "checks": { - "count": self.total_checks, - "passed": { - "count": len(self.passed_checks), - "percentage": (len(self.passed_checks) / self.total_checks * 100) if self.total_checks > 0 else 0.0, - "identifiers": sorted([c.identifier for c in self.passed_checks]), - }, - "failed": { - "count": len(self.failed_checks), - "percentage": (len(self.failed_checks) / self.total_checks * 100) if self.total_checks > 0 else 0.0, - "identifiers": sorted([c.identifier for c in self.failed_checks]), - }, - "identifiers": sorted([c.identifier for c in self.checks]), - "by_severity": {k.name: len(v) for k, v in self._stats.get("checks_by_severity", {}).items()}, - }, - } - - def to_json(self) -> str: - """ - Get the computed validation statistics as a JSON string - """ - return json.dumps(self.to_dict(), indent=4, cls=CustomEncoder) - - -class AggregatedValidationStatistics: - """ - Represents aggregated validation statistics from multiple validation runs. - """ - - def __init__(self, statistics_list: list[ValidationStatistics]): - if not statistics_list: - raise ValueError("statistics_list cannot be empty") - # Store the individual statistics - self._statistics_list = statistics_list - - # Aggregate the statistics - self._overall_stats = self.__compute_averall_stats__() - - @property - def individual_statistics(self) -> list[ValidationStatistics]: - """ - Get the individual validation statistics - """ - return self._statistics_list - - def to_dict(self) -> dict: - """ - Get the overall aggregated statistics as a dictionary - """ - return { - # Execution time details - "started_at": self.started_at.isoformat() if self.started_at else None, - "finished_at": self.finished_at.isoformat() if self.finished_at else None, - "duration": self.duration, - # Profiles involved - "profiles": [p.identifier for p in self.profiles], - # Computed totals - "total_requirements": self.total_requirements, - "total_passed_requirements": len(self.passed_requirements), - "total_failed_requirements": len(self.failed_requirements), - "total_checks": self.total_checks, - "total_passed_checks": len(self.passed_checks), - "total_failed_checks": len(self.failed_checks), - "total_checks_by_severity": {k.name: len(v) for k, v in self.checks_by_severity.items()}, - # Requirements involved - "requirements": { - "count": self.total_requirements, - "passed": { - "count": len(self.passed_requirements), - "percentage": ( - (len(self.passed_requirements) / self.total_requirements * 100) - if self.total_requirements > 0 - else 0.0 - ), - "identifiers": [r.identifier for r in self.passed_requirements], - }, - "failed": { - "count": len(self.failed_requirements), - "percentage": ( - (len(self.failed_requirements) / self.total_requirements * 100) - if self.total_requirements > 0 - else 0.0 - ), - "identifiers": [r.identifier for r in self.failed_requirements], - }, - "identifiers": [r.identifier for r in self.requirements], - }, - # Checks involved - "checks": { - "count": self.total_checks, - "passed": { - "count": len(self.passed_checks), - "percentage": (len(self.passed_checks) / self.total_checks * 100) if self.total_checks > 0 else 0.0, - "identifiers": [c.identifier for c in self.passed_checks], - }, - "failed": { - "count": len(self.failed_checks), - "percentage": (len(self.failed_checks) / self.total_checks * 100) if self.total_checks > 0 else 0.0, - "identifiers": [c.identifier for c in self.failed_checks], - }, - "identifiers": [c.identifier for c in self.checks], - }, - } - - @property - def profiles(self) -> set[Profile]: - """ - Get the set of profiles involved in the aggregated validation - """ - return self._overall_stats.get("profiles", set()) - - @property - def total_profiles(self) -> int: - """ - Get the total number of profiles involved in the aggregated validation - """ - return len(self._overall_stats.get("profiles", set())) - - @property - def requirements(self) -> set[Requirement]: - """ - Get the set of requirements in the aggregated validation - """ - return self._overall_stats.get("requirements", set()) - - @property - def passed_requirements(self) -> set[Requirement]: - """ - Get the set of passed requirements in the aggregated validation - """ - return self._overall_stats.get("passed_requirements", set()) - - @property - def failed_requirements(self) -> set[Requirement]: - """ - Get the set of failed requirements in the aggregated validation - """ - return self._overall_stats.get("failed_requirements", set()) - - @property - def total_requirements(self) -> int: - """ - Get the total number of requirements in the aggregated validation - """ - return len(self._overall_stats.get("requirements", set())) - - @property - def checks(self) -> set[RequirementCheck]: - """ - Get the set of checks in the aggregated validation - """ - return self._overall_stats.get("checks", set()) - - @property - def checks_by_severity(self) -> dict: - """ - Get the checks grouped by severity in the aggregated validation - """ - return self._overall_stats.get("checks_by_severity", {}) - - @property - def total_checks(self) -> int: - """ - Get the total number of checks in the aggregated validation - """ - return len(self._overall_stats.get("checks", set())) - - @property - def passed_checks(self) -> set[RequirementCheck]: - """ - Get the set of passed checks in the aggregated validation - """ - return self._overall_stats.get("passed_checks", set()) - - @property - def failed_checks(self) -> set[RequirementCheck]: - """ - Get the set of failed checks in the aggregated validation - """ - return self._overall_stats.get("failed_checks", set()) - - @property - def started_at(self) -> datetime | None: - """ - Get the timestamp when the aggregated validation started - """ - return self._overall_stats.get("started_at") - - @property - def finished_at(self) -> datetime | None: - """ - Get the timestamp when the aggregated validation finished - """ - return self._overall_stats.get("finished_at") - - @property - def duration(self) -> float: - """ - Get the total duration of the aggregated validation in seconds - """ - return self._overall_stats.get("duration", 0.0) - - def __compute_averall_stats__(self): - """ - Compute the overall aggregated statistics - """ - raw_stats = self.__aggregate_raw_stats__(self._statistics_list) - return self.__build_sorted_stats_dict__(raw_stats) - - @classmethod - def __aggregate_raw_stats__( - cls, - statistics_list: list[ValidationStatistics], - ): - """ - Aggregate raw (unsorted) statistics from a list of ValidationStatistics instances. - """ - profiles: set[Profile] = set() - requirements: set[Requirement] = set() - checks: set[RequirementCheck] = set() - checks_by_severity: dict[Severity, set[RequirementCheck]] = {} - failed_requirements: set[Requirement] = set() - failed_checks: set[RequirementCheck] = set() - passed_requirements: set[Requirement] = set() - passed_checks: set[RequirementCheck] = set() - started_at: datetime | None = None - finished_at: datetime | None = None - duration: float = 0.0 - - # Aggregate statistics from each ValidationStatistics instance - for stats in statistics_list: - # Aggregate profiles - for profile in stats.profiles: - profiles.add(profile) - - # Aggregate total requirements and checks - requirements.update(stats.requirements) - checks.update(stats.checks) - checks_by_severity.update(stats.checks_by_severity) - - # Aggregate failed and passed requirements and checks - failed_requirements.update(stats.failed_requirements) - failed_checks.update(stats.failed_checks) - passed_requirements.update(stats.passed_requirements) - passed_checks.update(stats.passed_checks) - - # Aggregate started_at and finished_at - if started_at is not None and stats.started_at is not None: - started_at = min(started_at, stats.started_at) - elif stats.started_at is not None: - started_at = stats.started_at - if finished_at is not None and stats.finished_at is not None: - finished_at = max(finished_at, stats.finished_at) - elif stats.finished_at is not None: - finished_at = stats.finished_at - # Aggregate duration - duration += stats.duration or 0.0 - - return { - "profiles": profiles, - "requirements": requirements, - "checks": checks, - "checks_by_severity": checks_by_severity, - "failed_requirements": failed_requirements, - "failed_checks": failed_checks, - "passed_requirements": passed_requirements, - "passed_checks": passed_checks, - "started_at": started_at, - "finished_at": finished_at, - "duration": duration, - } - - @classmethod - def __build_sorted_stats_dict__(cls, raw_stats): - """ - Sort the raw aggregated sets and build the final sorted statistics dict. - """ - sorted_checks_by_severity = {} - for severity_key, severity_checks in raw_stats["checks_by_severity"].items(): - sorted_checks_by_severity[severity_key] = sorted(severity_checks, key=lambda c: c.identifier) - - return { - "profiles": sorted(raw_stats["profiles"], key=lambda p: p.identifier), - "requirements": sorted(raw_stats["requirements"], key=lambda r: r.identifier), - "checks": sorted(raw_stats["checks"], key=lambda c: c.identifier), - "checks_by_severity": sorted_checks_by_severity, - "failed_requirements": sorted(raw_stats["failed_requirements"], key=lambda r: r.identifier), - "failed_checks": sorted(raw_stats["failed_checks"], key=lambda c: c.identifier), - "passed_requirements": sorted(raw_stats["passed_requirements"], key=lambda r: r.identifier), - "passed_checks": sorted(raw_stats["passed_checks"], key=lambda c: c.identifier), - "started_at": raw_stats["started_at"], - "finished_at": raw_stats["finished_at"], - "duration": raw_stats["duration"], - } - - -class ValidationResult: - """ - Represents the result of a validation. - - :param context: The validation context - :type context: ValidationContext - :param rocrate_uri: The URI of the RO-Crate - :type rocrate_uri: str - :param validation_settings: The validation settings - :type validation_settings: ValidationSettings - :param issues: The issues found during the validation - :type issues: list[CheckIssue] - """ - - def __init__(self, context: ValidationContext): - # reference to the validation context - self._context = context - # reference to the ro-crate URI - self._rocrate_uri = context.rocrate_uri - # reference to the validation settings - self._validation_settings: ValidationSettings = context.settings - # keep track of the issues found during the validation - self._issues: list[CheckIssue] = [] - # keep track of the checks that have been executed - self._executed_checks: set[RequirementCheck] = set() - self._executed_checks_results: dict[str, bool] = {} - # keep track of the checks that have been skipped - self._skipped_checks: set[RequirementCheck] = set() - # initialize the statistics - self._statistics = ValidationStatistics(context.settings) - - @property - def context(self) -> ValidationContext: - """ - The validation context - """ - return self._context - - @property - def rocrate_uri(self): - """ - The URI of the RO-Crate - """ - return self._rocrate_uri - - @property - def validation_settings(self): - """ - The validation settings - """ - return self._validation_settings - - @property - def statistics(self) -> ValidationStatistics: - """ - The validation statistics - """ - return self._statistics - - # --- Checks --- - - @property - def executed_checks(self) -> set[RequirementCheck]: - """ - The checks that have been executed - """ - return self._executed_checks - - def _add_executed_check(self, check: RequirementCheck, result: bool): - """ - Internal method to add a check to the executed checks - """ - self._executed_checks.add(check) - self._executed_checks_results[check.identifier] = result - # remove the check from the skipped checks if it was skipped - if check in self._skipped_checks: - self._skipped_checks.remove(check) - logger.debug("Removing check '%s' from skipped checks", check.name) - - def get_executed_check_result(self, check: RequirementCheck) -> bool | None: - """ - Get the result of an executed check - """ - return self._executed_checks_results.get(check.identifier) - - @property - def skipped_checks(self) -> set[RequirementCheck]: - """ - The checks that have been skipped - """ - return self._skipped_checks - - def _add_skipped_check(self, check: RequirementCheck): - """ - Internal method to add a check to the skipped checks - """ - self._skipped_checks.add(check) - - def _remove_skipped_check(self, check: RequirementCheck): - """ - Internal method to remove a check from the skipped checks - """ - self._skipped_checks.remove(check) - - # --- Issues --- - @property - def issues(self) -> list[CheckIssue]: - """ - The issues found during the validation - """ - return self._issues.copy() - - def get_issues(self, min_severity: Severity | None = None) -> list[CheckIssue]: - """ - Get the issues found during the validation with a severity greater than or equal to `min_severity` - """ - min_severity = min_severity or self.context.requirement_severity - return [issue for issue in self._issues if issue.severity >= min_severity] - - def get_issues_by_check(self, check: RequirementCheck, min_severity: Severity | None = None) -> list[CheckIssue]: - """ - Get the issues found during the validation for a specific check - with a severity greater than or equal to `min_severity` - """ - min_severity = min_severity or self.context.requirement_severity - return [issue for issue in self._issues if issue.check == check and issue.severity >= min_severity] - - def has_issues(self, min_severity: Severity | None = None) -> bool: - """ - Check if there are issues with a severity greater than or equal to the given `severity` - """ - min_severity = min_severity or self.context.requirement_severity - return any(issue.severity >= min_severity for issue in self._issues) - - def passed(self, min_severity: Severity | None = None) -> bool: - """ - Check if all checks passed with a severity greater than or equal to the given `severity` - """ - min_severity = min_severity or self.context.requirement_severity - return not any(issue.severity >= min_severity for issue in self._issues) - - def add_issue( - self, - message: str, - check: RequirementCheck, - violatingEntity: str | None = None, - violatingProperty: str | None = None, - violatingPropertyValue: str | None = None, - ) -> CheckIssue: - """ - Add an issue to the validation result - - Parameters: - message(str): The message of the issue - check(RequirementCheck): The check that generated the issue - violatingEntity(Optional[str]): The entity that caused the issue (if any) - violatingProperty(Optional[str]): The property that caused the issue (if any) - violatingPropertyValue(Optional[str]): The value of the violatingProperty (if any) - """ - c = CheckIssue( - check, - message, - violatingProperty=violatingProperty, - violatingEntity=violatingEntity, - value=violatingPropertyValue, - ) - bisect.insort(self._issues, c) - return c - - # --- Requirements --- - @property - def failed_requirements(self) -> Collection[Requirement]: - """ - Get the requirements that failed at or above the configured `requirement_severity`. - """ - min_severity = self.context.requirement_severity - return {issue.check.requirement for issue in self._issues if issue.severity >= min_severity} - - # --- Checks --- - @property - def failed_checks(self) -> Collection[RequirementCheck]: - """ - Get the checks that failed at or above the configured `requirement_severity`. - """ - min_severity = self.context.requirement_severity - return {issue.check for issue in self._issues if issue.severity >= min_severity} - - def get_failed_checks_by_requirement(self, requirement: Requirement) -> Collection[RequirementCheck]: - """ - Get the checks that failed for a specific requirement - """ - return [check for check in self.failed_checks if check.requirement == requirement] - - def get_failed_checks_by_requirement_and_severity( - self, requirement: Requirement, severity: Severity - ) -> Collection[RequirementCheck]: - """ - Get the checks that failed for a specific requirement and severity - """ - return [ - check for check in self.failed_checks if check.requirement == requirement and check.severity == severity - ] - - def __str__(self) -> str: - return f"Validation result: passed={len(self.failed_checks) == 0}, {len(self._issues)} issues" - - def __repr__(self): - return f"ValidationResult(passed={len(self.failed_checks) == 0},issues={self._issues})" - - def __eq__(self, other: object) -> bool: - if not isinstance(other, ValidationResult): - raise TypeError(f"Cannot compare ValidationResult with {type(other)}") - return self._issues == other._issues - - # Equality is based on the mutable list of issues, so instances are - # intentionally unhashable (a content-based hash would be unstable). - __hash__ = None # type: ignore[assignment] - - def to_dict(self) -> dict: - """ - Convert the ValidationResult to a dictionary - """ - allowed_properties = [ - "profile_identifier", - "enable_profile_inheritance", - "requirement_severity", - "abort_on_first", - ] - validation_settings = { - key: value for key, value in self.validation_settings.to_dict().items() if key in allowed_properties - } - result: dict[str, Any] = { - "meta": {"version": JSON_OUTPUT_FORMAT_VERSION}, - "validation_settings": validation_settings, - "passed": self.passed(cast("Severity", self.context.settings.requirement_severity)), - "issues": [issue.to_dict() for issue in self.issues], - } - # add validator version to the settings - result["validation_settings"]["rocrate_validator_version"] = __version__ - return result - - def to_json(self, path: Path | None = None) -> str: - """ - Convert the ValidationResult to a JSON string - """ - result = json.dumps(self.to_dict(), indent=4, cls=CustomEncoder) - if path: - with path.open("w", encoding="utf-8") as f: - f.write(result) - return result - - -class CustomEncoder(json.JSONEncoder): - def default(self, o): - if isinstance(o, CheckIssue): - return o.__dict__ - if isinstance(o, Path): - return str(o) - if isinstance(o, (RequirementCheck, Requirement)): - return o.identifier - if isinstance(o, (Severity, RequirementLevel)): - return o.name - return super().default(o) - - -@dataclass -class ValidationSettings: - """ - Represents the settings for RO-Crate validation. - - It includes the following attributes: - """ - - #: The URI of the RO-Crate - rocrate_uri: URI # pyright: ignore[reportRedeclaration] - #: The relative root path of the RO-Crate - rocrate_relative_root_path: Path | None = None - # Profile settings - #: The path to the profiles - profiles_path: Path = DEFAULT_PROFILES_PATH - #: The path to the extra profiles - extra_profiles_path: Path | None = None - #: The profile identifier to validate against - profile_identifier: str = DEFAULT_PROFILE_IDENTIFIER - #: Flag to enable profile inheritance - # Use the `enable_profile_inheritance` flag with caution: disable inheritance only if the - # target validation profile is fully self-contained and does not rely on definitions - # from inherited profiles (e.g., entities defined upstream). For modularization - # purposes, some base entities and properties are defined in the base RO-Crate - # profile and are intentionally not redefined in specialized profiles; they are - # required for validations targeting those specializations and therefore cannot be skipped. - # Nevertheless, the validator can still suppress issue reporting for checks defined - # in inherited profiles by setting disable_inherited_profiles_issue_reporting to `True`. - enable_profile_inheritance: bool = True - # Validation settings - #: Flag to abort on first error - abort_on_first: bool | None = False - #: Flag to disable reporting of issues related to inherited profiles - disable_inherited_profiles_issue_reporting: bool = False - #: Flag to disable remote crate download - disable_remote_crate_download: bool = True - # Requirement settings - #: The requirement severity - requirement_severity: str | Severity = Severity.REQUIRED - #: Flag to validate requirement severity only skipping check with lower or higher severity - requirement_severity_only: bool = False - # Requirement check settings - #: Flag to allow requirement check override - allow_requirement_check_override: bool = True - #: Flag to disable the check for duplicates - disable_check_for_duplicates: bool = False - #: Checks to skip - skip_checks: list[str] | None = None - #: Flag to validate only the metadata of the RO-Crate - metadata_only: bool = False - #: RO-Crate metadata as dictionary - metadata_dict: dict | None = None - #: Verbose output - verbose: bool = False - #: Cache max age in seconds (negative values mean "never expire") - cache_max_age: int = DEFAULT_HTTP_CACHE_MAX_AGE - #: Cache path - cache_path: Path | None = None - #: Flag to enable offline mode: HTTP requests are served only from the cache - offline: bool = False - #: Flag to disable the HTTP cache entirely: every request hits the network - no_cache: bool = False - - def __post_init__(self): - # if requirement_severity is a str, convert to Severity - if isinstance(self.requirement_severity, str): - self.requirement_severity = Severity[self.requirement_severity] - # Offline mode needs the cache to serve responses, so it cannot be - # combined with an explicit cache disable. - if self.offline and self.no_cache: - raise ValueError( - "Offline mode requires the HTTP cache to be enabled; no_cache=True is incompatible with offline=True." - ) - # Default to the persistent user cache whenever caching is enabled so that - # consecutive runs (online then offline) share the same HTTP cache: this - # is what lets the offline mode find the resources fetched online. - if self.cache_path is None and not self.no_cache: - default_path = get_default_http_cache_path() - default_path.parent.mkdir(parents=True, exist_ok=True) - self.cache_path = default_path - logger.debug("Cache path not set: defaulting to persistent user cache %s", self.cache_path) - if self.offline and self.cache_path is None: - logger.warning( - "Offline mode enabled without a persistent cache path: " - "all HTTP-backed resources will fail unless pre-populated." - ) - # Re-apply the cache settings to the HTTP requester. ``initialize_cache`` - # reconfigures the existing singleton in place (rather than dropping it), - # so new settings take effect without discarding state set on the instance. - HttpRequester.initialize_cache( - cache_path=str(self.cache_path) if self.cache_path is not None else None, - cache_max_age=self.cache_max_age, - offline=self.offline, - no_cache=self.no_cache, - ) - logger.debug( - "HTTP cache initialized at %s with max age %s seconds (offline=%s, no_cache=%s)", - self.cache_path, - self.cache_max_age, - self.offline, - self.no_cache, - ) - # Install the JSON-LD document loader so context resolution goes through the cache. - try: - install_document_loader() - except Exception as e: - logger.debug("Could not install JSON-LD document loader: %s", e) - # Best-effort synchronous warm-up of profile-declared URLs. - if not self.offline: - try: - auto_warm_up_for_settings(self) - except Exception as e: - logger.debug("Auto warm-up skipped: %s", e) - - def to_dict(self): - """ - Convert the ValidationSettings to a dictionary - """ - result = asdict(self) - result["rocrate_uri"] = str(self.rocrate_uri) - result.pop("metadata_dict", None) # exclude metadata_dict from the dict representation - # Remove disable_crate_download from the dict representation - result.pop("disable_remote_crate_download", None) - # Remove requirement_severity_only from the dict representation - result.pop("requirement_severity_only", None) - return result - - @property # type: ignore[no-redef] - def rocrate_uri(self) -> URI | None: - """ - Get the RO-Crate URI - - :return: The RO-Crate URI - :rtype: URI - """ - return self._rocrate_uri - - @rocrate_uri.setter - def rocrate_uri(self, value: str | Path | URI): - """ - Set the RO-Crate URI. - - :param value: The RO-Crate URI. - :type value: Union[str, Path, URI] - """ - if not value: - raise ValueError("Invalid RO-Crate URI") - self._rocrate_uri: URI = URI(str(value)) - - @classmethod - def parse(cls, settings: dict | ValidationSettings) -> ValidationSettings: - """ - Parse the settings to a ValidationSettings object. - - :param settings: The settings to parse. - :type settings: Union[dict, ValidationSettings] - - :return: The parsed settings. - :rtype: ValidationSettings - - :raises ValueError: If the settings type is invalid. - """ - if isinstance(settings, dict): - return cls(**settings) - if isinstance(settings, ValidationSettings): - return settings - raise ValueError(f"Invalid settings type: {type(settings)}") - - -class ValidationEvent(Event): - def __init__( - self, - event_type: EventType, - validation_result: ValidationResult | None = None, - message: str | None = None, - ): - super().__init__(event_type, message) - self._validation_result = validation_result - - @property - def validation_result(self) -> ValidationResult | None: - return self._validation_result - - -class ProfileValidationEvent(Event): - def __init__( - self, - event_type: EventType, - profile: Profile, - message: str | None = None, - ): - assert event_type in ( - EventType.PROFILE_VALIDATION_START, - EventType.PROFILE_VALIDATION_END, - ) - super().__init__(event_type, message) - self._profile = profile - - @property - def profile(self) -> Profile: - return self._profile - - def __str__(self) -> str: - return f"ProfileValidationEvent({self.event_type}, {self.profile})" - - def __repr__(self) -> str: - return f"ProfileValidationEvent(event_type={self.event_type}, profile={self.profile})" - - def __eq__(self, other: object) -> bool: - if not isinstance(other, ProfileValidationEvent): - raise TypeError(f"Cannot compare {type(self)} with {type(other)}") - return self.event_type == other.event_type and self.profile == other.profile - - def __ne__(self, other: object) -> bool: - return not self.__eq__(other) - - def __hash__(self) -> int: - return hash((self.event_type, self.profile)) - - -class RequirementValidationEvent(Event): - def __init__( - self, - event_type: EventType, - requirement: Requirement, - validation_result: bool | None = None, - message: str | None = None, - ): - assert event_type in ( - EventType.REQUIREMENT_VALIDATION_START, - EventType.REQUIREMENT_VALIDATION_END, - ) - super().__init__(event_type, message) - self._requirement = requirement - self._validation_result = validation_result - - @property - def requirement(self) -> Requirement: - return self._requirement - - @property - def validation_result(self) -> bool | None: - return self._validation_result - - def __str__(self) -> str: - return f"RequirementValidationEvent({self.event_type}, {self.requirement})" - - def __repr__(self) -> str: - return f"RequirementValidationEvent(event_type={self.event_type}, requirement={self.requirement})" - - def __eq__(self, other: object) -> bool: - if not isinstance(other, RequirementValidationEvent): - raise TypeError(f"Cannot compare {type(self)} with {type(other)}") - return self.event_type == other.event_type and self.requirement == other.requirement - - def __ne__(self, other: object) -> bool: - return not self.__eq__(other) - - def __hash__(self) -> int: - return hash((self.event_type, self.requirement)) - - -class RequirementCheckValidationEvent(Event): - def __init__( - self, - event_type: EventType, - requirement_check: RequirementCheck, - validation_result: bool | None = None, - message: str | None = None, - ): - assert event_type in ( - EventType.REQUIREMENT_CHECK_VALIDATION_START, - EventType.REQUIREMENT_CHECK_VALIDATION_END, - ) - super().__init__(event_type, message) - self._requirement_check = requirement_check - self._validation_result = validation_result - - @property - def requirement_check(self) -> RequirementCheck: - return self._requirement_check - - @property - def validation_result(self) -> bool | None: - return self._validation_result - - def __str__(self) -> str: - return f"RequirementCheckValidationEvent({self.event_type}, {self.requirement_check})" - - def __repr__(self) -> str: - return ( - f"RequirementCheckValidationEvent(event_type={self.event_type}, requirement_check={self.requirement_check})" - ) - - def __eq__(self, other: object) -> bool: - if not isinstance(other, RequirementCheckValidationEvent): - raise TypeError(f"Cannot compare {type(self)} with {type(other)}") - return self.event_type == other.event_type and self.requirement_check == other.requirement_check - - def __ne__(self, other: object) -> bool: - return not self.__eq__(other) - - def __hash__(self) -> int: - return hash((self.event_type, self.requirement_check)) - - -class Validator(Publisher): - """ - Validator class for validating Research Object Crates(RO-Crate) - against specified profiles according to the validation settings. - - Attributes: - validation_settings(ValidationSettings): The settings used for validation. - - Methods: - __init__(settings: Union[str, ValidationSettings]): - Initializes the Validator with the given settings. - validation_settings() -> ValidationSettings: - Returns the validation settings. - detect_rocrate_profiles() -> list[Profile]: - Detects the profiles to validate against. - validate() -> ValidationResult: - Validate the RO-Crate against the detected profiles according to the validation settings - validate_requirements(requirements: list[Requirement]) -> ValidationResult: - Validates the RO-Crate against the specified subset of the profile requirements. - """ - - def __init__(self, settings: dict | ValidationSettings): - self._validation_settings = ValidationSettings.parse(settings) - super().__init__() - # initialize the current context - self.__current_context__: ValidationContext | None = None - - @property - def validation_settings(self) -> ValidationSettings: - return self._validation_settings - - def detect_rocrate_profiles(self) -> list[Profile]: - """ - Detect the profiles to validate against - """ - try: - # initialize the validation context - context = ValidationContext(self, self.validation_settings) - candidate_profiles_uris: set[str] = set() - try: - candidate_profiles_uris.update(context.ro_crate.metadata.get_conforms_to() or []) - except Exception as e: - logger.debug("Error while getting candidate profiles URIs: %s", e) - try: - candidate_profiles_uris.update(context.ro_crate.metadata.get_root_data_entity_conforms_to() or []) - except Exception as e: - logger.debug("Error while getting candidate profiles URIs: %s", e) - - logger.debug("Candidate profiles: %s", candidate_profiles_uris) - if not candidate_profiles_uris: - logger.debug("Unable to determine the profile to validate against") - return [] - # load the profiles - profiles = [] - candidate_profiles = [] - available_profiles = Profile.load_profiles( - context.profiles_path, - extra_profiles_path=context.extra_profiles_path, - publicID=context.publicID, - severity=context.requirement_severity, - ) - profiles = [p for p in available_profiles if p.uri in candidate_profiles_uris] - # get the candidate profiles - for profile in profiles: - candidate_profiles.append(profile) - inherited_profiles = profile.inherited_profiles - for inherited_profile in inherited_profiles: - if inherited_profile in candidate_profiles: - candidate_profiles.remove(inherited_profile) - logger.debug( - "%d Candidate Profiles found: %s", - len(candidate_profiles), - candidate_profiles, - ) - # unmatched candidate profiles - unmatched_profiles = candidate_profiles_uris.difference({p.uri for p in profiles}) - logger.debug("Unmatched Candidate Profiles URIs: %s", unmatched_profiles) - if len(unmatched_profiles) > 0: - logger.warning( - "The conformance to the following profiles could not be verified: %s", - ", ".join(unmatched_profiles), - ) - return candidate_profiles - - except Exception: - if logger.isEnabledFor(logging.DEBUG): - logger.exception("Error detecting RO-Crate profiles") - return [] - - def validate(self) -> ValidationResult: - """ - Validate the RO-Crate against the detected profiles according to the validation settings - """ - return self.__do_validate__() - - def validate_requirements(self, requirements: list[Requirement]) -> ValidationResult: - """ - Validates the RO-Crate against the specified subset of the profile requirements - """ - assert all(isinstance(requirement, Requirement) for requirement in requirements), "Invalid requirement type" - # perform the requirements validation - return self.__do_validate__(requirements) - - def __do_validate__(self, requirements: list[Requirement] | None = None) -> ValidationResult: - - # initialize the validation context - context = ValidationContext(self, self.validation_settings) - # register the current context - self.__current_context__ = context - - # initialize the requirement types - self.__invoke_pre_validation_hooks__(context) - - try: - # set the profiles to validate against - profiles = context.profiles - assert len(profiles) > 0, "No profiles to validate" - # Pre-load every profile's requirements so all shape graphs are - # populated before the validation loop runs. This lets a check - # see `sh:deactivated true` triples declared by descendant - # profiles that have not yet been visited. - for p in profiles: - _ = p.requirements - self.notify(EventType.VALIDATION_START) - for profile in profiles: - logger.debug( - "Validating profile %s (id: %s)", - profile.name, - profile.identifier, - ) - # set the target profile in the context - context._target_validation_profile = profile - self.notify(ProfileValidationEvent(EventType.PROFILE_VALIDATION_START, profile=profile)) - # perform the requirements validation - requirements = profile.get_requirements( - context.requirement_severity, - exact_match=context.requirement_severity_only, - ) - logger.debug( - "Validating profile %s with %s requirements", - profile.identifier, - len(requirements), - ) - logger.debug( - "For profile %s, validating these %s requirements: %s", - profile.identifier, - len(requirements), - requirements, - ) - terminate = False - for requirement in requirements: - if not requirement.overridden: - self.notify( - RequirementValidationEvent( - EventType.REQUIREMENT_VALIDATION_START, - requirement=requirement, - ) - ) - passed = requirement._do_validate_(context) - logger.debug( - "Requirement %s passed: %s", - requirement.identifier, - passed, - ) - if not requirement.overridden: - self.notify( - RequirementValidationEvent( - EventType.REQUIREMENT_VALIDATION_END, - requirement=requirement, - validation_result=passed, - ) - ) - if passed: - logger.debug("Validation Requirement passed") - else: - logger.debug(f"Validation Requirement {requirement} failed (profile: {profile.identifier})") - if context.fail_fast: - logger.debug("Aborting on first requirement failure") - terminate = True - break - self.notify(ProfileValidationEvent(EventType.PROFILE_VALIDATION_END, profile=profile)) - if terminate: - break - - # finalize the requirement types - self.__invoke_post_validation_hooks__(context) - # notify the end of the validation - self.notify(ValidationEvent(EventType.VALIDATION_END, validation_result=context.result)) - # return the validation result - return context.result - finally: - # clear the current context - self.__current_context__ = None - - def __invoke_pre_validation_hooks__(self, context: ValidationContext): - logger.debug("Initializing requirement types: starting...") - requirements_types = RequirementLoader.__get_requirement_classes__() - for requirement_type in requirements_types: - requirement_type.initialize(context) - logger.debug("Initializing requirement types: completed") - - def __invoke_post_validation_hooks__(self, context: ValidationContext): - logger.debug("Finalizing requirement types: starting...") - requirements_types = RequirementLoader.__get_requirement_classes__() - for requirement_type in requirements_types: - requirement_type.finalize(context) - logger.debug("Finalizing requirement types: completed") - - def notify(self, event: Event | EventType, ctx: Any | None = None): - """Override notify to update statistics""" - assert self.__current_context__ is not None, "No current validation context" - result: ValidationResult = self.__current_context__.result - if isinstance(event, EventType): - event = Event(event) - result.statistics.update(event, ctx=self.__current_context__) - return super().notify(event, ctx=self.__current_context__) - - -class ValidationContext: - """ - Class that represents the context for the validation process. - """ - - def __init__(self, validator: Validator, settings: ValidationSettings): - # reference to the validator - self._validator = validator - # reference to the settings - self._settings = settings - # reference to the data graph - self._data_graph: Graph | None = None - # reference to the profiles - self._profiles: list[Profile] | None = None - # reference to the target profile - self._target_validation_profile: Profile | None = None - # reference to the validation result - self._result: ValidationResult | None = None - # additional properties for the context - self._properties: dict = {} - # URLs already reported as missing from the HTTP cache during this run - self._offline_cache_misses_warned: set[str] = set() - - # initialize the ROCrate object - if settings.metadata_dict: - self._rocrate = ROCrate.from_metadata_dict(settings.metadata_dict) - else: - rocrate_uri = settings.rocrate_uri - assert rocrate_uri is not None, "RO-Crate URI is required when metadata_dict is not provided" - self._rocrate = ROCrate.new_instance( - rocrate_uri, - relative_root_path=settings.rocrate_relative_root_path, - ) - assert isinstance(self._rocrate, ROCrate), "Invalid RO-Crate instance" - - @property - def ro_crate(self) -> ROCrate: - """ - The RO-Crate instance - - :return: The RO-Crate instance - :rtype: ROCrate - """ - return self._rocrate - - @property - def validator(self) -> Validator: - """ - The validator instance which this context belongs to - - :return: The validator instance - :rtype: Validator - """ - return self._validator - - @property - def result(self) -> ValidationResult: - """ - The validation result - - :return: The validation result - :rtype: ValidationResult - """ - if self._result is None: - self._result = ValidationResult(self) - return self._result - - @property - def settings(self) -> ValidationSettings: - """ - The validation settings - - :return: The validation settings - :rtype: ValidationSettings - """ - return self._settings - - @property - def publicID(self) -> str: - """ - The root URI of the RO-Crate - """ - path = str(self.ro_crate.uri.base_uri) - if not path.endswith("/"): - path = f"{path}/" - return path - - @property - def profiles_path(self) -> Path: - """ - The path to the profiles - - :return: The path to the profiles - :rtype: Path - """ - profiles_path = self.settings.profiles_path - if isinstance(profiles_path, str): - profiles_path = Path(profiles_path) - return profiles_path - - @property - def extra_profiles_path(self) -> Path | None: - """ - The path to the extra profiles - - :return: The path to the extra profiles - :rtype: Optional[Path] - """ - extra_profiles_path = self.settings.extra_profiles_path - if isinstance(extra_profiles_path, str): - extra_profiles_path = Path(extra_profiles_path) - return extra_profiles_path or None - - @property - def requirement_severity(self) -> Severity: - """ - The requirement severity to validate against - - :return: The requirement severity - :rtype: Severity - """ - severity = self.settings.requirement_severity - if isinstance(severity, str): - severity = Severity[severity] - elif not isinstance(severity, Severity): - raise TypeError(f"Invalid severity type: {type(severity)}") - return severity - - @property - def requirement_severity_only(self) -> bool: - """ - Flag to validate requirement severity only skipping check with lower or higher severity - - :return: The flag to validate requirement severity only - :rtype: bool - """ - return self.settings.requirement_severity_only - - @property - def rocrate_uri(self) -> URI: - """ - The URI of the RO-Crate - - :return: The URI of the RO-Crate - :rtype: Path - """ - rocrate_uri = self.settings.rocrate_uri - if rocrate_uri is None: - raise ValueError("RO-Crate URI is not set") - return rocrate_uri - - @property - def fail_fast(self) -> bool: - """ - Flag to abort on first error - - :return: The flag to abort on first error - :rtype: bool - """ - return bool(self.settings.abort_on_first) - - @property - def rel_fd_path(self) -> Path: - """ - The relative path to the file descriptor - - :return: The relative path to the file descriptor - :rtype: Path - """ - return Path(ROCRATE_METADATA_FILE) - - def __load_data_graph__(self) -> Graph: - data_graph = Graph() - logger.debug("Loading RO-Crate metadata of: %s", self.ro_crate.uri) - _ = data_graph.parse( - data=self.ro_crate.metadata.as_dict(), # type: ignore[arg-type] - format="json-ld", - publicID=self.publicID, - ) - logger.debug("RO-Crate metadata loaded: %s", data_graph) - return data_graph - - def get_data_graph(self, refresh: bool = False) -> Graph: - """ - Utility method to get the data graph of the RO-Crate, - i.e., the metadata of the RO-Crate as an RDF graph. - - :param refresh: Flag to refresh the data graph - :type refresh: bool - - :return: The data graph of the RO-Crate - :rtype: :py:class:rdflib.Graph - - :raises ROCrateMetadataNotFoundError: If the RO-Crate metadata is not found - """ - # load the data graph - try: - if not self._data_graph or refresh: - self._data_graph = self.__load_data_graph__() - return self._data_graph - except (HTTPError, FileNotFoundError) as e: - logger.debug("Error loading data graph: %s", e) - raise ROCrateMetadataNotFoundError(str(self.rocrate_uri)) from e - - @property - def data_graph(self) -> Graph: - """ - The data graph of the RO-Crate, - i.e., the metadata of the RO-Crate as an RDF graph. - - :return: The data graph of the RO-Crate - :rtype: Graph - """ - return self.get_data_graph() - - @property - def inheritance_enabled(self) -> bool: - """ - Flag which indicates if profile inheritance is enabled - - :return: The flag to enable profile inheritance - :rtype: bool - """ - return self.settings.enable_profile_inheritance - - @property - def profile_identifier(self) -> str: - """ - The profile identifier to validate against - - :return: The profile identifier - :rtype: str - """ - return self.settings.profile_identifier - - @property - def allow_requirement_check_override(self) -> bool: - """ - Flag that indicates if requirement check override is allowed - - :return: The flag to allow requirement check override - :rtype: bool - """ - return self.settings.allow_requirement_check_override - - @property - def disable_check_for_duplicates(self) -> bool: - """ - Flag that indicates if the check for duplicates is disabled - - :return: The flag to disable the check for duplicates - :rtype: bool - """ - return self.settings.disable_check_for_duplicates - - def __load_profiles__(self) -> list[Profile]: - - # load all profiles - profiles = Profile.load_profiles( - self.profiles_path, - extra_profiles_path=self.settings.extra_profiles_path, - publicID=self.publicID, - severity=self.requirement_severity, - allow_requirement_check_override=self.allow_requirement_check_override, - ) - - # Check if the target profile is in the list of profiles - profile = Profile.get_by_identifier(self.profile_identifier) - if not profile: - try: - candidate_profiles = Profile.get_by_token(self.profile_identifier) - logger.debug("Candidate profiles found by token: %s", profile) - if candidate_profiles: - # Find the profile with the highest version number - profile = max(candidate_profiles, key=lambda p: p.version or "") - self.settings.profile_identifier = profile.identifier - logger.debug("Profile with the highest version number: %s", profile) - except AttributeError as e: - # raised when the profile is not found - if logger.isEnabledFor(logging.DEBUG): - logger.exception("Profile not found: %s", self.profile_identifier) - raise ProfileNotFound( - self.profile_identifier, - message=f"Profile '{self.profile_identifier}' not found in '{self.profiles_path}'", - ) from e - if profile is None: - raise ProfileNotFound( - self.profile_identifier, - message=f"Profile '{self.profile_identifier}' not found in '{self.profiles_path}'", - ) - - # if the inheritance is enabled, return only the target profile - if not self.inheritance_enabled: - return [profile] - - # Set the profiles to validate against as the target profile and its inherited profiles - profiles = [*profile.inherited_profiles, profile] - - # if the check for duplicates is disabled, return the profiles - if self.disable_check_for_duplicates: - return profiles - - return profiles - - @property - def profiles(self) -> list[Profile]: - """ - The profiles to validate against, - i.e., the target profile and its inherited profiles - - :return: The profiles to validate against - :rtype: list[Profile] - """ - if not self._profiles: - self._profiles = self.__load_profiles__() - return self._profiles.copy() - - @property - def target_validation_profile(self) -> Profile: - """ - The target validation profile to validate against - - :return: The target validation profile - :rtype: Profile - """ - assert self._target_validation_profile is not None, "Target validation profile not set" - return self._target_validation_profile - - @property - def target_profile(self) -> Profile: - """ - The target profile to validate against - - :return: The target profile - :rtype: Profile - """ - profiles = self.profiles - assert len(profiles) > 0, "No profiles to validate" - return self.profiles[-1] - - def get_profile_by_token(self, token: str) -> list[Profile]: - """ - Get the profile by token from the profiles to validate against - - :param token: The token of the profile - :type token: str - - :return: The profile with the given token - :rtype: Profile - """ - return [p for p in self.profiles if p.token == token] - - def get_profile_by_identifier(self, identifier: str) -> Profile: - """ - Get the profile by identifier from the profiles to validate against - - :param identifier: The identifier of the profile - :type identifier: str - - :return: The profile with the given identifier - :rtype: Profile - """ - for p in self.profiles: - if p.identifier == identifier: - return p - raise ProfileNotFound(identifier) - - def maybe_warn_offline_cache_miss(self, exc: BaseException) -> bool: - """ - If ``exc`` (or any cause/context in its chain) is an - :class:`OfflineCacheMissError`, emit a single user-facing warning - for the missing URL — but only the first time that URL is seen - during this validation run — and return ``True``. - - Returns ``False`` when the exception is unrelated to offline cache - misses, so callers can fall back to their generic handling. - """ - miss = find_offline_cache_miss(exc) - if miss is None: - return False - if miss.url not in self._offline_cache_misses_warned: - self._offline_cache_misses_warned.add(miss.url) - logger.warning("%s", miss) - return True diff --git a/rocrate_validator/models/__init__.py b/rocrate_validator/models/__init__.py new file mode 100644 index 000000000..e4cb06746 --- /dev/null +++ b/rocrate_validator/models/__init__.py @@ -0,0 +1,84 @@ +# Copyright (c) 2024-2026 CRS4 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from rocrate_validator.utils.uri import URI + +from rocrate_validator.models._logging import logger +from rocrate_validator.models.severity import ( + LevelCollection, + RequirementLevel, + Severity, +) +from rocrate_validator.models.settings import ( + DEFAULT_PROFILES_PATH, + BaseTypes, + ValidationSettings, +) +from rocrate_validator.models.events import ( + ProfileValidationEvent, + RequirementCheckValidationEvent, + RequirementValidationEvent, + ValidationEvent, +) +from rocrate_validator.models.requirement import ( + Requirement, + RequirementCheck, + RequirementLoader, + SkipRequirementCheck, + SourceSnippet, +) +from rocrate_validator.models.profile import Profile +from rocrate_validator.models.result import ( + CheckIssue, + CustomEncoder, + ValidationResult, +) +from rocrate_validator.models.statistics import ( + AggregatedValidationStatistics, + ValidationStatistics, + ValidationStatisticsListener, +) +from rocrate_validator.models.validation import ( + ValidationContext, + Validator, +) + +__all__ = [ + "AggregatedValidationStatistics", + "BaseTypes", + "CheckIssue", + "CustomEncoder", + "DEFAULT_PROFILES_PATH", + "LevelCollection", + "Profile", + "ProfileValidationEvent", + "Requirement", + "RequirementCheck", + "RequirementCheckValidationEvent", + "RequirementLevel", + "RequirementLoader", + "RequirementValidationEvent", + "Severity", + "SkipRequirementCheck", + "SourceSnippet", + "URI", + "ValidationContext", + "ValidationEvent", + "ValidationResult", + "ValidationSettings", + "ValidationStatistics", + "ValidationStatisticsListener", + "Validator", + "logger", +] diff --git a/rocrate_validator/models/_logging.py b/rocrate_validator/models/_logging.py new file mode 100644 index 000000000..f4a85dcf7 --- /dev/null +++ b/rocrate_validator/models/_logging.py @@ -0,0 +1,17 @@ +# Copyright (c) 2024-2026 CRS4 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from rocrate_validator.utils import log as logging + +logger = logging.getLogger("rocrate_validator.models") diff --git a/rocrate_validator/models/events.py b/rocrate_validator/models/events.py new file mode 100644 index 000000000..6f06038c1 --- /dev/null +++ b/rocrate_validator/models/events.py @@ -0,0 +1,161 @@ +# Copyright (c) 2024-2026 CRS4 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import annotations + +from typing import TYPE_CHECKING + +from rocrate_validator.events import Event, EventType + +if TYPE_CHECKING: + from rocrate_validator.models.profile import Profile + from rocrate_validator.models.requirement import Requirement, RequirementCheck + from rocrate_validator.models.result import ValidationResult + + +class ValidationEvent(Event): + def __init__( + self, + event_type: EventType, + validation_result: ValidationResult | None = None, + message: str | None = None, + ): + super().__init__(event_type, message) + self._validation_result = validation_result + + @property + def validation_result(self) -> ValidationResult | None: + return self._validation_result + + +class ProfileValidationEvent(Event): + def __init__( + self, + event_type: EventType, + profile: Profile, + message: str | None = None, + ): + assert event_type in ( + EventType.PROFILE_VALIDATION_START, + EventType.PROFILE_VALIDATION_END, + ) + super().__init__(event_type, message) + self._profile = profile + + @property + def profile(self) -> Profile: + return self._profile + + def __str__(self) -> str: + return f"ProfileValidationEvent({self.event_type}, {self.profile})" + + def __repr__(self) -> str: + return f"ProfileValidationEvent(event_type={self.event_type}, profile={self.profile})" + + def __eq__(self, other: object) -> bool: + if not isinstance(other, ProfileValidationEvent): + raise TypeError(f"Cannot compare {type(self)} with {type(other)}") + return self.event_type == other.event_type and self.profile == other.profile + + def __ne__(self, other: object) -> bool: + return not self.__eq__(other) + + def __hash__(self) -> int: + return hash((self.event_type, self.profile)) + + +class RequirementValidationEvent(Event): + def __init__( + self, + event_type: EventType, + requirement: Requirement, + validation_result: bool | None = None, + message: str | None = None, + ): + assert event_type in ( + EventType.REQUIREMENT_VALIDATION_START, + EventType.REQUIREMENT_VALIDATION_END, + ) + super().__init__(event_type, message) + self._requirement = requirement + self._validation_result = validation_result + + @property + def requirement(self) -> Requirement: + return self._requirement + + @property + def validation_result(self) -> bool | None: + return self._validation_result + + def __str__(self) -> str: + return f"RequirementValidationEvent({self.event_type}, {self.requirement})" + + def __repr__(self) -> str: + return f"RequirementValidationEvent(event_type={self.event_type}, requirement={self.requirement})" + + def __eq__(self, other: object) -> bool: + if not isinstance(other, RequirementValidationEvent): + raise TypeError(f"Cannot compare {type(self)} with {type(other)}") + return self.event_type == other.event_type and self.requirement == other.requirement + + def __ne__(self, other: object) -> bool: + return not self.__eq__(other) + + def __hash__(self) -> int: + return hash((self.event_type, self.requirement)) + + +class RequirementCheckValidationEvent(Event): + def __init__( + self, + event_type: EventType, + requirement_check: RequirementCheck, + validation_result: bool | None = None, + message: str | None = None, + ): + assert event_type in ( + EventType.REQUIREMENT_CHECK_VALIDATION_START, + EventType.REQUIREMENT_CHECK_VALIDATION_END, + ) + super().__init__(event_type, message) + self._requirement_check = requirement_check + self._validation_result = validation_result + + @property + def requirement_check(self) -> RequirementCheck: + return self._requirement_check + + @property + def validation_result(self) -> bool | None: + return self._validation_result + + def __str__(self) -> str: + return f"RequirementCheckValidationEvent({self.event_type}, {self.requirement_check})" + + def __repr__(self) -> str: + return ( + f"RequirementCheckValidationEvent(event_type={self.event_type}, requirement_check={self.requirement_check})" + ) + + def __eq__(self, other: object) -> bool: + if not isinstance(other, RequirementCheckValidationEvent): + raise TypeError(f"Cannot compare {type(self)} with {type(other)}") + return self.event_type == other.event_type and self.requirement_check == other.requirement_check + + def __ne__(self, other: object) -> bool: + return not self.__eq__(other) + + def __hash__(self) -> int: + return hash((self.event_type, self.requirement_check)) diff --git a/rocrate_validator/models/profile.py b/rocrate_validator/models/profile.py new file mode 100644 index 000000000..b827fee59 --- /dev/null +++ b/rocrate_validator/models/profile.py @@ -0,0 +1,788 @@ +# Copyright (c) 2024-2026 CRS4 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import annotations + +import re +from functools import total_ordering +from pathlib import Path +from typing import TYPE_CHECKING, Any, Collection, cast + +from rdflib import RDF, RDFS, Graph, Namespace, URIRef + +from rocrate_validator.constants import ( + DEFAULT_PROFILE_README_FILE, + IGNORED_PROFILE_DIRECTORIES, + PROF_NS, + PROFILE_SPECIFICATION_FILE, + SCHEMA_ORG_NS, +) +from rocrate_validator.errors import ( + DuplicateRequirementCheck, + InvalidProfilePath, + ProfileNotFound, + ProfileSpecificationError, + ProfileSpecificationNotFound, +) +from rocrate_validator.models.severity import Severity +from rocrate_validator.models._logging import logger +from rocrate_validator.utils.collections import MapIndex, MultiIndexMap + +if TYPE_CHECKING: + from rocrate_validator.models.requirement import Requirement, RequirementCheck + +@total_ordering +class Profile: + """ + RO-Crate Validator profile. + + A profile is a named set of requirements that can be used to validate an RO-Crate. + """ + + # store the map of profiles: profile URI -> Profile instance + __profiles_map: MultiIndexMap = MultiIndexMap( + "uri", + indexes=[ + MapIndex("name"), + MapIndex("token", unique=False), + MapIndex("identifier", unique=True), + MapIndex("token_path", unique=False), + ], + ) + + def __init__( + self, + profiles_base_path: Path, + profile_path: Path, + requirements: list[Requirement] | None = None, + identifier: str | None = None, + publicID: str | None = None, + severity: Severity = Severity.REQUIRED, + ): + """ + Initialize the Profile instance + + :param profile_path: the path of the profile + :type profile_path: Path + + :param requirements: the list of requirements of the profile + :type requirements: list[Requirement] + + :param identifier: the identifier of the profile + :type identifier: str + + :param publicID: the public identifier of the profile + :type publicID: str + :meta private: + + :param severity: the severity of the profile + :type severity: Severity + + : raises ProfileSpecificationNotFound: if the profile specification file is not found + : raises ProfileSpecificationError: if the profile specification file contains more than one profile + : raises InvalidProfilePath: if the profile path is not a directory + + :meta private: + """ + self._identifier: str | None = identifier + self._profiles_base_path = profiles_base_path + self._profile_path = profile_path + self._name: str | None = None + self._description: str | None = None + self._requirements: list[Requirement] = requirements if requirements is not None else [] + self._publicID = publicID + self._severity = severity + self._overrides: list[Profile] = [] + self._overridden_by: list[Profile] = [] + + # init property to store the RDF node which is the root of the profile specification graph + self._profile_node: Any = None + + # init property to store the RDF graph of the profile specification + self._profile_specification_graph: Graph | None = None + + # check if the profile specification file exists + spec_file = self.profile_specification_file_path + if not spec_file or not spec_file.exists(): + raise ProfileSpecificationNotFound(str(spec_file)) + # load the profile specification expressed using the Profiles Vocabulary + profile = Graph() + profile.parse(str(spec_file), format="turtle") + # check that the specification Graph hosts only one profile + profiles = list(profile.subjects(predicate=RDF.type, object=PROF_NS.Profile)) + if len(profiles) == 1: + self._profile_node = profiles[0] + self._profile_specification_graph = profile + # initialize the token and version + self._token, self._version = self.__init_token_version__() + + # Check if the profile is overriding an existing profile + existing_profile = self.__profiles_map.get_by_key(cast("Any", self._profile_node).toPython()) + # If an existing profile is being overridden by a different one, log a warning + if existing_profile and existing_profile.path != profile_path: + logger.warning( + "Profile with identifier %s at %s is being overridden by the profile loaded from %s.", + existing_profile.identifier, + existing_profile.path, + profile_path, + ) + # add the existing profile as an override + self.__add_override__(existing_profile) + + # add the profile to the profiles map + self.__profiles_map.add( + cast("Any", self._profile_node).toPython(), + self, + token=self.token, + name=self.name, + identifier=self.identifier, + token_path=self.__extract_token_from_path__(), + ) # add the profile to the profiles map + else: + raise ProfileSpecificationError( + message=f"Profile specification file {spec_file} must contain exactly one profile" + ) + + def __get_specification_property__( + self, + prop: str, + namespace: Namespace, + pop_first: bool = True, + as_python_object: bool = True, + ) -> str | list[str | URIRef] | None: + assert self._profile_specification_graph is not None, "Profile specification graph not loaded" + nodes = list(self._profile_specification_graph.objects(self._profile_node, namespace[prop])) + values: list = [cast("Any", v).toPython() for v in nodes] if (nodes and as_python_object) else list(nodes) + if pop_first: + return values[0] if values else None + return values + + def __add_override__(self, profile: Profile): + """ + Add an override profile to this profile. + + :param profile: the profile that overrides this profile + :type profile: Profile + """ + if not isinstance(profile, Profile): + raise TypeError(f"Expected a Profile instance, got {type(profile)}") + if profile not in self._overrides: + self._overrides.append(profile) + profile._overridden_by.append(self) + + @property + def overrides(self) -> list[Profile]: + """ + The list of profiles that override this profile. + """ + return self._overrides + + @property + def overridden_by(self) -> list[Profile]: + """ + The list of profiles that are overridden by this profile. + """ + return self._overridden_by + + @property + def path(self): + """ + The path of the profile directory + """ + return self._profile_path + + @property + def identifier(self) -> str: + """ + The identifier of the profile. + """ + if not self._identifier: + version = self.version + self._identifier = f"{self.token}-{version}" if version else self.token + return self._identifier + + @property + def name(self): + """ + The name of the profile as specified in the profile specification file + (i.e., the value of the rdfs: label property in the `profile.ttl` file) or + a default name if the label is not specified. + """ + return self.label or f"Profile {self.uri}" + + @property + def profile_specification_graph(self) -> Graph: + """ + The RDF graph of the profile specification. + """ + return self._profile_specification_graph # type: ignore[return-value] + + @property + def profile_node(self): + return self._profile_node + + @property + def token(self): + """ + A token that uniquely identifies the profile + as specified in the profile specification file + (i.e., the value of the prof: hasToken property + in the `profile.ttl` file). + """ + return self._token + + @property + def uri(self): + """ + The URI of the profile. + """ + return self._profile_node.toPython() + + @property + def label(self): + return self.__get_specification_property__("label", RDFS) # type: ignore[arg-type] + + @property + def comment(self): + """ + The comment added to the profile in the profile specification file + (i.e., the value of the rdfs: comment property in the `profile.ttl` file). + """ + return self.__get_specification_property__("comment", RDFS) # type: ignore[arg-type] + + @property + def version(self): + """ + The version of the profile as specified in the profile specification file + (i.e., the value of the prof: version property in the `profile.ttl` file). + """ + return self._version + + @property + def is_profile_of(self) -> list[str]: + """ + The list of profiles that this profile is a profile of + as specified in the profile specification file + (i.e., the value of the prof: isProfileOf property in the `profile.ttl` file). + """ + return cast("list[str]", self.__get_specification_property__("isProfileOf", PROF_NS, pop_first=False)) + + @property + def is_transitive_profile_of(self) -> list[str]: + """ + The list of profiles that this profile is a transitive profile of + as specified in the profile specification file + (i.e., the value of the prof: isTransitiveProfileOf property in the `profile.ttl` file). + """ + return cast("list[str]", self.__get_specification_property__("isTransitiveProfileOf", PROF_NS, pop_first=False)) + + @property + def parents(self) -> list[Profile]: + """ + The list of profiles that this profile is a profile of + as specified in the profile specification file. + """ + return [ + profile + for profile in (self.__profiles_map.get_by_key(_) for _ in self.is_profile_of) + if profile is not None + ] + + @property + def siblings(self) -> list[Profile]: + """ + The list of profiles that are siblings of this profile + (i.e., profiles that share the same parent profile). + """ + return self.get_sibling_profiles(self) + + @property + def descendants(self) -> list[Profile]: + """ + The list of profiles that are descendants of this profile + (i.e., profiles that have this profile among their inherited profiles). + """ + return self.get_descendants(self) + + @property + def readme_file_path(self) -> Path: + """ + The path of the README file of the profile. + """ + return self.path / DEFAULT_PROFILE_README_FILE + + @property + def profile_specification_file_path(self) -> Path: + """ + The path of the profile specification file. + """ + return self.path / PROFILE_SPECIFICATION_FILE + + @property + def publicID(self) -> str | None: + """ + The public identifier of the RO-Crate which is validated by the profile. + + :meta private: + """ + return self._publicID + + @property + def severity(self) -> Severity: + """ + The severity of the profile which the profile is loaded with, + i.e., the minimum severity level of the requirements of the profile. + """ + return self._severity + + @property + def description(self) -> str | None: + """ + The description of the profile as specified in the profile specification file + (i.e., the value of the rdfs: comment property in the `profile.ttl` file). + """ + if not self._description: + if self.path and self.readme_file_path.exists(): + with self.readme_file_path.open(encoding="utf-8") as f: + self._description = f.read() + else: + comment = self.comment + self._description = str(comment) if comment else None + return self._description + + @property + def requirements(self) -> list[Requirement]: + """ + The list of requirements of the profile. + """ + if not self._requirements: + from rocrate_validator.models.requirement import RequirementLoader # noqa: PLC0415 + + self._requirements = RequirementLoader.load_requirements(self, severity=self.severity) + return self._requirements + + def get_requirements(self, severity: Severity = Severity.REQUIRED, exact_match: bool = False) -> list[Requirement]: + """ + Get the requirements of the profile with the given severity level. + If the exact_match flag is set to `True`, only the requirements with the exact severity level + are returned; otherwise, the requirements with severity level greater than or equal to + the given severity level are returned. + """ + return [ + requirement + for requirement in self.requirements + if (not exact_match and (not requirement.severity_from_path or requirement.severity_from_path >= severity)) + or (exact_match and requirement.severity_from_path == severity) + ] + + def get_requirement(self, name: str) -> Requirement | None: + """ + Get the requirement with the given name + """ + for requirement in self.requirements: + if requirement.name == name: + return requirement + return None + + def get_requirement_check(self, check_name: str) -> RequirementCheck | None: + """ + Get the requirement check with the given name + """ + for requirement in self.requirements: + check = requirement.get_check(check_name) + if check: + return check + return None + + @classmethod + def __get_nested_profiles__(cls, source: str) -> list[str]: + result: list[str] = [] + visited = [] + queue = [source] + while len(queue) > 0: + p = queue.pop() + if p not in visited: + visited.append(p) + profile = cls.__profiles_map.get_by_key(p) + if profile is None: + continue + inherited_profiles = profile.is_profile_of + if inherited_profiles: + for p in sorted(inherited_profiles, reverse=True): + if p not in visited: + queue.append(p) + if p not in result: + result.insert(0, p) + return result + + @property + def inherited_profiles(self) -> list[Profile]: + inherited_profiles = self.is_transitive_profile_of + if not inherited_profiles or len(inherited_profiles) == 0: + inherited_profiles = Profile.__get_nested_profiles__(self.uri) + profile_keys = self.__profiles_map.keys + return [ + profile + for key in inherited_profiles + if key in profile_keys + for profile in [self.__profiles_map.get_by_key(key)] + if isinstance(profile, Profile) + ] + + def add_requirement(self, requirement: Requirement): + self._requirements.append(requirement) + + def remove_requirement(self, requirement: Requirement): + self._requirements.remove(requirement) + + def __eq__(self, other: object) -> bool: + return isinstance(other, Profile) and self.identifier == other.identifier and self.path == other.path + + def __lt__(self, other: object) -> bool: + if not isinstance(other, Profile): + raise TypeError(f"Cannot compare {type(self)} with {type(other)}") + # If one profile is a parent of the other, the parent is greater + if other in self.parents: + return False + # If the number of inherited profiles is the same, compare based on identifier + return self.identifier < other.identifier + + def __hash__(self) -> int: + return hash((self.identifier, self.path)) + + def __repr__(self) -> str: + return ( + f"Profile(identifier={self.identifier}, name={self.name}, path={self.path}, " + if self.path + else f"requirements={self.requirements})" + ) + + def __str__(self) -> str: + return f"{self.name} ({self.identifier})" + + def to_dict(self) -> dict: + return { + "identifier": self.identifier, + "uri": self.uri, + "name": self.name, + "description": self.description, + } + + @staticmethod + def __extract_version_from_token__(token: str) -> str | None: + if not token: + return None + pattern = r"\Wv?(\d+(\.\d+(\.\d+)?)?)" + matches = re.findall(pattern, token) + if matches: + return matches[-1][0] + return None + + def __get_consistent_version__(self, candidate_token: str) -> str | None: + candidates = { + _ + for _ in [ + cast("str | None", self.__get_specification_property__("version", SCHEMA_ORG_NS)), + self.__extract_version_from_token__(candidate_token), + self.__extract_version_from_token__(str(self.path.relative_to(self._profiles_base_path))), + self.__extract_version_from_token__(str(self.uri)), + ] + if _ is not None + } + if len(candidates) > 1: + raise ProfileSpecificationError(f"Inconsistent versions found: {candidates}") + logger.debug("Candidate versions: %s", candidates) + return candidates.pop() if len(candidates) == 1 else None + + def __extract_token_from_path__(self) -> str: + base_path = str(self._profiles_base_path.absolute()) + identifier = str(self.path.absolute()) + # Check if the path starts with the base path + if not identifier.startswith(base_path): + raise ValueError("Path does not start with the base path") + # Remove the base path from the identifier + identifier = identifier.replace(f"{base_path}/", "") + # Replace slashes with hyphens + return identifier.replace("/", "-") + + def __init_token_version__(self) -> tuple[str, str | None]: + # try to extract the token from the specs or the path + candidate_token = cast("str | None", self.__get_specification_property__("hasToken", PROF_NS)) + if not candidate_token: + candidate_token = self.__extract_token_from_path__() + logger.debug("Candidate token: %s", candidate_token) + + # try to extract the version from the specs or the token or the path or the URI + version = self.__get_consistent_version__(candidate_token) + logger.debug("Extracted version: %s", version) + + # remove the version from the token if it is present + if version: + candidate_token = re.sub(r"[\W|_]+" + re.escape(version) + r"$", "", candidate_token) + + # return the candidate token and version + return candidate_token, version + + @classmethod + def __load_profile_path__( + cls, + profiles_base_path: str | Path, + profile_path: str | Path, + publicID: str | None = None, + severity: Severity = Severity.REQUIRED, + ) -> Profile: + # if the path is a string, convert it to a Path + if isinstance(profile_path, str): + profile_path = Path(profile_path) + # check if the path is a directory + if not profile_path.is_dir(): + raise InvalidProfilePath(str(profile_path)) + # create a new profile + profile = Profile( + profiles_base_path=Path(profiles_base_path), + profile_path=profile_path, + publicID=publicID, + severity=severity, + ) + logger.debug("Loaded profile: %s", profile) + return profile + + @classmethod + def __load_profiles_paths__( + cls, + profiles_path: str | Path | None = None, + extra_profiles_path: str | Path | None = None, + ) -> list[tuple[Path, Path]]: + """ + Load the paths of the profiles from the given profiles path and extra profiles path. + + :param profiles_path: the path to the profiles directory + :type profiles_path: Union[str, Path] + :param extra_profiles_path: an additional path to search for profiles + :type extra_profiles_path: Union[str, Path] + + :return: a list of tuples containing the root profile directory and the profile directory + :rtype: list[Tuple[Path, Path]] + + :raises InvalidProfilePath: if the profiles path is not a directory + """ + result = [] + # set the list of root profile directories + root_profile_directories = [profiles_path] if profiles_path else [] + if extra_profiles_path is not None and extra_profiles_path != profiles_path: + root_profile_directories.append(extra_profiles_path) + # collect profiles nested in the root profile directories + for root_profile_directory in root_profile_directories: + # if the path is a string, convert it to a Path + profile_root_directory = ( + Path(root_profile_directory) if isinstance(root_profile_directory, str) else root_profile_directory + ) + # check if the path is a directory and raise an error if not + if not profile_root_directory.is_dir(): + raise InvalidProfilePath(str(profile_root_directory)) + # if the path is a directory, get the profile directories + result.extend( + [ + (profile_root_directory, p.parent) + for p in profile_root_directory.rglob("*.*") + if p.name == PROFILE_SPECIFICATION_FILE + ] + ) + # return the list of profile directories + return result + + @classmethod + def load_profiles( + cls, + profiles_path: str | Path, + extra_profiles_path: str | Path | None = None, + publicID: str | None = None, + severity: Severity = Severity.REQUIRED, + allow_requirement_check_override: bool = True, + ) -> list[Profile]: + # initialize the profiles list + profiles: list[Profile] = [] + # calculate the list of profiles path as the subdirectories of the profiles path + # where the profile specification file is present + profiles_paths = cls.__load_profiles_paths__(profiles_path, extra_profiles_path) + + # iterate through the directories and load the profiles + for root_profile_path, profile_path in profiles_paths: + logger.debug( + "Checking profile path: %s %s %r", + profile_path, + profile_path.is_dir(), + IGNORED_PROFILE_DIRECTORIES, + ) + # check if the profile path is a directory and not in the ignored directories + if profile_path.is_dir() and profile_path not in IGNORED_PROFILE_DIRECTORIES: + profile = Profile.__load_profile_path__( + root_profile_path, + profile_path, + publicID=publicID, + severity=severity, + ) + # if the profile overrides another profile, + # remove the overridden profiles from the list of profiles + # to avoid duplicates and ensure that the most specific profile is used + if profile.overrides: + for overridden_profile in profile.overrides: + if overridden_profile in profiles: + profiles.remove(overridden_profile) + # add the profile to the list of profiles + profiles.append(profile) + logger.debug("Loaded profile: %s (%s)", profile.identifier, profile.path) + + # order profiles based on the inheritance hierarchy, + # from the most specific to the most general + # (i.e., from the leaves of the graph to the root) + profiles = sorted(profiles, reverse=True) + + # Check for overridden checks + if not allow_requirement_check_override: + # Navigate the profiles to check for overridden checks. + # If the override is not enabled in the settings raise an error. + profiles_checks = set() + # Search for duplicated checks in the profiles + for profile in profiles: + profile_checks = [_ for r in profile.get_requirements() for _ in r.get_checks()] + for check in profile_checks: + # If the check is already present in the list of checks, + # raise an error if the override is not enabled. + if check in profiles_checks: + raise DuplicateRequirementCheck(check.name, profile.identifier) + # Add the check to the list of checks + profiles_checks.add(check) + + # order profiles according to the number of profiles they depend on: + # i.e, first the profiles that do not depend on any other profile + # then the profiles that depend on the previous ones, and so on + return sorted( + profiles, + key=lambda x: f"{len(x.inherited_profiles)}_{x.identifier}", + ) + + @classmethod + def get_by_identifier(cls, identifier: str) -> Profile | None: + """ + Get the profile with the given identifier + + :param identifier: the identifier + :type identifier: str + + :return: the profile + :rtype: Profile + """ + profile = cls.__profiles_map.get_by_index("identifier", identifier) + if isinstance(profile, list): + return cast("Profile | None", profile[0] if profile else None) + return cast("Profile | None", profile) + + @classmethod + def get_by_uri(cls, uri: str) -> Profile | None: + """ + Get the profile with the given URI + + :param uri: the URI + :type uri: str + + :return: the profile + :rtype: Profile + """ + return cast("Profile | None", cls.__profiles_map.get_by_key(uri)) + + @classmethod + def get_by_name(cls, name: str) -> list[Profile]: + """ + Get the profile with the given name + + :param name: the name + :type name: str + + :return: the profile + :rtype: Profile + """ + return cast("list[Profile]", cls.__profiles_map.get_by_index("name", name) or []) + + @classmethod + def get_by_token(cls, token: str) -> list[Profile]: + """ + Get the profile with the given token + + :param token: the token + :type token: str + + :return: the profile + :rtype: Profile + """ + return cast("list[Profile]", cls.__profiles_map.get_by_index("token", token) or []) + + @classmethod + def get_sibling_profiles(cls, profile: Profile) -> list[Profile]: + """ + Get the sibling profiles of the given profile + + :param profile: the profile + :type profile: Profile + + :return: the list of sibling profiles + :rtype: list[Profile] + """ + return [p for p in cls.__profiles_map.values() if profile in p.parents] + + @classmethod + def get_descendants(cls, profile: Profile) -> list[Profile]: + """ + Get the transitive descendants of the given profile (any profile + that has `profile` among its `inherited_profiles`). + + :param profile: the profile + :type profile: Profile + + :return: the list of descendant profiles + :rtype: list[Profile] + """ + return [p for p in cls.__profiles_map.values() if profile in p.inherited_profiles] + + @classmethod + def all(cls) -> list[Profile]: + """ + Get all the profiles + + :return: the list of profiles + :rtype: list[Profile] + """ + return list(cls.__profiles_map.values()) + + @classmethod + def find_in_list(cls, profiles: Collection[Profile], profile_identifier: str) -> Profile | None: + """ + Find a profile with the given identifier in the given list of profiles + + :param profiles: the list of profiles + :type profiles: Collection[Profile] + + :param identifier: the identifier + :type identifier: str + + :return: the profile if found, None otherwise + :rtype: Optional[Profile] + """ + profile = next((p for p in profiles if p.identifier == profile_identifier), None) or next( + (p for p in profiles if str(p.identifier).replace(f"-{p.version}", "") == profile_identifier), + None, + ) + if not profile: + raise ProfileNotFound(profile_identifier) + return profile diff --git a/rocrate_validator/models/requirement.py b/rocrate_validator/models/requirement.py new file mode 100644 index 000000000..ae7b77377 --- /dev/null +++ b/rocrate_validator/models/requirement.py @@ -0,0 +1,650 @@ +# Copyright (c) 2024-2026 CRS4 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import annotations + +import importlib +from abc import ABC, abstractmethod +from dataclasses import dataclass +from functools import total_ordering +from pathlib import Path +from typing import TYPE_CHECKING, Any, cast + +from rocrate_validator.constants import ( + DEFAULT_ONTOLOGY_FILE, + PROFILE_FILE_EXTENSIONS, + PROFILE_SPECIFICATION_FILE, +) +from rocrate_validator.events import EventType +from rocrate_validator.models._logging import logger +from rocrate_validator.models.severity import ( + LevelCollection, + RequirementLevel, + Severity, +) +from rocrate_validator.utils import log as logging +from rocrate_validator.utils.python_helpers import ( + get_requirement_name_from_file, +) + +if TYPE_CHECKING: + from rocrate_validator.models.profile import Profile + from rocrate_validator.models.validation import ValidationContext + + +class SkipRequirementCheck(Exception): + def __init__(self, check: RequirementCheck, message: str = ""): + self.check = check + self.message = message + + def __str__(self): + return f"SkipRequirementCheck(check={self.check})" + + +@total_ordering +class Requirement(ABC): + """ + Abstract class representing a requirement of a profile. + A requirement is a named set of checks that can be used to validate an RO-Crate. + """ + + def __init__( + self, + profile: Profile, + name: str = "", + description: str | None = None, + path: Path | None = None, + initialize_checks: bool = True, + ): + """ + Initialize the Requirement instance + + :meta private: + """ + self._order_number: int | None = None + self._profile = profile + self._description = description + self._path = path # path of code implementing the requirement + self._level_from_path: RequirementLevel | None = None + self._checks: list[RequirementCheck] = [] + self._overridden: bool | None = None + + if not name and path: + self._name = get_requirement_name_from_file(path) + else: + self._name = name + + # set flag to indicate if the checks have been initialized + self._checks_initialized = False + # initialize the checks if the flag is set + if initialize_checks: + _ = self.__init_checks__() + # assign order numbers to checks + self.__reorder_checks__() + # update the checks initialized flag + self._checks_initialized = True + + @property + def order_number(self) -> int: + """ + The order number of the requirement in the profile + + :return: the order number + :rtype: int + """ + assert self._order_number is not None + return self._order_number + + @property + def identifier(self) -> str: + """ + The identifier of the requirement + + :return: the identifier + :rtype: str + """ + return f"{self.profile.identifier}_{self.relative_identifier}" + + @property + def relative_identifier(self) -> str: + """ + The relative identifier of the requirement + + :return: the relative identifier + :rtype: str + + :meta private: + """ + return f"{self.order_number}" + + @property + def name(self) -> str: + return self._name + + @property + def severity_from_path(self) -> Severity | None: + return self.requirement_level_from_path.severity if self.requirement_level_from_path else None + + @property + def requirement_level_from_path(self) -> RequirementLevel | None: + if not self._level_from_path and self._path: + try: + self._level_from_path = LevelCollection.get(self._path.parent.name) + except ValueError: + logger.debug( + "The requirement level could not be determined from the path: %s", + self._path, + ) + return self._level_from_path + + @property + def profile(self) -> Profile: + return self._profile + + @property + def description(self) -> str: + if not self._description: + self._description = ( + self.__class__.__doc__.strip() if self.__class__.__doc__ else f"Profile Requirement {self.name}" + ) + return self._description + + @property + def overridden(self) -> bool: + # Check if the requirement has been overridden. + # The requirement can be considered overridden if all its checks have been overridden + if self._overridden is None: + self._overridden = len([_ for _ in self._checks if not _.overridden]) == 0 + return self._overridden + + @property + @abstractmethod + def hidden(self) -> bool: + pass + + @property + def path(self) -> Path | None: + return self._path + + @abstractmethod + def __init_checks__(self) -> list[RequirementCheck]: + pass + + def get_checks(self) -> list[RequirementCheck]: + return self._checks.copy() + + def get_check(self, name: str) -> RequirementCheck | None: + for check in self._checks: + if check.name == name: + return check + return None + + def get_checks_by_level(self, level: RequirementLevel) -> list[RequirementCheck]: + return list({check for check in self._checks if check.level.severity == level.severity}) + + def __reorder_checks__(self) -> None: + for i, check in enumerate(self._checks): + check.order_number = i + 1 + + def _do_validate_(self, context: ValidationContext) -> bool: + """ + Internal method to perform the validation + Returns whether all checks in this requirement passed. + + :meta private: + """ + logger.debug( + "Validating Requirement %s with %s checks", + self.name, + len(self._checks), + ) + all_passed = True + checks_to_perform = [ + _ + for _ in self._checks + if not context.settings.skip_checks or _.identifier not in context.settings.skip_checks + ] + for check in checks_to_perform: + try: + all_passed, should_break = self.__execute_check__(check, context, all_passed) + if should_break: + break + except SkipRequirementCheck as e: + logger.debug("Skipping check '%s' because: %s", check.name, e) + context.result._add_skipped_check(check) + continue + except Exception as e: + if context.maybe_warn_offline_cache_miss(e): + logger.debug("Offline cache miss during check %s: %s", check, e) + else: + logger.warning("Unexpected error during check %s. Exception: %s", check, e) + logger.warning("Consider reporting this as a bug.") + if logger.isEnabledFor(logging.DEBUG): + logger.exception("Unhandled exception during check execution", exc_info=e) + skipped_checks = set(self._checks) - set(checks_to_perform) + context.result.skipped_checks.update(skipped_checks) + logger.debug( + "Checks for Requirement '%s' completed. Checks passed? %s", + self.name, + all_passed, + ) + return all_passed + + def __execute_check__(self, check, context, all_passed): + from rocrate_validator.models.events import ( # noqa: PLC0415 + RequirementCheckValidationEvent, + ) + + if check.overridden and check.requirement.profile.identifier != context.profile_identifier: + logger.debug( + "Skipping check '%s' because overridden by '%r'", + check.identifier, + [_.identifier for _ in check.overridden_by], + ) + return all_passed, False + if check.deactivated: + logger.debug("Skipping check '%s' because deactivated", check.identifier) + context.result._add_skipped_check(check) + return all_passed, False + # Determine whether to skip event notification for inherited profiles + skip_event_notify = False + if ( + check.requirement.profile.identifier != context.profile_identifier + and context.settings.disable_inherited_profiles_issue_reporting + ): + logger.debug( + "Inherited profiles reporting disabled. " + "Skipping requirement %s as it belongs to an inherited profile %s", + check.requirement.identifier, + check.requirement.profile.identifier, + ) + skip_event_notify = True + # Notify the start of the check execution if not skip_event_notify is set to True + if not skip_event_notify: + context.validator.notify( + RequirementCheckValidationEvent(EventType.REQUIREMENT_CHECK_VALIDATION_START, check) + ) + # Execute the check and get the result + check_result = check.execute_check(context) + logger.debug("Result of check %s: %s", check.identifier, check_result) + context.result._add_executed_check(check, check_result) + # Notify the end of the check execution if not skip_event_notify is set to True + if not skip_event_notify: + context.validator.notify( + RequirementCheckValidationEvent( + EventType.REQUIREMENT_CHECK_VALIDATION_END, + check, + validation_result=check_result, + ) + ) + logger.debug( + "Ran check '%s'. Got result %s", + check.identifier, + check_result, + ) + # Ensure the check result is a boolean value, otherwise log a warning and ignore the check result + if not isinstance(check_result, bool): + logger.warning( + "Ignoring the check %s as it returned the value %r instead of a boolean", + check.name, + check_result, + ) + raise TypeError(f"Ignoring invalid result from check {check.name}") + new_all_passed = all_passed and check_result + should_break = not new_all_passed and context.fail_fast + return new_all_passed, should_break + + def __eq__(self, other: object) -> bool: + if not isinstance(other, Requirement): + raise TypeError(f"Cannot compare {type(self)} with {type(other)}") + return self.name == other.name and self.description == other.description and self.path == other.path + + def __ne__(self, other: object) -> bool: + return not self.__eq__(other) + + def __hash__(self): + return hash((self.name, self.description, self.path)) + + def __lt__(self, other: object) -> bool: + if not isinstance(other, Requirement): + raise TypeError(f"Cannot compare Requirement with {type(other)}") + return (self._order_number, self.name) < ( + other._order_number, + other.name, + ) + + def __repr__(self): + return ( + f"ProfileRequirement(" + f"_order_number={self._order_number}, " + f"name={self.name}, " + f"description={self.description}" + f", path={self.path}, " + if self.path + else ")" + ) + + def __str__(self) -> str: + return self.name + + def to_dict(self, with_profile: bool = True, with_checks: bool = True) -> dict: + result = { + "identifier": self.identifier, + "name": self.name, + "description": self.description, + "order": self.order_number, + } + if with_profile: + result["profile"] = self.profile.to_dict() + if with_checks: + result["checks"] = [_.to_dict(with_requirement=False, with_profile=False) for _ in self._checks] + return result + + @classmethod + def initialize(cls, context: ValidationContext) -> None: + logger.debug( + "Starting %s requirement initialization for context %s", + cls.__name__, + context, + ) + # do initialization logic here (empty for now) + logger.debug( + "Completed %s requirement initialization for context %s", + cls.__name__, + context, + ) + + @classmethod + def finalize(cls, context: ValidationContext) -> None: + logger.debug( + "Starting %s requirement finalization for context %s", + cls.__name__, + context, + ) + # do finalization logic here (empty for now) + logger.debug( + "Completed %s requirement finalization for context %s", + cls.__name__, + context, + ) + + +class RequirementLoader: + def __init__(self, profile: Profile): + self._profile = profile + + @property + def profile(self) -> Profile: + return self._profile + + @staticmethod + def __get_requirement_type__(requirement_path: Path) -> str: + if requirement_path.suffix == ".py": + return "python" + if requirement_path.suffix == ".ttl": + return "shacl" + raise ValueError(f"Unsupported requirement type: {requirement_path.suffix}") + + @classmethod + def __get_requirement_loader__(cls, profile: Profile, requirement_path: Path) -> RequirementLoader: + requirement_type = cls.__get_requirement_type__(requirement_path) + loader_instance_name = f"_{requirement_type}_loader_instance" + loader_instance = getattr(profile, loader_instance_name, None) + if loader_instance is None: + module_name = f"rocrate_validator.requirements.{requirement_type}" + logger.debug("Loading module: %s", module_name) + module = importlib.import_module(module_name) + loader_class_name = f"{'Py' if requirement_type == 'python' else 'SHACL'}RequirementLoader" + loader_class = getattr(module, loader_class_name) + loader_instance = loader_class(profile) + setattr(profile, loader_instance_name, loader_instance) + return loader_instance + + @staticmethod + def __get_requirement_classes__() -> list[type[Requirement]]: + + # Ensure known requirement modules are imported so subclasses are registered. + for requirement_type in ("python", "shacl"): + module_name = f"rocrate_validator.requirements.{requirement_type}" + try: + importlib.import_module(module_name) + except Exception: + logger.debug( + "Unable to import requirement module: %s", + module_name, + exc_info=True, + ) + + def all_subclasses( + base_class: type[Requirement], + ) -> list[type[Requirement]]: + result: list[type[Requirement]] = [] + for subcls in base_class.__subclasses__(): + result.append(subcls) + result.extend(all_subclasses(subcls)) + return result + + return all_subclasses(Requirement) # type: ignore[type-abstract] + + @staticmethod + def load_requirements(profile: Profile, severity: Severity = Severity.REQUIRED) -> list[Requirement]: + """ + Load the requirements related to the profile + """ + + def ok_file(p: Path) -> bool: + return ( + p.is_file() + and p.suffix in PROFILE_FILE_EXTENSIONS + and p.name not in {DEFAULT_ONTOLOGY_FILE, PROFILE_SPECIFICATION_FILE} + and not p.name.startswith(".") + and not p.name.startswith("_") + ) + + files = sorted( + (p for p in profile.path.rglob("*.*") if ok_file(p)), + key=lambda x: (x.suffix != ".py", x), + ) + + # set the requirement level corresponding to the severity + requirement_level = LevelCollection.get(severity.name) + + requirements = [] + for requirement_path in files: + try: + requirement_level_from_path = LevelCollection.get(requirement_path.parent.name) + if requirement_level_from_path < requirement_level: + continue + except ValueError: + logger.debug( + "The requirement level could not be determined from the path: %s", + requirement_path, + ) + requirement_loader = RequirementLoader.__get_requirement_loader__(profile, requirement_path) + requirements.extend( + cast("Any", requirement_loader).load( + profile, + requirement_level, + requirement_path, + publicID=profile.publicID, + ) + ) + # sort the requirements by severity + requirements = sorted( + requirements, + key=lambda x: ( + (-x.severity_from_path.value, x.path.name, x.name) + if x.severity_from_path is not None + else (0, x.path.name, x.name) + ), + reverse=False, + ) + # assign order numbers to requirements + for i, requirement in enumerate(requirements): + requirement._order_number = i + 1 + # log and return the requirements + logger.debug("Profile %s loaded %s requirements: %s", profile.identifier, len(requirements), requirements) + return requirements + + +@dataclass(frozen=True) +class SourceSnippet: + """ + A snippet of source code backing a :class:`RequirementCheck`. + :ivar language: language tag for syntax highlighting (e.g. ``"python"``, ``"turtle"``). + :ivar code: the source code as text. + :ivar source_path: path to the file the snippet was extracted from, when available. + """ + + language: str + code: str + source_path: Path | None = None + + +@total_ordering +class RequirementCheck(ABC): + def __init__( + self, + requirement: Requirement, + name: str | None, + level: RequirementLevel | None = LevelCollection.REQUIRED, + description: str | None = None, + hidden: bool | None = None, + deactivated: bool = False, + ): + self._requirement: Requirement = requirement + self._order_number = 0 + self._name = name + self._level = level + self._description = description + self._hidden = hidden + self._deactivated = deactivated + + @property + def order_number(self) -> int: + return self._order_number + + @order_number.setter + def order_number(self, value: int) -> None: + if value < 0: + raise ValueError("order_number can't be < 0") + self._order_number = value + + @property + def identifier(self) -> str: + return f"{self.requirement.identifier}.{self.order_number}" + + @property + def relative_identifier(self) -> str: + return f"{self.level.name} {self.requirement.relative_identifier}.{self.order_number}" + + @property + def name(self) -> str: + if not self._name: + return self.__class__.__name__.replace("Check", "") + return self._name + + @property + def description(self) -> str: + if not self._description: + return self.__class__.__doc__.strip() if self.__class__.__doc__ else f"Check {self.name}" + return self._description + + @property + def requirement(self) -> Requirement: + return self._requirement + + @property + def level(self) -> RequirementLevel: + return self._level or self.requirement.requirement_level_from_path or LevelCollection.REQUIRED + + @property + def severity(self) -> Severity: + return self.level.severity + + @property + def overridden_by(self) -> list[RequirementCheck]: + overridden_by = [] + for sibling_profile in self.requirement.profile.siblings: + check = sibling_profile.get_requirement_check(self.name) + if check: + overridden_by.append(check) + return overridden_by + + @property + def overrides(self) -> list[RequirementCheck]: + overrides = [] + for parent in self.requirement.profile.parents: + check = parent.get_requirement_check(self.name) + if check: + overrides.append(check) + return overrides + + @property + def overridden(self) -> bool: + return len(self.overridden_by) > 0 + + @property + def deactivated(self) -> bool: + return self._deactivated + + @property + def hidden(self) -> bool: + if self._hidden is not None: + return self._hidden + return self.requirement.hidden + + @abstractmethod + def execute_check(self, context: ValidationContext) -> bool: + raise NotImplementedError() + + def get_source_snippet(self) -> SourceSnippet | None: + """ + Return the source code that implements this check, or ``None`` if the + backing source cannot be extracted for this check kind. + Concrete subclasses should override this method. + """ + return None + + def to_dict(self, with_requirement: bool = True, with_profile: bool = True) -> dict: + result = { + "identifier": self.identifier, + "label": self.relative_identifier, + "order": self.order_number, + "name": self.name, + "description": self.description, + "severity": self.severity.name, + } + if with_requirement: + result["requirement"] = self.requirement.to_dict(with_profile=with_profile, with_checks=False) + return result + + def __eq__(self, other: object) -> bool: + if not isinstance(other, RequirementCheck): + raise TypeError(f"Cannot compare RequirementCheck with {type(other)}") + return self.requirement == other.requirement and self.name == other.name + + def __lt__(self, other: object) -> bool: + if not isinstance(other, RequirementCheck): + raise TypeError(f"Cannot compare RequirementCheck with {type(other)}") + return (self.requirement, self.identifier) < ( + other.requirement, + other.identifier, + ) + + def __ne__(self, other: object) -> bool: + return not self.__eq__(other) + + def __hash__(self) -> int: + return hash((self.requirement, self.name or "")) diff --git a/rocrate_validator/models/result.py b/rocrate_validator/models/result.py new file mode 100644 index 000000000..2570a692a --- /dev/null +++ b/rocrate_validator/models/result.py @@ -0,0 +1,434 @@ +# Copyright (c) 2024-2026 CRS4 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import annotations + +import bisect +import json +from functools import total_ordering +from pathlib import Path +from typing import TYPE_CHECKING, Any, Collection, cast + +from rocrate_validator import __version__ +from rocrate_validator.constants import JSON_OUTPUT_FORMAT_VERSION +from rocrate_validator.models._logging import logger +from rocrate_validator.models.requirement import ( + Requirement, + RequirementCheck, +) +from rocrate_validator.models.severity import ( + RequirementLevel, + Severity, +) +from rocrate_validator.models.settings import ValidationSettings + +if TYPE_CHECKING: + from rocrate_validator.models.statistics import ValidationStatistics + from rocrate_validator.models.validation import ValidationContext + + +@total_ordering +class CheckIssue: + """ + Represents an issue with a check that has been executed + during the validation process. + """ + + def __init__( + self, + check: RequirementCheck, + message: str | None = None, + violatingProperty: str | None = None, + violatingEntity: str | None = None, + value: str | None = None, + ): + self._message = message + self._check: RequirementCheck = check + self._violatingProperty = violatingProperty + self._violatingEntity = violatingEntity + self._propertyValue = value + + @property + def message(self) -> str | None: + """The message associated with the issue""" + return self._message + + @property + def level(self) -> RequirementLevel: + """The level of the issue""" + return self._check.level + + @property + def severity(self) -> Severity: + """Severity of the RequirementLevel associated with this check.""" + return self._check.severity + + @property + def level_name(self) -> str: + return self.level.name + + @property + def check(self) -> RequirementCheck: + """The check that generated the issue""" + return self._check + + @property + def violatingEntity(self) -> str | None: + """ + It represents the specific element being evaluated that fails + to meet the defined rules or constraints within a validation process. + Also referred to as `focusNode` in SHACL terminology + in the context of an RDF graph, it is the subject of a triple + that violates a given constraint on the subject's property/predicate, + represented by the violatingProperty. + """ + return self._violatingEntity + + @property + def violatingProperty(self) -> str | None: + """ + It refers to the specific property or relationship within an item + that leads to a validation failure. + It identifies the part of the data structure that is causing the issue. + Also referred to as `resultPath` in SHACL terminology, + in the context of an RDF graph, it is the predicate of a triple + that violates a given constraint on the subject's property/predicate, + represented by the violatingProperty. + """ + return self._violatingProperty + + @property + def violatingPropertyValue(self) -> str | None: + """ + It represents the value of the violatingProperty + that leads to a validation failure. + """ + return self._propertyValue + + def __eq__(self, other: object) -> bool: + return isinstance(other, CheckIssue) and self._check == other._check and self._message == other._message + + def __lt__(self, other: object) -> bool: + if not isinstance(other, CheckIssue): + raise TypeError(f"Cannot compare {type(self)} with {type(other)}") + return (self._check, self._message) < (other._check, other._message) + + def __hash__(self) -> int: + return hash((self._check, self._message)) + + def __repr__(self) -> str: + return f"CheckIssue(severity={self.severity}, check={self.check}, message={self.message})" + + def __str__(self) -> str: + return f'Issue of severity {self.severity.name} with check "{self.check.identifier}": {self.message}' + + def to_dict( + self, + with_check: bool = True, + with_requirement: bool = True, + with_profile: bool = True, + ) -> dict: + result: dict[str, Any] = { + "severity": self.severity.name, + "message": self.message, + "violatingEntity": self.violatingEntity, + "violatingProperty": self.violatingProperty, + "violatingPropertyValue": self.violatingPropertyValue, + } + if with_check: + result["check"] = self.check.to_dict(with_requirement=with_requirement, with_profile=with_profile) + return result + + def to_json( + self, + with_checks: bool = True, + with_requirements: bool = True, + with_profile: bool = True, + ) -> str: + return json.dumps( + self.to_dict( + with_check=with_checks, + with_requirement=with_requirements, + with_profile=with_profile, + ), + indent=4, + cls=CustomEncoder, + ) + + +class ValidationResult: + """ + Represents the result of a validation. + + :param context: The validation context + :type context: ValidationContext + :param rocrate_uri: The URI of the RO-Crate + :type rocrate_uri: str + :param validation_settings: The validation settings + :type validation_settings: ValidationSettings + :param issues: The issues found during the validation + :type issues: list[CheckIssue] + """ + + def __init__(self, context: ValidationContext): + from rocrate_validator.models.statistics import ValidationStatistics # noqa: PLC0415 + + # reference to the validation context + self._context = context + # reference to the ro-crate URI + self._rocrate_uri = context.rocrate_uri + # reference to the validation settings + self._validation_settings: ValidationSettings = context.settings + # keep track of the issues found during the validation + self._issues: list[CheckIssue] = [] + # keep track of the checks that have been executed + self._executed_checks: set[RequirementCheck] = set() + self._executed_checks_results: dict[str, bool] = {} + # keep track of the checks that have been skipped + self._skipped_checks: set[RequirementCheck] = set() + # initialize the statistics + self._statistics = ValidationStatistics(context.settings) + + @property + def context(self) -> ValidationContext: + """ + The validation context + """ + return self._context + + @property + def rocrate_uri(self): + """ + The URI of the RO-Crate + """ + return self._rocrate_uri + + @property + def validation_settings(self): + """ + The validation settings + """ + return self._validation_settings + + @property + def statistics(self) -> ValidationStatistics: + """ + The validation statistics + """ + return self._statistics + + # --- Checks --- + + @property + def executed_checks(self) -> set[RequirementCheck]: + """ + The checks that have been executed + """ + return self._executed_checks + + def _add_executed_check(self, check: RequirementCheck, result: bool): + """ + Internal method to add a check to the executed checks + """ + self._executed_checks.add(check) + self._executed_checks_results[check.identifier] = result + # remove the check from the skipped checks if it was skipped + if check in self._skipped_checks: + self._skipped_checks.remove(check) + logger.debug("Removing check '%s' from skipped checks", check.name) + + def get_executed_check_result(self, check: RequirementCheck) -> bool | None: + """ + Get the result of an executed check + """ + return self._executed_checks_results.get(check.identifier) + + @property + def skipped_checks(self) -> set[RequirementCheck]: + """ + The checks that have been skipped + """ + return self._skipped_checks + + def _add_skipped_check(self, check: RequirementCheck): + """ + Internal method to add a check to the skipped checks + """ + self._skipped_checks.add(check) + + def _remove_skipped_check(self, check: RequirementCheck): + """ + Internal method to remove a check from the skipped checks + """ + self._skipped_checks.remove(check) + + # --- Issues --- + @property + def issues(self) -> list[CheckIssue]: + """ + The issues found during the validation + """ + return self._issues.copy() + + def get_issues(self, min_severity: Severity | None = None) -> list[CheckIssue]: + """ + Get the issues found during the validation with a severity greater than or equal to `min_severity` + """ + min_severity = min_severity or self.context.requirement_severity + return [issue for issue in self._issues if issue.severity >= min_severity] + + def get_issues_by_check(self, check: RequirementCheck, min_severity: Severity | None = None) -> list[CheckIssue]: + """ + Get the issues found during the validation for a specific check + with a severity greater than or equal to `min_severity` + """ + min_severity = min_severity or self.context.requirement_severity + return [issue for issue in self._issues if issue.check == check and issue.severity >= min_severity] + + def has_issues(self, min_severity: Severity | None = None) -> bool: + """ + Check if there are issues with a severity greater than or equal to the given `severity` + """ + min_severity = min_severity or self.context.requirement_severity + return any(issue.severity >= min_severity for issue in self._issues) + + def passed(self, min_severity: Severity | None = None) -> bool: + """ + Check if all checks passed with a severity greater than or equal to the given `severity` + """ + min_severity = min_severity or self.context.requirement_severity + return not any(issue.severity >= min_severity for issue in self._issues) + + def add_issue( + self, + message: str, + check: RequirementCheck, + violatingEntity: str | None = None, + violatingProperty: str | None = None, + violatingPropertyValue: str | None = None, + ) -> CheckIssue: + """ + Add an issue to the validation result + + Parameters: + message(str): The message of the issue + check(RequirementCheck): The check that generated the issue + violatingEntity(Optional[str]): The entity that caused the issue (if any) + violatingProperty(Optional[str]): The property that caused the issue (if any) + violatingPropertyValue(Optional[str]): The value of the violatingProperty (if any) + """ + c = CheckIssue( + check, + message, + violatingProperty=violatingProperty, + violatingEntity=violatingEntity, + value=violatingPropertyValue, + ) + bisect.insort(self._issues, c) + return c + + # --- Requirements --- + @property + def failed_requirements(self) -> Collection[Requirement]: + """ + Get the requirements that failed at or above the configured `requirement_severity`. + """ + min_severity = self.context.requirement_severity + return {issue.check.requirement for issue in self._issues if issue.severity >= min_severity} + + # --- Checks --- + @property + def failed_checks(self) -> Collection[RequirementCheck]: + """ + Get the checks that failed at or above the configured `requirement_severity`. + """ + min_severity = self.context.requirement_severity + return {issue.check for issue in self._issues if issue.severity >= min_severity} + + def get_failed_checks_by_requirement(self, requirement: Requirement) -> Collection[RequirementCheck]: + """ + Get the checks that failed for a specific requirement + """ + return [check for check in self.failed_checks if check.requirement == requirement] + + def get_failed_checks_by_requirement_and_severity( + self, requirement: Requirement, severity: Severity + ) -> Collection[RequirementCheck]: + """ + Get the checks that failed for a specific requirement and severity + """ + return [ + check for check in self.failed_checks if check.requirement == requirement and check.severity == severity + ] + + def __str__(self) -> str: + return f"Validation result: passed={len(self.failed_checks) == 0}, {len(self._issues)} issues" + + def __repr__(self): + return f"ValidationResult(passed={len(self.failed_checks) == 0},issues={self._issues})" + + def __eq__(self, other: object) -> bool: + if not isinstance(other, ValidationResult): + raise TypeError(f"Cannot compare ValidationResult with {type(other)}") + return self._issues == other._issues + + # Equality is based on the mutable list of issues, so instances are + # intentionally unhashable (a content-based hash would be unstable). + __hash__ = None # type: ignore[assignment] + + def to_dict(self) -> dict: + """ + Convert the ValidationResult to a dictionary + """ + allowed_properties = [ + "profile_identifier", + "enable_profile_inheritance", + "requirement_severity", + "abort_on_first", + ] + validation_settings = { + key: value for key, value in self.validation_settings.to_dict().items() if key in allowed_properties + } + result: dict[str, Any] = { + "meta": {"version": JSON_OUTPUT_FORMAT_VERSION}, + "validation_settings": validation_settings, + "passed": self.passed(cast("Severity", self.context.settings.requirement_severity)), + "issues": [issue.to_dict() for issue in self.issues], + } + # add validator version to the settings + result["validation_settings"]["rocrate_validator_version"] = __version__ + return result + + def to_json(self, path: Path | None = None) -> str: + """ + Convert the ValidationResult to a JSON string + """ + result = json.dumps(self.to_dict(), indent=4, cls=CustomEncoder) + if path: + with path.open("w", encoding="utf-8") as f: + f.write(result) + return result + + +class CustomEncoder(json.JSONEncoder): + def default(self, o): + if isinstance(o, CheckIssue): + return o.__dict__ + if isinstance(o, Path): + return str(o) + if isinstance(o, (RequirementCheck, Requirement)): + return o.identifier + if isinstance(o, (Severity, RequirementLevel)): + return o.name + return super().default(o) diff --git a/rocrate_validator/models/settings.py b/rocrate_validator/models/settings.py new file mode 100644 index 000000000..230292410 --- /dev/null +++ b/rocrate_validator/models/settings.py @@ -0,0 +1,207 @@ +# Copyright (c) 2024-2026 CRS4 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import annotations + +from dataclasses import asdict, dataclass +from pathlib import Path + +from rocrate_validator.constants import ( + DEFAULT_HTTP_CACHE_MAX_AGE, + DEFAULT_PROFILE_IDENTIFIER, +) +from rocrate_validator.models.severity import Severity +from rocrate_validator.models._logging import logger +from rocrate_validator.utils.cache_warmup import auto_warm_up_for_settings +from rocrate_validator.utils.document_loader import install_document_loader +from rocrate_validator.utils.http import HttpRequester +from rocrate_validator.utils.paths import ( + get_default_http_cache_path, + get_profiles_path, +) +from rocrate_validator.utils.uri import URI + +# set the default profiles path +DEFAULT_PROFILES_PATH = get_profiles_path() + +BaseTypes = str | Path | bool | int | None + + +@dataclass +class ValidationSettings: + """ + Represents the settings for RO-Crate validation. + + It includes the following attributes: + """ + + #: The URI of the RO-Crate + rocrate_uri: URI # pyright: ignore[reportRedeclaration] + #: The relative root path of the RO-Crate + rocrate_relative_root_path: Path | None = None + # Profile settings + #: The path to the profiles + profiles_path: Path = DEFAULT_PROFILES_PATH + #: The path to the extra profiles + extra_profiles_path: Path | None = None + #: The profile identifier to validate against + profile_identifier: str = DEFAULT_PROFILE_IDENTIFIER + #: Flag to enable profile inheritance + # Use the `enable_profile_inheritance` flag with caution: disable inheritance only if the + # target validation profile is fully self-contained and does not rely on definitions + # from inherited profiles (e.g., entities defined upstream). For modularization + # purposes, some base entities and properties are defined in the base RO-Crate + # profile and are intentionally not redefined in specialized profiles; they are + # required for validations targeting those specializations and therefore cannot be skipped. + # Nevertheless, the validator can still suppress issue reporting for checks defined + # in inherited profiles by setting disable_inherited_profiles_issue_reporting to `True`. + enable_profile_inheritance: bool = True + # Validation settings + #: Flag to abort on first error + abort_on_first: bool | None = False + #: Flag to disable reporting of issues related to inherited profiles + disable_inherited_profiles_issue_reporting: bool = False + #: Flag to disable remote crate download + disable_remote_crate_download: bool = True + # Requirement settings + #: The requirement severity + requirement_severity: str | Severity = Severity.REQUIRED + #: Flag to validate requirement severity only skipping check with lower or higher severity + requirement_severity_only: bool = False + # Requirement check settings + #: Flag to allow requirement check override + allow_requirement_check_override: bool = True + #: Flag to disable the check for duplicates + disable_check_for_duplicates: bool = False + #: Checks to skip + skip_checks: list[str] | None = None + #: Flag to validate only the metadata of the RO-Crate + metadata_only: bool = False + #: RO-Crate metadata as dictionary + metadata_dict: dict | None = None + #: Verbose output + verbose: bool = False + #: Cache max age in seconds (negative values mean "never expire") + cache_max_age: int = DEFAULT_HTTP_CACHE_MAX_AGE + #: Cache path + cache_path: Path | None = None + #: Flag to enable offline mode: HTTP requests are served only from the cache + offline: bool = False + #: Flag to disable the HTTP cache entirely: every request hits the network + no_cache: bool = False + + def __post_init__(self): + # if requirement_severity is a str, convert to Severity + if isinstance(self.requirement_severity, str): + self.requirement_severity = Severity[self.requirement_severity] + # Offline mode needs the cache to serve responses, so it cannot be + # combined with an explicit cache disable. + if self.offline and self.no_cache: + raise ValueError( + "Offline mode requires the HTTP cache to be enabled; no_cache=True is incompatible with offline=True." + ) + # Default to the persistent user cache whenever caching is enabled so that + # consecutive runs (online then offline) share the same HTTP cache: this + # is what lets the offline mode find the resources fetched online. + if self.cache_path is None and not self.no_cache: + default_path = get_default_http_cache_path() + default_path.parent.mkdir(parents=True, exist_ok=True) + self.cache_path = default_path + logger.debug("Cache path not set: defaulting to persistent user cache %s", self.cache_path) + if self.offline and self.cache_path is None: + logger.warning( + "Offline mode enabled without a persistent cache path: " + "all HTTP-backed resources will fail unless pre-populated." + ) + # Re-apply the cache settings to the HTTP requester. ``initialize_cache`` + # reconfigures the existing singleton in place (rather than dropping it), + # so new settings take effect without discarding state set on the instance. + HttpRequester.initialize_cache( + cache_path=str(self.cache_path) if self.cache_path is not None else None, + cache_max_age=self.cache_max_age, + offline=self.offline, + no_cache=self.no_cache, + ) + logger.debug( + "HTTP cache initialized at %s with max age %s seconds (offline=%s, no_cache=%s)", + self.cache_path, + self.cache_max_age, + self.offline, + self.no_cache, + ) + # Install the JSON-LD document loader so context resolution goes through the cache. + try: + install_document_loader() + except Exception as e: + logger.debug("Could not install JSON-LD document loader: %s", e) + # Best-effort synchronous warm-up of profile-declared URLs. + if not self.offline: + try: + auto_warm_up_for_settings(self) + except Exception as e: + logger.debug("Auto warm-up skipped: %s", e) + + def to_dict(self): + """ + Convert the ValidationSettings to a dictionary + """ + result = asdict(self) + result["rocrate_uri"] = str(self.rocrate_uri) + result.pop("metadata_dict", None) # exclude metadata_dict from the dict representation + # Remove disable_crate_download from the dict representation + result.pop("disable_remote_crate_download", None) + # Remove requirement_severity_only from the dict representation + result.pop("requirement_severity_only", None) + return result + + @property # type: ignore[no-redef] + def rocrate_uri(self) -> URI | None: + """ + Get the RO-Crate URI + + :return: The RO-Crate URI + :rtype: URI + """ + return self._rocrate_uri + + @rocrate_uri.setter + def rocrate_uri(self, value: str | Path | URI): + """ + Set the RO-Crate URI. + + :param value: The RO-Crate URI. + :type value: Union[str, Path, URI] + """ + if not value: + raise ValueError("Invalid RO-Crate URI") + self._rocrate_uri: URI = URI(str(value)) + + @classmethod + def parse(cls, settings: dict | ValidationSettings) -> ValidationSettings: + """ + Parse the settings to a ValidationSettings object. + + :param settings: The settings to parse. + :type settings: Union[dict, ValidationSettings] + + :return: The parsed settings. + :rtype: ValidationSettings + + :raises ValueError: If the settings type is invalid. + """ + if isinstance(settings, dict): + return cls(**settings) + if isinstance(settings, ValidationSettings): + return settings + raise ValueError(f"Invalid settings type: {type(settings)}") diff --git a/rocrate_validator/models/severity.py b/rocrate_validator/models/severity.py new file mode 100644 index 000000000..879605911 --- /dev/null +++ b/rocrate_validator/models/severity.py @@ -0,0 +1,147 @@ +# Copyright (c) 2024-2026 CRS4 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import annotations + +import enum +import inspect +from dataclasses import dataclass +from functools import total_ordering + +from enum_tools.documentation import document_enum + + +@enum.unique +@document_enum +@total_ordering +class Severity(enum.Enum): + """ + Enum ordering "strength" of conditions to be verified + """ + + #: the condition is not mandatory + OPTIONAL = 0 + #: the condition is recommended + RECOMMENDED = 2 + #: the condition is mandatory + REQUIRED = 4 + + def __lt__(self, other: object) -> bool: + if isinstance(other, Severity): + return self.value < other.value + raise TypeError(f"Comparison not supported between instances of {type(self)} and {type(other)}") + + @staticmethod + def get(name: str) -> Severity: + return getattr(Severity, name.upper()) + + +@total_ordering +@dataclass +class RequirementLevel: + """ + Represents a requirement level. + + A requirement has a name and a severity level of type :class:`.Severity`. + It implements the comparison operators to allow ordering of the requirement levels. + """ + + name: str + severity: Severity + + def __eq__(self, other: object) -> bool: + if not isinstance(other, RequirementLevel): + return False + return self.name == other.name and self.severity == other.severity + + def __lt__(self, other: object) -> bool: + # NOTE: this ordering is not totally coherent, since for two objects a and b + # with equal Severity but different names you would have + # not a < b, which implies a >= b + # and also a != b and not a > b, which is incoherent with a >= b + if not isinstance(other, RequirementLevel): + raise TypeError(f"Cannot compare {type(self)} with {type(other)}") + return self.severity < other.severity + + def __hash__(self) -> int: + return hash((self.name, self.severity)) + + def __repr__(self) -> str: + return f"RequirementLevel(name={self.name}, severity={self.severity})" + + def __str__(self) -> str: + return self.name + + def __int__(self) -> int: + return self.severity.value + + def __index__(self) -> int: + return self.severity.value + + +class LevelCollection: + """ + Collection of :class:`.RequirementLevel` instances. + + Provides a set of predefined RequirementLevel instances + that can be used to define the severity of a requirement. + They map the keywords defined in **RFC 2119** to the corresponding severity levels. + + .. note:: + The keywords **MUST**, **MUST NOT**, **REQUIRED**, + **SHALL**, **SHALL NOT**, **SHOULD**, **SHOULD NOT**, + **RECOMMENDED**, **MAY**, and **OPTIONAL** in this document + are to be interpreted as described in **RFC 2119**. + + """ + + #: The requirement level OPTIONAL is mapped to the OPTIONAL severity level + OPTIONAL = RequirementLevel("OPTIONAL", Severity.OPTIONAL) + #: The requirement level MAY is mapped to the OPTIONAL severity level + MAY = RequirementLevel("MAY", Severity.OPTIONAL) + #: The requirement level REQUIRED is mapped to the REQUIRED severity level + REQUIRED = RequirementLevel("REQUIRED", Severity.REQUIRED) + #: The requirement level SHOULD is mapped to the RECOMMENDED severity level + SHOULD = RequirementLevel("SHOULD", Severity.RECOMMENDED) + #: The requirement level SHOULD NOT is mapped to the RECOMMENDED severity level + SHOULD_NOT = RequirementLevel("SHOULD_NOT", Severity.RECOMMENDED) + #: The requirement level RECOMMENDED is mapped to the RECOMMENDED severity level + RECOMMENDED = RequirementLevel("RECOMMENDED", Severity.RECOMMENDED) + + #: The requirement level MUST is mapped to the REQUIRED severity level + MUST = RequirementLevel("MUST", Severity.REQUIRED) + #: The requirement level MUST_NOT is mapped to the REQUIRED severity level + MUST_NOT = RequirementLevel("MUST_NOT", Severity.REQUIRED) + #: The requirement level SHALL is mapped to the REQUIRED severity level + SHALL = RequirementLevel("SHALL", Severity.REQUIRED) + #: The requirement level SHALL_NOT is mapped to the REQUIRED severity level + SHALL_NOT = RequirementLevel("SHALL_NOT", Severity.REQUIRED) + + def __init__(self): + raise NotImplementedError(f"{type(self)} can't be instantiated") + + @staticmethod + def all() -> list[RequirementLevel]: + return [ + level + for name, level in inspect.getmembers(LevelCollection) + if not inspect.isroutine(level) and not inspect.isdatadescriptor(level) and not name.startswith("__") + ] + + @staticmethod + def get(name: str) -> RequirementLevel: + try: + return getattr(LevelCollection, name.upper()) + except AttributeError: + raise ValueError(f"Invalid RequirementLevel: {name}") from None diff --git a/rocrate_validator/models/statistics.py b/rocrate_validator/models/statistics.py new file mode 100644 index 000000000..88f360998 --- /dev/null +++ b/rocrate_validator/models/statistics.py @@ -0,0 +1,792 @@ +# Copyright (c) 2024-2026 CRS4 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import annotations + +import json +from datetime import datetime, timezone +from typing import TYPE_CHECKING, Protocol, cast + +from rocrate_validator.events import Event, EventType, Subscriber +from rocrate_validator.models.events import ( + ProfileValidationEvent, + RequirementCheckValidationEvent, + RequirementValidationEvent, + ValidationEvent, +) +from rocrate_validator.models._logging import logger +from rocrate_validator.models.profile import Profile +from rocrate_validator.models.requirement import ( + Requirement, + RequirementCheck, +) +from rocrate_validator.models.severity import ( + LevelCollection, + Severity, +) +from rocrate_validator.models.settings import ValidationSettings + +if TYPE_CHECKING: + from rocrate_validator.models.result import ValidationResult + from rocrate_validator.models.validation import ValidationContext + + +class ValidationStatisticsListener(Protocol): + """ + Protocol for listeners interested in validation statistics updates. + """ + + def on_statistics_updated(self, statistics: ValidationStatistics): + logger.debug("Statistics updated: %r", statistics.statistics) + + +class ValidationStatistics(Subscriber): + """ + Computes and stores statistical metrics about the RO-Crate validation process. + """ + + def __init__( + self, + settings: dict | ValidationSettings, + context: ValidationContext | None = None, + skip_initialization: bool = False, + ): + super().__init__(name=self.__class__.__name__) + if isinstance(settings, dict): + settings = ValidationSettings.parse(settings) + self._settings = settings + self._context = context + self._stats = self.__initialise__(settings) if not skip_initialization else {} + self._result: ValidationResult | None = None + self._listeners: list[ValidationStatisticsListener] = [] + + @property + def validation_settings(self) -> ValidationSettings: + """ + Get the validation settings used for statistics computation + """ + return self._settings + + @property + def validation_result(self) -> ValidationResult | None: + """ + Get the validation result + """ + return self._result + + def add_listener(self, listener: ValidationStatisticsListener): + """ + Add a listener to be notified on statistics updates + """ + self._listeners.append(listener) + logger.debug("Listener added: %r", listener) + + def notify_listeners(self): + """ + Notify all registered listeners about statistics updates + """ + for listener in self._listeners: + listener.on_statistics_updated(self) + logger.debug("Notified listener: %r", listener) + + @property + def statistics(self) -> dict: + """ + Get the computed validation statistics + """ + return self._stats.copy() + + @property + def profile(self) -> Profile: + """ + Get the profile being validated + """ + return cast("Profile", self._stats.get("profile")) + + @property + def profiles(self) -> list[Profile]: + """ + Get all profiles involved in validation + """ + return self._stats.get("profiles", []) + + @property + def severity(self) -> Severity: + """ + Get the validation severity level + """ + return cast("Severity", self._stats.get("severity")) + + @property + def checks_by_severity(self) -> dict: + """ + Get the checks grouped by severity + """ + return self._stats.get("checks_by_severity", {}) + + @property + def check_count_by_severity(self) -> dict: + """ + Get the count of checks grouped by severity + """ + return {k: len(v) for k, v in self._stats.get("checks_by_severity", {}).items()} + + @property + def requirements(self) -> list[Requirement]: + """ + Get all requirements being validated + """ + return self._stats.get("requirements", []) + + @property + def passed_requirements(self) -> list[Requirement]: + """ + Get the list of passed requirements + """ + return self._stats.get("passed_requirements", []) + + @property + def failed_requirements(self) -> list[Requirement]: + """ + Get the list of failed requirements + """ + return self._stats.get("failed_requirements", []) + + @property + def total_requirements(self) -> int: + """ + Get the total number of requirements + """ + return len(self._stats.get("requirements", [])) + + @property + def checks(self) -> list[RequirementCheck]: + """ + Get all checks being validated + """ + return self._stats.get("checks", []) + + @property + def passed_checks(self) -> list[RequirementCheck]: + """ + Get the list of passed checks + """ + return self._stats.get("passed_checks", []) + + @property + def failed_checks(self) -> list[RequirementCheck]: + """ + Get the list of failed checks + """ + return self._stats.get("failed_checks", []) + + @property + def total_checks(self) -> int: + """ + Get the total number of checks + """ + return len(self._stats.get("checks", [])) + + @property + def validated_profiles(self) -> list[Profile]: + """ + Get the list of validated profiles + """ + return self._stats.get("validated_profiles", []) + + @property + def validated_requirements(self) -> list[Requirement]: + """ + Get the list of validated requirements + """ + return self._stats.get("validated_requirements", []) + + @property + def validated_checks(self) -> list[RequirementCheck]: + """ + Get the list of validated checks + """ + return self._stats.get("validated_checks", []) + + @property + def started_at(self) -> datetime | None: + """ + Get the timestamp when validation started + """ + return self._stats.get("started_at") + + @property + def finished_at(self) -> datetime | None: + """ + Get the timestamp when validation finished + """ + return self._stats.get("finished_at") + + @property + def duration(self) -> float | None: + """ + Get the duration of the validation process in seconds + """ + started_at = self.started_at + finished_at = self.finished_at + if started_at and finished_at: + return (finished_at - started_at).total_seconds() + return None + + @staticmethod + def __collect_requirement_checks__( + requirement, + severity_validation, + validation_settings, + target_profile_identifier, + checks, + checks_by_severity, + ) -> int: + """Count and register a requirement's checks across severities >= the requested one.""" + requirement_checks_count = 0 + for severity in ( + Severity.REQUIRED, + Severity.RECOMMENDED, + Severity.OPTIONAL, + ): + logger.debug(f"Checking requirement: {requirement} severity: {severity} {severity < severity_validation}") + # skip requirements with lower severity + if severity < severity_validation: + continue + # count the checks + requirement_checks = [ + _ + for _ in requirement.get_checks_by_level(LevelCollection.get(severity.name)) + if (not validation_settings.skip_checks or _.identifier not in validation_settings.skip_checks) + and (not _.overridden or _.requirement.profile.identifier == target_profile_identifier) + ] + num_checks = len(requirement_checks) + requirement_checks_count += num_checks + if num_checks > 0: + logger.debug(f"Requirement: {requirement} has {num_checks} checks of severity: {severity}") + checks.update(requirement_checks) + checks_by_severity[severity].update(requirement_checks) + return requirement_checks_count + + @classmethod + def __initialise__(cls, validation_settings: ValidationSettings): + """ + Compute the statistics of the profile + """ + # extract the validation settings + severity_validation = validation_settings.requirement_severity + profiles: list[Profile] = Profile.load_profiles( + validation_settings.profiles_path, + extra_profiles_path=validation_settings.extra_profiles_path, + severity=cast("Severity", severity_validation), + allow_requirement_check_override=validation_settings.allow_requirement_check_override, + ) + profile: Profile = cast("Profile", Profile.find_in_list(profiles, validation_settings.profile_identifier)) + target_profile_identifier = profile.identifier + # initialize the profiles list + profiles = [profile] + + # add inherited profiles if enabled + if not validation_settings.disable_inherited_profiles_issue_reporting: + profiles.extend(profile.inherited_profiles) + logger.debug("Inherited profiles: %r", profile.inherited_profiles) + + # Initialize the counters + checks_by_severity: dict[Severity, set[RequirementCheck]] = {} + checks: set[RequirementCheck] = set() + requirements: set[Requirement] = set() + + # Initialize the counters + for severity in ( + Severity.REQUIRED, + Severity.RECOMMENDED, + Severity.OPTIONAL, + ): + checks_by_severity[severity] = set() + + # Process the requirements and checks + processed_requirements = [] + for profile in profiles: + for requirement in profile.requirements: + if requirement in processed_requirements: + continue + processed_requirements.append(requirement) + if requirement.hidden: + continue + + requirement_checks_count = cls.__collect_requirement_checks__( + requirement, + severity_validation, + validation_settings, + target_profile_identifier, + checks, + checks_by_severity, + ) + + # count the requirements and checks + if requirement_checks_count == 0: + logger.debug(f"No checks for requirement: {requirement}") + else: + # Only if there are checks for the requirement count it + logger.debug(f"Requirement: {requirement} checks count: {requirement_checks_count}") + assert not requirement.hidden, "Hidden requirements should not be counted" + # add the requirement to the list + requirements.add(requirement) + + # log processed requirements + logger.debug( + "Processed requirements %r: %r", + len(processed_requirements), + processed_requirements, + ) + + # Prepare the result + result = { + "profile": profile, + "profiles": profiles, + "requirements": requirements, + "checks": checks, + "severity": severity_validation, + "checks_by_severity": checks_by_severity, + "failed_requirements": [], + "failed_checks": [], + "passed_requirements": [], + "passed_checks": [], + "started_at": None, + "finished_at": None, + "validated_profiles": [], + "validated_requirements": [], + "validated_checks": [], + } + logger.debug(result) + return result + + def update(self, event: Event, ctx: ValidationContext | None = None) -> None: + self.__event_handlers__.get(event.event_type, lambda e, c: None)(event, ctx) + + def __handle_validation_start__(self, _event: Event, _ctx: ValidationContext | None) -> None: + logger.debug("Validation started") + self._stats["started_at"] = datetime.now(timezone.utc) + + def __handle_profile_validation_start__(self, event: Event, _ctx: ValidationContext | None) -> None: + assert isinstance(event, ProfileValidationEvent) + logger.debug("Profile validation start: %s", event.profile.identifier) + + def __handle_requirement_validation_start__(self, _event: Event, _ctx: ValidationContext | None) -> None: + logger.debug("Requirement validation start") + + def __handle_requirement_check_validation_start__(self, _event: Event, _ctx: ValidationContext | None) -> None: + logger.debug("Requirement check validation start") + + def __handle_requirement_check_validation_end__(self, event: Event, ctx: ValidationContext | None) -> None: + assert isinstance(event, RequirementCheckValidationEvent) + assert ctx is not None + target_profile = ctx.target_validation_profile + if not event.requirement_check.requirement.hidden and ( + not event.requirement_check.overridden + or target_profile.identifier == event.requirement_check.requirement.profile.identifier + ): + if event.validation_result is not None: + if event.validation_result: + self._stats["passed_checks"].append(event.requirement_check) + else: + self._stats["failed_checks"].append(event.requirement_check) + self._stats["validated_checks"].append(event.requirement_check) + self.notify_listeners() + else: + logger.debug( + "Requirement check validation result is None: %s", + event.requirement_check.identifier, + ) + else: + logger.debug( + "Skipping requirement check validation: %s", + event.requirement_check.identifier, + ) + + def __handle_requirement_validation_end__(self, event: Event, _ctx: ValidationContext | None) -> None: + assert isinstance(event, RequirementValidationEvent) + if not event.requirement.hidden: + if event.validation_result: + self._stats["passed_requirements"].append(event.requirement) + else: + self._stats["failed_requirements"].append(event.requirement) + self._stats["validated_requirements"].append(event.requirement) + self.notify_listeners() + + def __handle_profile_validation_end__(self, event: Event, _ctx: ValidationContext | None) -> None: + assert isinstance(event, ProfileValidationEvent) + self._stats["validated_profiles"].append(event.profile) + logger.debug("Profile validation ended: %s", event.profile.identifier) + + def __handle_validation_end__(self, event: Event, _ctx: ValidationContext | None) -> None: + assert isinstance(event, ValidationEvent) + self._result = event.validation_result + self._stats["finished_at"] = datetime.now(timezone.utc) + logger.debug("Validation ended with result: %s", event.validation_result) + + @property + def __event_handlers__(self): + return { + EventType.VALIDATION_START: self.__handle_validation_start__, + EventType.PROFILE_VALIDATION_START: self.__handle_profile_validation_start__, + EventType.REQUIREMENT_VALIDATION_START: self.__handle_requirement_validation_start__, + EventType.REQUIREMENT_CHECK_VALIDATION_START: self.__handle_requirement_check_validation_start__, + EventType.REQUIREMENT_CHECK_VALIDATION_END: self.__handle_requirement_check_validation_end__, + EventType.REQUIREMENT_VALIDATION_END: self.__handle_requirement_validation_end__, + EventType.PROFILE_VALIDATION_END: self.__handle_profile_validation_end__, + EventType.VALIDATION_END: self.__handle_validation_end__, + } + + def to_dict(self) -> dict: + """ + Get the computed validation statistics as a dictionary + """ + return { + # Execution time details + "started_at": self.started_at.isoformat() if self.started_at else None, + "finished_at": self.finished_at.isoformat() if self.finished_at else None, + "duration": self.duration, + # Profile details + "profile": self.profile.identifier if self.profile else None, + "profiles": [p.identifier for p in self.profiles], + "severity": self.severity.name if self.severity else None, + # Computed totals + "total_requirements": self.total_requirements, + "total_passed_requirements": len(self.passed_requirements), + "total_failed_requirements": len(self.failed_requirements), + "total_checks": self.total_checks, + "total_passed_checks": len(self.passed_checks), + "total_failed_checks": len(self.failed_checks), + "total_checks_by_severity": {k.name: len(v) for k, v in self.checks_by_severity.items()}, + # Requirements involved + "requirements": { + "count": self.total_requirements, + "passed": { + "count": len(self.passed_requirements), + "percentage": ( + (len(self.passed_requirements) / self.total_requirements * 100) + if self.total_requirements > 0 + else 0.0 + ), + "identifiers": sorted([r.identifier for r in self.passed_requirements]), + }, + "failed": { + "count": len(self.failed_requirements), + "percentage": ( + (len(self.failed_requirements) / self.total_requirements * 100) + if self.total_requirements > 0 + else 0.0 + ), + "identifiers": sorted([r.identifier for r in self.failed_requirements]), + }, + "identifiers": sorted([r.identifier for r in self.requirements]), + }, + # Checks involved + "checks": { + "count": self.total_checks, + "passed": { + "count": len(self.passed_checks), + "percentage": (len(self.passed_checks) / self.total_checks * 100) if self.total_checks > 0 else 0.0, + "identifiers": sorted([c.identifier for c in self.passed_checks]), + }, + "failed": { + "count": len(self.failed_checks), + "percentage": (len(self.failed_checks) / self.total_checks * 100) if self.total_checks > 0 else 0.0, + "identifiers": sorted([c.identifier for c in self.failed_checks]), + }, + "identifiers": sorted([c.identifier for c in self.checks]), + "by_severity": {k.name: len(v) for k, v in self._stats.get("checks_by_severity", {}).items()}, + }, + } + + def to_json(self) -> str: + """ + Get the computed validation statistics as a JSON string + """ + from rocrate_validator.models.result import CustomEncoder # noqa: PLC0415 + + return json.dumps(self.to_dict(), indent=4, cls=CustomEncoder) + + +class AggregatedValidationStatistics: + """ + Represents aggregated validation statistics from multiple validation runs. + """ + + def __init__(self, statistics_list: list[ValidationStatistics]): + if not statistics_list: + raise ValueError("statistics_list cannot be empty") + # Store the individual statistics + self._statistics_list = statistics_list + + # Aggregate the statistics + self._overall_stats = self.__compute_averall_stats__() + + @property + def individual_statistics(self) -> list[ValidationStatistics]: + """ + Get the individual validation statistics + """ + return self._statistics_list + + def to_dict(self) -> dict: + """ + Get the overall aggregated statistics as a dictionary + """ + return { + # Execution time details + "started_at": self.started_at.isoformat() if self.started_at else None, + "finished_at": self.finished_at.isoformat() if self.finished_at else None, + "duration": self.duration, + # Profiles involved + "profiles": [p.identifier for p in self.profiles], + # Computed totals + "total_requirements": self.total_requirements, + "total_passed_requirements": len(self.passed_requirements), + "total_failed_requirements": len(self.failed_requirements), + "total_checks": self.total_checks, + "total_passed_checks": len(self.passed_checks), + "total_failed_checks": len(self.failed_checks), + "total_checks_by_severity": {k.name: len(v) for k, v in self.checks_by_severity.items()}, + # Requirements involved + "requirements": { + "count": self.total_requirements, + "passed": { + "count": len(self.passed_requirements), + "percentage": ( + (len(self.passed_requirements) / self.total_requirements * 100) + if self.total_requirements > 0 + else 0.0 + ), + "identifiers": [r.identifier for r in self.passed_requirements], + }, + "failed": { + "count": len(self.failed_requirements), + "percentage": ( + (len(self.failed_requirements) / self.total_requirements * 100) + if self.total_requirements > 0 + else 0.0 + ), + "identifiers": [r.identifier for r in self.failed_requirements], + }, + "identifiers": [r.identifier for r in self.requirements], + }, + # Checks involved + "checks": { + "count": self.total_checks, + "passed": { + "count": len(self.passed_checks), + "percentage": (len(self.passed_checks) / self.total_checks * 100) if self.total_checks > 0 else 0.0, + "identifiers": [c.identifier for c in self.passed_checks], + }, + "failed": { + "count": len(self.failed_checks), + "percentage": (len(self.failed_checks) / self.total_checks * 100) if self.total_checks > 0 else 0.0, + "identifiers": [c.identifier for c in self.failed_checks], + }, + "identifiers": [c.identifier for c in self.checks], + }, + } + + @property + def profiles(self) -> set[Profile]: + """ + Get the set of profiles involved in the aggregated validation + """ + return self._overall_stats.get("profiles", set()) + + @property + def total_profiles(self) -> int: + """ + Get the total number of profiles involved in the aggregated validation + """ + return len(self._overall_stats.get("profiles", set())) + + @property + def requirements(self) -> set[Requirement]: + """ + Get the set of requirements in the aggregated validation + """ + return self._overall_stats.get("requirements", set()) + + @property + def passed_requirements(self) -> set[Requirement]: + """ + Get the set of passed requirements in the aggregated validation + """ + return self._overall_stats.get("passed_requirements", set()) + + @property + def failed_requirements(self) -> set[Requirement]: + """ + Get the set of failed requirements in the aggregated validation + """ + return self._overall_stats.get("failed_requirements", set()) + + @property + def total_requirements(self) -> int: + """ + Get the total number of requirements in the aggregated validation + """ + return len(self._overall_stats.get("requirements", set())) + + @property + def checks(self) -> set[RequirementCheck]: + """ + Get the set of checks in the aggregated validation + """ + return self._overall_stats.get("checks", set()) + + @property + def checks_by_severity(self) -> dict: + """ + Get the checks grouped by severity in the aggregated validation + """ + return self._overall_stats.get("checks_by_severity", {}) + + @property + def total_checks(self) -> int: + """ + Get the total number of checks in the aggregated validation + """ + return len(self._overall_stats.get("checks", set())) + + @property + def passed_checks(self) -> set[RequirementCheck]: + """ + Get the set of passed checks in the aggregated validation + """ + return self._overall_stats.get("passed_checks", set()) + + @property + def failed_checks(self) -> set[RequirementCheck]: + """ + Get the set of failed checks in the aggregated validation + """ + return self._overall_stats.get("failed_checks", set()) + + @property + def started_at(self) -> datetime | None: + """ + Get the timestamp when the aggregated validation started + """ + return self._overall_stats.get("started_at") + + @property + def finished_at(self) -> datetime | None: + """ + Get the timestamp when the aggregated validation finished + """ + return self._overall_stats.get("finished_at") + + @property + def duration(self) -> float: + """ + Get the total duration of the aggregated validation in seconds + """ + return self._overall_stats.get("duration", 0.0) + + def __compute_averall_stats__(self): + """ + Compute the overall aggregated statistics + """ + raw_stats = self.__aggregate_raw_stats__(self._statistics_list) + return self.__build_sorted_stats_dict__(raw_stats) + + @classmethod + def __aggregate_raw_stats__( + cls, + statistics_list: list[ValidationStatistics], + ): + """ + Aggregate raw (unsorted) statistics from a list of ValidationStatistics instances. + """ + profiles: set[Profile] = set() + requirements: set[Requirement] = set() + checks: set[RequirementCheck] = set() + checks_by_severity: dict[Severity, set[RequirementCheck]] = {} + failed_requirements: set[Requirement] = set() + failed_checks: set[RequirementCheck] = set() + passed_requirements: set[Requirement] = set() + passed_checks: set[RequirementCheck] = set() + started_at: datetime | None = None + finished_at: datetime | None = None + duration: float = 0.0 + + # Aggregate statistics from each ValidationStatistics instance + for stats in statistics_list: + # Aggregate profiles + for profile in stats.profiles: + profiles.add(profile) + + # Aggregate total requirements and checks + requirements.update(stats.requirements) + checks.update(stats.checks) + checks_by_severity.update(stats.checks_by_severity) + + # Aggregate failed and passed requirements and checks + failed_requirements.update(stats.failed_requirements) + failed_checks.update(stats.failed_checks) + passed_requirements.update(stats.passed_requirements) + passed_checks.update(stats.passed_checks) + + # Aggregate started_at and finished_at + if started_at is not None and stats.started_at is not None: + started_at = min(started_at, stats.started_at) + elif stats.started_at is not None: + started_at = stats.started_at + if finished_at is not None and stats.finished_at is not None: + finished_at = max(finished_at, stats.finished_at) + elif stats.finished_at is not None: + finished_at = stats.finished_at + # Aggregate duration + duration += stats.duration or 0.0 + + return { + "profiles": profiles, + "requirements": requirements, + "checks": checks, + "checks_by_severity": checks_by_severity, + "failed_requirements": failed_requirements, + "failed_checks": failed_checks, + "passed_requirements": passed_requirements, + "passed_checks": passed_checks, + "started_at": started_at, + "finished_at": finished_at, + "duration": duration, + } + + @classmethod + def __build_sorted_stats_dict__(cls, raw_stats): + """ + Sort the raw aggregated sets and build the final sorted statistics dict. + """ + sorted_checks_by_severity = {} + for severity_key, severity_checks in raw_stats["checks_by_severity"].items(): + sorted_checks_by_severity[severity_key] = sorted(severity_checks, key=lambda c: c.identifier) + + return { + "profiles": sorted(raw_stats["profiles"], key=lambda p: p.identifier), + "requirements": sorted(raw_stats["requirements"], key=lambda r: r.identifier), + "checks": sorted(raw_stats["checks"], key=lambda c: c.identifier), + "checks_by_severity": sorted_checks_by_severity, + "failed_requirements": sorted(raw_stats["failed_requirements"], key=lambda r: r.identifier), + "failed_checks": sorted(raw_stats["failed_checks"], key=lambda c: c.identifier), + "passed_requirements": sorted(raw_stats["passed_requirements"], key=lambda r: r.identifier), + "passed_checks": sorted(raw_stats["passed_checks"], key=lambda c: c.identifier), + "started_at": raw_stats["started_at"], + "finished_at": raw_stats["finished_at"], + "duration": raw_stats["duration"], + } diff --git a/rocrate_validator/models/validation.py b/rocrate_validator/models/validation.py new file mode 100644 index 000000000..a94bd8921 --- /dev/null +++ b/rocrate_validator/models/validation.py @@ -0,0 +1,650 @@ +# Copyright (c) 2024-2026 CRS4 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import annotations + +from pathlib import Path +from typing import Any +from urllib.error import HTTPError + +from rdflib import Graph + +from rocrate_validator.constants import ROCRATE_METADATA_FILE +from rocrate_validator.errors import ( + ProfileNotFound, + ROCrateMetadataNotFoundError, +) +from rocrate_validator.events import Event, EventType, Publisher +from rocrate_validator.models._logging import logger +from rocrate_validator.models.events import ( + ProfileValidationEvent, + RequirementValidationEvent, + ValidationEvent, +) +from rocrate_validator.models.profile import Profile +from rocrate_validator.models.requirement import ( + Requirement, + RequirementLoader, +) +from rocrate_validator.models.result import ValidationResult +from rocrate_validator.models.severity import Severity +from rocrate_validator.models.settings import ValidationSettings +from rocrate_validator.rocrate import ROCrate +from rocrate_validator.utils import log as logging +from rocrate_validator.utils.http import find_offline_cache_miss +from rocrate_validator.utils.uri import URI + + +class Validator(Publisher): + """ + Validator class for validating Research Object Crates(RO-Crate) + against specified profiles according to the validation settings. + + Attributes: + validation_settings(ValidationSettings): The settings used for validation. + + Methods: + __init__(settings: Union[str, ValidationSettings]): + Initializes the Validator with the given settings. + validation_settings() -> ValidationSettings: + Returns the validation settings. + detect_rocrate_profiles() -> list[Profile]: + Detects the profiles to validate against. + validate() -> ValidationResult: + Validate the RO-Crate against the detected profiles according to the validation settings + validate_requirements(requirements: list[Requirement]) -> ValidationResult: + Validates the RO-Crate against the specified subset of the profile requirements. + """ + + def __init__(self, settings: dict | ValidationSettings): + self._validation_settings = ValidationSettings.parse(settings) + super().__init__() + # initialize the current context + self.__current_context__: ValidationContext | None = None + + @property + def validation_settings(self) -> ValidationSettings: + return self._validation_settings + + def detect_rocrate_profiles(self) -> list[Profile]: + """ + Detect the profiles to validate against + """ + try: + # initialize the validation context + context = ValidationContext(self, self.validation_settings) + candidate_profiles_uris: set[str] = set() + try: + candidate_profiles_uris.update(context.ro_crate.metadata.get_conforms_to() or []) + except Exception as e: + logger.debug("Error while getting candidate profiles URIs: %s", e) + try: + candidate_profiles_uris.update(context.ro_crate.metadata.get_root_data_entity_conforms_to() or []) + except Exception as e: + logger.debug("Error while getting candidate profiles URIs: %s", e) + + logger.debug("Candidate profiles: %s", candidate_profiles_uris) + if not candidate_profiles_uris: + logger.debug("Unable to determine the profile to validate against") + return [] + # load the profiles + profiles = [] + candidate_profiles = [] + available_profiles = Profile.load_profiles( + context.profiles_path, + extra_profiles_path=context.extra_profiles_path, + publicID=context.publicID, + severity=context.requirement_severity, + ) + profiles = [p for p in available_profiles if p.uri in candidate_profiles_uris] + # get the candidate profiles + for profile in profiles: + candidate_profiles.append(profile) + inherited_profiles = profile.inherited_profiles + for inherited_profile in inherited_profiles: + if inherited_profile in candidate_profiles: + candidate_profiles.remove(inherited_profile) + logger.debug( + "%d Candidate Profiles found: %s", + len(candidate_profiles), + candidate_profiles, + ) + # unmatched candidate profiles + unmatched_profiles = candidate_profiles_uris.difference({p.uri for p in profiles}) + logger.debug("Unmatched Candidate Profiles URIs: %s", unmatched_profiles) + if len(unmatched_profiles) > 0: + logger.warning( + "The conformance to the following profiles could not be verified: %s", + ", ".join(unmatched_profiles), + ) + return candidate_profiles + + except Exception: + if logger.isEnabledFor(logging.DEBUG): + logger.exception("Error detecting RO-Crate profiles") + return [] + + def validate(self) -> ValidationResult: + """ + Validate the RO-Crate against the detected profiles according to the validation settings + """ + return self.__do_validate__() + + def validate_requirements(self, requirements: list[Requirement]) -> ValidationResult: + """ + Validates the RO-Crate against the specified subset of the profile requirements + """ + assert all(isinstance(requirement, Requirement) for requirement in requirements), "Invalid requirement type" + # perform the requirements validation + return self.__do_validate__(requirements) + + def __do_validate__(self, requirements: list[Requirement] | None = None) -> ValidationResult: + + # initialize the validation context + context = ValidationContext(self, self.validation_settings) + # register the current context + self.__current_context__ = context + + # initialize the requirement types + self.__invoke_pre_validation_hooks__(context) + + try: + # set the profiles to validate against + profiles = context.profiles + assert len(profiles) > 0, "No profiles to validate" + # Pre-load every profile's requirements so all shape graphs are + # populated before the validation loop runs. This lets a check + # see `sh:deactivated true` triples declared by descendant + # profiles that have not yet been visited. + for p in profiles: + _ = p.requirements + self.notify(EventType.VALIDATION_START) + for profile in profiles: + logger.debug( + "Validating profile %s (id: %s)", + profile.name, + profile.identifier, + ) + # set the target profile in the context + context._target_validation_profile = profile + self.notify(ProfileValidationEvent(EventType.PROFILE_VALIDATION_START, profile=profile)) + # perform the requirements validation + requirements = profile.get_requirements( + context.requirement_severity, + exact_match=context.requirement_severity_only, + ) + logger.debug( + "Validating profile %s with %s requirements", + profile.identifier, + len(requirements), + ) + logger.debug( + "For profile %s, validating these %s requirements: %s", + profile.identifier, + len(requirements), + requirements, + ) + terminate = False + for requirement in requirements: + if not requirement.overridden: + self.notify( + RequirementValidationEvent( + EventType.REQUIREMENT_VALIDATION_START, + requirement=requirement, + ) + ) + passed = requirement._do_validate_(context) + logger.debug( + "Requirement %s passed: %s", + requirement.identifier, + passed, + ) + if not requirement.overridden: + self.notify( + RequirementValidationEvent( + EventType.REQUIREMENT_VALIDATION_END, + requirement=requirement, + validation_result=passed, + ) + ) + if passed: + logger.debug("Validation Requirement passed") + else: + logger.debug(f"Validation Requirement {requirement} failed (profile: {profile.identifier})") + if context.fail_fast: + logger.debug("Aborting on first requirement failure") + terminate = True + break + self.notify(ProfileValidationEvent(EventType.PROFILE_VALIDATION_END, profile=profile)) + if terminate: + break + + # finalize the requirement types + self.__invoke_post_validation_hooks__(context) + # notify the end of the validation + self.notify(ValidationEvent(EventType.VALIDATION_END, validation_result=context.result)) + # return the validation result + return context.result + finally: + # clear the current context + self.__current_context__ = None + + def __invoke_pre_validation_hooks__(self, context: ValidationContext): + logger.debug("Initializing requirement types: starting...") + requirements_types = RequirementLoader.__get_requirement_classes__() + for requirement_type in requirements_types: + requirement_type.initialize(context) + logger.debug("Initializing requirement types: completed") + + def __invoke_post_validation_hooks__(self, context: ValidationContext): + logger.debug("Finalizing requirement types: starting...") + requirements_types = RequirementLoader.__get_requirement_classes__() + for requirement_type in requirements_types: + requirement_type.finalize(context) + logger.debug("Finalizing requirement types: completed") + + def notify(self, event: Event | EventType, ctx: Any | None = None): + """Override notify to update statistics""" + assert self.__current_context__ is not None, "No current validation context" + result: ValidationResult = self.__current_context__.result + if isinstance(event, EventType): + event = Event(event) + result.statistics.update(event, ctx=self.__current_context__) + return super().notify(event, ctx=self.__current_context__) + + +class ValidationContext: + """ + Class that represents the context for the validation process. + """ + + def __init__(self, validator: Validator, settings: ValidationSettings): + # reference to the validator + self._validator = validator + # reference to the settings + self._settings = settings + # reference to the data graph + self._data_graph: Graph | None = None + # reference to the profiles + self._profiles: list[Profile] | None = None + # reference to the target profile + self._target_validation_profile: Profile | None = None + # reference to the validation result + self._result: ValidationResult | None = None + # additional properties for the context + self._properties: dict = {} + # URLs already reported as missing from the HTTP cache during this run + self._offline_cache_misses_warned: set[str] = set() + + # initialize the ROCrate object + if settings.metadata_dict: + self._rocrate = ROCrate.from_metadata_dict(settings.metadata_dict) + else: + rocrate_uri = settings.rocrate_uri + assert rocrate_uri is not None, "RO-Crate URI is required when metadata_dict is not provided" + self._rocrate = ROCrate.new_instance( + rocrate_uri, + relative_root_path=settings.rocrate_relative_root_path, + ) + assert isinstance(self._rocrate, ROCrate), "Invalid RO-Crate instance" + + @property + def ro_crate(self) -> ROCrate: + """ + The RO-Crate instance + + :return: The RO-Crate instance + :rtype: ROCrate + """ + return self._rocrate + + @property + def validator(self) -> Validator: + """ + The validator instance which this context belongs to + + :return: The validator instance + :rtype: Validator + """ + return self._validator + + @property + def result(self) -> ValidationResult: + """ + The validation result + + :return: The validation result + :rtype: ValidationResult + """ + if self._result is None: + self._result = ValidationResult(self) + return self._result + + @property + def settings(self) -> ValidationSettings: + """ + The validation settings + + :return: The validation settings + :rtype: ValidationSettings + """ + return self._settings + + @property + def publicID(self) -> str: + """ + The root URI of the RO-Crate + """ + path = str(self.ro_crate.uri.base_uri) + if not path.endswith("/"): + path = f"{path}/" + return path + + @property + def profiles_path(self) -> Path: + """ + The path to the profiles + + :return: The path to the profiles + :rtype: Path + """ + profiles_path = self.settings.profiles_path + if isinstance(profiles_path, str): + profiles_path = Path(profiles_path) + return profiles_path + + @property + def extra_profiles_path(self) -> Path | None: + """ + The path to the extra profiles + + :return: The path to the extra profiles + :rtype: Optional[Path] + """ + extra_profiles_path = self.settings.extra_profiles_path + if isinstance(extra_profiles_path, str): + extra_profiles_path = Path(extra_profiles_path) + return extra_profiles_path or None + + @property + def requirement_severity(self) -> Severity: + """ + The requirement severity to validate against + + :return: The requirement severity + :rtype: Severity + """ + severity = self.settings.requirement_severity + if isinstance(severity, str): + severity = Severity[severity] + elif not isinstance(severity, Severity): + raise TypeError(f"Invalid severity type: {type(severity)}") + return severity + + @property + def requirement_severity_only(self) -> bool: + """ + Flag to validate requirement severity only skipping check with lower or higher severity + + :return: The flag to validate requirement severity only + :rtype: bool + """ + return self.settings.requirement_severity_only + + @property + def rocrate_uri(self) -> URI: + """ + The URI of the RO-Crate + + :return: The URI of the RO-Crate + :rtype: Path + """ + rocrate_uri = self.settings.rocrate_uri + if rocrate_uri is None: + raise ValueError("RO-Crate URI is not set") + return rocrate_uri + + @property + def fail_fast(self) -> bool: + """ + Flag to abort on first error + + :return: The flag to abort on first error + :rtype: bool + """ + return bool(self.settings.abort_on_first) + + @property + def rel_fd_path(self) -> Path: + """ + The relative path to the file descriptor + + :return: The relative path to the file descriptor + :rtype: Path + """ + return Path(ROCRATE_METADATA_FILE) + + def __load_data_graph__(self) -> Graph: + data_graph = Graph() + logger.debug("Loading RO-Crate metadata of: %s", self.ro_crate.uri) + _ = data_graph.parse( + data=self.ro_crate.metadata.as_dict(), # type: ignore[arg-type] + format="json-ld", + publicID=self.publicID, + ) + logger.debug("RO-Crate metadata loaded: %s", data_graph) + return data_graph + + def get_data_graph(self, refresh: bool = False) -> Graph: + """ + Utility method to get the data graph of the RO-Crate, + i.e., the metadata of the RO-Crate as an RDF graph. + + :param refresh: Flag to refresh the data graph + :type refresh: bool + + :return: The data graph of the RO-Crate + :rtype: :py:class:rdflib.Graph + + :raises ROCrateMetadataNotFoundError: If the RO-Crate metadata is not found + """ + # load the data graph + try: + if not self._data_graph or refresh: + self._data_graph = self.__load_data_graph__() + return self._data_graph + except (HTTPError, FileNotFoundError) as e: + logger.debug("Error loading data graph: %s", e) + raise ROCrateMetadataNotFoundError(str(self.rocrate_uri)) from e + + @property + def data_graph(self) -> Graph: + """ + The data graph of the RO-Crate, + i.e., the metadata of the RO-Crate as an RDF graph. + + :return: The data graph of the RO-Crate + :rtype: Graph + """ + return self.get_data_graph() + + @property + def inheritance_enabled(self) -> bool: + """ + Flag which indicates if profile inheritance is enabled + + :return: The flag to enable profile inheritance + :rtype: bool + """ + return self.settings.enable_profile_inheritance + + @property + def profile_identifier(self) -> str: + """ + The profile identifier to validate against + + :return: The profile identifier + :rtype: str + """ + return self.settings.profile_identifier + + @property + def allow_requirement_check_override(self) -> bool: + """ + Flag that indicates if requirement check override is allowed + + :return: The flag to allow requirement check override + :rtype: bool + """ + return self.settings.allow_requirement_check_override + + @property + def disable_check_for_duplicates(self) -> bool: + """ + Flag that indicates if the check for duplicates is disabled + + :return: The flag to disable the check for duplicates + :rtype: bool + """ + return self.settings.disable_check_for_duplicates + + def __load_profiles__(self) -> list[Profile]: + + # load all profiles + profiles = Profile.load_profiles( + self.profiles_path, + extra_profiles_path=self.settings.extra_profiles_path, + publicID=self.publicID, + severity=self.requirement_severity, + allow_requirement_check_override=self.allow_requirement_check_override, + ) + + # Check if the target profile is in the list of profiles + profile = Profile.get_by_identifier(self.profile_identifier) + if not profile: + try: + candidate_profiles = Profile.get_by_token(self.profile_identifier) + logger.debug("Candidate profiles found by token: %s", profile) + if candidate_profiles: + # Find the profile with the highest version number + profile = max(candidate_profiles, key=lambda p: p.version or "") + self.settings.profile_identifier = profile.identifier + logger.debug("Profile with the highest version number: %s", profile) + except AttributeError as e: + # raised when the profile is not found + if logger.isEnabledFor(logging.DEBUG): + logger.exception("Profile not found: %s", self.profile_identifier) + raise ProfileNotFound( + self.profile_identifier, + message=f"Profile '{self.profile_identifier}' not found in '{self.profiles_path}'", + ) from e + if profile is None: + raise ProfileNotFound( + self.profile_identifier, + message=f"Profile '{self.profile_identifier}' not found in '{self.profiles_path}'", + ) + + # if the inheritance is enabled, return only the target profile + if not self.inheritance_enabled: + return [profile] + + # Set the profiles to validate against as the target profile and its inherited profiles + profiles = [*profile.inherited_profiles, profile] + + # if the check for duplicates is disabled, return the profiles + if self.disable_check_for_duplicates: + return profiles + + return profiles + + @property + def profiles(self) -> list[Profile]: + """ + The profiles to validate against, + i.e., the target profile and its inherited profiles + + :return: The profiles to validate against + :rtype: list[Profile] + """ + if not self._profiles: + self._profiles = self.__load_profiles__() + return self._profiles.copy() + + @property + def target_validation_profile(self) -> Profile: + """ + The target validation profile to validate against + + :return: The target validation profile + :rtype: Profile + """ + assert self._target_validation_profile is not None, "Target validation profile not set" + return self._target_validation_profile + + @property + def target_profile(self) -> Profile: + """ + The target profile to validate against + + :return: The target profile + :rtype: Profile + """ + profiles = self.profiles + assert len(profiles) > 0, "No profiles to validate" + return self.profiles[-1] + + def get_profile_by_token(self, token: str) -> list[Profile]: + """ + Get the profile by token from the profiles to validate against + + :param token: The token of the profile + :type token: str + + :return: The profile with the given token + :rtype: Profile + """ + return [p for p in self.profiles if p.token == token] + + def get_profile_by_identifier(self, identifier: str) -> Profile: + """ + Get the profile by identifier from the profiles to validate against + + :param identifier: The identifier of the profile + :type identifier: str + + :return: The profile with the given identifier + :rtype: Profile + """ + for p in self.profiles: + if p.identifier == identifier: + return p + raise ProfileNotFound(identifier) + + def maybe_warn_offline_cache_miss(self, exc: BaseException) -> bool: + """ + If ``exc`` (or any cause/context in its chain) is an + :class:`OfflineCacheMissError`, emit a single user-facing warning + for the missing URL — but only the first time that URL is seen + during this validation run — and return ``True``. + + Returns ``False`` when the exception is unrelated to offline cache + misses, so callers can fall back to their generic handling. + """ + miss = find_offline_cache_miss(exc) + if miss is None: + return False + if miss.url not in self._offline_cache_misses_warned: + self._offline_cache_misses_warned.add(miss.url) + logger.warning("%s", miss) + return True From 1fb4bfd1fbd4d81aa375bbca8233ef27eeac1c6e Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Fri, 12 Jun 2026 18:59:36 +0200 Subject: [PATCH 327/352] =?UTF-8?q?chore(shacl):=20=F0=9F=8F=B7=EF=B8=8F?= =?UTF-8?q?=20add=20warning=20for=20caused=20by=20lazy=20imports?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- rocrate_validator/requirements/shacl/validator.py | 1 + 1 file changed, 1 insertion(+) diff --git a/rocrate_validator/requirements/shacl/validator.py b/rocrate_validator/requirements/shacl/validator.py index af9eb7ffb..225f5e920 100644 --- a/rocrate_validator/requirements/shacl/validator.py +++ b/rocrate_validator/requirements/shacl/validator.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +# pylint: disable=cyclic-import # lazy imports break the cycle at runtime (see PLC0415 noqa markers in requirements) from __future__ import annotations from pathlib import Path From 787a4b7d38369d0b79601c9706d0cc3093bfdd40 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Fri, 12 Jun 2026 19:11:48 +0200 Subject: [PATCH 328/352] =?UTF-8?q?chore(models):=20=F0=9F=8F=B7=EF=B8=8F?= =?UTF-8?q?=20move=20type-only=20imports=20to=20TYPE=5FCHECKING=20blocks?= =?UTF-8?q?=20and=20reorder=20imports?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .gitignore | 5 +++++ rocrate_validator/models/__init__.py | 29 ++++++++++++------------- rocrate_validator/models/profile.py | 6 +++-- rocrate_validator/models/requirement.py | 5 +++-- rocrate_validator/models/result.py | 6 +++-- rocrate_validator/models/settings.py | 2 +- rocrate_validator/models/statistics.py | 12 +++++----- rocrate_validator/models/validation.py | 8 ++++--- 8 files changed, 42 insertions(+), 31 deletions(-) diff --git a/.gitignore b/.gitignore index f4b09e1f1..d147bca6f 100644 --- a/.gitignore +++ b/.gitignore @@ -31,3 +31,8 @@ docs/_build # ignore IDE files .vscode/ .idea/ + +extra_crates/ +extra_profiles/ + +*.back diff --git a/rocrate_validator/models/__init__.py b/rocrate_validator/models/__init__.py index e4cb06746..eadc17bf8 100644 --- a/rocrate_validator/models/__init__.py +++ b/rocrate_validator/models/__init__.py @@ -12,25 +12,14 @@ # See the License for the specific language governing permissions and # limitations under the License. -from rocrate_validator.utils.uri import URI - from rocrate_validator.models._logging import logger -from rocrate_validator.models.severity import ( - LevelCollection, - RequirementLevel, - Severity, -) -from rocrate_validator.models.settings import ( - DEFAULT_PROFILES_PATH, - BaseTypes, - ValidationSettings, -) from rocrate_validator.models.events import ( ProfileValidationEvent, RequirementCheckValidationEvent, RequirementValidationEvent, ValidationEvent, ) +from rocrate_validator.models.profile import Profile from rocrate_validator.models.requirement import ( Requirement, RequirementCheck, @@ -38,12 +27,21 @@ SkipRequirementCheck, SourceSnippet, ) -from rocrate_validator.models.profile import Profile from rocrate_validator.models.result import ( CheckIssue, CustomEncoder, ValidationResult, ) +from rocrate_validator.models.settings import ( + DEFAULT_PROFILES_PATH, + BaseTypes, + ValidationSettings, +) +from rocrate_validator.models.severity import ( + LevelCollection, + RequirementLevel, + Severity, +) from rocrate_validator.models.statistics import ( AggregatedValidationStatistics, ValidationStatistics, @@ -53,13 +51,15 @@ ValidationContext, Validator, ) +from rocrate_validator.utils.uri import URI __all__ = [ + "DEFAULT_PROFILES_PATH", + "URI", "AggregatedValidationStatistics", "BaseTypes", "CheckIssue", "CustomEncoder", - "DEFAULT_PROFILES_PATH", "LevelCollection", "Profile", "ProfileValidationEvent", @@ -72,7 +72,6 @@ "Severity", "SkipRequirementCheck", "SourceSnippet", - "URI", "ValidationContext", "ValidationEvent", "ValidationResult", diff --git a/rocrate_validator/models/profile.py b/rocrate_validator/models/profile.py index b827fee59..55b753743 100644 --- a/rocrate_validator/models/profile.py +++ b/rocrate_validator/models/profile.py @@ -17,7 +17,7 @@ import re from functools import total_ordering from pathlib import Path -from typing import TYPE_CHECKING, Any, Collection, cast +from typing import TYPE_CHECKING, Any, cast from rdflib import RDF, RDFS, Graph, Namespace, URIRef @@ -35,11 +35,13 @@ ProfileSpecificationError, ProfileSpecificationNotFound, ) -from rocrate_validator.models.severity import Severity from rocrate_validator.models._logging import logger +from rocrate_validator.models.severity import Severity from rocrate_validator.utils.collections import MapIndex, MultiIndexMap if TYPE_CHECKING: + from collections.abc import Collection + from rocrate_validator.models.requirement import Requirement, RequirementCheck @total_ordering diff --git a/rocrate_validator/models/requirement.py b/rocrate_validator/models/requirement.py index ae7b77377..6c4ff8f9e 100644 --- a/rocrate_validator/models/requirement.py +++ b/rocrate_validator/models/requirement.py @@ -18,7 +18,6 @@ from abc import ABC, abstractmethod from dataclasses import dataclass from functools import total_ordering -from pathlib import Path from typing import TYPE_CHECKING, Any, cast from rocrate_validator.constants import ( @@ -39,6 +38,8 @@ ) if TYPE_CHECKING: + from pathlib import Path + from rocrate_validator.models.profile import Profile from rocrate_validator.models.validation import ValidationContext @@ -473,7 +474,7 @@ def ok_file(p: Path) -> bool: ) requirement_loader = RequirementLoader.__get_requirement_loader__(profile, requirement_path) requirements.extend( - cast("Any", requirement_loader).load( + cast(Any, requirement_loader).load( # noqa: TC006 profile, requirement_level, requirement_path, diff --git a/rocrate_validator/models/result.py b/rocrate_validator/models/result.py index 2570a692a..3dcf4979f 100644 --- a/rocrate_validator/models/result.py +++ b/rocrate_validator/models/result.py @@ -18,7 +18,7 @@ import json from functools import total_ordering from pathlib import Path -from typing import TYPE_CHECKING, Any, Collection, cast +from typing import TYPE_CHECKING, Any, cast from rocrate_validator import __version__ from rocrate_validator.constants import JSON_OUTPUT_FORMAT_VERSION @@ -31,9 +31,11 @@ RequirementLevel, Severity, ) -from rocrate_validator.models.settings import ValidationSettings if TYPE_CHECKING: + from collections.abc import Collection + + from rocrate_validator.models.settings import ValidationSettings from rocrate_validator.models.statistics import ValidationStatistics from rocrate_validator.models.validation import ValidationContext diff --git a/rocrate_validator/models/settings.py b/rocrate_validator/models/settings.py index 230292410..179bb46a9 100644 --- a/rocrate_validator/models/settings.py +++ b/rocrate_validator/models/settings.py @@ -21,8 +21,8 @@ DEFAULT_HTTP_CACHE_MAX_AGE, DEFAULT_PROFILE_IDENTIFIER, ) -from rocrate_validator.models.severity import Severity from rocrate_validator.models._logging import logger +from rocrate_validator.models.severity import Severity from rocrate_validator.utils.cache_warmup import auto_warm_up_for_settings from rocrate_validator.utils.document_loader import install_document_loader from rocrate_validator.utils.http import HttpRequester diff --git a/rocrate_validator/models/statistics.py b/rocrate_validator/models/statistics.py index 88f360998..b8d79cbf8 100644 --- a/rocrate_validator/models/statistics.py +++ b/rocrate_validator/models/statistics.py @@ -19,25 +19,25 @@ from typing import TYPE_CHECKING, Protocol, cast from rocrate_validator.events import Event, EventType, Subscriber +from rocrate_validator.models._logging import logger from rocrate_validator.models.events import ( ProfileValidationEvent, RequirementCheckValidationEvent, RequirementValidationEvent, ValidationEvent, ) -from rocrate_validator.models._logging import logger from rocrate_validator.models.profile import Profile -from rocrate_validator.models.requirement import ( - Requirement, - RequirementCheck, -) +from rocrate_validator.models.settings import ValidationSettings from rocrate_validator.models.severity import ( LevelCollection, Severity, ) -from rocrate_validator.models.settings import ValidationSettings if TYPE_CHECKING: + from rocrate_validator.models.requirement import ( + Requirement, + RequirementCheck, + ) from rocrate_validator.models.result import ValidationResult from rocrate_validator.models.validation import ValidationContext diff --git a/rocrate_validator/models/validation.py b/rocrate_validator/models/validation.py index a94bd8921..ba603252f 100644 --- a/rocrate_validator/models/validation.py +++ b/rocrate_validator/models/validation.py @@ -15,7 +15,7 @@ from __future__ import annotations from pathlib import Path -from typing import Any +from typing import TYPE_CHECKING, Any from urllib.error import HTTPError from rdflib import Graph @@ -38,12 +38,14 @@ RequirementLoader, ) from rocrate_validator.models.result import ValidationResult -from rocrate_validator.models.severity import Severity from rocrate_validator.models.settings import ValidationSettings +from rocrate_validator.models.severity import Severity from rocrate_validator.rocrate import ROCrate from rocrate_validator.utils import log as logging from rocrate_validator.utils.http import find_offline_cache_miss -from rocrate_validator.utils.uri import URI + +if TYPE_CHECKING: + from rocrate_validator.utils.uri import URI class Validator(Publisher): From 9fe1a05fc1b70027bfb1a137675eabb06b8cd39e Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Mon, 15 Jun 2026 12:13:42 +0200 Subject: [PATCH 329/352] ci(pre-commit): add pylint hooks for main package and tests --- .pre-commit-config.yaml | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index b31acc4bb..d558d602c 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -58,3 +58,24 @@ repos: language: system types: [python] pass_filenames: false + + # Pylint runs against the whole package (not just the changed files) so + # cross-module import resolution stays accurate. Two hooks are needed + # because pylint loads a single rcfile per run: the main package uses + # the `[tool.pylint.*]` sections in `pyproject.toml`, while `tests/` + # uses its own `tests/.pylintrc` (pytest-fixture idioms, domain camelCase + # allowlists). The hooks only fire when files in their respective trees + # change, but each one lints its whole tree. + - id: pylint-main + name: pylint (rocrate_validator) + entry: poetry run pylint rocrate_validator/ + language: system + files: ^rocrate_validator/.*\.py$ + pass_filenames: false + + - id: pylint-tests + name: pylint (tests) + entry: poetry run pylint --rcfile=tests/.pylintrc tests/ + language: system + files: ^tests/.*\.py$ + pass_filenames: false From 8471bb188601c7f054eb0361192acb10aced0b3a Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Mon, 15 Jun 2026 12:19:09 +0200 Subject: [PATCH 330/352] test: pylint cleanup, fix tempfile race, and rename for underscore consistency --- rocrate_validator/models/profile.py | 1 + tests/.pylintrc | 70 +++++++++++++++++++ .../process-run-crate/test_procrc_action.py | 2 +- ...lid_ro-crate.py => test_valid_ro_crate.py} | 0 tests/shared.py | 18 +++-- tests/unit/requirements/test_shacl_checks.py | 11 ++- tests/unit/test_cache_warmup.py | 7 +- tests/unit/test_document_loader.py | 6 +- tests/unit/test_http_requester_offline.py | 8 +-- tests/unit/test_http_requester_reconfigure.py | 2 +- tests/unit/test_offline_cache_miss_warning.py | 7 +- tests/unit/test_rocrate.py | 2 + 12 files changed, 115 insertions(+), 19 deletions(-) create mode 100644 tests/.pylintrc rename tests/integration/profiles/ro-crate/{test_valid_ro-crate.py => test_valid_ro_crate.py} (100%) diff --git a/rocrate_validator/models/profile.py b/rocrate_validator/models/profile.py index 55b753743..6d245fea2 100644 --- a/rocrate_validator/models/profile.py +++ b/rocrate_validator/models/profile.py @@ -44,6 +44,7 @@ from rocrate_validator.models.requirement import Requirement, RequirementCheck + @total_ordering class Profile: """ diff --git a/tests/.pylintrc b/tests/.pylintrc new file mode 100644 index 000000000..c3243857a --- /dev/null +++ b/tests/.pylintrc @@ -0,0 +1,70 @@ +# Pylint configuration scoped to the tests tree. +# +# Pylint loads one rcfile per run (CWD-walking discovery); to apply these +# overrides, invoke pylint with `--rcfile=tests/.pylintrc` when linting +# `tests/`, e.g.: +# +# poetry run pylint --rcfile=tests/.pylintrc tests/ +# +# The project-wide config in `pyproject.toml` continues to govern the main +# package (`rocrate_validator/`). This file intentionally mirrors the parts +# of that config we still want, then adds disables that only make sense for +# pytest-based test code. + +[MAIN] +py-version = 3.10 +ignored-modules = msvcrt + +[FORMAT] +max-line-length = 120 + +[BASIC] +# Mirror the project-wide allowlist (kept in sync with pyproject.toml) and +# extend it with patterns that only appear in the test tree. +good-names-rgxs = + ^_?publicID$, + ^_?(violating[A-Za-z]+|propertyValue)$, + ^(basicConfig|getLogger)$, + ^(_installed|_config|__profiles_loaded)$, + ^[A-Z][A-Z0-9_]*_TYPES$, + ^(focusNode|resultPath|sourceConstraintComponent|sourceShape|propertyGroup|requirementCheck)$, + ^SHACL$, + ^jsonLD(_[a-zA-Z]+)?$, + # Test names and helper attributes carry RO-Crate / SHACL camelCase domain + # terms verbatim (e.g. `test_no_sdDatePublished`, `missing_hasPart_*`); the + # mixed-case is the spec wording we want to preserve. + ^[a-zA-Z_]*(hasPart|sdDatePublished|termCode|intendedUse|sourceShape|softwareVersion)[a-zA-Z_]*$, + # PyFunctionCheck subclasses in `tests/data/profiles/**` follow the + # SHACL severity convention (UPPERCASE class names like `B_MUST`). + ^[A-Z][A-Z0-9]*(_[A-Z0-9]+)+$, + +[MESSAGES CONTROL] +# Inherit the project-wide disables (kept in sync with pyproject.toml) ... +disable = + missing-module-docstring, + missing-class-docstring, + missing-function-docstring, + too-many-arguments, + too-many-positional-arguments, + too-many-public-methods, + too-few-public-methods, + too-many-instance-attributes, + duplicate-code, + import-outside-toplevel, + broad-exception-caught, + wrong-import-position, + global-statement, + protected-access, + # ... then add the pytest-specific noise: + # Pytest fixtures are passed by *name*: the test function parameter must + # match the fixture name, which pylint flags as shadowing the outer-scope + # `@pytest.fixture` symbol. This is the canonical pytest pattern, not a + # bug — disable globally for the test tree. + redefined-outer-name, # W0621 + # Many fixtures (`mock_network`, `monkeypatch`, `tmp_path`, ...) are + # requested for side effects only; the parameter is intentionally unused + # inside the test body. Same for HTTP-mock callbacks that must mirror the + # `requests` signature `(url, headers, ...)`. + unused-argument, # W0613 + # `TODO:` markers in tests are work-in-progress notes, not lint signal. + fixme, # W0511 diff --git a/tests/integration/profiles/process-run-crate/test_procrc_action.py b/tests/integration/profiles/process-run-crate/test_procrc_action.py index 3e3d38766..10657001c 100644 --- a/tests/integration/profiles/process-run-crate/test_procrc_action.py +++ b/tests/integration/profiles/process-run-crate/test_procrc_action.py @@ -350,7 +350,7 @@ def test_procrc_action_bad_containerimage(): Test a Process Run Crate where the Action has a containerImage that does not point to a URL or to a ContainerImage object. """ - for crate in ( # noqa: B007 + for crate in ( # noqa: B007 # pylint: disable=unused-variable InvalidProcRC().action_bad_containerimage_url, InvalidProcRC().action_bad_containerimage_type, ): diff --git a/tests/integration/profiles/ro-crate/test_valid_ro-crate.py b/tests/integration/profiles/ro-crate/test_valid_ro_crate.py similarity index 100% rename from tests/integration/profiles/ro-crate/test_valid_ro-crate.py rename to tests/integration/profiles/ro-crate/test_valid_ro_crate.py diff --git a/tests/shared.py b/tests/shared.py index 41a7298d3..9fbe3bce2 100644 --- a/tests/shared.py +++ b/tests/shared.py @@ -94,8 +94,11 @@ def _prepare_temp_rocrate( rocrate_entity_patch: dict | None, rocrate_entity_mod_sparql: str | None, ) -> Path: - temp_rocrate_path = Path(tempfile.TemporaryDirectory().name) - shutil.copytree(rocrate_path, temp_rocrate_path) + # `mkdtemp` returns a stable path the test owns; `TemporaryDirectory().name` + # was deleted on GC before copytree ran. `dirs_exist_ok=True` lets us copy + # into the (already-created) mkdtemp directory. + temp_rocrate_path = Path(tempfile.mkdtemp()) + shutil.copytree(rocrate_path, temp_rocrate_path, dirs_exist_ok=True) with (temp_rocrate_path / "ro-crate-metadata.json").open(encoding="utf-8") as f: rocrate = json.load(f) if rocrate_entity_patch is not None: @@ -120,7 +123,10 @@ def _prepare_temp_rocrate( return temp_rocrate_path -def do_entity_test( +def do_entity_test( # pylint: disable=too-many-locals + # Shared test driver: the long signature mirrors `ValidationSettings` + # plus the assertion-shaping knobs, so locals scale with the surface area + # it has to cover. Splitting it would only push the locals to the caller. rocrate_path: Path | str, requirement_severity: models.Severity, expected_validation_result: bool, @@ -189,7 +195,11 @@ def do_entity_test( logger.debug("Expected validation result: %s", expected_validation_result) assert result.context is not None, "Validation context should not be None" - f"Expected requirement severity to be {requirement_severity}, but got {result.context.requirement_severity}" + logger.debug( + "Expected requirement severity to be %s, got %s", + requirement_severity, + result.context.requirement_severity, + ) assert result.passed() == expected_validation_result, ( f"RO-Crate should be {'valid' if expected_validation_result else 'invalid'}" ) diff --git a/tests/unit/requirements/test_shacl_checks.py b/tests/unit/requirements/test_shacl_checks.py index f4de84cc5..a8165dcdc 100644 --- a/tests/unit/requirements/test_shacl_checks.py +++ b/tests/unit/requirements/test_shacl_checks.py @@ -12,17 +12,22 @@ # See the License for the specific language governing permissions and # limitations under the License. +from __future__ import annotations + import logging -from typing import cast +from typing import TYPE_CHECKING, cast from rdflib import BNode, Graph, Namespace, URIRef from rocrate_validator.constants import SHACL_NS -from rocrate_validator.models import LevelCollection, Requirement +from rocrate_validator.models import LevelCollection from rocrate_validator.requirements.shacl.checks import SHACLCheck from rocrate_validator.requirements.shacl.models import NodeShape, PropertyShape, Shape, ShapesRegistry from rocrate_validator.requirements.shacl.utils import resolve_parent_shape +if TYPE_CHECKING: + from rocrate_validator.models import Requirement + logger = logging.getLogger(__name__) @@ -108,7 +113,7 @@ def test_description_fallback_no_description_no_parent_description(): def test_property_shape_description_fallback(): """Test description fallback for PropertyShape without explicit description.""" - from rocrate_validator.requirements.shacl.models import PropertyShape + # PropertyShape already imported at module level g = Graph() prop = PropertyShape(URIRef("http://example.org/property"), g) diff --git a/tests/unit/test_cache_warmup.py b/tests/unit/test_cache_warmup.py index ca22e5c0f..71bc36b50 100644 --- a/tests/unit/test_cache_warmup.py +++ b/tests/unit/test_cache_warmup.py @@ -17,12 +17,12 @@ from __future__ import annotations import io -from typing import cast +from typing import TYPE_CHECKING, cast import pytest import urllib3 -from rocrate_validator.models import Profile, ValidationSettings +from rocrate_validator.models import Profile from rocrate_validator.utils.cache_warmup import ( auto_warm_up_for_settings, discover_cacheable_urls_from_profiles, @@ -32,6 +32,9 @@ from rocrate_validator.utils.http import HttpRequester from rocrate_validator.utils.paths import get_profiles_path +if TYPE_CHECKING: + from rocrate_validator.models import ValidationSettings + PROFILE_TTL_TEMPLATE = """ @prefix dct: . @prefix prof: . diff --git a/tests/unit/test_document_loader.py b/tests/unit/test_document_loader.py index 86b4b713b..d2e1da2b6 100644 --- a/tests/unit/test_document_loader.py +++ b/tests/unit/test_document_loader.py @@ -173,7 +173,8 @@ def test_install_patches_both_util_and_context(tmp_path): install_document_loader() assert jsonld_util.source_to_json is document_loader._patched_source_to_json - assert jsonld_context.source_to_json is document_loader._patched_source_to_json # pyright: ignore[reportPrivateImportUsage] + context_source = jsonld_context.source_to_json # pyright: ignore[reportPrivateImportUsage] + assert context_source is document_loader._patched_source_to_json def test_uninstall_restores_both_util_and_context(tmp_path): @@ -185,7 +186,8 @@ def test_uninstall_restores_both_util_and_context(tmp_path): uninstall_document_loader() assert jsonld_util.source_to_json is document_loader._original_source_to_json - assert jsonld_context.source_to_json is document_loader._original_source_to_json # pyright: ignore[reportPrivateImportUsage] + context_source = jsonld_context.source_to_json # pyright: ignore[reportPrivateImportUsage] + assert context_source is document_loader._original_source_to_json def test_context_module_resolution_routes_through_http(tmp_path, mock_network): diff --git a/tests/unit/test_http_requester_offline.py b/tests/unit/test_http_requester_offline.py index 2765fcf40..67a01f5ef 100644 --- a/tests/unit/test_http_requester_offline.py +++ b/tests/unit/test_http_requester_offline.py @@ -180,7 +180,7 @@ def test_cache_info_reports_metadata(tmp_path): assert info["entries"] == 0 -class _RecordCollector: +class _RecordCollector: # pylint: disable=attribute-defined-outside-init """Context manager that attaches a capturing handler to the http logger.""" def __init__(self): @@ -189,14 +189,14 @@ def __init__(self): def __enter__(self): import logging as _logging - from rocrate_validator.utils import http as http_module + from rocrate_validator.utils import http as http_module # pylint: disable=reimported self.records.clear() self.handler = _logging.Handler() self.handler.setLevel(_logging.DEBUG) self.handler.emit = self.records.append # type: ignore[assignment] # Force initialization of the underlying logger via the proxy. - http_module.logger.warning # noqa: B018 + http_module.logger.warning # noqa: B018 # pylint: disable=pointless-statement target = http_module.logger._instance assert target is not None # initialized above via the proxy access self._target = target @@ -246,7 +246,7 @@ def test_offline_without_requests_cache_uses_fallback_session(tmp_path, monkeypa """When requests_cache is unavailable, offline mode falls back to a 504 stub.""" original_import = __import__ - def fake_import(name, globals=None, locals=None, fromlist=(), level=0): + def fake_import(name, globals=None, locals=None, fromlist=(), level=0): # pylint: disable=redefined-builtin if name == "requests_cache" or (fromlist and "CachedSession" in fromlist and name.endswith("requests_cache")): raise ImportError("simulated missing dependency") return original_import(name, globals, locals, fromlist, level) diff --git a/tests/unit/test_http_requester_reconfigure.py b/tests/unit/test_http_requester_reconfigure.py index cbc6ab2f1..629efca65 100644 --- a/tests/unit/test_http_requester_reconfigure.py +++ b/tests/unit/test_http_requester_reconfigure.py @@ -113,7 +113,7 @@ def test_pinned_wrapper_survives_reconfigure(tmp_path): wrapper must still target the live session, not a closed one.""" requester = _initialize(tmp_path / "cache-1", cache_max_age=60) # pin the wrapper as an instance attribute - requester.get = requester.get # pyright: ignore[reportAttributeAccessIssue] + requester.get = requester.get # pyright: ignore[reportAttributeAccessIssue] # pylint: disable=no-member _initialize(tmp_path / "cache-2", cache_max_age=60) # rebuilds the session diff --git a/tests/unit/test_offline_cache_miss_warning.py b/tests/unit/test_offline_cache_miss_warning.py index eab453191..bf6a4e50c 100644 --- a/tests/unit/test_offline_cache_miss_warning.py +++ b/tests/unit/test_offline_cache_miss_warning.py @@ -31,6 +31,7 @@ def test_find_offline_cache_miss_direct(): def test_find_offline_cache_miss_walks_cause_chain(): inner = OfflineCacheMissError("https://example.org/x") + found = None try: try: raise inner @@ -43,15 +44,16 @@ def test_find_offline_cache_miss_walks_cause_chain(): def test_find_offline_cache_miss_walks_context_chain(): # `raise` inside `except` without `from` populates __context__. + found = None try: try: raise OfflineCacheMissError("https://example.org/y") except OfflineCacheMissError: - raise RuntimeError("wrapped via context") # noqa: B904 + raise RuntimeError("wrapped via context") # noqa: B904 # pylint: disable=raise-missing-from except Exception as outer: found = find_offline_cache_miss(outer) assert isinstance(found, OfflineCacheMissError) - assert found.url == "https://example.org/y" + assert found.url == "https://example.org/y" # pylint: disable=no-member def test_find_offline_cache_miss_returns_none_for_unrelated(): @@ -119,6 +121,7 @@ def test_maybe_warn_emits_once_per_distinct_url(bare_context, mock_logger): def test_maybe_warn_dedups_when_miss_is_wrapped(bare_context, mock_logger): url = "https://example.org/ctx" + wrapped = None try: raise RuntimeError("wrapped") from OfflineCacheMissError(url) except RuntimeError as wrapped_exc: diff --git a/tests/unit/test_rocrate.py b/tests/unit/test_rocrate.py index d4eacb969..f5824c47b 100644 --- a/tests/unit/test_rocrate.py +++ b/tests/unit/test_rocrate.py @@ -12,6 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +# pylint: disable=abstract-class-instantiated + from pathlib import Path import pytest From 2f498481b17fe0eee9d897bb493f17a8be403d76 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Mon, 15 Jun 2026 12:21:01 +0200 Subject: [PATCH 331/352] chore(pre-commit): move mypy and pylint hooks to manual stage --- .pre-commit-config.yaml | 19 ++++++++++++------- README.md | 25 +++++++++++++++++-------- 2 files changed, 29 insertions(+), 15 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index d558d602c..370ed027a 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -45,11 +45,14 @@ repos: - id: ruff-check - id: ruff-format - # Run mypy to perform static type checking on the codebase. - # This hook is configured as a manual hook to run mypy using Poetry, - # ensuring that it uses the correct virtual environment and dependencies. - # To run this hook, use the command: - # `pre-commit run --hook-stage manual` + # Slow whole-project checks (mypy + pylint) are configured as MANUAL hooks + # so they do not run on every commit. They use `poetry run` to pick up the + # project's virtualenv and dependencies. To run them: + # + # pre-commit run --hook-stage manual # all manual hooks + # pre-commit run --hook-stage manual mypy # just mypy + # pre-commit run --hook-stage manual pylint-main # just main pylint + # pre-commit run --hook-stage manual pylint-tests # just tests pylint - repo: local hooks: - id: mypy @@ -58,20 +61,21 @@ repos: language: system types: [python] pass_filenames: false + stages: [manual] # Pylint runs against the whole package (not just the changed files) so # cross-module import resolution stays accurate. Two hooks are needed # because pylint loads a single rcfile per run: the main package uses # the `[tool.pylint.*]` sections in `pyproject.toml`, while `tests/` # uses its own `tests/.pylintrc` (pytest-fixture idioms, domain camelCase - # allowlists). The hooks only fire when files in their respective trees - # change, but each one lints its whole tree. + # allowlists). - id: pylint-main name: pylint (rocrate_validator) entry: poetry run pylint rocrate_validator/ language: system files: ^rocrate_validator/.*\.py$ pass_filenames: false + stages: [manual] - id: pylint-tests name: pylint (tests) @@ -79,3 +83,4 @@ repos: language: system files: ^tests/.*\.py$ pass_filenames: false + stages: [manual] diff --git a/README.md b/README.md index 5c8061760..f554c7424 100644 --- a/README.md +++ b/README.md @@ -182,25 +182,34 @@ poetry install The repository ships a [pre-commit](https://pre-commit.com/) configuration (`.pre-commit-config.yaml`) that runs spell checking (`typos`), linting and -formatting (`ruff`), and static type checking (`mypy`). The hooks are **not** -active until you install them once in your local clone: +formatting (`ruff`), static type checking (`mypy`), and static analysis +(`pylint`). The hooks are **not** active until you install them once in your +local clone: ```bash poetry run pre-commit install ``` -After this, the checks run automatically on every `git commit`. You can also run -them manually at any time: +After this, the **fast** checks (typos, ruff, basic file hygiene) run +automatically on every `git commit`. The **slow whole-project checks** — +`mypy`, `pylint (rocrate_validator)`, and `pylint (tests)` — are configured +as manual-stage hooks and are *not* triggered by `git commit`; run them +explicitly when you want a full review: ```bash -# Run all hooks against the whole codebase +# Run all auto hooks against the whole codebase poetry run pre-commit run --all-files -# Run a single hook (e.g. typos or ruff) +# Run a single auto hook (e.g. typos or ruff) poetry run pre-commit run typos --all-files -# mypy is configured as a manual-stage hook, so run it explicitly -poetry run pre-commit run --hook-stage manual +# Run ALL manual hooks (mypy + both pylint runs) +poetry run pre-commit run --hook-stage manual --all-files + +# Run a single manual hook +poetry run pre-commit run --hook-stage manual mypy +poetry run pre-commit run --hook-stage manual pylint-main # rocrate_validator/ +poetry run pre-commit run --hook-stage manual pylint-tests # tests/ (uses tests/.pylintrc) ``` `actionStatus` MUST have one of the allowed values." ; + ] . + + +#=== SHOULD shapes ===# + +five-safes-crate:RootDataEntityShouldMentionCheckValueObject + a sh:NodeShape ; + sh:name "RootDataEntity" ; + sh:targetClass ro-crate:RootDataEntity ; + sh:description "" ; + sh:severity sh:Warning ; + + sh:sparql [ + a sh:SPARQLConstraint ; + sh:name "mentions" ; + sh:description "RootDataEntity SHOULD mention a check value object." ; + sh:select """ + PREFIX schema: + PREFIX shp: + SELECT $this + WHERE { + FILTER NOT EXISTS{ + $this schema:mentions ?action . + ?action schema:additionalType shp:CheckValue . + } + } + """ ; + sh:message "RootDataEntity SHOULD mention a check value object." ; + ] . + + +five-safes-crate:CheckValueObjectShouldPointToRootDataEntity + a sh:NodeShape ; + sh:name "CheckValue" ; + sh:description "" ; + sh:target [ + a sh:SPARQLTarget ; + sh:select """ + PREFIX schema: + PREFIX shp: + SELECT ?this + WHERE { + ?this schema:additionalType shp:CheckValue . + } + """ ; + ] ; + + sh:property [ + a sh:PropertyShape ; + sh:name "object" ; + sh:path schema:object ; + sh:minCount 1 ; + sh:class ro-crate:RootDataEntity ; + sh:severity sh:Warning ; + sh:message "`CheckValue` --> `object` SHOULD point to the root of the RO-Crate" ; + ] . + + +five-safes-crate:CheckValueInstrumentShouldPointToEntityTypedDefinedTerm + a sh:NodeShape ; + sh:name "CheckValue" ; + sh:description "" ; + sh:target [ + a sh:SPARQLTarget ; + sh:select """ + PREFIX schema: + PREFIX shp: + SELECT ?this + WHERE { + ?this schema:additionalType shp:CheckValue . + } + """ ; + ] ; + + sh:property [ + a sh:PropertyShape ; + sh:name "instrument" ; + sh:path schema:instrument ; + sh:minCount 1 ; + sh:class schema:DefinedTerm ; + sh:severity sh:Warning ; + sh:message "`CheckValue` --> `instrument` SHOULD point to an entity typed `DefinedTerm`" ; + ] . + + +five-safes-crate:CheckValueAgentShouldIdentifyTheAgentWhoPerformnedTheCheck + a sh:NodeShape ; + sh:name "CheckValue" ; + sh:description "" ; + sh:target [ + a sh:SPARQLTarget ; + sh:select """ + PREFIX schema: + PREFIX shp: + SELECT ?this + WHERE { + ?this schema:additionalType shp:CheckValue . + } + """ ; + ] ; + + sh:property [ + a sh:PropertyShape ; + sh:name "agent" ; + sh:path schema:agent ; + sh:minCount 1 ; + sh:nodeKind sh:IRI ; + sh:severity sh:Warning ; + sh:message "`CheckValue` --> `agent` SHOULD reference the agent who initiated the check" ; + ] . + + +five-safes-crate:CheckValueShouldHaveEndTime + a sh:NodeShape ; + sh:name "CheckValue" ; + sh:description "" ; + sh:target [ + a sh:SPARQLTarget ; + sh:select """ + PREFIX schema: + PREFIX shp: + SELECT ?this + WHERE { + ?this schema:additionalType shp:CheckValue ; + schema:actionStatus ?status . + FILTER(?status IN ( + "http://schema.org/CompletedActionStatus", + "http://schema.org/FailedActionStatus" + )) + } + """ ; + ] ; + + sh:property [ + a sh:PropertyShape ; + sh:name "EndTime" ; + sh:path schema:endTime ; + sh:minCount 1 ; + sh:maxCount 1 ; + sh:severity sh:Warning ; + sh:message "`CheckValue` SHOULD have the `endTime` property." ; + ] . + + +five-safes-crate:CheckValueShouldHaveActionStatus + a sh:NodeShape ; + sh:name "CheckValue" ; + sh:description "" ; + + sh:target [ + a sh:SPARQLTarget ; + sh:select """ + PREFIX schema: + PREFIX shp: + + SELECT ?this + WHERE { + ?this schema:additionalType shp:CheckValue . + } + """ ; + ] ; + + sh:property [ + a sh:PropertyShape ; + sh:name "ActionStatus" ; + sh:path schema:actionStatus ; + sh:minCount 1 ; + sh:severity sh:Warning ; + sh:message "CheckValue SHOULD have actionStatus property." ; + ] . + + +#=== MAY shapes ===# + +five-safes-crate:CheckValueMayHaveStartTime + a sh:NodeShape ; + sh:name "CheckValue" ; + sh:description "" ; + sh:target [ + a sh:SPARQLTarget ; + sh:select """ + PREFIX schema: + PREFIX shp: + SELECT ?this + WHERE { + ?this schema:additionalType shp:CheckValue ; + schema:actionStatus ?status . + FILTER(?status IN ( + "http://schema.org/CompletedActionStatus", + "http://schema.org/FailedActionStatus", + "http://schema.org/ActiveActionStatus" + )) + } + """ ; + ] ; + + sh:property [ + a sh:PropertyShape ; + sh:name "StartTime" ; + sh:path schema:startTime ; + sh:minCount 1 ; + sh:maxCount 1 ; + sh:severity sh:Info ; + sh:message "`CheckValue` MAY have the `startTime` property." ; + ] . diff --git a/rocrate_validator/profiles/five-safes-crate/13_validation_phase.ttl b/rocrate_validator/profiles/five-safes-crate/13_validation_phase.ttl new file mode 100644 index 000000000..e9d25d46b --- /dev/null +++ b/rocrate_validator/profiles/five-safes-crate/13_validation_phase.ttl @@ -0,0 +1,308 @@ +# Copyright (c) 2025 eScience Lab, The University of Manchester +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +@prefix ro: <./> . +@prefix ro-crate: . +@prefix five-safes-crate: . +@prefix rdf: . +@prefix schema: . +@prefix purl: . +@prefix sh: . +@prefix validator: . +@prefix xsd: . + + +#=== MUST shapes ===# + +five-safes-crate:ValidationCheckObjectHasDescriptiveNameAndIsAssessAction + a sh:NodeShape ; + sh:name "ValidationCheck" ; + sh:description "" ; + + sh:target [ + a sh:SPARQLTarget ; + sh:select """ + PREFIX schema: + PREFIX shp: + + SELECT ?this + WHERE { + ?this schema:additionalType shp:ValidationCheck . + } + """ ; + ] ; + + sh:property [ + sh:path rdf:type ; + sh:minCount 1 ; + sh:hasValue schema:AssessAction; + sh:severity sh:Violation ; + sh:message "ValidationCheck MUST be a `AssessAction`." ; + ] ; + + sh:property [ + sh:a sh:PropertyShape ; + sh:name "name" ; + sh:description "ValidationCheck MUST have a human readable name string." ; + sh:path schema:name ; + sh:datatype xsd:string ; + sh:severity sh:Violation ; + sh:message "ValidationCheck MUST have a human readable name string." ; + ] . + + +five-safes-crate:ValidationCheckActionStatusMustHaveAllowedValue + a sh:NodeShape ; + sh:name "ValidationCheck" ; + sh:target [ + a sh:SPARQLTarget ; + sh:select """ + PREFIX schema: + PREFIX shp: + + SELECT ?this + WHERE { + ?this schema:additionalType shp:ValidationCheck ; + schema:actionStatus ?status . + } + """ ; + ] ; + + sh:property [ + a sh:PropertyShape ; + sh:name "actionStatus" ; + sh:description "The `actionStatus` of ValidationCheck MUST have an allowed value (see https://schema.org/ActionStatusType)." ; + sh:path schema:actionStatus ; + sh:in ( + "http://schema.org/PotentialActionStatus" + "http://schema.org/ActiveActionStatus" + "http://schema.org/CompletedActionStatus" + "http://schema.org/FailedActionStatus" + ) ; + sh:severity sh:Violation ; + sh:message "The `actionStatus` of ValidationCheck MUST have an allowed value (see https://schema.org/ActionStatusType)." ; + ] . + + +five-safes-crate:ValidationCheckActionStatusMustHaveAllowedValue + a sh:NodeShape ; + sh:name "ValidationCheck" ; + sh:target [ + a sh:SPARQLTarget ; + sh:select """ + PREFIX schema: + PREFIX shp: + + SELECT ?this + WHERE { + ?this schema:additionalType shp:ValidationCheck . + } + """ ; + ] ; + + sh:property [ + a sh:PropertyShape ; + sh:minCount 1 ; + sh:name "actionStatus" ; + sh:description "actionStatus MUST be either PotentialActionStatus, ActiveActionStatus, CompletedActionStatus, or FailedActionStatus." ; + sh:path schema:actionStatus ; + sh:in ( + "http://schema.org/PotentialActionStatus" + "http://schema.org/ActiveActionStatus" + "http://schema.org/CompletedActionStatus" + "http://schema.org/FailedActionStatus" + ) ; + sh:severity sh:Violation ; + sh:message "actionStatus MUST be either PotentialActionStatus, ActiveActionStatus, CompletedActionStatus, or FailedActionStatus." ; + ] . + + +#=== SHOULD shapes ===# + +five-safes-crate:RootDataEntityShouldMentionValidationCheckObject + a sh:NodeShape ; + sh:name "RootDataEntity" ; + sh:targetClass ro-crate:RootDataEntity ; + sh:description "" ; + sh:severity sh:Warning ; + + sh:sparql [ + a sh:SPARQLConstraint ; + sh:name "mentions" ; + sh:description "RootDataEntity SHOULD mention a ValidationCheck object." ; + sh:select """ + PREFIX schema: + PREFIX shp: + SELECT $this + WHERE { + FILTER NOT EXISTS{ + $this schema:mentions ?action . + ?action schema:additionalType shp:ValidationCheck . + } + } + """ ; + sh:message "RootDataEntity SHOULD mention a ValidationCheck object." ; + ] . + + +five-safes-crate:ValidationCheckObjectShouldPointToRootDataEntity + a sh:NodeShape ; + sh:name "ValidationCheck" ; + sh:description "" ; + sh:target [ + a sh:SPARQLTarget ; + sh:select """ + PREFIX schema: + PREFIX shp: + SELECT ?this + WHERE { + ?this schema:additionalType shp:ValidationCheck . + } + """ ; + ] ; + + sh:property [ + a sh:PropertyShape ; + sh:name "object" ; + sh:path schema:object ; + sh:minCount 1 ; + sh:class ro-crate:RootDataEntity ; + sh:severity sh:Warning ; + sh:message "`ValidationCheck` --> `object` SHOULD point to the root of the RO-Crate" ; + ] . + +five-safes-crate:ValidationCheckInstrumentShouldPointToEntityWithSpecificId + a sh:NodeShape ; + sh:name "ValidationCheck" ; + sh:description "" ; + sh:target [ + a sh:SPARQLTarget ; + sh:select """ + PREFIX schema: + PREFIX shp: + SELECT ?this + WHERE { + ?this schema:additionalType shp:ValidationCheck . + } + """ ; + ] ; + + sh:property [ + a sh:PropertyShape ; + sh:name "instrument" ; + sh:description "" ; + sh:path schema:instrument ; + sh:minCount 1 ; + sh:nodeKind sh:IRI ; + sh:hasValue ; + sh:severity sh:Warning ; + sh:message "`ValidationCheck` --> `instrument` SHOULD point to an entity with @id https://w3id.org/5s-crate/0.4" ; + ] . + + +five-safes-crate:ValidationCheckShouldHaveActionStatus + a sh:NodeShape ; + sh:name "ValidationCheck" ; + sh:description "" ; + + sh:target [ + a sh:SPARQLTarget ; + sh:select """ + PREFIX schema: + PREFIX shp: + + SELECT ?this + WHERE { + ?this schema:additionalType shp:ValidationCheck . + } + """ ; + ] ; + + sh:property [ + a sh:PropertyShape ; + sh:name "ActionStatus" ; + sh:path schema:actionStatus ; + sh:minCount 1 ; + sh:severity sh:Warning ; + sh:message "ValidationCheck SHOULD have actionStatus property." ; + ] . + + +five-safes-crate:DownloadActionShouldHaveEndTimeIfBegun + a sh:NodeShape ; + sh:name "ValidationCheck" ; + sh:description "" ; + sh:target [ + a sh:SPARQLTarget ; + sh:select """ + PREFIX schema: + PREFIX shp: + SELECT ?this + WHERE { + ?this schema:additionalType shp:ValidationCheck ; + schema:actionStatus ?status . + FILTER(?status IN ( + "http://schema.org/CompletedActionStatus", + "http://schema.org/FailedActionStatus" + )) + } + """ ; + ] ; + + sh:property [ + a sh:PropertyShape ; + sh:name "EndTime" ; + sh:path schema:endTime ; + sh:minCount 1 ; + sh:maxCount 1 ; + sh:severity sh:Warning ; + sh:description "ValidationCheck SHOULD have the `endTime` property if `actionStatus` is either CompletedActionStatus or FailedActionStatus." ; + sh:message "ValidationCheck SHOULD have the `endTime` property if `actionStatus` is either CompletedActionStatus or FailedActionStatus." ; + ] . + + + #=== MAY shapes ===# + + five-safes-crate:DownloadActionMayHaveStartTimeIfBegun + a sh:NodeShape ; + sh:name "ValidationCheck" ; + sh:description "" ; + sh:target [ + a sh:SPARQLTarget ; + sh:select """ + PREFIX schema: + PREFIX shp: + SELECT ?this + WHERE { + ?this schema:additionalType shp:ValidationCheck ; + schema:actionStatus ?status . + FILTER(?status IN ( + "http://schema.org/CompletedActionStatus", + "http://schema.org/FailedActionStatus", + "http://schema.org/ActiveActionStatus" + )) + } + """ ; + ] ; + + sh:property [ + a sh:PropertyShape ; + sh:name "StartTime" ; + sh:path schema:startTime ; + sh:minCount 1 ; + sh:maxCount 1 ; + sh:severity sh:Info ; + sh:description "ValidationCheck MAY have the `startTime` property if `actionStatus` is either ActiveActionStatus, CompletedActionStatus or FailedActionStatus." ; + sh:message "ValidationCheck MAY have the `startTime` property if `actionStatus` is either ActiveActionStatus, CompletedActionStatus or FailedActionStatus." ; + ] . diff --git a/rocrate_validator/profiles/five-safes-crate/14_workflow_retrieval_phase.ttl b/rocrate_validator/profiles/five-safes-crate/14_workflow_retrieval_phase.ttl new file mode 100644 index 000000000..df7ad33e8 --- /dev/null +++ b/rocrate_validator/profiles/five-safes-crate/14_workflow_retrieval_phase.ttl @@ -0,0 +1,344 @@ +# Copyright (c) 2025 eScience Lab, The University of Manchester +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +@prefix ro: <./> . +@prefix ro-crate: . +@prefix five-safes-crate: . +@prefix rdf: . +@prefix schema: . +@prefix purl: . +@prefix sh: . +@prefix validator: . +@prefix xsd: . + + +#=== MUST shapes ===# + +five-safes-crate:DownloadActionObjectMUSTHavesDescriptiveName + a sh:NodeShape ; + sh:name "DownloadAction" ; + sh:targetClass schema:DownloadAction ; + sh:description "" ; + + sh:property [ + a sh:PropertyShape ; + sh:name "name" ; + sh:description "DownloadAction MUST have a human readable name string." ; + sh:path schema:name ; + sh:minCount 1 ; + sh:maxCount 1 ; + sh:datatype xsd:string ; + sh:severity sh:Violation ; + sh:message "DownloadAction MUST have a human readable name string." ; + ] . + + + +five-safes-crate:WorkflowSameAsAndRootDataEntityMainEntityMUSTBeTheSame + a sh:NodeShape ; + sh:name "Downloaded Workflow" ; + sh:description "" ; + sh:severity sh:Violation ; + + sh:target [ + a sh:SPARQLTarget ; + sh:select """ + PREFIX schema: + PREFIX rdf: + + SELECT ?this + WHERE { + ?this rdf:type schema:Dataset . + ?s rdf:type schema:DownloadAction ; + schema:result ?this . + } + """ ; + ]; + + sh:sparql [ + a sh:SPARQLConstraint ; + sh:select """ + PREFIX schema: + PREFIX rdf: + + SELECT $this + WHERE { + FILTER NOT EXISTS { + $this schema:sameAs ?o . + ?s schema:mainEntity ?o . + # ?o rdf:type schema:Dataset . + } + } + """ ; + + sh:description "The property `sameAs` of the entity representing the downloaded workflow MUST point to the same entity as `RootDataEntity` --> `mainEntity`." ; + sh:message "The property `sameAs` of the entity representing the downloaded workflow MUST point to the same entity as `RootDataEntity` --> `mainEntity`." ; + ] . + + +five-safes-crate:DownloadedWorkflowDistributionAndDownloadActionObjectMUSTBeTheSame + a sh:NodeShape ; + sh:name "Downloaded Workflow" ; + sh:description "" ; + sh:severity sh:Violation ; + + sh:target [ + a sh:SPARQLTarget ; + sh:select """ + PREFIX schema: + PREFIX rdf: + + SELECT ?this + WHERE { + ?this rdf:type schema:Dataset . + ?s rdf:type schema:DownloadAction ; + schema:result ?this . + } + """ ; + ]; + + sh:sparql [ + a sh:SPARQLConstraint ; + sh:name "" ; + sh:select """ + PREFIX schema: + PREFIX rdf: + + SELECT $this + WHERE { + ?action rdf:type schema:DownloadAction . + FILTER NOT EXISTS { + $this schema:distribution ?url . + ?action schema:object ?url . + } + } + """ ; + sh:message "DownloadedWorkflow --> `distribution` MUST reference the same entity as `DownloadAction` --> `object`." ; + ] . + + +five-safes-crate:DownloadActionActionStatusMUSTHaveAllowedValues + a sh:NodeShape ; + sh:name "DownloadAction" ; + sh:description "" ; + + sh:target [ + a sh:SPARQLTarget ; + sh:select """ + PREFIX schema: + PREFIX rdf: + + SELECT ?this + WHERE { + ?this rdf:type schema:DownloadAction ; + schema:actionStatus ?status . + } + """ ; + ] ; + + sh:property [ + a sh:PropertyShape ; + sh:name "ActionStatus" ; + sh:path schema:actionStatus ; + sh:in ( + "http://schema.org/PotentialActionStatus" + "http://schema.org/ActiveActionStatus" + "http://schema.org/CompletedActionStatus" + "http://schema.org/FailedActionStatus" + ) ; + sh:severity sh:Violation ; + sh:message "The value of actionStatus MUST be one of the allowed values: PotentialActionStatus; ActiveActionStatus; CompletedActionStatus; FailedActionStatus." ; + ] . + + +#=== SHOULD shapes ===# + +five-safes-crate:DownloadActionEntitySHOULDExist + a sh:NodeShape ; + sh:name "RootDataEntity" ; + sh:description "" ; + sh:targetClass ro-crate:RootDataEntity ; + sh:severity sh:Warning ; + + sh:sparql [ + a sh:SPARQLConstraint ; + sh:name "" ; + sh:select """ + PREFIX schema: + PREFIX rdf: + + SELECT $this + WHERE { + FILTER NOT EXISTS { + ?s rdf:type schema:DownloadAction . + } + } + """ ; + sh:message "An entity typed DownloadAction SHOULD exist." ; + ] . + + + +five-safes-crate:RootDataEntitySHOULDMentionDownloadActionIfPresent + a sh:NodeShape ; + sh:name "RootDataEntity" ; + sh:description "" ; + sh:targetClass ro-crate:RootDataEntity ; + sh:severity sh:Warning ; + + sh:sparql [ + a sh:SPARQLConstraint ; + sh:name "" ; + sh:select """ + PREFIX schema: + PREFIX rdf: + + SELECT $this ?da + WHERE { + ?da rdf:type schema:DownloadAction . + FILTER NOT EXISTS { + $this schema:mentions ?da . + } + } + """ ; + sh:message "RootDataEntity SHOULD mention DownloadAction if this exists." ; + ] . + + +five-safes-crate:DownloadActionShouldHaveEndTimeIfEnded + a sh:NodeShape ; + sh:name "DownloadAction" ; + sh:description "" ; + sh:target [ + a sh:SPARQLTarget ; + sh:select """ + PREFIX schema: + PREFIX rdf: + + SELECT ?this + WHERE { + ?this rdf:type schema:DownloadAction ; + schema:actionStatus ?status . + FILTER(?status IN ( + "http://schema.org/CompletedActionStatus", + "http://schema.org/FailedActionStatus" + )) + } + """ ; + ] ; + + sh:property [ + a sh:PropertyShape ; + sh:name "EndTime" ; + sh:path schema:endTime ; + sh:minCount 1 ; + sh:maxCount 1 ; + sh:severity sh:Warning ; + sh:message "`DownloadAction` SHOULD have the `endTime` property if `actionStatus` is either CompletedActionStatus or FailedActionStatus." ; + ] . + + +five-safes-crate:DownloadActionShouldHaveActionStatus + a sh:NodeShape ; + sh:name "DownloadAction" ; + sh:targetClass schema:DownloadAction ; + sh:description "" ; + + sh:property [ + a sh:PropertyShape ; + sh:name "ActionStatus" ; + sh:path schema:actionStatus ; + sh:minCount 1 ; + sh:severity sh:Warning ; + sh:message "`DownloadAction` SHOULD have `actionStatus` property." ; + ] . + + +#=== MAY shapes ===# + +five-safes-crate:DownloadedWorkflowSHOULDExistAndBeReferencedByDownloadActionResult + a sh:NodeShape ; + sh:name "DownloadAction" ; + sh:description "Validates that DownloadAction result references an existing entity" ; + sh:targetClass schema:DownloadAction ; + + sh:property [ + a sh:PropertyShape ; + sh:name "Result" ; + sh:description "The result property must reference an existing entity in the RO-Crate" ; + sh:path schema:result ; + sh:minCount 1 ; + sh:nodeKind sh:IRI ; + sh:sparql [ + a sh:SPARQLConstraint ; + sh:select """ + PREFIX rdf: + PREFIX schema: + + SELECT $this $value + WHERE { + $this schema:result $value . + + # Entity must have BOTH type AND name (proper definition) + FILTER NOT EXISTS { + $value rdf:type schema:Dataset . + } + } + """ ; + ] ; + sh:severity sh:Info ; + sh:message "The entity representing the downloaded workflow is not defined, OR is not referenced by `DownloadAction` --> `result`, OR is not of type `Dataset`." ; + ] . + + +five-safes-crate:DownloadActionMayHaveStartTimeIfBegun + a sh:NodeShape ; + sh:name "DownloadAction" ; + sh:description ( + "`DownloadAction` MAY have the `startTime` property if `actionStatus` " + "is either ActiveActionStatus, CompletedActionStatus or FailedActionStatus." + ); + + sh:target [ + a sh:SPARQLTarget ; + sh:select """ + PREFIX schema: + PREFIX rdf: + + SELECT ?this + WHERE { + ?this rdf:type schema:DownloadAction ; + schema:actionStatus ?status . + FILTER(?status IN ( + "http://schema.org/CompletedActionStatus", + "http://schema.org/FailedActionStatus", + "http://schema.org/ActiveActionStatus" + )) + } + """ ; + ] ; + + sh:property [ + a sh:PropertyShape ; + sh:name "StartTime" ; + sh:path schema:startTime ; + sh:minCount 1 ; + sh:maxCount 1 ; + sh:description ( + "`DownloadAction` MAY have the `startTime` property if `actionStatus` " + "is either ActiveActionStatus, CompletedActionStatus or FailedActionStatus." + ); + sh:severity sh:Info ; + sh:message "`DownloadAction` MAY have the `startTime` property if `actionStatus` is either ActiveActionStatus, CompletedActionStatus or FailedActionStatus." ; + ] . diff --git a/rocrate_validator/profiles/five-safes-crate/15_metadata_file.py b/rocrate_validator/profiles/five-safes-crate/15_metadata_file.py new file mode 100644 index 000000000..b589d332f --- /dev/null +++ b/rocrate_validator/profiles/five-safes-crate/15_metadata_file.py @@ -0,0 +1,64 @@ +# Copyright (c) 2024-2025 CRS4 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import re + +import rocrate_validator.utils.log as logging +from rocrate_validator.models import Severity, ValidationContext +from rocrate_validator.requirements.python import PyFunctionCheck, check, requirement + +# set up logging +logger = logging.getLogger(__name__) + + +@requirement(name="RO-Crate context version") +class FileDescriptorContextVersion(PyFunctionCheck): + """The RO-Crate metadata file MUST include the RO-Crate context version 1.2 + (or later minor version) in `@context`""" + + @check(name="RO-Crate context version", severity=Severity.REQUIRED) + def test_existence(self, context: ValidationContext) -> bool: + """ + The RO-Crate metadata file MUST include the RO-Crate context version 1.2 + (or later minor version) in `@context` + """ + try: + json_dict = context.ro_crate.metadata.as_dict() + context_value = json_dict["@context"] + pattern = re.compile( + r"https://w3id\.org/ro/crate/1\.[2-9](-DRAFT)?/context" + ) + passed = True + if isinstance(context_value, list): + if not any( + pattern.match(item) + for item in context_value + if isinstance(item, str) + ): + passed = False + else: + if not pattern.match(context_value): + passed = False + if not passed: + context.result.add_issue( + "The RO-Crate metadata file MUST include the RO-Crate context " + "version 1.2 (or later minor version) in `@context`", + self, + ) + return passed + + except Exception as e: + if logger.isEnabledFor(logging.DEBUG): + logger.exception(e) + return True diff --git a/rocrate_validator/profiles/five-safes-crate/15_metadata_file.ttl b/rocrate_validator/profiles/five-safes-crate/15_metadata_file.ttl new file mode 100644 index 000000000..72f5e020e --- /dev/null +++ b/rocrate_validator/profiles/five-safes-crate/15_metadata_file.ttl @@ -0,0 +1,53 @@ +# Copyright (c) 2025 eScience Lab, The University of Manchester +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +@prefix ro-crate: . +@prefix five-safes-crate: . +@prefix rdf: . +@prefix schema: . +@prefix purl: . +@prefix sh: . +@prefix validator: . +@prefix xsd: . +@prefix dct: . + + + +#=== MUST shapes ===# + +five-safes-crate:MetadataFileDescriptorProperties a sh:NodeShape ; + sh:name "RO-Crate conforms to 1.2 or later minor version" ; + sh:description """The RO-Crate metadata file descriptor MUST have a `conformsTo` property with RO-Crate specification version 1.2 or later minor version"""; + sh:targetClass ro-crate:ROCrateMetadataFileDescriptor ; + sh:property [ + a sh:PropertyShape ; + sh:name "RO-Crate conforms to 1.2 or later minor version" ; + sh:description "The RO-Crate metadata file descriptor MUST have a `conformsTo` property with RO-Crate specification version 1.2 or later minor version" ; + sh:minCount 1 ; + sh:nodeKind sh:IRI ; + sh:path dct:conformsTo ; + sh:pattern "https://w3id\\.org/ro/crate/(1\\.[2-9](-DRAFT)?)" ; + sh:severity sh:Violation; + sh:message "The RO-Crate metadata file descriptor MUST have a `conformsTo` property with RO-Crate specification version 1.2 or later minor version" ; + ] . + +ro-crate:conformsToROCrateSpec sh:deactivated true . + + +#=== SHOULD shapes ===# +# (none) + + +#=== MAY shapes ===# +# (none) \ No newline at end of file diff --git a/rocrate_validator/profiles/five-safes-crate/16_publishing_phase.ttl b/rocrate_validator/profiles/five-safes-crate/16_publishing_phase.ttl new file mode 100644 index 000000000..c56e67b72 --- /dev/null +++ b/rocrate_validator/profiles/five-safes-crate/16_publishing_phase.ttl @@ -0,0 +1,51 @@ +# Copyright (c) 2025 eScience Lab, The University of Manchester +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +@prefix ro: <./> . +@prefix ro-crate: . +@prefix five-safes-crate: . +@prefix rdf: . +@prefix schema: . +@prefix purl: . +@prefix sh: . +@prefix validator: . +@prefix xsd: . + + +#=== MUST shapes ===# + +five-safes-crate:AllAssessActionsMentioned + a sh:NodeShape ; + sh:name "All AssessActions are mentioned from Root Data Entity" ; + sh:description "All AssessAction entities in the crate MUST be referenced from the Root Dataset via `mentions`." ; + sh:targetClass schema:AssessAction; + + sh:property [ + a sh:PropertyShape ; + sh:name "AssessAction mentions from RDE" ; + sh:description "All AssessAction entities in the crate MUST be referenced from the Root Dataset via `mentions`." ; + sh:path [ sh:inversePath schema:mentions ] ; + sh:node ro-crate:RootDataEntity ; + sh:minCount 1 ; + sh:severity sh:Violation ; + sh:message "All AssessAction entities in the crate MUST be referenced from the Root Dataset via `mentions`." ; + ] . + + +#=== SHOULD shapes ===# +# (none) + + +#=== MAY shapes ===# +# (none) \ No newline at end of file diff --git a/rocrate_validator/profiles/five-safes-crate/1_requesting_agent.ttl b/rocrate_validator/profiles/five-safes-crate/1_requesting_agent.ttl new file mode 100644 index 000000000..812691666 --- /dev/null +++ b/rocrate_validator/profiles/five-safes-crate/1_requesting_agent.ttl @@ -0,0 +1,114 @@ +# Copyright (c) 2025 eScience Lab, The University of Manchester +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +@prefix ro: <./> . +@prefix ro-crate: . +@prefix five-safes-crate: . +@prefix rdf: . +@prefix schema: . +@prefix sh: . +@prefix validator: . +@prefix xsd: . + + +#=== MUST shapes ===# + +five-safes-crate:AgentIsMemberOf + a sh:NodeShape ; + sh:name "Requesting Agent" ; + sh:target [ + a sh:SPARQLTarget ; + sh:prefixes ro-crate:sparqlPrefixes ; + sh:select """ + SELECT DISTINCT ?this WHERE { + ?action a schema:CreateAction ; + schema:agent ?this . + } + """ + ] ; + + sh:property [ + a sh:PropertyShape ; + sh:name "memberOf" ; + sh:path schema:memberOf; + sh:class schema:Project ; + sh:severity sh:Violation ; + sh:message """The 'memberOf' property of an agent MUST be of type Project.""" ; + ] . + + +five-safes-crate:AgentProjectIntersection + a sh:NodeShape ; + sh:name "Agent Project Intersection" ; + sh:description """At least one Project referenced by Agent -> memberOf MUST be included in the set of Projects referenced by RootDataEntity -> sourceOrganization.""" ; + sh:target [ + a sh:SPARQLTarget ; + sh:prefixes ro-crate:sparqlPrefixes ; + sh:select """ + SELECT DISTINCT ?this WHERE { + ?action a schema:CreateAction ; + schema:agent ?this . + } + """ + ] ; + sh:sparql [ + a sh:SPARQLConstraint ; + sh:name "Agent Project Intersection" ; + sh:description """At least one Project referenced by Agent -> memberOf MUST be included in the set of Projects referenced by RootDataEntity -> sourceOrganization.""" ; + sh:prefixes ro-crate:sparqlPrefixes ; + sh:select """ + SELECT $this WHERE { + FILTER EXISTS { + $this schema:memberOf ?anyProject . + } + FILTER NOT EXISTS { + $this schema:memberOf ?commonProject . + ?metadata schema:about ?root . + ?root schema:sourceOrganization ?commonProject . + } + } + """ ; + ] ; + sh:severity sh:Violation ; + sh:message """At least one Project referenced by Agent -> memberOf MUST be included in the set of Projects referenced by RootDataEntity -> sourceOrganization.""" . + + +#=== SHOULD shapes ===# + +five-safes-crate:AgentIsMemberOf + a sh:NodeShape ; + sh:name "Requesting Agent" ; + sh:target [ + a sh:SPARQLTarget ; + sh:prefixes ro-crate:sparqlPrefixes ; + sh:select """ + SELECT DISTINCT ?this WHERE { + ?action a rocrate:WorkflowRunAction ; + schema:agent ?this . + } + """ + ] ; + + sh:property [ + a sh:PropertyShape ; + sh:name "memberOf" ; + sh:path schema:memberOf; + sh:minCount 1 ; + sh:severity sh:Warning ; + sh:message """The Requesting Agent SHOULD have a `memberOf` property.""" ; + ] . + + + #=== MAY shapes ===# + # (none) \ No newline at end of file diff --git a/rocrate_validator/profiles/five-safes-crate/1_responsible_project.ttl b/rocrate_validator/profiles/five-safes-crate/1_responsible_project.ttl new file mode 100644 index 000000000..7be332d23 --- /dev/null +++ b/rocrate_validator/profiles/five-safes-crate/1_responsible_project.ttl @@ -0,0 +1,139 @@ +# Copyright (c) 2025 eScience Lab, The University of Manchester +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +@prefix ro: <./> . +@prefix ro-crate: . +@prefix five-safes-crate: . +@prefix rdf: . +@prefix schema: . +@prefix sh: . +@prefix validator: . +@prefix xsd: . + + +#=== MUST shapes ===# + +five-safes-crate:ResponsibleProject + a sh:NodeShape ; + sh:name "Responsible Project" ; + sh:target [ + a sh:SPARQLTarget ; + sh:prefixes ro-crate:sparqlPrefixes ; + sh:select """ + SELECT DISTINCT ?this WHERE { + ?action a schema:CreateAction ; + schema:agent ?agent . + ?agent schema:memberOf ?this . + } + """ + ] ; + + sh:property [ + a sh:PropertyShape ; + sh:name "funding" ; + sh:path schema:funding; + sh:class schema:Grant ; + sh:severity sh:Violation ; + sh:message """The property 'funding' of the Responsible Project MUST be of type Grant.""" ; + ] ; + + sh:property [ + a sh:PropertyShape ; + sh:name "member" ; + sh:path schema:member; + sh:or ( + [ sh:class schema:Organization ] + [ sh:class schema:Person ] + ) ; + sh:severity sh:Violation ; + sh:message """The property 'member' of the Responsible Project MUST be of type schema:Organization or Person.""" ; + ] . + + +#=== SHOULD shapes ===# + +five-safes-crate:ResponsibleProjectMemberAndSourceOrganizationIntersection + a sh:NodeShape ; + sh:name "Organizations (members of Responsible Project)" ; + sh:description """At least one of the organisations that are members of the responsible project SHOULD be included in the Requesting Agent's affiliations, if such properties exist.""" ; + sh:severity sh:Warning ; + + sh:target [ + a sh:SPARQLTarget ; + sh:prefixes ro-crate:sparqlPrefixes ; + sh:select """ + SELECT DISTINCT ?this WHERE { + ?action a ro-crate:WorkflowRunAction ; + schema:agent ?this . + ?this a schema:Person ; + schema:memberOf ?project ; + schema:affiliation ?someAffiliation . + ?project schema:member ?org2 . + } + """ + ] ; + + sh:sparql [ + a sh:SPARQLConstraint ; + sh:name "Intersection with agent affiliations" ; + sh:description """At least one of the organisations that are members of the responsible project SHOULD be included in the Requesting Agent's affiliations, if such properties exist.""" ; + + sh:prefixes ro-crate:sparqlPrefixes ; + sh:select """ + SELECT $this WHERE { + FILTER NOT EXISTS { + $this schema:affiliation ?org . + $this schema:memberOf ?project . + ?project schema:member ?org . + } + } + """ ; + sh:message """At least one of the organisations that are members of the responsible project SHOULD be included in the Requesting Agent's affiliations, if such properties exist.""" ; + ] . + + +#=== MAY shapes ===# + +five-safes-crate:ResponsibleProject + a sh:NodeShape ; + sh:name "Responsible Project" ; + sh:target [ + a sh:SPARQLTarget ; + sh:prefixes ro-crate:sparqlPrefixes ; + sh:select """ + SELECT DISTINCT ?this WHERE { + ?action a ro-crate:WorkflowRunAction ; + schema:agent ?agent . + ?agent schema:memberOf ?this . + } + """ + ] ; + + sh:property [ + a sh:PropertyShape ; + sh:name "funding" ; + sh:path schema:funding; + sh:minCount 1 ; + sh:severity sh:Info ; + sh:message """The Responsible Project does not have the property `funding`.""" ; + ] ; + + sh:property [ + a sh:PropertyShape ; + sh:name "member" ; + sh:path schema:member; + sh:minCount 1 ; + sh:severity sh:Info ; + sh:message """The Responsible Project does not have the property `member`.""" ; + ] . \ No newline at end of file diff --git a/rocrate_validator/profiles/five-safes-crate/1_root_data_entity.ttl b/rocrate_validator/profiles/five-safes-crate/1_root_data_entity.ttl new file mode 100644 index 000000000..b5481fd12 --- /dev/null +++ b/rocrate_validator/profiles/five-safes-crate/1_root_data_entity.ttl @@ -0,0 +1,56 @@ +# Copyright (c) 2025 eScience Lab, The University of Manchester +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +@prefix ro: <./> . +@prefix ro-crate: . +@prefix five-safes-crate: . +@prefix rdf: . +@prefix schema: . +@prefix sh: . +@prefix validator: . +@prefix xsd: . + + +#=== MUST shapes ===# + +five-safes-crate:RootDataEntityRequiredProperties + a sh:NodeShape ; + sh:name "RootDataEntity" ; + sh:targetClass ro-crate:RootDataEntity ; + + sh:property [ + a sh:PropertyShape ; + sh:name "sourceOrganization" ; + sh:path schema:sourceOrganization; + sh:minCount 1 ; + sh:severity sh:Violation ; + sh:message """The Root Data Entity MUST have a `sourceOrganization` property.""" ; + ] ; + + sh:property [ + a sh:PropertyShape ; + sh:name "sourceOrganization" ; + sh:path schema:sourceOrganization ; + sh:class schema:Project ; + sh:severity sh:Violation ; + sh:message """The `sourceOrganization` property of the RootDataEntity MUST point to a Project entity.""" ; + ] . + + +#=== SHOULD shapes ===# +# (none) + + +#=== MAY shapes ===# +# (none) diff --git a/rocrate_validator/profiles/five-safes-crate/2_requesting_agent.ttl b/rocrate_validator/profiles/five-safes-crate/2_requesting_agent.ttl new file mode 100644 index 000000000..49e5ba3f1 --- /dev/null +++ b/rocrate_validator/profiles/five-safes-crate/2_requesting_agent.ttl @@ -0,0 +1,97 @@ +# Copyright (c) 2025 eScience Lab, The University of Manchester +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +@prefix ro: <./> . +@prefix ro-crate: . +@prefix five-safes-crate: . +@prefix rdf: . +@prefix schema: . +@prefix sh: . +@prefix validator: . +@prefix xsd: . + + +#=== MUST shapes ===# + +five-safes-crate:CreateActionHasAgent + a sh:NodeShape ; + sh:name "CreateAction" ; + sh:targetClass schema:CreateAction ; + sh:description "Checks that a CreateAction has an agent and that each agent is a Person." ; + + # CreateAction entity MUST have an agent (IRI) + sh:property [ + a sh:PropertyShape ; + sh:name "Has Agent" ; + sh:path schema:agent ; + sh:minCount 1 ; + sh:nodeKind sh:IRI ; + sh:severity sh:Violation ; + sh:message "CreateAction MUST have at least one agent that is a contextual entity." ; + ] ; + + # The agent of a CreateAction entity MUST be a Person + sh:property [ + a sh:PropertyShape ; + sh:name "Agent is a Person" ; + sh:path schema:agent ; + sh:nodeKind sh:IRI ; + sh:class schema:Person ; + sh:severity sh:Violation ; + sh:message "Each CreateAction agent MUST be typed as Person." ; + ] ; + + # If any agent affiliation exists, it MUST be an Organization (IRI) + sh:property [ + a sh:PropertyShape ; + sh:name "Affiliation is an Organization" ; + sh:path ( schema:agent schema:affiliation ) ; + sh:class schema:Organization ; + sh:nodeKind sh:IRI ; + sh:severity sh:Violation ; + sh:message "The affiliation of a CreateAction's agent MUST be a contextual entity with type Organization." ; + ] . + + +#=== SHOULD shapes ===# + +# Person who is the agent of a WorkflowRunAction SHOULD have an affiliation +five-safes-crate:PersonAgentHasAffiliation + a sh:NodeShape ; + sh:name "Agent of WorkflowRunAction" ; + sh:description "The agent of a WorkflowRunAction entity" ; + sh:target [ + a sh:SPARQLTarget ; + sh:prefixes ro-crate:sparqlPrefixes ; + sh:select """ + SELECT DISTINCT ?this WHERE { + ?action a ro-crate:WorkflowRunAction ; + schema:agent ?this . + } + """ + ] ; + + # The agent of the `CreateAction` corresponding to the workflowrunAction SHOULD have an affiliation + sh:property [ + a sh:PropertyShape ; + sh:name "Presence of affiliations" ; + sh:path schema:affiliation ; + sh:minCount 1 ; + sh:severity sh:Warning ; + sh:message "The agent of the `CreateAction` corresponding to the workflow run SHOULD have an affiliation" ; + ] . + + +# === MAY shapes ===# +# (none) \ No newline at end of file diff --git a/rocrate_validator/profiles/five-safes-crate/3_timestamp_format.ttl b/rocrate_validator/profiles/five-safes-crate/3_timestamp_format.ttl new file mode 100644 index 000000000..64055a033 --- /dev/null +++ b/rocrate_validator/profiles/five-safes-crate/3_timestamp_format.ttl @@ -0,0 +1,64 @@ +# Copyright (c) 2025 eScience Lab, The University of Manchester +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +@prefix ro: <./> . +@prefix ro-crate: . +@prefix five-safes-crate: . +@prefix rdf: . +@prefix schema: . +@prefix purl: . +@prefix sh: . +@prefix validator: . +@prefix xsd: . + + +#=== MUST shapes ===# + +# to ensure the entity id will be included in any error message, +# target all entities which have startTime and/or endTime properties using sh:targetSubjectsOf, +# then we use sh:property to validate the values of those properties. +# the properties are listed individually so that the property id appears in any error message too +five-safes-crate:TimeStampFormat + a sh:NodeShape ; + sh:name "Timestamp Format" ; + sh:description "Timestamps MUST follow the RFC 3339 standard (YYYY-MM-DD'T'hh:mm:ss[.fraction](Z | ±hh:mm))." ; + sh:targetSubjectsOf schema:startTime, schema:endTime; + sh:property [ + a sh:PropertyShape ; + sh:name "End TimeStamp" ; + sh:description "End timestamps MUST follow the RFC 3339 standard." ; + sh:path schema:endTime ; + sh:minCount 0 ; + sh:pattern "^[0-9]{4}-[0-9]{2}-[0-9]{2}[Tt][0-9]{2}:[0-9]{2}:[0-9]{2}([.|,][0-9]+)?(Z|z|[+-][0-9]{2}:[0-9]{2})$" ; + sh:severity sh:Violation ; + sh:message "All `startTime` and `endTime` values MUST follow the RFC 3339 standard (YYYY-MM-DD'T'hh:mm:ss[.fraction](Z | ±hh:mm))." ; + ] ; + sh:property [ + a sh:PropertyShape ; + sh:name "Start TimeStamp" ; + sh:description "Start timestamps MUST follow the RFC 3339 standard." ; + sh:path schema:startTime ; + sh:minCount 0 ; + sh:pattern "^[0-9]{4}-[0-9]{2}-[0-9]{2}[Tt][0-9]{2}:[0-9]{2}:[0-9]{2}([.|,][0-9]+)?(Z|z|[+-][0-9]{2}:[0-9]{2})$" ; + sh:severity sh:Violation ; + sh:message "All `startTime` and `endTime` values MUST follow the RFC 3339 standard (YYYY-MM-DD'T'hh:mm:ss[.fraction](Z | ±hh:mm))." ; + ] . + + +#=== SHOULD shapes ===# +# (none) + + +#=== MAY shapes ===# +# (none) \ No newline at end of file diff --git a/rocrate_validator/profiles/five-safes-crate/4_sign_off.ttl b/rocrate_validator/profiles/five-safes-crate/4_sign_off.ttl new file mode 100644 index 000000000..0c91901e4 --- /dev/null +++ b/rocrate_validator/profiles/five-safes-crate/4_sign_off.ttl @@ -0,0 +1,295 @@ +# Copyright (c) 2025 eScience Lab, The University of Manchester +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +@prefix ro: <./> . +@prefix ro-crate: . +@prefix five-safes-crate: . +@prefix rdf: . +@prefix schema: . +@prefix purl: . +@prefix sh: . +@prefix validator: . +@prefix xsd: . + + +#=== MUST shapes ===# + +five-safes-crate:SignOffObjectActionAndName + a sh:NodeShape ; + sh:name "SignOff" ; + sh:description "Sign Off phase" ; + + sh:target [ + a sh:SPARQLTarget ; + sh:select """ + PREFIX schema: + PREFIX shp: + SELECT ?this + WHERE { + ?this schema:additionalType shp:SignOff . + } + """ ; + ] ; + + sh:property [ + sh:path schema:name ; + sh:datatype xsd:string ; + sh:minCount 1 ; + sh:severity sh:Violation ; + sh:message "Sign Off phase MUST have a human-readable name string." ; + ] ; + + sh:property [ + sh:path rdf:type ; + sh:minCount 1 ; + sh:hasValue schema:AssessAction; + sh:severity sh:Violation ; + sh:message "Sign Off phase MUST be a `AssessAction`." ; + ] . + +five-safes-crate:SignOffObjectHasActionStatus + a sh:NodeShape ; + sh:name "SignOffStatus" ; + sh:description "Sign Off Phase Action Status" ; + + sh:target [ + a sh:SPARQLTarget ; + sh:select """ + PREFIX schema: + PREFIX shp: + SELECT ?this + WHERE { + ?this schema:additionalType shp:SignOff ; + schema:actionStatus ?status . + } + """ ; + ] ; + + sh:property [ + a sh:PropertyShape ; + sh:name "actionStatus" ; + sh:description "The value of actionStatus MUST be one of the allowed values." ; + sh:path schema:actionStatus ; + sh:in ( + "http://schema.org/PotentialActionStatus" + "http://schema.org/ActiveActionStatus" + "http://schema.org/CompletedActionStatus" + "http://schema.org/FailedActionStatus" + ) ; + sh:severity sh:Violation ; + sh:message "The value of actionStatus MUST be one of the allowed values: PotentialActionStatus; ActiveActionStatus; CompletedActionStatus; FailedActionStatus." ; + ] . + + +#=== SHOULD shapes ===# + +# There SHOULD be a Sign-Off Phase +five-safes-crate:SignOffPhase + a sh:NodeShape ; + sh:name "SignOffPhase" ; + sh:targetClass ro-crate:RootDataEntity ; + sh:description "Check the Sign-Off Phase" ; + sh:severity sh:Warning ; + + sh:sparql [ + sh:select """ + PREFIX schema: + PREFIX shp: + SELECT $this + WHERE { + FILTER NOT EXISTS { + ?action schema:additionalType shp:SignOff . + } + } + """ ; + sh:message "There SHOULD be a Sign-Off Phase in the Final RO-Crate" ; + ] ; + + sh:sparql [ + sh:select """ + PREFIX schema: + PREFIX shp: + SELECT $this + WHERE { + ?action schema:additionalType shp:SignOff . + FILTER NOT EXISTS { + $this schema:mentions ?action . + } + } + """ ; + sh:message "The Root Data Entity SHOULD mention a Sign-Off Phase Object" ; + ] . + + +five-safes-crate:SignOffPhaseProperties + a sh:NodeShape ; + sh:name "SignOffPhaseProperties" ; + sh:description "Check Sign-Off Phase Properties" ; + sh:severity sh:Warning ; + + sh:target [ + a sh:SPARQLTarget ; + sh:select """ + PREFIX schema: + PREFIX shp: + SELECT ?this + WHERE { + ?this schema:additionalType shp:SignOff . + } + """ + ] ; + + sh:property [ + sh:description "Check if the Sign Off phase has an actionStatus" ; + sh:path schema:actionStatus ; + sh:minCount 1 ; + sh:severity sh:Warning ; + sh:message "The Sign-Off Phase SHOULD have an actionStatus" ; + ] ; + + sh:property [ + sh:description "Check if the Sign Off phase has an agent" ; + sh:path schema:agent ; + sh:minCount 1 ; + sh:severity sh:Warning ; + sh:message "The Sign-Off Phase SHOULD have an agent" ; + ] ; + + sh:property [ + sh:description "Check if the Sign Off phase has an instrument (TRE Policy)" ; + sh:path schema:instrument ; + sh:class schema:CreativeWork ; + sh:nodeKind sh:IRI; + sh:minCount 1 ; + sh:severity sh:Warning ; + sh:message "The Sign-Off Phase SHOULD have an TRE policy (instrument) with type CreativeWork" ; + ] ; + + sh:property [ + sh:description "Check if the Sign Off phase has an instrument (TRE Policy)" ; + sh:path ( schema:instrument schema:name ) ; + sh:datatype xsd:string ; + sh:minCount 1 ; + sh:severity sh:Warning ; + sh:message "The Sign-Off Phase SHOULD have an TRE policy (instrument) with a human-readable name" ; + ] ; + + sh:sparql [ + a sh:SPARQLConstraint ; + sh:description "Check if the Sign Off phase lists the workflow as an object" ; + sh:select """ + PREFIX schema: + PREFIX rocrate: + SELECT $this + WHERE { + ?root a schema:Dataset ; + schema:mainEntity ?mainEntity ; + rdf:type rocrate:RootDataEntity . + FILTER NOT EXISTS { + $this schema:object ?mainEntity . + } + } + """ ; + sh:message "The Sign-Off Phase SHOULD list the workflow (mainEntity) as an object" ; + ]; + + sh:sparql [ + a sh:SPARQLConstraint ; + sh:description "Check if the Sign Off phase lists the Responsible Project as an object" ; + sh:select """ + PREFIX schema: + PREFIX rocrate: + SELECT $this + WHERE { + ?root a schema:Dataset ; + rdf:type rocrate:RootDataEntity ; + schema:sourceOrganization ?sourceOrg . + FILTER NOT EXISTS { + $this schema:object ?sourceOrg . + } + } + """ ; + sh:message "The Sign-Off Phase SHOULD list the Responsible Project (sourceOrganization) as an object" ; + ]. + + +five-safes-crate:SignOffPhaseEndTime + a sh:NodeShape ; + sh:name "SignOffPhaseEndTime" ; + sh:description "Sign Off end time check" ; + + sh:target [ + a sh:SPARQLTarget ; + sh:select """ + PREFIX schema: + PREFIX shp: + SELECT ?this + WHERE { + ?this schema:additionalType shp:SignOff ; + schema:actionStatus ?status . + FILTER(?status IN ( + "http://schema.org/CompletedActionStatus", + "http://schema.org/FailedActionStatus" + )) + } + """ ; + ] ; + + sh:property [ + a sh:PropertyShape ; + sh:name "EndTime" ; + sh:path schema:endTime ; + sh:minCount 1 ; + sh:maxCount 1 ; + sh:severity sh:Warning ; + sh:description "Sign Off object SHOULD have endTime property if action completed or failed." ; + sh:message "Sign Off object SHOULD have endTime property if action completed or failed." ; + ] . + + +#=== MAY shapes ===# + +five-safes-crate:SignOffPhaseStartTime + a sh:NodeShape ; + sh:name "SignOffPhaseStartTime" ; + + sh:target [ + a sh:SPARQLTarget ; + sh:select """ + PREFIX schema: + PREFIX shp: + SELECT ?this + WHERE { + ?this schema:additionalType shp:SignOff ; + schema:actionStatus ?status . + FILTER(?status IN ( + "http://schema.org/ActiveActionStatus", + "http://schema.org/CompletedActionStatus", + "http://schema.org/FailedActionStatus" + )) + } + """ ; + ] ; + + sh:property [ + a sh:PropertyShape ; + sh:name "StartTime" ; + sh:path schema:startTime ; + sh:minCount 1 ; + sh:maxCount 1 ; + sh:pattern "^[0-9]{4}-[0-9]{2}-[0-9]{2}[Tt][0-9]{2}:[0-9]{2}:[0-9]{2}([.|,][0-9]+)?(Z|z|[+-][0-9]{2}:[0-9]{2})$" ; + sh:severity sh:Info ; + sh:description "Sign Off object MAY have a startTime property if action is active, completed or failed." ; + sh:message "Sign Off object MAY have a startTime property if action is active, completed or failed." ; + ] . diff --git a/rocrate_validator/profiles/five-safes-crate/5_profile_conformance.ttl b/rocrate_validator/profiles/five-safes-crate/5_profile_conformance.ttl new file mode 100644 index 000000000..069d390ed --- /dev/null +++ b/rocrate_validator/profiles/five-safes-crate/5_profile_conformance.ttl @@ -0,0 +1,89 @@ +# Copyright (c) 2025 eScience Lab, The University of Manchester +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +@prefix ro: <./> . +@prefix ro-crate: . +@prefix five-safes-crate: . +@prefix rdf: . +@prefix schema: . +@prefix dct: . +@prefix sh: . +@prefix validator: . +@prefix xsd: . +@prefix shp: . + + +#=== MUST shapes ===# +# (none) + + +#=== SHOULD shapes ===# + +# Root Dataset SHOULD declare conformsTo Five Safes profile +five-safes-crate:RootDatasetConformsToFiveSafes + a sh:NodeShape ; + sh:name "RootDataEntity" ; + sh:targetClass ro-crate:RootDataEntity ; + + sh:property [ + a sh:PropertyShape ; + sh:name "conformsTo Five Safes profile" ; + sh:path dct:conformsTo ; + sh:hasValue ; + sh:severity sh:Warning ; + sh:message "Root Dataset SHOULD include `conformsTo` https://w3id.org/5s-crate/0.4" ; + ] . + +five-safes-crate:RootDatasetDatePublishedWhenPublished + a sh:NodeShape ; + sh:name "datePublished present on published crates" ; + sh:description "If the root dataset is published (has schema:publisher), it SHOULD have schema:datePublished." ; + sh:targetClass ro-crate:RootDataEntity ; + sh:severity sh:Warning ; + sh:message "A crate SHOULD have a publishedDate if and only if it has a publisher." ; + sh:xone ( + # datePublished SHOULD be present if and only if the publisher is specified: + [ sh:not [ sh:property [ + sh:path schema:publisher ; + sh:minCount 1 ; + ]]] + [ sh:property [ + sh:path schema:datePublished ; + sh:minCount 1 ; + ]] + ) . + +five-safes-crate:RootDatasetLicenseWhenPublished + a sh:NodeShape ; + sh:name "License present on published crates" ; + sh:description "If the root dataset is published (has schema:publisher), it SHOULD declare a license." ; + sh:targetClass ro-crate:RootDataEntity ; + sh:severity sh:Warning ; + sh:message "Profile Conformance: Published crates SHOULD include a license." ; + sh:or ( + # license not required if no publisher: + [ sh:not [ sh:property [ + sh:path schema:publisher ; + sh:minCount 1 ; + ]]] + # license required if publisher present: + [ sh:property [ + sh:path schema:license ; + sh:minCount 1 ; + ]] + ) . + + +#=== MAY shapes ===# +# (none) \ No newline at end of file diff --git a/rocrate_validator/profiles/five-safes-crate/6_workflow_reference.ttl b/rocrate_validator/profiles/five-safes-crate/6_workflow_reference.ttl new file mode 100644 index 000000000..222f2bc9b --- /dev/null +++ b/rocrate_validator/profiles/five-safes-crate/6_workflow_reference.ttl @@ -0,0 +1,109 @@ +# Copyright (c) 2025 eScience Lab, The University of Manchester +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +@prefix ro: <./> . +@prefix ro-crate: . +@prefix five-safes-crate: . +@prefix rdf: . +@prefix schema: . +@prefix purl: . +@prefix sh: . +@prefix validator: . +@prefix xsd: . + + +#=== MUST shapes ===# + +five-safes-crate:ReferenceToWorkflowCrate + a sh:NodeShape ; + sh:name "RootDataEntity" ; + sh:targetClass ro-crate:RootDataEntity ; + + # RootDataEntity MUST have an mainEntity property + sh:property [ + a sh:PropertyShape ; + sh:name "mainEntity" ; + sh:path schema:mainEntity ; + sh:minCount 1 ; + sh:maxCount 1 ; + sh:nodeKind sh:IRI ; + sh:severity sh:Violation ; + sh:message "The RootDataEntity MUST have exactly one mainEntity property that is an IRI." ; + ] ; + + # The mainEntity of a RootDataEntity MUST be a Dataset + sh:property [ + a sh:PropertyShape ; + sh:name "mainEntity" ; + sh:path schema:mainEntity ; + sh:class schema:Dataset ; + sh:severity sh:Violation ; + sh:message "The mainEntity pointed to by the RootDataEntity MUST be of type Dataset" ; + ] . + +five-safes-crate:mainEntityHasProperConformsTo + a sh:NodeShape ; + sh:name "mainEntity" ; + sh:description "The mainEntity of the RootDataEntity MUST have a conformsTo property with an IRI starting with https://w3id.org/workflowhub/workflow-ro-crate" ; + sh:targetObjectsOf schema:mainEntity ; + sh:property [ + a sh:PropertyShape ; + sh:name "conformsTo" ; + sh:path purl:conformsTo ; + sh:minCount 1 ; + sh:maxCount 1 ; + sh:severity sh:Violation ; + sh:message "mainEntity MUST have one and only one `conformsTo` property." ; + ] ; + sh:property [ + a sh:PropertyShape ; + sh:name "conformsTo" ; + sh:path purl:conformsTo ; + sh:nodeKind sh:IRI ; + sh:pattern "^https://w3id\\.org/workflowhub/workflow-ro-crate" ; + sh:severity sh:Violation ; + sh:message "conformsTo IRI must start with https://w3id.org/workflowhub/workflow-ro-crate" ; + ] . + + +#=== SHOULD shapes ===# + +five-safes-crate:DatasetMustHaveDistributionIfURI + a sh:NodeShape ; + sh:name "mainEntity" ; + sh:targetObjectsOf schema:mainEntity ; + sh:description "If mainEntity has an HTTP(S) @id, it SHOULD have a distribution that is an HTTP(S) URL." ; + sh:severity sh:Warning ; + + sh:sparql [ + a sh:SPARQLConstraint ; + sh:name "distribution" ; + sh:description "If mainEntity has an HTTP(S) @id, it SHOULD have a distribution that is an HTTP(S) URL." ; + sh:prefixes ro-crate:sparqlPrefixes ; + sh:select """ + SELECT $this + WHERE { + FILTER (STRSTARTS(STR($this), "http://") || STRSTARTS(STR($this), "https://")) . + FILTER NOT EXISTS { + $this schema:distribution ?dist . + FILTER (STRSTARTS(STR(?dist), "http://") || STRSTARTS(STR(?dist), "https://")) . + } + } + """ ; + sh:message "If mainEntity has an HTTP(S) @id SHOULD have at least one distribution with an HTTP(S) URL." ; + ] . + + +#=== MAY shapes ===# +# (none) \ No newline at end of file diff --git a/rocrate_validator/profiles/five-safes-crate/7_requested_workflow_run.ttl b/rocrate_validator/profiles/five-safes-crate/7_requested_workflow_run.ttl new file mode 100644 index 000000000..9b66f1769 --- /dev/null +++ b/rocrate_validator/profiles/five-safes-crate/7_requested_workflow_run.ttl @@ -0,0 +1,108 @@ +# Copyright (c) 2025 eScience Lab, The University of Manchester +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +@prefix ro: <./> . +@prefix ro-crate: . +@prefix five-safes-crate: . +@prefix rdf: . +@prefix schema: . +@prefix sh: . +@prefix validator: . +@prefix xsd: . + + +#=== MUST shapes ===# + +five-safes-crate:RootDataEntityMentionsWorkflowRunAction + a sh:NodeShape ; + sh:name "RootDataEntity" ; + sh:targetClass ro-crate:RootDataEntity ; + sh:description "" ; + + sh:property [ + a sh:PropertyShape ; + sh:name "mentions" ; + sh:path schema:mentions; + sh:qualifiedValueShape [ + sh:class ro-crate:WorkflowRunAction ; + ] ; + sh:qualifiedMinCount 1 ; + sh:severity sh:Violation ; + sh:message "`RootDataEntity` MUST reference at least one `CreateAction` (corresponding to the workflow run) through `mentions`" ; + ] . + + +five-safes-crate:WorkflowRunActionExistence + a sh:NodeShape ; + sh:name "RootDataEntity" ; + sh:targetClass ro-crate:RootDataEntity ; + sh:description "" ; + sh:severity sh:Violation ; + + sh:sparql [ + a sh:SPARQLConstraint ; + sh:name "WorkflowRunAction" ; + sh:prefixes ro-crate:sparqlPrefixes ; + sh:select """ + SELECT $this + WHERE { + FILTER NOT EXISTS { + ?workflowRunAction a ro-crate:WorkflowRunAction . + } + } + """ ; + sh:message "The CreateAction entity corresponding to the workflow MUST reference, as an instrument, the entity that is referenced as mainEntity by the RO-Crate" ; + ] . + + +five-safes-crate:WorkflowRunObject + a sh:NodeShape ; + sh:name "WorkflowRunAction" ; + sh:targetClass ro-crate:WorkflowRunAction ; + sh:description "" ; + sh:severity sh:Violation ; # Apply to all property shapes / constraints below + + sh:sparql [ + a sh:SPARQLConstraint ; + sh:prefixes ro-crate:sparqlPrefixes ; + sh:name "object" ; + sh:select """ + SELECT $this ?object + WHERE { + $this schema:object ?object . + FILTER NOT EXISTS { ?object a ?type . } + } + """ ; + sh:message "In the `CreateAction` entity corresponding to the workflow run, each `object` MUST reference an existing entity." ; + ] . + + +#=== SHOULD shapes ===# + +# WorkflowRunAction SHOULD have object property with minimum cardinality 1 +five-safes-crate:WorkflowRunActionShouldHaveObjectProperty + a sh:NodeShape ; + sh:targetClass ro-crate:WorkflowRunAction ; + sh:name "WorkflowRunAction" ; + sh:property [ + sh:path schema:object ; + sh:minCount 1 ; + sh:nodeKind sh:IRI ; + sh:severity sh:Warning ; + sh:message "`CreateAction` (corresponding to the workflow run) SHOULD have the property `object` with IRI values." ; + ] . + + +#=== MAY shapes ===# +# (none) diff --git a/rocrate_validator/profiles/five-safes-crate/8_disclosure_phase.ttl b/rocrate_validator/profiles/five-safes-crate/8_disclosure_phase.ttl new file mode 100644 index 000000000..61b785a43 --- /dev/null +++ b/rocrate_validator/profiles/five-safes-crate/8_disclosure_phase.ttl @@ -0,0 +1,233 @@ +# Copyright (c) 2025 eScience Lab, The University of Manchester +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +@prefix ro: <./> . +@prefix ro-crate: . +@prefix five-safes-crate: . +@prefix rdf: . +@prefix schema: . +@prefix purl: . +@prefix sh: . +@prefix validator: . +@prefix xsd: . + + +#=== MUST shapes ===# + +five-safes-crate:DisclosureObjectHasDescriptiveNameAndIsAssessAction + a sh:NodeShape ; + sh:name "DisclosureCheck" ; + sh:description "DisclosureCheck" ; + + sh:target [ + a sh:SPARQLTarget ; + sh:select """ + PREFIX schema: + PREFIX shp: + + SELECT ?this + WHERE { + ?this schema:additionalType shp:DisclosureCheck . + } + """ ; + ] ; + + sh:property [ + a sh:PropertyShape ; + sh:name "AssessAction" ; + sh:description "`DisclosureCheck` MUST be a `AssessAction`." ; + sh:path rdf:type ; + sh:minCount 1 ; + sh:hasValue schema:AssessAction; + sh:severity sh:Violation ; + sh:message "`DisclosureCheck` MUST be a `AssessAction`." ; + ] ; + + sh:property [ + a sh:PropertyShape ; + sh:name "name" ; + sh:description "`DisclosureCheck` MUST have a name string of at least 10 characters." ; + sh:minCount 1 ; + sh:path schema:name ; + sh:datatype xsd:string ; + sh:minLength 10 ; + sh:severity sh:Violation ; + sh:message "`DisclosureCheck` MUST have a name string of at least 10 characters." ; + ] . + + +five-safes-crate:DisclosureObjectHasActionStatusWithAcceptedValue + a sh:NodeShape ; + sh:name "DisclosureCheck" ; + sh:description "`DisclosureCheck` MUST have an actionStatus with an allowed value (see https://schema.org/ActionStatusType)." ; + + sh:target [ + a sh:SPARQLTarget ; + sh:select """ + PREFIX schema: + PREFIX shp: + + SELECT ?this + WHERE { + ?this schema:additionalType shp:DisclosureCheck ; + schema:actionStatus ?status . + } + """ ; + ] ; + + sh:property [ + a sh:PropertyShape ; + sh:name "actionStatus" ; + sh:description "`DisclosureCheck` MUST have an actionStatus with an allowed value (see https://schema.org/ActionStatusType)." ; + sh:path schema:actionStatus ; + sh:in ( + "http://schema.org/PotentialActionStatus" + "http://schema.org/ActiveActionStatus" + "http://schema.org/CompletedActionStatus" + "http://schema.org/FailedActionStatus" + ) ; + sh:severity sh:Violation ; + sh:message "`DisclosureCheck` MUST have an actionStatus with an allowed value (see https://schema.org/ActionStatusType)." ; + ] . + + +#=== SHOULD shapes ===# + +five-safes-crate:RootDataEntityShouldMentionDisclosureObject + a sh:NodeShape ; + sh:name "RootDataEntity" ; + sh:targetClass ro-crate:RootDataEntity ; + sh:description "RootDataEntity SHOULD mention a disclosure object." ; + sh:severity sh:Warning ; + + sh:sparql [ + a sh:SPARQLConstraint ; + sh:name "mentions" ; + sh:description "`RootDataEntity` SHOULD mention a disclosure object." ; + sh:select """ + PREFIX schema: + PREFIX shp: + SELECT $this + WHERE { + FILTER NOT EXISTS{ + $this schema:mentions ?action . + ?action a schema:AssessAction ; + schema:additionalType shp:DisclosureCheck . + } + } + """ ; + sh:message "`RootDataEntity` SHOULD mention a disclosure object." ; + ] . + + +five-safes-crate:DisclosureObjectHasActionStatus + a sh:NodeShape ; + sh:name "DisclosureCheck" ; + sh:description "The `DisclosureCheck` SHOULD have `actionStatus` property." ; + + sh:target [ + a sh:SPARQLTarget ; + sh:select """ + PREFIX schema: + PREFIX shp: + + SELECT ?this + WHERE { + ?this schema:additionalType shp:DisclosureCheck . + } + """ ; + ] ; + + sh:property [ + a sh:PropertyShape ; + sh:name "ActionStatus" ; + sh:description "The `DisclosureCheck` SHOULD have `actionStatus` property." ; + sh:path schema:actionStatus ; + sh:minCount 1 ; + sh:severity sh:Warning ; + sh:message "The `DisclosureCheck` SHOULD have `actionStatus` property." ; + ] . + + +five-safes-crate:DisclosureObjectHasEndTimeIfcompletedOrFailed + a sh:NodeShape ; + sh:name "DisclosureCheck" ; + sh:description "DisclosureCheck" ; + + sh:target [ + a sh:SPARQLTarget ; + sh:select """ + PREFIX schema: + PREFIX shp: + + SELECT ?this + WHERE { + ?this schema:additionalType shp:DisclosureCheck ; + schema:actionStatus ?status . + FILTER(?status IN ( + "http://schema.org/CompletedActionStatus", + "http://schema.org/FailedActionStatus" + )) + } + """ ; + ] ; + + sh:property [ + a sh:PropertyShape ; + sh:name "EndTime" ; + sh:path schema:endTime ; + sh:minCount 1 ; + sh:maxCount 1 ; + sh:severity sh:Warning ; + sh:description "`DisclosureCheck` SHOULD have the `endTime` property if `actionStatus` is either CompletedActionStatus or FailedActionStatus." ; + sh:message "`DisclosureCheck` SHOULD have the `endTime` property if `actionStatus` is either CompletedActionStatus or FailedActionStatus." ; + ] . + + +#=== MAY shapes ===# + +five-safes-crate:DisclosureObjectHasStartTimeIfBegun + a sh:NodeShape ; + sh:name "DisclosureCheck" ; + sh:description "DisclosureCheck" ; + + sh:target [ + a sh:SPARQLTarget ; + sh:select """ + PREFIX schema: + PREFIX shp: + + SELECT ?this + WHERE { + ?this schema:additionalType shp:DisclosureCheck ; + schema:actionStatus ?status . + FILTER(?status IN ( + "http://schema.org/CompletedActionStatus", + "http://schema.org/FailedActionStatus", + "http://schema.org/ActiveActionStatus" + )) + } + """ ; + ] ; + + sh:property [ + a sh:PropertyShape ; + sh:name "StartTime" ; + sh:path schema:startTime ; + sh:minCount 1 ; + sh:maxCount 1 ; + sh:severity sh:Info ; + sh:description "`DisclosureCheck` MAY have the `startTime` property if `actionStatus` is either ActiveActionStatus, CompletedActionStatus or FailedActionStatus." ; + sh:message "`DisclosureCheck` MAY have the `startTime` property if `actionStatus` is either ActiveActionStatus, CompletedActionStatus or FailedActionStatus." ; + ] . \ No newline at end of file diff --git a/rocrate_validator/profiles/five-safes-crate/9_inputs.ttl b/rocrate_validator/profiles/five-safes-crate/9_inputs.ttl new file mode 100644 index 000000000..e0e83c940 --- /dev/null +++ b/rocrate_validator/profiles/five-safes-crate/9_inputs.ttl @@ -0,0 +1,67 @@ +# Copyright (c) 2025 eScience Lab, The University of Manchester +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +@prefix ro: <./> . +@prefix ro-crate: . +@prefix five-safes-crate: . +@prefix rdf: . +@prefix schema: . +@prefix bioschemas: . +@prefix purl: . +@prefix sh: . +@prefix validator: . +@prefix xsd: . + + +#=== MUST shapes ===# +# (none) + + +#=== SHOULD shapes ===# + +five-safes-crate:InputEntityReferencesFormalParameterViaExampleOfWork + a sh:NodeShape ; + sh:name "Input" ; + sh:description "" ; + sh:severity sh:Warning ; + sh:target [ + a sh:SPARQLTarget ; + sh:prefixes ro-crate:sparqlPrefixes ; + sh:select """ + SELECT ?this WHERE { + ?action a ro-crate:WorkflowRunAction ; + schema:object ?this . + } + """ + ] ; + sh:sparql [ + a sh:SPARQLConstraint ; + sh:name "exampleOfWork" ; + sh:description "Input SHOULD reference a FormalParameter using exampleOfWork" ; + + sh:prefixes ro-crate:sparqlPrefixes ; + sh:select """ + SELECT $this WHERE { + FILTER NOT EXISTS { + $this schema:exampleOfWork ?par . + ?par a bioschemas:FormalParameter . + } + } + """ ; + sh:message "Input SHOULD reference a FormalParameter using exampleOfWork" ; + ] . + + +#=== MAY shapes ===# +# (none) diff --git a/rocrate_validator/profiles/five-safes-crate/ontology.ttl b/rocrate_validator/profiles/five-safes-crate/ontology.ttl new file mode 100644 index 000000000..d37dfd45e --- /dev/null +++ b/rocrate_validator/profiles/five-safes-crate/ontology.ttl @@ -0,0 +1,37 @@ +# Copyright (c) 2024-2026 CRS4 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +@prefix ro: <./> . +@prefix owl: . +@prefix rdf: . +@prefix xml: . +@prefix xsd: . +@prefix rdfs: . +@prefix schema: . +@prefix rocrate: . +@prefix bioschemas: . +@prefix ro-crate: . +@prefix isa-ro-crate: . + + rdf:type owl:Ontology ; + owl:versionIRI . + +# # ################################################################# +# # # Classes +# # ################################################################# + +# Declare a WorkflowRunAction class +ro-crate:WorkflowRunAction rdf:type owl:Class ; + rdfs:subClassOf schema:CreateAction ; + rdfs:label "WorkflowRunAction"@en . diff --git a/rocrate_validator/profiles/five-safes-crate/profile.ttl b/rocrate_validator/profiles/five-safes-crate/profile.ttl new file mode 100644 index 000000000..28abe62c0 --- /dev/null +++ b/rocrate_validator/profiles/five-safes-crate/profile.ttl @@ -0,0 +1,83 @@ +# Copyright (c) 2024-2025 CRS4, University of Manchester +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +@prefix dct: . +@prefix prof: . +@prefix role: . +@prefix rdfs: . + + + a prof:Profile ; + + # the Profile's label + rdfs:label "Five Safes RO-Crate 0.4" ; + + # regular metadata, a basic description of the Profile + rdfs:comment """Five Safes RO-Crate Metadata Specification 0.4"""@en ; + + # URI of the publisher of the Metadata Specification + dct:publisher ; + + # TODO: resolve failures when these profiles are applied + # This profile is an extension of Workflow Run Crate for use in Trusted Research Environments (TRE) + # prof:isProfileOf ; + + # This profile is a transitive profile of the RO-Crate Metadata Specification + prof:isTransitiveProfileOf ; + # TODO: resolve failures when these profiles are applied + # , + # ; + + # this profile has a JSON-LD context resource + prof:hasResource [ + a prof:ResourceDescriptor ; + + # it's in JSON-LD format + dct:format ; + + # it conforms to JSON-LD, here referred to by its namespace URI as a Profile + dct:conformsTo ; + + # this profile resource plays the role of "Vocabulary" + # described in this ontology's accompanying Roles vocabulary + prof:hasRole role:Vocabulary ; + + # this profile resource's actual file + prof:hasArtifact ; + ] ; + + # this profile has a human-readable documentation resource + prof:hasResource [ + a prof:ResourceDescriptor ; + + # it's in HTML format + dct:format ; + + # it conforms to HTML, here referred to by its namespace URI as a Profile + dct:conformsTo ; + + # this profile resource plays the role of "Specification" + # described in this ontology's accompanying Roles vocabulary + prof:hasRole role:Specification ; + + # this profile resource's actual file + prof:hasArtifact ; + + # this profile is inherited from Workflow Run profile + prof:isInheritedFrom ; + ] ; + + # a short code to refer to the Profile with when a URI can't be used + prof:hasToken "five-safes-crate" ; +. diff --git a/tests/data/crates/invalid/five_safes_crate/context_multiple_wrong_version/ro-crate-metadata.json b/tests/data/crates/invalid/five_safes_crate/context_multiple_wrong_version/ro-crate-metadata.json new file mode 100644 index 000000000..79733fa7e --- /dev/null +++ b/tests/data/crates/invalid/five_safes_crate/context_multiple_wrong_version/ro-crate-metadata.json @@ -0,0 +1,169 @@ +{ + "@context": ["https://w3id.org/ro/crate/1.1/context", "http://schema.org", {"test": "http://schema.org/test"}], + "@graph": [ + { + "@type": "CreativeWork", + "@id": "ro-crate-metadata.json", + "about": { + "@id": "./" + }, + "conformsTo": { + "@id": "https://w3id.org/ro/crate/1.2" + } + }, + { + "@id": "./", + "@type": "Dataset", + "name": "5-Safe RO-Crate Request", + "description": "example 5-Safe RO-Crate request metadata for testing", + "license": "Apache-2.0", + "datePublished": "2025-09-20T14:38:00+00:00", + "conformsTo": { + "@id": "https://w3id.org/5s-crate/0.4" + }, + "hasPart": [ + { + "@id": "https://workflowhub.eu/workflows/289?version=1" + }, + { + "@id": "input1.txt" + } + ], + "mainEntity": { + "@id": "https://workflowhub.eu/workflows/289?version=1" + }, + "mentions": { + "@id": "#query-37252371-c937-43bd-a0a7-3680b48c0538" + }, + "sourceOrganization": { + "@id": "#project-be6ffb55-4f5a-4c14-b60e-47e0951090c70" + } + }, + { + "@id": "https://w3id.org/5s-crate/0.4", + "@type": "Profile", + "name": "Five Safes RO-Crate profile" + }, + { + "@id": "https://workflowhub.eu/workflows/289?version=1", + "@type": "Dataset", + "name": "CWL Protein MD Setup tutorial with mutations", + "conformsTo": { + "@id": "https://w3id.org/workflowhub/workflow-ro-crate/1.0" + }, + "distribution": { + "@id": "https://workflowhub.eu/workflows/289/ro_crate?version=1" + } + }, + { + "@id": "https://workflowhub.eu/workflows/289/ro_crate?version=1", + "@type": "DataDownload", + "conformsTo": { + "@id": "https://w3id.org/ro/crate" + }, + "encodingFormat": "application/zip" + }, + { + "@id": "#query-37252371-c937-43bd-a0a7-3680b48c0538", + "@type": "CreateAction", + "actionStatus": "http://schema.org/PotentialActionStatus", + "agent": { + "@id": "https://orcid.org/0000-0001-9842-9718" + }, + "instrument": { + "@id": "https://workflowhub.eu/workflows/289?version=1" + }, + "name": "Execute query 12389 on workflow ", + "object": [ + { + "@id": "input1.txt" + }, + { + "@id": "#enableFastMode" + } + ] + }, + { + "@id": "https://orcid.org/0000-0001-9842-9718", + "@type": "Person", + "name": "Stian Soiland-Reyes", + "affiliation": { + "@id": "https://ror.org/027m9bs27" + }, + "memberOf": [ + { + "@id": "#project-be6ffb55-4f5a-4c14-b60e-47e0951090c70" + } + ] + }, + { + "@id": "https://ror.org/027m9bs27", + "@type": "Organization", + "name": "The University of Manchester" + }, + { + "@id": "https://ror.org/01ee9ar58", + "@type": "Organization", + "name": "University of Nottingham" + }, + { + "@id": "#project-be6ffb55-4f5a-4c14-b60e-47e0951090c70", + "@type": "Project", + "name": "Investigation of cancer (TRE72 project 81)", + "identifier": [ + { + "@id": "_:localid:tre72:project81" + } + ], + "funding": { + "@id": "https://gtr.ukri.org/projects?ref=10038961" + }, + "member": [ + { + "@id": "https://ror.org/027m9bs27" + }, + { + "@id": "https://ror.org/01ee9ar58" + } + ] + }, + { + "@id": "_:localid:tre72:project81", + "@type": "PropertyValue", + "name": "tre72", + "value": "project81" + }, + { + "@id": "https://gtr.ukri.org/projects?ref=10038961", + "@type": "Grant", + "name": "EOSC4Cancer" + }, + { + "@id": "input1.txt", + "@type": "File", + "name": "input1", + "exampleOfWork": { + "@id": "#sequence" + } + }, + { + "@id": "#enableFastMode", + "@type": "PropertyValue", + "name": "--fast-mode", + "value": "True", + "exampleOfWork": { + "@id": "#fast" + } + }, + { + "@id": "#sequence", + "@type": "FormalParameter", + "name": "input-sequence" + }, + { + "@id": "#fast", + "@type": "FormalParameter", + "name": "fast-mode" + } + ] +} diff --git a/tests/data/crates/invalid/five_safes_crate/context_single_wrong_version/ro-crate-metadata.json b/tests/data/crates/invalid/five_safes_crate/context_single_wrong_version/ro-crate-metadata.json new file mode 100644 index 000000000..beba68399 --- /dev/null +++ b/tests/data/crates/invalid/five_safes_crate/context_single_wrong_version/ro-crate-metadata.json @@ -0,0 +1,169 @@ +{ + "@context": "https://w3id.org/ro/crate/1.1/context", + "@graph": [ + { + "@type": "CreativeWork", + "@id": "ro-crate-metadata.json", + "about": { + "@id": "./" + }, + "conformsTo": { + "@id": "https://w3id.org/ro/crate/1.2" + } + }, + { + "@id": "./", + "@type": "Dataset", + "name": "5-Safe RO-Crate Request", + "description": "example 5-Safe RO-Crate request metadata for testing", + "license": "Apache-2.0", + "datePublished": "2025-09-20T14:38:00+00:00", + "conformsTo": { + "@id": "https://w3id.org/5s-crate/0.4" + }, + "hasPart": [ + { + "@id": "https://workflowhub.eu/workflows/289?version=1" + }, + { + "@id": "input1.txt" + } + ], + "mainEntity": { + "@id": "https://workflowhub.eu/workflows/289?version=1" + }, + "mentions": { + "@id": "#query-37252371-c937-43bd-a0a7-3680b48c0538" + }, + "sourceOrganization": { + "@id": "#project-be6ffb55-4f5a-4c14-b60e-47e0951090c70" + } + }, + { + "@id": "https://w3id.org/5s-crate/0.4", + "@type": "Profile", + "name": "Five Safes RO-Crate profile" + }, + { + "@id": "https://workflowhub.eu/workflows/289?version=1", + "@type": "Dataset", + "name": "CWL Protein MD Setup tutorial with mutations", + "conformsTo": { + "@id": "https://w3id.org/workflowhub/workflow-ro-crate/1.0" + }, + "distribution": { + "@id": "https://workflowhub.eu/workflows/289/ro_crate?version=1" + } + }, + { + "@id": "https://workflowhub.eu/workflows/289/ro_crate?version=1", + "@type": "DataDownload", + "conformsTo": { + "@id": "https://w3id.org/ro/crate" + }, + "encodingFormat": "application/zip" + }, + { + "@id": "#query-37252371-c937-43bd-a0a7-3680b48c0538", + "@type": "CreateAction", + "actionStatus": "http://schema.org/PotentialActionStatus", + "agent": { + "@id": "https://orcid.org/0000-0001-9842-9718" + }, + "instrument": { + "@id": "https://workflowhub.eu/workflows/289?version=1" + }, + "name": "Execute query 12389 on workflow ", + "object": [ + { + "@id": "input1.txt" + }, + { + "@id": "#enableFastMode" + } + ] + }, + { + "@id": "https://orcid.org/0000-0001-9842-9718", + "@type": "Person", + "name": "Stian Soiland-Reyes", + "affiliation": { + "@id": "https://ror.org/027m9bs27" + }, + "memberOf": [ + { + "@id": "#project-be6ffb55-4f5a-4c14-b60e-47e0951090c70" + } + ] + }, + { + "@id": "https://ror.org/027m9bs27", + "@type": "Organization", + "name": "The University of Manchester" + }, + { + "@id": "https://ror.org/01ee9ar58", + "@type": "Organization", + "name": "University of Nottingham" + }, + { + "@id": "#project-be6ffb55-4f5a-4c14-b60e-47e0951090c70", + "@type": "Project", + "name": "Investigation of cancer (TRE72 project 81)", + "identifier": [ + { + "@id": "_:localid:tre72:project81" + } + ], + "funding": { + "@id": "https://gtr.ukri.org/projects?ref=10038961" + }, + "member": [ + { + "@id": "https://ror.org/027m9bs27" + }, + { + "@id": "https://ror.org/01ee9ar58" + } + ] + }, + { + "@id": "_:localid:tre72:project81", + "@type": "PropertyValue", + "name": "tre72", + "value": "project81" + }, + { + "@id": "https://gtr.ukri.org/projects?ref=10038961", + "@type": "Grant", + "name": "EOSC4Cancer" + }, + { + "@id": "input1.txt", + "@type": "File", + "name": "input1", + "exampleOfWork": { + "@id": "#sequence" + } + }, + { + "@id": "#enableFastMode", + "@type": "PropertyValue", + "name": "--fast-mode", + "value": "True", + "exampleOfWork": { + "@id": "#fast" + } + }, + { + "@id": "#sequence", + "@type": "FormalParameter", + "name": "input-sequence" + }, + { + "@id": "#fast", + "@type": "FormalParameter", + "name": "fast-mode" + } + ] +} diff --git a/tests/data/crates/valid/five-safes-crate-multiple-context/ro-crate-metadata.json b/tests/data/crates/valid/five-safes-crate-multiple-context/ro-crate-metadata.json new file mode 100644 index 000000000..89813ee62 --- /dev/null +++ b/tests/data/crates/valid/five-safes-crate-multiple-context/ro-crate-metadata.json @@ -0,0 +1,169 @@ +{ + "@context": ["https://w3id.org/ro/crate/1.2/context", "https://w3id.org/ro/terms/workflow-run/context", {"test": "http://schema.org/test"}], + "@graph": [ + { + "@type": "CreativeWork", + "@id": "ro-crate-metadata.json", + "about": { + "@id": "./" + }, + "conformsTo": { + "@id": "https://w3id.org/ro/crate/1.2" + } + }, + { + "@id": "./", + "@type": "Dataset", + "name": "5-Safe RO-Crate Request", + "description": "example 5-Safe RO-Crate request metadata for testing", + "license": "Apache-2.0", + "datePublished": "2025-09-20T14:38:00+00:00", + "conformsTo": { + "@id": "https://w3id.org/5s-crate/0.4" + }, + "hasPart": [ + { + "@id": "https://workflowhub.eu/workflows/289?version=1" + }, + { + "@id": "input1.txt" + } + ], + "mainEntity": { + "@id": "https://workflowhub.eu/workflows/289?version=1" + }, + "mentions": { + "@id": "#query-37252371-c937-43bd-a0a7-3680b48c0538" + }, + "sourceOrganization": { + "@id": "#project-be6ffb55-4f5a-4c14-b60e-47e0951090c70" + } + }, + { + "@id": "https://w3id.org/5s-crate/0.4", + "@type": "Profile", + "name": "Five Safes RO-Crate profile" + }, + { + "@id": "https://workflowhub.eu/workflows/289?version=1", + "@type": "Dataset", + "name": "CWL Protein MD Setup tutorial with mutations", + "conformsTo": { + "@id": "https://w3id.org/workflowhub/workflow-ro-crate/1.0" + }, + "distribution": { + "@id": "https://workflowhub.eu/workflows/289/ro_crate?version=1" + } + }, + { + "@id": "https://workflowhub.eu/workflows/289/ro_crate?version=1", + "@type": "DataDownload", + "conformsTo": { + "@id": "https://w3id.org/ro/crate" + }, + "encodingFormat": "application/zip" + }, + { + "@id": "#query-37252371-c937-43bd-a0a7-3680b48c0538", + "@type": "CreateAction", + "actionStatus": "http://schema.org/PotentialActionStatus", + "agent": { + "@id": "https://orcid.org/0000-0001-9842-9718" + }, + "instrument": { + "@id": "https://workflowhub.eu/workflows/289?version=1" + }, + "name": "Execute query 12389 on workflow ", + "object": [ + { + "@id": "input1.txt" + }, + { + "@id": "#enableFastMode" + } + ] + }, + { + "@id": "https://orcid.org/0000-0001-9842-9718", + "@type": "Person", + "name": "Stian Soiland-Reyes", + "affiliation": { + "@id": "https://ror.org/027m9bs27" + }, + "memberOf": [ + { + "@id": "#project-be6ffb55-4f5a-4c14-b60e-47e0951090c70" + } + ] + }, + { + "@id": "https://ror.org/027m9bs27", + "@type": "Organization", + "name": "The University of Manchester" + }, + { + "@id": "https://ror.org/01ee9ar58", + "@type": "Organization", + "name": "University of Nottingham" + }, + { + "@id": "#project-be6ffb55-4f5a-4c14-b60e-47e0951090c70", + "@type": "Project", + "name": "Investigation of cancer (TRE72 project 81)", + "identifier": [ + { + "@id": "_:localid:tre72:project81" + } + ], + "funding": { + "@id": "https://gtr.ukri.org/projects?ref=10038961" + }, + "member": [ + { + "@id": "https://ror.org/027m9bs27" + }, + { + "@id": "https://ror.org/01ee9ar58" + } + ] + }, + { + "@id": "_:localid:tre72:project81", + "@type": "PropertyValue", + "name": "tre72", + "value": "project81" + }, + { + "@id": "https://gtr.ukri.org/projects?ref=10038961", + "@type": "Grant", + "name": "EOSC4Cancer" + }, + { + "@id": "input1.txt", + "@type": "File", + "name": "input1", + "exampleOfWork": { + "@id": "#sequence" + } + }, + { + "@id": "#enableFastMode", + "@type": "PropertyValue", + "name": "--fast-mode", + "value": "True", + "exampleOfWork": { + "@id": "#fast" + } + }, + { + "@id": "#sequence", + "@type": "FormalParameter", + "name": "input-sequence" + }, + { + "@id": "#fast", + "@type": "FormalParameter", + "name": "fast-mode" + } + ] +} diff --git a/tests/data/crates/valid/five-safes-crate-request/ro-crate-metadata.json b/tests/data/crates/valid/five-safes-crate-request/ro-crate-metadata.json new file mode 100644 index 000000000..a53902d45 --- /dev/null +++ b/tests/data/crates/valid/five-safes-crate-request/ro-crate-metadata.json @@ -0,0 +1,169 @@ +{ + "@context": "https://w3id.org/ro/crate/1.2/context", + "@graph": [ + { + "@type": "CreativeWork", + "@id": "ro-crate-metadata.json", + "about": { + "@id": "./" + }, + "conformsTo": { + "@id": "https://w3id.org/ro/crate/1.2" + } + }, + { + "@id": "./", + "@type": "Dataset", + "name": "5-Safe RO-Crate Request", + "description": "example 5-Safe RO-Crate request metadata for testing", + "license": "Apache-2.0", + "datePublished": "2025-09-20T14:38:00+00:00", + "conformsTo": { + "@id": "https://w3id.org/5s-crate/0.4" + }, + "hasPart": [ + { + "@id": "https://workflowhub.eu/workflows/289?version=1" + }, + { + "@id": "input1.txt" + } + ], + "mainEntity": { + "@id": "https://workflowhub.eu/workflows/289?version=1" + }, + "mentions": { + "@id": "#query-37252371-c937-43bd-a0a7-3680b48c0538" + }, + "sourceOrganization": { + "@id": "#project-be6ffb55-4f5a-4c14-b60e-47e0951090c70" + } + }, + { + "@id": "https://w3id.org/5s-crate/0.4", + "@type": "Profile", + "name": "Five Safes RO-Crate profile" + }, + { + "@id": "https://workflowhub.eu/workflows/289?version=1", + "@type": "Dataset", + "name": "CWL Protein MD Setup tutorial with mutations", + "conformsTo": { + "@id": "https://w3id.org/workflowhub/workflow-ro-crate/1.0" + }, + "distribution": { + "@id": "https://workflowhub.eu/workflows/289/ro_crate?version=1" + } + }, + { + "@id": "https://workflowhub.eu/workflows/289/ro_crate?version=1", + "@type": "DataDownload", + "conformsTo": { + "@id": "https://w3id.org/ro/crate" + }, + "encodingFormat": "application/zip" + }, + { + "@id": "#query-37252371-c937-43bd-a0a7-3680b48c0538", + "@type": "CreateAction", + "actionStatus": "http://schema.org/PotentialActionStatus", + "agent": { + "@id": "https://orcid.org/0000-0001-9842-9718" + }, + "instrument": { + "@id": "https://workflowhub.eu/workflows/289?version=1" + }, + "name": "Execute query 12389 on workflow ", + "object": [ + { + "@id": "input1.txt" + }, + { + "@id": "#enableFastMode" + } + ] + }, + { + "@id": "https://orcid.org/0000-0001-9842-9718", + "@type": "Person", + "name": "Stian Soiland-Reyes", + "affiliation": { + "@id": "https://ror.org/027m9bs27" + }, + "memberOf": [ + { + "@id": "#project-be6ffb55-4f5a-4c14-b60e-47e0951090c70" + } + ] + }, + { + "@id": "https://ror.org/027m9bs27", + "@type": "Organization", + "name": "The University of Manchester" + }, + { + "@id": "https://ror.org/01ee9ar58", + "@type": "Organization", + "name": "University of Nottingham" + }, + { + "@id": "#project-be6ffb55-4f5a-4c14-b60e-47e0951090c70", + "@type": "Project", + "name": "Investigation of cancer (TRE72 project 81)", + "identifier": [ + { + "@id": "_:localid:tre72:project81" + } + ], + "funding": { + "@id": "https://gtr.ukri.org/projects?ref=10038961" + }, + "member": [ + { + "@id": "https://ror.org/027m9bs27" + }, + { + "@id": "https://ror.org/01ee9ar58" + } + ] + }, + { + "@id": "_:localid:tre72:project81", + "@type": "PropertyValue", + "name": "tre72", + "value": "project81" + }, + { + "@id": "https://gtr.ukri.org/projects?ref=10038961", + "@type": "Grant", + "name": "EOSC4Cancer" + }, + { + "@id": "input1.txt", + "@type": "File", + "name": "input1", + "exampleOfWork": { + "@id": "#sequence" + } + }, + { + "@id": "#enableFastMode", + "@type": "PropertyValue", + "name": "--fast-mode", + "value": "True", + "exampleOfWork": { + "@id": "#fast" + } + }, + { + "@id": "#sequence", + "@type": "FormalParameter", + "name": "input-sequence" + }, + { + "@id": "#fast", + "@type": "FormalParameter", + "name": "fast-mode" + } + ] +} diff --git a/tests/data/crates/valid/five-safes-crate-result/ro-crate-metadata.json b/tests/data/crates/valid/five-safes-crate-result/ro-crate-metadata.json new file mode 100644 index 000000000..2fa6838d2 --- /dev/null +++ b/tests/data/crates/valid/five-safes-crate-result/ro-crate-metadata.json @@ -0,0 +1,413 @@ +{ + "@context": "https://w3id.org/ro/crate/1.2/context", + "@graph": [ + { + "@type": "CreativeWork", + "@id": "ro-crate-metadata.json", + "about": { + "@id": "./" + }, + "conformsTo": { + "@id": "https://w3id.org/ro/crate/1.2" + } + }, + { + "@id": "./", + "@type": "Dataset", + "name": "5-Safe RO-Crate Result", + "description": "example 5-Safe RO-Crate result metadata for testing", + "datePublished": "2025-09-20T14:45:00+00:00", + "conformsTo": { + "@id": "https://w3id.org/5s-crate/0.4" + }, + "hasPart": [ + { + "@id": "https://workflowhub.eu/workflows/289?version=1" + }, + { + "@id": "input1.txt" + }, + { + "@id": "outputs/qa.csv" + }, + { + "@id": "outputs/diagrams/" + }, + { + "@id": "workflow/289/" + } + ], + "mainEntity": { + "@id": "https://workflowhub.eu/workflows/289?version=1" + }, + "mentions": [ + { + "@id": "#query-37252371-c937-43bd-a0a7-3680b48c0538" + }, + { + "@id": "#check-f33fe90c-0c22-4c72-b299-de509028410e" + }, + { + "@id": "#validate-1146f640-819e-4c86-b029-b763a0040896" + }, + { + "@id": "#download-8b51bf57-6b29-44da-b24b-638c8df91639" + }, + { + "@id": "#signoff-3b741265-cfef-49ea-8138-a2fa149bf2f0" + }, + { + "@id": "#disclosure-b16c1f0a-ae7f-4582-9b28-7d9df3313e27" + }, + { + "@id": "#bagit-ce785c0b-c988-4043-8cbd-1489dcebc14f" + } + ], + "sourceOrganization": { + "@id": "#project-be6ffb55-4f5a-4c14-b60e-47e0951090c70" + }, + "publisher": { + "@id": "https://tre72.example.com/" + }, + "license": { + "@id": "http://spdx.org/licenses/CC-BY-4.0" + } + }, + { + "@id": "https://w3id.org/5s-crate/0.4", + "@type": "Profile", + "name": "Five Safes RO-Crate profile" + }, + { + "@id": "https://spdx.org/licenses/CC-BY-4.0", + "@type": "CreativeWork", + "name": "Creative Commons Attribution 4.0 International", + "identifier": "CC-BY-4.0" + }, + { + "@id": "https://workflowhub.eu/workflows/289?version=1", + "@type": "Dataset", + "name": "CWL Protein MD Setup tutorial with mutations", + "conformsTo": { + "@id": "https://w3id.org/workflowhub/workflow-ro-crate/1.0" + }, + "distribution": { + "@id": "https://workflowhub.eu/workflows/289/ro_crate?version=1" + } + }, + { + "@id": "https://workflowhub.eu/workflows/289/ro_crate?version=1", + "@type": "DataDownload", + "conformsTo": { + "@id": "https://w3id.org/ro/crate" + }, + "encodingFormat": "application/zip" + }, + { + "@id": "#query-37252371-c937-43bd-a0a7-3680b48c0538", + "@type": "CreateAction", + "startTime": "2023-04-18T12:12:00+01:00", + "endTime": "2023-04-19T16:59:59+01:00", + "actionStatus": "http://schema.org/CompletedActionStatus", + "agent": { + "@id": "https://orcid.org/0000-0001-9842-9718" + }, + "instrument": { + "@id": "https://workflowhub.eu/workflows/289?version=1" + }, + "name": "Execute query 12389 on workflow ", + "object": [ + { + "@id": "input1.txt" + }, + { + "@id": "#enableFastMode" + } + ], + "result": [ + { + "@id": "outputs/qa.csv" + }, + { + "@id": "outputs/diagrams/" + }, + { + "@id": "urn:uuid:07b81e0f-7ac4-5428-9940-878b241e2397" + } + ] + }, + { + "@id": "#project-be6ffb55-4f5a-4c14-b60e-47e0951090c70", + "@type": "Project", + "name": "Investigation of cancer (TRE72 project 81)", + "identifier": [ + { + "@id": "_:localid:tre72:project81" + } + ], + "funding": { + "@id": "https://gtr.ukri.org/projects?ref=10038961" + }, + "member": [ + { + "@id": "https://ror.org/027m9bs27" + }, + { + "@id": "https://ror.org/01ee9ar58" + } + ] + }, + { + "@id": "_:localid:tre72:project81", + "@type": "PropertyValue", + "name": "tre72", + "value": "project81" + }, + { + "@id": "input1.txt", + "@type": "File", + "name": "input1", + "exampleOfWork": { + "@id": "#sequence" + } + }, + { + "@id": "#enableFastMode", + "@type": "PropertyValue", + "name": "--fast-mode", + "value": "True", + "exampleOfWork": { + "@id": "#fast" + } + }, + { + "@id": "#sequence", + "@type": "FormalParameter", + "name": "input-sequence" + }, + { + "@id": "#fast", + "@type": "FormalParameter", + "name": "fast-mode" + }, + { + "@id": "outputs/qa.csv", + "@type": "File", + "encodingFormat": "text/csv", + "name": "Tabular listing of quality assessment" + }, + { + "@id": "outputs/diagrams/", + "@type": "Dataset", + "name": "Diagrams of regions of interest" + }, + { + "@id": "workflow/289/", + "sameAs": { + "@id": "https://workflowhub.eu/workflows/289?version=1" + }, + "@type": "Dataset", + "name": "CWL Protein MD Setup tutorial with mutations", + "conformsTo": { + "@id": "https://w3id.org/workflowhub/workflow-ro-crate/1.0" + }, + "distribution": { + "@id": "https://workflowhub.eu/workflows/289/ro_crate?version=1" + } + }, + { + "@id": "#check-f33fe90c-0c22-4c72-b299-de509028410e", + "@type": "AssessAction", + "additionalType": { + "@id": "https://w3id.org/shp#CheckValue" + }, + "name": "BagIt checksum of Crate: OK", + "startTime": "2023-04-18T12:11:00+01:00", + "endTime": "2023-04-18T12:11:45+01:00", + "object": { + "@id": "./" + }, + "instrument": { + "@id": "https://www.iana.org/assignments/named-information#sha-512" + }, + "agent": { + "@id": "#validator-a4a66c63-2fe0-4c57-830d-268a40718313" + }, + "actionStatus": "http://schema.org/CompletedActionStatus" + }, + { + "@id": "#validate-1146f640-819e-4c86-b029-b763a0040896", + "@type": "AssessAction", + "additionalType": { + "@id": "https://w3id.org/shp#ValidationCheck" + }, + "name": "Validation against Five Safes RO-Crate profile: approved", + "startTime": "2023-04-18T12:11:46+01:00", + "endTime": "2023-04-18T12:11:49+01:00", + "object": { + "@id": "./" + }, + "instrument": { + "@id": "https://w3id.org/5s-crate/0.4" + }, + "agent": { + "@id": "#validator-a4a66c63-2fe0-4c57-830d-268a40718313" + }, + "actionStatus": "http://schema.org/CompletedActionStatus" + }, + { + "@id": "#download-8b51bf57-6b29-44da-b24b-638c8df91639", + "@type": "DownloadAction", + "name": "Downloaded workflow RO-Crate via proxy", + "startTime": "2023-04-18T12:11:50+01:00", + "endTime": "2023-04-18T12:11:52+01:00", + "object": { + "@id": "https://workflowhub.eu/workflows/289/ro_crate?version=1" + }, + "result": { + "@id": "workflow/289/" + }, + "agent": { + "@id": "http://proxy.example.com/" + }, + "actionStatus": "http://schema.org/CompletedActionStatus" + }, + { + "@id": "#signoff-3b741265-cfef-49ea-8138-a2fa149bf2f0", + "@type": "AssessAction", + "additionalType": { + "@id": "https://w3id.org/shp#SignOff" + }, + "name": "Sign-off of execution according to Agreement policy: approved", + "endTime": "2023-04-19T17:15:12+01:00", + "startTime": "2023-04-19T10:15:12+01:00", + "object": [ + { + "@id": "./" + }, + { + "@id": "https://workflowhub.eu/workflows/289?version=1" + }, + { + "@id": "#project-be6ffb55-4f5a-4c14-b60e-47e0951090c70" + } + ], + "instrument": { + "@id": "https://tre72.example.com/agreement-policy/81" + }, + "agent": { + "@id": "https://orcid.org/0000-0002-1825-0097" + }, + "actionStatus": "http://schema.org/CompletedActionStatus" + }, + { + "@id": "#disclosure-b16c1f0a-ae7f-4582-9b28-7d9df3313e27", + "@type": "AssessAction", + "additionalType": { + "@id": "https://w3id.org/shp#DisclosureCheck" + }, + "name": "Disclosure check of workflow results: approved", + "startTime": "2023-04-25T15:00:00+01:00", + "endTime": "2023-04-25T16:00:00+01:00", + "object": { + "@id": "./" + }, + "agent": { + "@id": "https://orcid.org/0000-0002-1825-0097" + }, + "actionStatus": "http://schema.org/CompletedActionStatus" + }, + { + "@id": "#bagit-ce785c0b-c988-4043-8cbd-1489dcebc14f", + "@type": "UpdateAction", + "startTime": "2023-04-29T12:12:25+01:00", + "additionalType": { + "@id": "https://w3id.org/shp#GenerateCheckValue" + }, + "name": "BagIt manifests of Crate updated", + "object": { + "@id": "./" + }, + "instrument": { + "@id": "https://www.iana.org/assignments/named-information#sha-512" + }, + "agent": { + "@id": "#validator-a4a66c63-2fe0-4c57-830d-268a40718313" + }, + "actionStatus": "http://schema.org/CompletedActionStatus" + }, + { + "@id": "urn:uuid:07b81e0f-7ac4-5428-9940-878b241e2397", + "@type": "DigitalDocument", + "encodingFormat": "text/csv", + "name": "Patient measurement 07b81e0f-7ac4-5428-9940-878b241e2397", + "hasDigitalDocumentPermission": { + "@id": "#permissions-07b81e0f" + } + }, + { + "@id": "#permissions-07b81e0f", + "@type": "DigitalDocumentPermission", + "permissionType": "http://schema.org/ReadPermission", + "grantee": { + "@id": "#project-be6ffb55-4f5a-4c14-b60e-47e0951090c70" + } + }, + { + "@id": "https://orcid.org/0000-0001-9842-9718", + "@type": "Person", + "name": "Stian Soiland-Reyes", + "affiliation": { + "@id": "https://ror.org/027m9bs27" + }, + "memberOf": [ + { + "@id": "#project-be6ffb55-4f5a-4c14-b60e-47e0951090c70" + } + ] + }, + { + "@id": "https://ror.org/027m9bs27", + "@type": "Organization", + "name": "The University of Manchester" + }, + { + "@id": "https://ror.org/01ee9ar58", + "@type": "Organization", + "name": "University of Nottingham" + }, + { + "@id": "https://gtr.ukri.org/projects?ref=10038961", + "@type": "Grant", + "name": "EOSC4Cancer" + }, + { + "@id": "https://tre72.example.com/", + "@type": "Organization", + "name": "TRE 72 trusted research environment at The University of Manchester", + "parentOrganization": { + "@id": "https://ror.org/027m9bs27" + } + }, + { + "@id": "https://tre72.example.com/#crate-validator", + "@type": "SoftwareApplication", + "name": "RO-Crate validator at TRE72", + "url": "https://tre72.example.com/#crate-validator", + "version": "1.0", + "provider": { + "@id": "https://tre72.example.com/" + } + }, + { + "@id": "https://tre72.example.com/agreement-policy/81", + "@type": "CreativeWork", + "name": "Agreement policy for TRE72 for project 81" + }, + { + "@id": "https://www.iana.org/assignments/named-information#sha-512", + "@type": "DefinedTerm", + "name": "sha-512 algorithm" + } + ] +} diff --git a/tests/data/crates/valid/five-safes-profile-crate/ro-crate-metadata.json b/tests/data/crates/valid/five-safes-profile-crate/ro-crate-metadata.json new file mode 100644 index 000000000..5ca7ea110 --- /dev/null +++ b/tests/data/crates/valid/five-safes-profile-crate/ro-crate-metadata.json @@ -0,0 +1,461 @@ +{ + "@context": "https://w3id.org/ro/crate/1.1/context", + "@graph": [ + { + "@type": "CreativeWork", + "@id": "ro-crate-metadata.json", + "about": { + "@id": "https://w3id.org/5s-crate/0.4" + }, + "licence": { + "@id": "http://spdx.org/licenses/CC0-1.0" + }, + "conformsTo": { + "@id": "https://w3id.org/ro/crate/1.1" + } + }, + { + "@id": "https://w3id.org/5s-crate/0.4", + "@type": [ + "Dataset", + "Profile" + ], + "name": "Five Safes RO-Crate profile", + "cite-as": "https://w3id.org/5s-crate/0.4", + "version": "0.4", + "datePublished": "2023-05-15T10:32:00Z", + "licence": { + "@id": "http://spdx.org/licenses/MIT" + }, + "copyrightHolder": { + "@id": "https://ror.org/027m9bs27" + }, + "copyrightYear": 2023, + "hasPart": [ + { + "@id": "https://trefx.uk/5s-crate/0.4/index.html" + }, + { + "@id": "https://trefx.uk/5s-crate/0.4/example-request.bagit.zip" + }, + { + "@id": "https://trefx.uk/5s-crate/0.4/example-request/data/" + }, + { + "@id": "https://trefx.uk/5s-crate/0.4/example-result.bagit.zip" + }, + { + "@id": "https://trefx.uk/5s-crate/0.4/example-result/data/" + }, + { + "@id": "http://schema.org/PotentialActionStatus" + }, + { + "@id": "http://schema.org/CompletedActionStatus" + }, + { + "@id": "http://schema.org/ActiveActionStatus" + }, + { + "@id": "http://schema.org/FailedActionStatus" + }, + { + "@id": "http://schema.org/CreateAction" + }, + { + "@id": "http://schema.org/AssessAction" + }, + { + "@id": "http://schema.org/DownloadAction" + }, + { + "@id": "http://schema.org/UpdateAction" + }, + { + "@id": "http://schema.org/grantee" + }, + { + "@id": "http://schema.org/DigitalDocument" + }, + { + "@id": "http://schema.org/DigitalDocumentPermission" + }, + { + "@id": "http://schema.org/hasDigitalDocumentPermission" + }, + { + "@id": "http://schema.org/ReadPermission" + }, + { + "@id": "https://bioschemas.org/FormalParameter" + }, + { + "@id": "https://w3id.org/shp#CheckValue" + }, + { + "@id": "https://w3id.org/shp#ValidationCheck" + }, + { + "@id": "https://w3id.org/shp#SignOff" + }, + { + "@id": "https://w3id.org/shp#DisclosureCheck" + }, + { + "@id": "https://w3id.org/shp#GenerateCheckValue" + } + ], + "hasResource": [ + { + "@id": "#hasSpecification" + }, + { + "@id": "#hasExampleRequest" + }, + { + "@id": "#hasExampleResult" + }, + { + "@id": "#usesSHP" + } + ], + "publisher": { + "@id": "https://trefx.uk/" + }, + "funding": { + "@id": "https://gtr.ukri.org/projects?ref=MC_PC_23007" + }, + "author": [ + { + "@id": "https://orcid.org/0000-0001-9842-9718" + }, + { + "@id": "https://orcid.org/0009-0003-2419-1964" + } + ] + }, + { + "@id": "https://trefx.uk/5s-crate/0.4/index.html", + "@type": "CreativeWork", + "name": "Five Safes RO-Crate profile (specification)" + }, + { + "@id": "https://trefx.uk/5s-crate/0.4/example-request/data/", + "@type": "Dataset", + "name": "Example Crate in Request state", + "conformsTo": { + "@id": "https://w3id.org/5s-crate/0.4" + }, + "subjectOf": [ + { + "@id": "https://trefx.uk/5s-crate/0.4/example-request/data/ro-crate-metadata.json" + }, + { + "@id": "https://trefx.uk/5s-crate/0.4/example-request/data/ro-crate-preview.html" + } + ], + "distribution": { + "@id": "https://trefx.uk/5s-crate/0.4/example-request.bagit.zip" + } + }, + { + "@id": "https://trefx.uk/5s-crate/0.4/example-result/data/", + "@type": "Dataset", + "name": "Example Crate in Request state", + "conformsTo": { + "@id": "https://w3id.org/5s-crate/0.4" + }, + "subjectOf": [ + { + "@id": "https://trefx.uk/5s-crate/0.4/example-result/data/ro-crate-metadata.json" + }, + { + "@id": "https://trefx.uk/5s-crate/0.4/example-result/data/ro-crate-preview.html" + } + ], + "distribution": { + "@id": "https://trefx.uk/5s-crate/0.4/example-result.bagit.zip" + } + }, + { + "@id": "https://trefx.uk/5s-crate/0.4/example-result.bagit.zip", + "@type": "DataDownload", + "name": "example-result.bagit.zip", + "description": "Example Result following the 5s-crate profile, archived as BagIt ZIP", + "encodingFormat": "application/zip", + "conformsTo": { + "@id": "https://w3id.org/ro/crate" + } + }, + { + "@id": "https://trefx.uk/5s-crate/0.4/example-request.bagit.zip", + "@type": "DataDownload", + "name": "example-request.bagit.zip", + "description": "Example Request following the 5s-crate profile, archived as BagIt ZIP", + "encodingFormat": "application/zip", + "conformsTo": { + "@id": "https://w3id.org/ro/crate" + } + }, + { + "@id": "https://trefx.uk/5s-crate/0.4/example-request/data/ro-crate-metadata.json", + "@type": "CreativeWork", + "encodingFormat": "application/ld+json" + }, + { + "@id": "https://trefx.uk/5s-crate/0.4/example-request/data/ro-crate-preview.html", + "@type": "CreativeWork", + "encodingFormat": "text/html" + }, + { + "@id": "https://trefx.uk/5s-crate/0.4/example-result/data/ro-crate-metadata.json", + "@type": "CreativeWork", + "encodingFormat": "application/ld+json" + }, + { + "@id": "https://trefx.uk/5s-crate/0.4/example-result/data/ro-crate-preview.html", + "@type": "CreativeWork", + "encodingFormat": "text/html" + }, + { + "@id": "https://bioschemas.org/FormalParameter", + "@type": "DefinedTerm" + }, + { + "@id": "https://w3id.org/shp", + "@type": "DefinedTermSet", + "name": "The Safe Haven Provenance (SHP) Ontology", + "version": "0.1" + }, + { + "@id": "https://w3id.org/shp#CheckValue", + "@type": "DefinedTerm", + "inDefinedTermSet": { + "@id": "https://w3id.org/shp" + } + }, + { + "@id": "https://w3id.org/shp#ValidationCheck", + "@type": "DefinedTerm", + "inDefinedTermSet": { + "@id": "https://w3id.org/shp" + } + }, + { + "@id": "https://w3id.org/shp#SignOff", + "@type": "DefinedTerm", + "inDefinedTermSet": { + "@id": "https://w3id.org/shp" + } + }, + { + "@id": "https://w3id.org/shp#DisclosureCheck", + "@type": "DefinedTerm", + "inDefinedTermSet": { + "@id": "https://w3id.org/shp" + } + }, + { + "@id": "https://w3id.org/shp#GenerateCheckValue", + "@type": "DefinedTerm", + "inDefinedTermSet": { + "@id": "https://w3id.org/shp" + } + }, + { + "@id": "https://w3id.org/shp#CheckValue", + "@type": "DefinedTerm", + "inDefinedTermSet": { + "@id": "https://w3id.org/shp" + } + }, + { + "@id": "https://w3id.org/shp#ValidationCheck", + "@type": "DefinedTerm", + "inDefinedTermSet": { + "@id": "https://w3id.org/shp" + } + }, + { + "@id": "https://w3id.org/shp#SignOff", + "@type": "DefinedTerm", + "inDefinedTermSet": { + "@id": "https://w3id.org/shp" + } + }, + { + "@id": "https://w3id.org/shp#DisclosureCheck", + "@type": "DefinedTerm", + "inDefinedTermSet": { + "@id": "https://w3id.org/shp" + } + }, + { + "@id": "https://w3id.org/shp#GenerateCheckValue", + "@type": "DefinedTerm", + "inDefinedTermSet": { + "@id": "https://w3id.org/shp" + } + }, + { + "@id": "https://www.iana.org/assignments/named-information#sha-512", + "@type": "DefinedTerm", + "name": "sha-512 algorithm" + }, + { + "@id": "#usesSHP", + "name": "uses vocabulary SHP", + "@type": "ResourceDescriptor", + "hasRole": { + "@id": "http://www.w3.org/ns/dx/prof/role/vocabulary" + }, + "hasArtifact": { + "@id": "https://w3id.org/shp" + } + }, + { + "@id": "#hasSpecification", + "@type": "ResourceDescriptor", + "name": "has specification", + "hasRole": { + "@id": "http://www.w3.org/ns/dx/prof/role/specification" + }, + "hasArtifact": { + "@id": "https://trefx.uk/5s-crate/0.4/index.html" + } + }, + { + "@id": "#hasExampleRequest", + "@type": "ResourceDescriptor", + "name": "has example: Request", + "hasRole": { + "@id": "http://www.w3.org/ns/dx/prof/role/example" + }, + "hasArtifact": { + "@id": "https://trefx.uk/5s-crate/0.4/example-request/data/" + } + }, + { + "@id": "#hasExampleResult", + "@type": "ResourceDescriptor", + "name": "has example: Result", + "hasRole": { + "@id": "http://www.w3.org/ns/dx/prof/role/example" + }, + "hasArtifact": { + "@id": "https://trefx.uk/5s-crate/0.4/example-result/data/" + } + }, + + { + "@id": "http://www.w3.org/ns/dx/prof/role/example", + "@type": [ + "DefinedTerm", + "ResourceRole" + ], + "name": "Example", + "description": "Sample instance data conforming to the profile" + }, + { + "@id": "http://www.w3.org/ns/dx/prof/role/specification", + "@type": [ + "DefinedTerm", + "ResourceRole" + ], + "name": "Specification", + "description": "Defining the profile in human-readable form" + }, + { + "@id": "http://www.w3.org/ns/dx/prof/role/vocabulary", + "@type": [ + "DefinedTerm", + "ResourceRole" + ], + "name": "Vocabulary", + "description": "Defines terms used in the profile specification" + }, + { + "@id": "http://spdx.org/licenses/CC0-1.0", + "@type": "CreativeWork", + "name": "Creative Commons Zero v1.0 Universal", + "identifier": "CC0-1.0", + "url": "https://creativecommons.org/publicdomain/zero/1.0/" + }, + { + "@id": "http://spdx.org/licenses/MIT", + "@type": "CreativeWork", + "name": "MIT Licence", + "identifier": "MIT" + }, + { + "@id": "https://gtr.ukri.org/projects?ref=MC_PC_23007", + "@type": "Grant", + "identifier": { + "@id": "_:ukri:MC_PC_23007" + }, + "name": "DARE-FX: Delivering a federated network of TREs to enable safe analytics", + "description": "", + "funder": { + "@id": "https://ror.org/001aqnf71" + }, + "sponsor": { + "@id": "https://dareuk.org.uk/" + } + }, + { + "@id": "https://dareuk.org.uk/", + "@type": "FundingScheme", + "name": "DARE UK", + "funder": { + "@id": "https://ror.org/001aqnf71" + } + }, + { + "@id": "_:ukri:MC_PC_23007", + "@type": "PropertyValue", + "name": "UKRI", + "value": "MC_PC_23007" + }, + { + "@id": "https://trefx.uk/", + "@type": "ResearchProject", + "name": "TRE-FX", + "description": "Delivering a federated network of TREs to enable safe analytics", + "funding": { + "@id": "https://gtr.ukri.org/projects?ref=MC_PC_23007" + } + }, + { + "@id": "https://ror.org/001aqnf71", + "@type": "FundingAgency", + "name": "UK Research and Innovation", + "alternateName": "UKRI", + "url": "https://www.ukri.org/" + }, + { + "@id": "https://orcid.org/0000-0001-9842-9718", + "@type": "Person", + "name": "Stian Soiland-Reyes", + "affiliation": { + "@id": "https://ror.org/027m9bs27" + }, + "memberOf": { + "@id": "https://trefx.uk/" + } + }, + { + "@id": "https://ror.org/027m9bs27", + "@type": "CollegeOrUniversity", + "name": "The University of Manchester", + "url": "https://www.manchester.ac.uk/" + }, + { + "@id": "https://orcid.org/0009-0003-2419-1964", + "@type": "Person", + "name": "Stuart Wheater", + "memberOf": { + "@id": "https://trefx.uk/" + } + } + ] +} \ No newline at end of file diff --git a/tests/integration/profiles/five-safes-crate/test_5src_10_outputs.py b/tests/integration/profiles/five-safes-crate/test_5src_10_outputs.py new file mode 100644 index 000000000..1cbdbe6d0 --- /dev/null +++ b/tests/integration/profiles/five-safes-crate/test_5src_10_outputs.py @@ -0,0 +1,95 @@ +# Copyright (c) 2024-2025 CRS4 +# Copyright (c) 2025-2026 eScience Lab, The University of Manchester +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging + +from rocrate_validator.models import Severity +from tests.ro_crates import ValidROC +from tests.shared import do_entity_test, SPARQL_PREFIXES + +# set up logging +logger = logging.getLogger(__name__) + + +# ----- SHOULD fails tests + + +def test_completed_createaction_does_not_have_result(): + """ + Test a Five Safes Crate where `CreateAction` has `CompletedActionStatus` but + does not have the property `result`. + (We remove `result` from `CreateAction` if this has `CompletedActionStatus`) + """ + sparql = ( + SPARQL_PREFIXES + + """ + DELETE { + ?action schema:result ?o . + } + WHERE { + ?action a schema:CreateAction ; + schema:actionStatus "http://schema.org/CompletedActionStatus" ; + schema:result ?o + } + """ + ) + + do_entity_test( + rocrate_path=ValidROC().five_safes_crate_result, + requirement_severity=Severity.RECOMMENDED, + expected_validation_result=False, + expected_triggered_requirements=["WorkflowRunAction"], + expected_triggered_issues=[ + "The `CreateAction` corresponding to the workflow run,", + "with CompletedActionStatus, SHOULD have the `result` property.", + ], + profile_identifier="five-safes-crate", + rocrate_entity_mod_sparql=sparql, + ) + + +def test_result_output_does_not_have_allowed_type(): + """ + Test a Five Safes Crate where the result output does not have a type that + is among those allowed. + (We remove the output entity and replace it with one that is of a wrong type)""" + sparql = ( + SPARQL_PREFIXES + + """ + DELETE { + ?result a ?oldType . + } + INSERT { + ?result a schema:Person . + } + WHERE { + ?action a schema:CreateAction ; + schema:result ?result . + ?result a ?oldType . + } + """ + ) + + do_entity_test( + rocrate_path=ValidROC().five_safes_crate_result, + requirement_severity=Severity.RECOMMENDED, + expected_validation_result=False, + expected_triggered_requirements=["Output"], + expected_triggered_issues=[ + "Result SHOULD have a `@type` among an allowed set of values." + ], + profile_identifier="five-safes-crate", + rocrate_entity_mod_sparql=sparql, + ) diff --git a/tests/integration/profiles/five-safes-crate/test_5src_11_workflow_execution.py b/tests/integration/profiles/five-safes-crate/test_5src_11_workflow_execution.py new file mode 100644 index 000000000..a271978a8 --- /dev/null +++ b/tests/integration/profiles/five-safes-crate/test_5src_11_workflow_execution.py @@ -0,0 +1,308 @@ +# Copyright (c) 2024-2025 CRS4 +# Copyright (c) 2025-2026 eScience Lab, The University of Manchester +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging + +from rocrate_validator.models import Severity +from tests.ro_crates import ValidROC +from tests.shared import do_entity_test, SPARQL_PREFIXES + +# set up logging +logger = logging.getLogger(__name__) + + +# ----- MUST fails tests + + +def test_5src_workflow_object_with_no_name(): + sparql = ( + SPARQL_PREFIXES + + """ + DELETE { + ?this schema:name ?name . + } + WHERE { + ?this rdf:type schema:CreateAction ; + schema:name ?name . + <./> schema:mentions ?this . + } + """ + ) + + do_entity_test( + rocrate_path=ValidROC().five_safes_crate_result, + requirement_severity=Severity.REQUIRED, + expected_validation_result=False, + expected_triggered_requirements=["WorkflowExecution"], + expected_triggered_issues=[ + "The `CreateAction` corresponding to the workflow run MUST have a name string of at least 10 characters." + ], + profile_identifier="five-safes-crate", + rocrate_entity_mod_sparql=sparql, + ) + + +def test_5src_workflow_object_with_name_not_string(): + sparql = ( + SPARQL_PREFIXES + + """ + DELETE { + ?this schema:name ?name . + } + INSERT { + ?this schema:name 123 . + } + WHERE { + ?this rdf:type schema:CreateAction ; + schema:name ?name . + <./> schema:mentions ?this . + } + """ + ) + + do_entity_test( + rocrate_path=ValidROC().five_safes_crate_result, + requirement_severity=Severity.REQUIRED, + expected_validation_result=False, + expected_triggered_requirements=["WorkflowExecution"], + expected_triggered_issues=[ + "The `CreateAction` corresponding to the workflow run MUST have a name string of at least 10 characters." + ], + profile_identifier="five-safes-crate", + rocrate_entity_mod_sparql=sparql, + ) + + +def test_5src_workflow_object_with_not_long_enough_name(): + sparql = ( + SPARQL_PREFIXES + + """ + DELETE { + ?this schema:name ?name . + } + INSERT { + ?this schema:name "Short" . + } + WHERE { + ?this rdf:type schema:CreateAction ; + schema:name ?name . + <./> schema:mentions ?this . + } + """ + ) + + do_entity_test( + rocrate_path=ValidROC().five_safes_crate_result, + requirement_severity=Severity.REQUIRED, + expected_validation_result=False, + expected_triggered_requirements=["WorkflowExecution"], + expected_triggered_issues=[ + "The `CreateAction` corresponding to the workflow run MUST have a name string of at least 10 characters." + ], + profile_identifier="five-safes-crate", + rocrate_entity_mod_sparql=sparql, + ) + + +def test_5src_workflow_object_has_no_properly_formatted_start_time(): + sparql = ( + SPARQL_PREFIXES + + """ + DELETE { + ?s schema:startTime ?time . + } + INSERT { + ?s schema:startTime "1st Dec '25 @ 10:00:00" . + } + WHERE { + ?s rdf:type schema:CreateAction ; + schema:startTime ?time . + } + """ + ) + + do_entity_test( + rocrate_path=ValidROC().five_safes_crate_result, + requirement_severity=Severity.REQUIRED, + expected_validation_result=False, + expected_triggered_requirements=["Timestamp Format"], + expected_triggered_issues=[ + ( + "All `startTime` and `endTime` values MUST follow the RFC 3339 standard " + "(YYYY-MM-DD'T'hh:mm:ss[.fraction](Z | ±hh:mm))." + ) + ], + profile_identifier="five-safes-crate", + rocrate_entity_mod_sparql=sparql, + ) + + +def test_5src_workflow_object_has_no_properly_formatted_end_time(): + sparql = ( + SPARQL_PREFIXES + + """ + DELETE { + ?s schema:endTime ?time . + } + INSERT { + ?s schema:endTime "1st Dec '25 @ 10:00:00" . + } + WHERE { + ?s rdf:type schema:CreateAction ; + schema:endTime ?time . + } + """ + ) + + do_entity_test( + rocrate_path=ValidROC().five_safes_crate_result, + requirement_severity=Severity.REQUIRED, + expected_validation_result=False, + expected_triggered_requirements=["Timestamp Format"], + expected_triggered_issues=[ + ( + "All `startTime` and `endTime` values MUST follow the RFC 3339 standard " + "(YYYY-MM-DD'T'hh:mm:ss[.fraction](Z | ±hh:mm))." + ) + ], + profile_identifier="five-safes-crate", + rocrate_entity_mod_sparql=sparql, + ) + + +def test_5src_workflow_object_with_no_action_status(): + sparql = ( + SPARQL_PREFIXES + + """ + DELETE { + ?this schema:actionStatus ?o . + } + WHERE { + ?this schema:actionStatus ?o ; + rdf:type schema:CreateAction . + } + """ + ) + + do_entity_test( + rocrate_path=ValidROC().five_safes_crate_result, + requirement_severity=Severity.REQUIRED, + expected_validation_result=False, + expected_triggered_requirements=["WorkflowExecution"], + expected_triggered_issues=[ + ( + "`CreateAction` MUST have an actionStatus " + "with an allowed value (see https://schema.org/ActionStatusType)." + ) + ], + profile_identifier="five-safes-crate", + rocrate_entity_mod_sparql=sparql, + ) + + +def test_5src_workflow_object_with_no_properly_valued_action_status(): + sparql = ( + SPARQL_PREFIXES + + """ + DELETE { + ?this schema:actionStatus ?o . + } + INSERT { + ?this schema:actionStatus "Not a proper actionStatus value" . + } + WHERE { + ?this schema:actionStatus ?o ; + rdf:type schema:CreateAction . + } + """ + ) + + do_entity_test( + rocrate_path=ValidROC().five_safes_crate_result, + requirement_severity=Severity.REQUIRED, + expected_validation_result=False, + expected_triggered_requirements=["WorkflowExecution"], + expected_triggered_issues=[ + ( + "`CreateAction` MUST have an actionStatus " + "with an allowed value (see https://schema.org/ActionStatusType)." + ) + ], + profile_identifier="five-safes-crate", + rocrate_entity_mod_sparql=sparql, + ) + + +# ----- SHOULD fails tests + + +def test_5src_workflow_object_has_no_end_time_if_ended(): + sparql = ( + SPARQL_PREFIXES + + """ + DELETE { + ?s schema:endTime ?time . + } + WHERE { + ?s rdf:type schema:CreateAction ; + schema:endTime ?time . + } + """ + ) + + do_entity_test( + rocrate_path=ValidROC().five_safes_crate_result, + requirement_severity=Severity.RECOMMENDED, + expected_validation_result=False, + expected_triggered_requirements=["WorkflowExecution"], + expected_triggered_issues=[ + "The workflow execution object SHOULD have an endTime property if it has ended." + ], + profile_identifier="five-safes-crate", + rocrate_entity_mod_sparql=sparql, + ) + + +# ------- MAY fail tests + + +def test_5src_workflow_object_has_no_start_time_if_begun(): + sparql = ( + SPARQL_PREFIXES + + """ + DELETE { + ?s schema:startTime ?time . + } + WHERE { + ?s rdf:type schema:CreateAction; + schema:startTime ?time . + } + """ + ) + + do_entity_test( + rocrate_path=ValidROC().five_safes_crate_result, + requirement_severity=Severity.OPTIONAL, + expected_validation_result=False, + expected_triggered_requirements=["WorkflowExecution"], + expected_triggered_issues=[ + ( + "The workflow execution object MAY have a startTime if actionStatus is " + "either ActiveActionStatus, CompletedActionStatus or FailedActionStatus." + ) + ], + profile_identifier="five-safes-crate", + rocrate_entity_mod_sparql=sparql, + ) diff --git a/tests/integration/profiles/five-safes-crate/test_5src_12_check_phase.py b/tests/integration/profiles/five-safes-crate/test_5src_12_check_phase.py new file mode 100644 index 000000000..53171a4c0 --- /dev/null +++ b/tests/integration/profiles/five-safes-crate/test_5src_12_check_phase.py @@ -0,0 +1,385 @@ +# Copyright (c) 2024-2025 CRS4 +# Copyright (c) 2025-2026 eScience Lab, The University of Manchester +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging + +from rocrate_validator.models import Severity +from tests.ro_crates import ValidROC +from tests.shared import do_entity_test + +# set up logging +logger = logging.getLogger(__name__) + + +# ----- MUST fails tests + + +# TO BE CHECKED AGAIN +def test_5src_check_value_not_of_type_assess_action(): + sparql = """ + PREFIX schema: + PREFIX shp: + PREFIX rdf: + + DELETE { + ?this rdf:type ?type . + } + INSERT { + ?this rdf:type . + } + WHERE { + ?this a schema:AssessAction ; + schema:additionalType shp:CheckValue ; + rdf:type ?type . + } + """ + + do_entity_test( + rocrate_path=ValidROC().five_safes_crate_result, + requirement_severity=Severity.REQUIRED, + expected_validation_result=False, + expected_triggered_requirements=["CheckValue"], + expected_triggered_issues=["CheckValue MUST be a `AssessAction`."], + profile_identifier="five-safes-crate", + rocrate_entity_mod_sparql=sparql, + ) + + +def test_5src_check_value_name_not_a_string(): + sparql = """ + PREFIX schema: + PREFIX shp: + PREFIX rdf: + + DELETE { + ?this schema:name ?name . + } + INSERT { + ?this schema:name "123"^^xsd:integer . + } + WHERE { + ?this schema:additionalType shp:CheckValue . + } + """ + + do_entity_test( + rocrate_path=ValidROC().five_safes_crate_result, + requirement_severity=Severity.REQUIRED, + expected_validation_result=False, + expected_triggered_requirements=["CheckValue"], + expected_triggered_issues=[ + "CheckValue MUST have a human readable name string." + ], + profile_identifier="five-safes-crate", + rocrate_entity_mod_sparql=sparql, + ) + + +def test_5src_check_value_start_time_not_iso_standard(): + sparql = """ + PREFIX schema: + PREFIX shp: + PREFIX rdf: + + DELETE { + ?c schema:startTime ?t . + } + INSERT { + ?c schema:startTime "1st of Jan 2021" . + } + WHERE { + ?c schema:additionalType shp:CheckValue ; + schema:startTime ?t . + } + """ + + do_entity_test( + rocrate_path=ValidROC().five_safes_crate_result, + requirement_severity=Severity.REQUIRED, + expected_validation_result=False, + expected_triggered_requirements=["Timestamp Format"], + expected_triggered_issues=[ + ( + "All `startTime` and `endTime` values MUST follow the RFC 3339 standard " + "(YYYY-MM-DD'T'hh:mm:ss[.fraction](Z | ±hh:mm))." + ) + ], + profile_identifier="five-safes-crate", + rocrate_entity_mod_sparql=sparql, + ) + + +def test_5src_check_value_end_time_not_iso_standard(): + sparql = """ + PREFIX schema: + PREFIX shp: + PREFIX rdf: + + DELETE { + ?c schema:endTime ?t . + } + INSERT { + ?c schema:endTime "1st of Jan 2021" . + } + WHERE { + ?c schema:additionalType shp:CheckValue ; + schema:endTime ?t . + } + """ + + do_entity_test( + rocrate_path=ValidROC().five_safes_crate_result, + requirement_severity=Severity.REQUIRED, + expected_validation_result=False, + expected_triggered_requirements=["Timestamp Format"], + expected_triggered_issues=[ + ( + "All `startTime` and `endTime` values MUST follow the RFC 3339 standard " + "(YYYY-MM-DD'T'hh:mm:ss[.fraction](Z | ±hh:mm))." + ) + ], + profile_identifier="five-safes-crate", + rocrate_entity_mod_sparql=sparql, + ) + + +def test_5src_check_value_has_action_status_with_not_allowed_value(): + sparql = """ + PREFIX schema: + PREFIX shp: + PREFIX rdf: + + DELETE { + ?s schema:actionStatus ?o . + } + INSERT { + ?s schema:actionStatus "Not a good action status" . + } + WHERE { + ?s schema:additionalType ; + schema:actionStatus ?o . + } + """ + + do_entity_test( + rocrate_path=ValidROC().five_safes_crate_result, + requirement_severity=Severity.REQUIRED, + expected_validation_result=False, + expected_triggered_requirements=["CheckValue"], + expected_triggered_issues=[ + "`CheckValue` --> `actionStatus` MUST have one of the allowed values." + ], + profile_identifier="five-safes-crate", + rocrate_entity_mod_sparql=sparql, + ) + + +# ----- SHOULD fails tests + + +def test_5src_root_data_entity_does_not_mention_check_value_entity(): + sparql = """ + PREFIX schema: + PREFIX shp: + + DELETE { + <./> schema:mentions ?o . + } + WHERE { + ?o schema:additionalType shp:CheckValue ; + } + """ + + do_entity_test( + rocrate_path=ValidROC().five_safes_crate_result, + requirement_severity=Severity.RECOMMENDED, + expected_validation_result=False, + expected_triggered_requirements=["RootDataEntity"], + expected_triggered_issues=[ + "RootDataEntity SHOULD mention a check value object." + ], + profile_identifier="five-safes-crate", + rocrate_entity_mod_sparql=sparql, + ) + + +def test_5src_check_value_object_does_not_point_to_root_data_entity(): + sparql = """ + PREFIX schema: + PREFIX shp: + + DELETE { + ?s schema:object <./> . + } + INSERT { + ?s schema:object "not the RootDataEntity" . + } + WHERE { + ?s schema:additionalType shp:CheckValue ; + } + """ + + do_entity_test( + rocrate_path=ValidROC().five_safes_crate_result, + requirement_severity=Severity.RECOMMENDED, + expected_validation_result=False, + expected_triggered_requirements=["CheckValue"], + expected_triggered_issues=[ + "`CheckValue` --> `object` SHOULD point to the root of the RO-Crate" + ], + profile_identifier="five-safes-crate", + rocrate_entity_mod_sparql=sparql, + ) + + +def test_5src_check_value_instrument_does_not_point_to_entity_with_type_defined_term(): + sparql = """ + PREFIX schema: + PREFIX shp: + PREFIX rdf: + + DELETE { + ?s rdf:type schema:DefinedTerm . + } + INSERT { + ?s rdf:type schema:Persona . + } + WHERE { + ?cv schema:additionalType shp:CheckValue ; + schema:instrument ?s . + ?s rdf:type schema:DefinedTerm . + } + """ + + do_entity_test( + rocrate_path=ValidROC().five_safes_crate_result, + requirement_severity=Severity.RECOMMENDED, + expected_validation_result=False, + expected_triggered_requirements=["CheckValue"], + expected_triggered_issues=[ + "`CheckValue` --> `instrument` SHOULD point to an entity typed `DefinedTerm`" + ], + profile_identifier="five-safes-crate", + rocrate_entity_mod_sparql=sparql, + ) + + +def test_5src_check_value_does_not_have_end_time(): + sparql = """ + PREFIX schema: + PREFIX shp: + PREFIX rdf: + + DELETE { + ?c schema:endTime ?t . + } + WHERE { + ?c schema:additionalType shp:CheckValue ; + schema:endTime ?t . + } + """ + + do_entity_test( + rocrate_path=ValidROC().five_safes_crate_result, + requirement_severity=Severity.RECOMMENDED, + expected_validation_result=False, + expected_triggered_requirements=["CheckValue"], + expected_triggered_issues=["`CheckValue` SHOULD have the `endTime` property."], + profile_identifier="five-safes-crate", + rocrate_entity_mod_sparql=sparql, + ) + + +def test_5src_check_value_does_not_have_action_status_property(): + sparql = """ + PREFIX schema: + PREFIX shp: + PREFIX rdf: + + DELETE { + ?s schema:actionStatus ?o . + } + WHERE { + ?s schema:additionalType shp:CheckValue ; + schema:actionStatus ?o . + } + """ + + do_entity_test( + rocrate_path=ValidROC().five_safes_crate_result, + requirement_severity=Severity.RECOMMENDED, + expected_validation_result=False, + expected_triggered_requirements=["CheckValue"], + expected_triggered_issues=["CheckValue SHOULD have actionStatus property."], + profile_identifier="five-safes-crate", + rocrate_entity_mod_sparql=sparql, + ) + + +def test_5src_check_value_does_not_point_to_an_agent(): + sparql = """ + PREFIX schema: + PREFIX shp: + PREFIX rdf: + + DELETE { + ?s schema:agent ?o . + } + WHERE { + ?s schema:additionalType shp:CheckValue ; + schema:agent ?o . + } + """ + + do_entity_test( + rocrate_path=ValidROC().five_safes_crate_result, + requirement_severity=Severity.RECOMMENDED, + expected_validation_result=False, + expected_triggered_requirements=["CheckValue"], + expected_triggered_issues=[ + "`CheckValue` --> `agent` SHOULD reference the agent who initiated the check" + ], + profile_identifier="five-safes-crate", + rocrate_entity_mod_sparql=sparql, + ) + + +# ----- MAY fails tests + + +def test_5src_check_value_does_not_have_start_time(): + sparql = """ + PREFIX schema: + PREFIX shp: + PREFIX rdf: + + DELETE { + ?c schema:startTime ?t . + } + WHERE { + ?c schema:additionalType shp:CheckValue ; + schema:startTime ?t . + } + """ + + do_entity_test( + rocrate_path=ValidROC().five_safes_crate_result, + requirement_severity=Severity.OPTIONAL, + expected_validation_result=False, + expected_triggered_requirements=["CheckValue"], + expected_triggered_issues=["`CheckValue` MAY have the `startTime` property."], + profile_identifier="five-safes-crate", + rocrate_entity_mod_sparql=sparql, + ) diff --git a/tests/integration/profiles/five-safes-crate/test_5src_13_validation_phase.py b/tests/integration/profiles/five-safes-crate/test_5src_13_validation_phase.py new file mode 100644 index 000000000..be6f44906 --- /dev/null +++ b/tests/integration/profiles/five-safes-crate/test_5src_13_validation_phase.py @@ -0,0 +1,354 @@ +# Copyright (c) 2024-2025 CRS4 +# Copyright (c) 2025-2026 eScience Lab, The University of Manchester +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging + +from rocrate_validator.models import Severity +from tests.ro_crates import ValidROC +from tests.shared import do_entity_test, SPARQL_PREFIXES + +# set up logging +logger = logging.getLogger(__name__) + + +# ----- MUST fails tests + + +def test_5src_validation_check_not_of_type_assess_action(): + sparql = ( + SPARQL_PREFIXES + + """ + DELETE { + ?this rdf:type schema:AssessAction . + } + INSERT { + ?this rdf:type . + } + WHERE { + ?this rdf:type schema:AssessAction ; + schema:additionalType shp:ValidationCheck . + } + """ + ) + + do_entity_test( + rocrate_path=ValidROC().five_safes_crate_result, + requirement_severity=Severity.REQUIRED, + expected_validation_result=False, + expected_triggered_requirements=["ValidationCheck"], + expected_triggered_issues=["ValidationCheck MUST be a `AssessAction`."], + profile_identifier="five-safes-crate", + rocrate_entity_mod_sparql=sparql, + ) + + +def test_5src_validation_check_name_not_a_string(): + sparql = ( + SPARQL_PREFIXES + + """ + DELETE { + ?this schema:name ?name . + } + INSERT { + ?this schema:name 123 . + } + WHERE { + ?this schema:additionalType shp:ValidationCheck . + } + """ + ) + + do_entity_test( + rocrate_path=ValidROC().five_safes_crate_result, + requirement_severity=Severity.REQUIRED, + expected_validation_result=False, + expected_triggered_requirements=["ValidationCheck"], + expected_triggered_issues=[ + "ValidationCheck MUST have a human readable name string." + ], + profile_identifier="five-safes-crate", + rocrate_entity_mod_sparql=sparql, + ) + + +def test_5src_validation_check_has_action_status_with_not_allowed_value(): + sparql = ( + SPARQL_PREFIXES + + """ + DELETE { + ?s schema:actionStatus ?o . + } + WHERE { + ?s schema:additionalType shp:ValidationCheck ; + schema:actionStatus ?o . + } + """ + ) + + do_entity_test( + rocrate_path=ValidROC().five_safes_crate_result, + requirement_severity=Severity.REQUIRED, + expected_validation_result=False, + expected_triggered_requirements=["ValidationCheck"], + expected_triggered_issues=[ + ( + "actionStatus MUST be either PotentialActionStatus, ActiveActionStatus, " + "CompletedActionStatus, or FailedActionStatus." + ) + ], + profile_identifier="five-safes-crate", + rocrate_entity_mod_sparql=sparql, + ) + + +def test_5src_validation_check_start_time_not_iso_standard(): + sparql = ( + SPARQL_PREFIXES + + """ + DELETE { + ?c schema:startTime ?t . + } + INSERT { + ?c schema:startTime "1st of Jan 2021" . + } + WHERE { + ?c schema:additionalType shp:ValidationCheck ; + schema:startTime ?t . + } + """ + ) + + do_entity_test( + rocrate_path=ValidROC().five_safes_crate_result, + requirement_severity=Severity.REQUIRED, + expected_validation_result=False, + expected_triggered_requirements=["Timestamp Format"], + expected_triggered_issues=[ + ( + "All `startTime` and `endTime` values MUST follow the RFC 3339 standard " + "(YYYY-MM-DD'T'hh:mm:ss[.fraction](Z | ±hh:mm))." + ) + ], + profile_identifier="five-safes-crate", + rocrate_entity_mod_sparql=sparql, + ) + + +def test_5src_validation_check_end_time_not_iso_standard(): + sparql = ( + SPARQL_PREFIXES + + """ + DELETE { + ?c schema:endTime ?t . + } + INSERT { + ?c schema:endTime "1st of Jan 2021" . + } + WHERE { + ?c schema:additionalType shp:ValidationCheck ; + schema:endTime ?t . + } + """ + ) + + do_entity_test( + rocrate_path=ValidROC().five_safes_crate_result, + requirement_severity=Severity.REQUIRED, + expected_validation_result=False, + expected_triggered_requirements=["Timestamp Format"], + expected_triggered_issues=[ + ( + "All `startTime` and `endTime` values MUST follow the RFC 3339 standard " + "(YYYY-MM-DD'T'hh:mm:ss[.fraction](Z | ±hh:mm))." + ) + ], + profile_identifier="five-safes-crate", + rocrate_entity_mod_sparql=sparql, + ) + + +# ----- SHOULD fails tests + + +def test_5src_root_data_entity_does_not_mention_validation_check_entity(): + sparql = ( + SPARQL_PREFIXES + + """ + DELETE { + <./> schema:mentions ?o . + } + WHERE { + ?o schema:additionalType shp:ValidationCheck ; + } + """ + ) + + do_entity_test( + rocrate_path=ValidROC().five_safes_crate_result, + requirement_severity=Severity.RECOMMENDED, + expected_validation_result=False, + expected_triggered_requirements=["RootDataEntity"], + expected_triggered_issues=[ + "RootDataEntity SHOULD mention a ValidationCheck object." + ], + profile_identifier="five-safes-crate", + rocrate_entity_mod_sparql=sparql, + ) + + +def test_5src_validation_check_object_does_not_point_to_root_data_entity(): + sparql = ( + SPARQL_PREFIXES + + """ + DELETE { + ?s schema:object <./> . + } + INSERT { + ?s schema:object "not the RootDataEntity" . + } + WHERE { + ?s schema:additionalType shp:ValidationCheck ; + } + """ + ) + + do_entity_test( + rocrate_path=ValidROC().five_safes_crate_result, + requirement_severity=Severity.RECOMMENDED, + expected_validation_result=False, + expected_triggered_requirements=["ValidationCheck"], + expected_triggered_issues=[ + "`ValidationCheck` --> `object` SHOULD point to the root of the RO-Crate" + ], + profile_identifier="five-safes-crate", + rocrate_entity_mod_sparql=sparql, + ) + + +def test_5src_validation_check_instrument_does_not_point_to_5scrate_0p4(): + sparql = ( + SPARQL_PREFIXES + + """ + DELETE { + ?s schema:instrument . + } + WHERE { + ?s schema:additionalType shp:ValidationCheck ; + schema:instrument . + } + """ + ) + + do_entity_test( + rocrate_path=ValidROC().five_safes_crate_result, + requirement_severity=Severity.RECOMMENDED, + expected_validation_result=False, + expected_triggered_requirements=["ValidationCheck"], + expected_triggered_issues=[ + "`ValidationCheck` --> `instrument` SHOULD point to an entity with @id https://w3id.org/5s-crate/0.4" + ], + profile_identifier="five-safes-crate", + rocrate_entity_mod_sparql=sparql, + ) + + +def test_5src_Validation_check_does_not_have_action_status_property(): + sparql = ( + SPARQL_PREFIXES + + """ + DELETE { + ?s schema:actionStatus ?o . + } + WHERE { + ?s schema:additionalType shp:ValidationCheck ; + schema:actionStatus ?o . + } + """ + ) + + do_entity_test( + rocrate_path=ValidROC().five_safes_crate_result, + requirement_severity=Severity.RECOMMENDED, + expected_validation_result=False, + expected_triggered_requirements=["ValidationCheck"], + expected_triggered_issues=[ + "ValidationCheck SHOULD have actionStatus property." + ], + profile_identifier="five-safes-crate", + rocrate_entity_mod_sparql=sparql, + ) + + +def test_5src_download_action_does_not_have_end_time(): + sparql = ( + SPARQL_PREFIXES + + """ + DELETE { + ?s schema:endTime ?time . + } + WHERE { + ?s schema:additionalType shp:ValidationCheck ; + schema:endTime ?time . + } + """ + ) + + do_entity_test( + rocrate_path=ValidROC().five_safes_crate_result, + requirement_severity=Severity.RECOMMENDED, + expected_validation_result=False, + expected_triggered_requirements=["ValidationCheck"], + expected_triggered_issues=[ + ( + "ValidationCheck SHOULD have the `endTime` property if `actionStatus` " + "is either CompletedActionStatus or FailedActionStatus." + ) + ], + profile_identifier="five-safes-crate", + rocrate_entity_mod_sparql=sparql, + ) + + +# ----- MAY fails tests + + +def test_5src_download_action_does_not_have_start_time(): + sparql = ( + SPARQL_PREFIXES + + """ + DELETE { + ?s schema:startTime ?time . + } + WHERE { + ?s schema:additionalType shp:ValidationCheck ; + schema:startTime ?time . + } + """ + ) + + do_entity_test( + rocrate_path=ValidROC().five_safes_crate_result, + requirement_severity=Severity.OPTIONAL, + expected_validation_result=False, + expected_triggered_requirements=["ValidationCheck"], + expected_triggered_issues=[ + ( + "ValidationCheck MAY have the `startTime` property if `actionStatus` " + "is either ActiveActionStatus, CompletedActionStatus or FailedActionStatus." + ) + ], + profile_identifier="five-safes-crate", + rocrate_entity_mod_sparql=sparql, + ) diff --git a/tests/integration/profiles/five-safes-crate/test_5src_14_workflow_retrieval_phase.py b/tests/integration/profiles/five-safes-crate/test_5src_14_workflow_retrieval_phase.py new file mode 100644 index 000000000..52228408e --- /dev/null +++ b/tests/integration/profiles/five-safes-crate/test_5src_14_workflow_retrieval_phase.py @@ -0,0 +1,425 @@ +# Copyright (c) 2024-2025 CRS4 +# Copyright (c) 2025-2026 eScience Lab, The University of Manchester +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging + +from rocrate_validator.models import Severity +from tests.ro_crates import ValidROC +from tests.shared import do_entity_test, SPARQL_PREFIXES + +# set up logging +logger = logging.getLogger(__name__) + + +# ----- MUST fails tests + + +def test_5src_download_action_does_not_have_name(): + sparql = ( + SPARQL_PREFIXES + + """ + DELETE { + ?this schema:name ?name . + } + WHERE { + ?this schema:name ?name ; + rdf:type schema:DownloadAction . + } + """ + ) + + do_entity_test( + rocrate_path=ValidROC().five_safes_crate_result, + requirement_severity=Severity.REQUIRED, + expected_validation_result=False, + expected_triggered_requirements=["DownloadAction"], + expected_triggered_issues=[ + "DownloadAction MUST have a human readable name string." + ], + profile_identifier="five-safes-crate", + rocrate_entity_mod_sparql=sparql, + ) + + +def test_5src_download_action_name_not_a_string(): + sparql = ( + SPARQL_PREFIXES + + """ + DELETE { + ?this schema:name ?name . + } + INSERT { + ?this schema:name 123 . + } + WHERE { + ?this rdf:type schema:DownloadAction . + } + """ + ) + + do_entity_test( + rocrate_path=ValidROC().five_safes_crate_result, + requirement_severity=Severity.REQUIRED, + expected_validation_result=False, + expected_triggered_requirements=["DownloadAction"], + expected_triggered_issues=[ + "DownloadAction MUST have a human readable name string." + ], + profile_identifier="five-safes-crate", + rocrate_entity_mod_sparql=sparql, + ) + + +def test_5src_download_action_start_time_not_iso_standard(): + sparql = ( + SPARQL_PREFIXES + + """ + DELETE { + ?c schema:startTime ?t . + } + INSERT { + ?c schema:startTime "1st of Jan 2021" . + } + WHERE { + ?c rdf:type schema:DownloadAction ; + schema:startTime ?t . + } + """ + ) + + do_entity_test( + rocrate_path=ValidROC().five_safes_crate_result, + requirement_severity=Severity.REQUIRED, + expected_validation_result=False, + expected_triggered_requirements=["Timestamp Format"], + expected_triggered_issues=[ + ( + "All `startTime` and `endTime` values MUST follow the RFC 3339 standard " + "(YYYY-MM-DD'T'hh:mm:ss[.fraction](Z | ±hh:mm))." + ) + ], + profile_identifier="five-safes-crate", + rocrate_entity_mod_sparql=sparql, + ) + + +def test_5src_check_value_end_time_not_iso_standard(): + sparql = ( + SPARQL_PREFIXES + + """ + DELETE { + ?c schema:endTime ?t . + } + INSERT { + ?c schema:endTime "1st of Jan 2021" . + } + WHERE { + ?c rdf:type schema:DownloadAction ; + schema:endTime ?t . + } + """ + ) + + do_entity_test( + rocrate_path=ValidROC().five_safes_crate_result, + requirement_severity=Severity.REQUIRED, + expected_validation_result=False, + expected_triggered_requirements=["Timestamp Format"], + expected_triggered_issues=[ + ( + "All `startTime` and `endTime` values MUST follow the RFC 3339 standard " + "(YYYY-MM-DD'T'hh:mm:ss[.fraction](Z | ±hh:mm))." + ) + ], + profile_identifier="five-safes-crate", + rocrate_entity_mod_sparql=sparql, + ) + + +def test_5src_downloaded_workflow_same_as_is_not_the_same_as_root_data_entity_main_entity(): + sparql = ( + SPARQL_PREFIXES + + """ + DELETE { + ?wf schema:sameAs ?me . + } + INSERT { + ?wf schema:sameAs "This is not the same as the main entity" . + } + WHERE { + ?wf schema:sameAs ?me . + <./> schema:mainEntity ?me . + ?da schema:result ?wf ; + rdf:type schema:DownloadAction . + } + """ + ) + + do_entity_test( + rocrate_path=ValidROC().five_safes_crate_result, + requirement_severity=Severity.REQUIRED, + expected_validation_result=False, + expected_triggered_requirements=["Downloaded Workflow"], + expected_triggered_issues=[ + ( + "The property `sameAs` of the entity representing the downloaded workflow " + "MUST point to the same entity as `RootDataEntity` --> `mainEntity`." + ) + ], + profile_identifier="five-safes-crate", + rocrate_entity_mod_sparql=sparql, + ) + + +def test_5src_downloaded_workflow_distribution_is_not_the_same_as_download_action_object(): + sparql = ( + SPARQL_PREFIXES + + """ + DELETE { + ?s schema:object ?o . + } + INSERT { + ?s schema:result "This is not the downloaded workflow entity" . + } + WHERE { + ?s schema:object ?o ; + rdf:type schema:DownloadAction . + } + """ + ) + + do_entity_test( + rocrate_path=ValidROC().five_safes_crate_result, + requirement_severity=Severity.REQUIRED, + expected_validation_result=False, + expected_triggered_requirements=["Downloaded Workflow"], + expected_triggered_issues=[ + ( + "DownloadedWorkflow --> `distribution` MUST reference " + "the same entity as `DownloadAction` --> `object`." + ) + ], + profile_identifier="five-safes-crate", + rocrate_entity_mod_sparql=sparql, + ) + + +def test_5src_download_action_has_action_status_with_not_allowed_value(): + sparql = ( + SPARQL_PREFIXES + + """ + DELETE { + ?s schema:actionStatus ?o . + } + INSERT { + ?s schema:actionStatus "Not a good action status" . + } + WHERE { + ?s rdf:type schema:DownloadAction ; + schema:actionStatus ?o . + } + """ + ) + + do_entity_test( + rocrate_path=ValidROC().five_safes_crate_result, + requirement_severity=Severity.REQUIRED, + expected_validation_result=False, + expected_triggered_requirements=["DownloadAction"], + expected_triggered_issues=[ + ( + "The value of actionStatus MUST be one of the allowed values: " + "PotentialActionStatus; ActiveActionStatus; CompletedActionStatus; FailedActionStatus." + ) + ], + profile_identifier="five-safes-crate", + rocrate_entity_mod_sparql=sparql, + ) + + +# ----- SHOULD fails tests + + +def test_5src_download_action_is_not_present(): + sparql = ( + SPARQL_PREFIXES + + """ + DELETE { + ?da ?p ?o . + } + WHERE { + ?da rdf:type schema:DownloadAction ; + ?p ?o . + } + """ + ) + + do_entity_test( + rocrate_path=ValidROC().five_safes_crate_result, + requirement_severity=Severity.RECOMMENDED, + expected_validation_result=False, + expected_triggered_requirements=["RootDataEntity"], + expected_triggered_issues=["An entity typed DownloadAction SHOULD exist."], + profile_identifier="five-safes-crate", + rocrate_entity_mod_sparql=sparql, + ) + + +def test_5src_root_data_entity_does_not_mention_download_action_entity(): + sparql = ( + SPARQL_PREFIXES + + """ + DELETE { + <./> schema:mentions ?o . + } + WHERE { + ?o rdf:type schema:DownloadAction ; + } + """ + ) + + do_entity_test( + rocrate_path=ValidROC().five_safes_crate_result, + requirement_severity=Severity.RECOMMENDED, + expected_validation_result=False, + expected_triggered_requirements=["RootDataEntity"], + expected_triggered_issues=[ + "RootDataEntity SHOULD mention DownloadAction if this exists." + ], + profile_identifier="five-safes-crate", + rocrate_entity_mod_sparql=sparql, + ) + + +def test_5src_download_action_does_not_have_end_time(): + sparql = ( + SPARQL_PREFIXES + + """ + DELETE { + ?c schema:endTime ?t . + } + WHERE { + ?c rdf:type schema:DownloadAction ; + schema:endTime ?t . + } + """ + ) + + do_entity_test( + rocrate_path=ValidROC().five_safes_crate_result, + requirement_severity=Severity.RECOMMENDED, + expected_validation_result=False, + expected_triggered_requirements=["DownloadAction"], + expected_triggered_issues=[ + ( + "`DownloadAction` SHOULD have the `endTime` property " + "if `actionStatus` is either CompletedActionStatus or FailedActionStatus." + ) + ], + profile_identifier="five-safes-crate", + rocrate_entity_mod_sparql=sparql, + ) + + +def test_5src_download_action_does_not_have_action_status_property(): + sparql = ( + SPARQL_PREFIXES + + """ + DELETE { + ?s schema:actionStatus ?o . + } + WHERE { + ?s rdf:type schema:DownloadAction ; + schema:actionStatus ?o . + } + """ + ) + + do_entity_test( + rocrate_path=ValidROC().five_safes_crate_result, + requirement_severity=Severity.RECOMMENDED, + expected_validation_result=False, + expected_triggered_requirements=["DownloadAction"], + expected_triggered_issues=[ + "`DownloadAction` SHOULD have `actionStatus` property." + ], + profile_identifier="five-safes-crate", + rocrate_entity_mod_sparql=sparql, + ) + + +# ----- MAY fails tests + + +def test_5src_downloaded_workflow_is_not_represented_by_its_own_entity(): + sparql = ( + SPARQL_PREFIXES + + """ + DELETE { + ?wf ?p ?o . + } + WHERE { + ?wf ?p ?o . + ?da schema:result ?wf ; + rdf:type schema:DownloadAction . + } + """ + ) + + do_entity_test( + rocrate_path=ValidROC().five_safes_crate_result, + requirement_severity=Severity.OPTIONAL, + expected_validation_result=False, + expected_triggered_requirements=["DownloadAction"], + expected_triggered_issues=[ + ( + "The entity representing the downloaded workflow is not defined, " + "OR is not referenced by `DownloadAction` --> `result`, " + "OR is not of type `Dataset`." + ) + ], + profile_identifier="five-safes-crate", + rocrate_entity_mod_sparql=sparql, + ) + + +def test_5src_download_action_does_not_have_start_time(): + sparql = ( + SPARQL_PREFIXES + + """ + DELETE { + ?s schema:startTime ?time . + } + WHERE { + ?s rdf:type schema:DownloadAction ; + schema:startTime ?time . + } + """ + ) + + do_entity_test( + rocrate_path=ValidROC().five_safes_crate_result, + requirement_severity=Severity.OPTIONAL, + expected_validation_result=False, + expected_triggered_requirements=["DownloadAction"], + expected_triggered_issues=[ + ( + "`DownloadAction` MAY have the `startTime` property if `actionStatus` " + "is either ActiveActionStatus, CompletedActionStatus or FailedActionStatus." + ) + ], + profile_identifier="five-safes-crate", + rocrate_entity_mod_sparql=sparql, + ) diff --git a/tests/integration/profiles/five-safes-crate/test_5src_15_metadata_file.py b/tests/integration/profiles/five-safes-crate/test_5src_15_metadata_file.py new file mode 100644 index 000000000..e94a76fa9 --- /dev/null +++ b/tests/integration/profiles/five-safes-crate/test_5src_15_metadata_file.py @@ -0,0 +1,87 @@ +# Copyright (c) 2024-2025 CRS4 +# Copyright (c) 2025-2026 eScience Lab, The University of Manchester +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging + +from rocrate_validator.models import Severity +from tests.ro_crates import ValidROC, Invalid5sROC +from tests.shared import do_entity_test, SPARQL_PREFIXES + +# set up logging +logger = logging.getLogger(__name__) + + +# ----- MUST fails tests + + +def test_5src_conforms_to_old_version(): + sparql = ( + SPARQL_PREFIXES + + """ + DELETE { + ?this dct:conformsTo ?version . + } + INSERT { + ?this dct:conformsTo . + } + WHERE { + ?this dct:conformsTo ?version ; + schema:about <./> . + } + """ + ) + + do_entity_test( + rocrate_path=ValidROC().five_safes_crate_request, + requirement_severity=Severity.REQUIRED, + expected_validation_result=False, + expected_triggered_requirements=[ + "RO-Crate conforms to 1.2 or later minor version" + ], + expected_triggered_issues=[ + "The RO-Crate metadata file descriptor MUST have a `conformsTo` property with " + "RO-Crate specification version 1.2 or later minor version" + ], + profile_identifier="five-safes-crate", + rocrate_entity_mod_sparql=sparql, + ) + + +def test_5src_context_single_wrong_version(): + do_entity_test( + rocrate_path=Invalid5sROC().context_single_wrong_version, + requirement_severity=Severity.REQUIRED, + expected_validation_result=False, + expected_triggered_requirements=["RO-Crate context version"], + expected_triggered_issues=[ + "The RO-Crate metadata file MUST include the RO-Crate context version 1.2 " + "(or later minor version) in `@context`" + ], + profile_identifier="five-safes-crate", + ) + + +def test_5src_context_multiple_wrong_version(): + do_entity_test( + rocrate_path=Invalid5sROC().context_multiple_wrong_version, + requirement_severity=Severity.REQUIRED, + expected_validation_result=False, + expected_triggered_requirements=["RO-Crate context version"], + expected_triggered_issues=[ + "The RO-Crate metadata file MUST include the RO-Crate context version 1.2 " + "(or later minor version) in `@context`" + ], + profile_identifier="five-safes-crate", + ) diff --git a/tests/integration/profiles/five-safes-crate/test_5src_16_publishing_phase.py b/tests/integration/profiles/five-safes-crate/test_5src_16_publishing_phase.py new file mode 100644 index 000000000..8149d1b24 --- /dev/null +++ b/tests/integration/profiles/five-safes-crate/test_5src_16_publishing_phase.py @@ -0,0 +1,56 @@ +# Copyright (c) 2024-2025 CRS4 +# Copyright (c) 2025-2026 eScience Lab, The University of Manchester +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging + +from rocrate_validator.models import Severity +from tests.ro_crates import ValidROC +from tests.shared import do_entity_test, SPARQL_PREFIXES + +# set up logging +logger = logging.getLogger(__name__) + + +# ----- MUST fails tests + + +def test_5src_assess_action_not_referenced_from_rde(): + sparql = ( + SPARQL_PREFIXES + + """ + DELETE { + <./> schema:mentions ?this . + } + WHERE { + ?this a schema:AssessAction . + <./> schema:mentions ?this . + } + """ + ) + + do_entity_test( + rocrate_path=ValidROC().five_safes_crate_result, + requirement_severity=Severity.REQUIRED, + expected_validation_result=False, + expected_triggered_requirements=[ + "All AssessActions are mentioned from Root Data Entity" + ], + expected_triggered_issues=[ + "All AssessAction entities in the crate MUST be referenced from " + "the Root Dataset via `mentions`." + ], + profile_identifier="five-safes-crate", + rocrate_entity_mod_sparql=sparql, + ) diff --git a/tests/integration/profiles/five-safes-crate/test_5src_1_requesting_agent.py b/tests/integration/profiles/five-safes-crate/test_5src_1_requesting_agent.py new file mode 100644 index 000000000..1ab9b4b37 --- /dev/null +++ b/tests/integration/profiles/five-safes-crate/test_5src_1_requesting_agent.py @@ -0,0 +1,139 @@ +# Copyright (c) 2024-2025 CRS4 +# Copyright (c) 2025-2026 eScience Lab, The University of Manchester +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging + +from rocrate_validator.models import Severity +from tests.ro_crates import ValidROC +from tests.shared import do_entity_test, SPARQL_PREFIXES + +# set up logging +logger = logging.getLogger(__name__) + + +# ----- MUST fails tests + + +def test_5src_agent_memberOf_not_project(): + """ + Test a Five Safes Crate where an agent's `memberOf` does NOT reference a schema:Project. + (We replace the referenced Project with a plain literal.) + """ + sparql = ( + SPARQL_PREFIXES + + """ + DELETE { + ?agent schema:memberOf ?org . + } + INSERT { + ?agent schema:memberOf "Not a project (literal replacement)" + } + WHERE { + ?action a schema:CreateAction ; + schema:agent ?agent . + ?agent schema:memberOf ?org . + ?org a schema:Project . + } + """ + ) + + do_entity_test( + rocrate_path=ValidROC().five_safes_crate_request, + requirement_severity=Severity.REQUIRED, + expected_validation_result=False, + expected_triggered_requirements=["Requesting Agent"], + expected_triggered_issues=[ + "The 'memberOf' property of an agent MUST be of type Project." + ], + profile_identifier="five-safes-crate", + rocrate_entity_mod_sparql=sparql, + ) + + +def test_5src_agent_memberOf_project_not_in_root(): + """ + Test a Five Safes Crate where NONE of the Projects referenced by Agent->memberOf are included + in the set of Projects referenced by RootDataEntity->sourceOrganization. + (We replace an agent's memberOf with a new Project that the root does not reference.) + """ + sparql = ( + SPARQL_PREFIXES + + """ + DELETE { + ?agent schema:memberOf ?org . + } + INSERT { + # assign the agent to a new Project that is not referenced by the Root Data Entity + ?agent schema:memberOf <#missing-project> . + <#missing-project> a schema:Project . + } + WHERE { + # locate a CreateAction -> agent -> memberOf that currently points to a Project + ?action a schema:CreateAction ; + schema:agent ?agent . + + ?agent schema:memberOf ?org . + ?org a schema:Project . + } + """ + ) + + do_entity_test( + rocrate_path=ValidROC().five_safes_crate_request, + requirement_severity=Severity.REQUIRED, + expected_validation_result=False, + expected_triggered_requirements=["Agent Project Intersection"], + expected_triggered_issues=[ + ( + "At least one Project referenced by Agent -> memberOf MUST be included " + "in the set of Projects referenced by RootDataEntity -> sourceOrganization." + ) + ], + profile_identifier="five-safes-crate", + rocrate_entity_mod_sparql=sparql, + ) + + +# ----- SHOULD warns tests +def test_5src_agent_memberOf_missing_warning(): + """ + Test a Five Safes Crate where the Requesting Agent does NOT have the 'memberOf' property. + This should trigger the SHACL warning: the Requesting Agent SHOULD have a `memberOf` property. + """ + sparql = ( + SPARQL_PREFIXES + + """ + DELETE { + ?agent schema:memberOf ?org . + } + WHERE { + ?action a schema:CreateAction ; + schema:agent ?agent . + ?agent schema:memberOf ?org . + } + """ + ) + + do_entity_test( + rocrate_path=ValidROC().five_safes_crate_request, + requirement_severity=Severity.RECOMMENDED, + expected_validation_result=False, # or True if warnings are not treated as failures + expected_triggered_requirements=["Requesting Agent"], + expected_triggered_issues=[ + "The Requesting Agent SHOULD have a `memberOf` property." + ], + profile_identifier="five-safes-crate", + rocrate_entity_mod_sparql=sparql, + ) diff --git a/tests/integration/profiles/five-safes-crate/test_5src_1_responsible_project.py b/tests/integration/profiles/five-safes-crate/test_5src_1_responsible_project.py new file mode 100644 index 000000000..1f5c9664b --- /dev/null +++ b/tests/integration/profiles/five-safes-crate/test_5src_1_responsible_project.py @@ -0,0 +1,219 @@ +# Copyright (c) 2024-2025 CRS4 +# Copyright (c) 2025-2026 eScience Lab, The University of Manchester +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging + +from rocrate_validator.models import Severity +from tests.ro_crates import ValidROC +from tests.shared import do_entity_test, SPARQL_PREFIXES + +# set up logging +logger = logging.getLogger(__name__) + + +# ---- MUST fails tests + + +def test_5src_responsible_project_funding_not_grant(): + """ + Test a Five Safes Crate where a Responsible Project's `funding` property + is NOT of type schema:Grant. + (We replace the funding reference with a literal.) + """ + sparql = ( + SPARQL_PREFIXES + + """ + DELETE { + ?project schema:funding ?grant . + } + INSERT { + ?project schema:funding "Not a grant (literal replacement)" . + } + WHERE { + ?action a schema:CreateAction ; + schema:agent ?agent . + ?agent schema:memberOf ?project . + ?project schema:funding ?grant . + ?grant a schema:Grant . + } + """ + ) + + do_entity_test( + rocrate_path=ValidROC().five_safes_crate_request, + requirement_severity=Severity.REQUIRED, + expected_validation_result=False, + expected_triggered_requirements=["Responsible Project"], + expected_triggered_issues=[ + "The property 'funding' of the Responsible Project MUST be of type Grant." + ], + profile_identifier="five-safes-crate", + rocrate_entity_mod_sparql=sparql, + ) + + +def test_5src_responsible_project_member_not_organization(): + """ + Test a Five Safes Crate where a Responsible Project's `member` property + is NOT of type schema:Organization. + (We replace the member reference with a literal.) + """ + sparql = ( + SPARQL_PREFIXES + + """ + DELETE { + ?project schema:member ?org . + } + INSERT { + ?project schema:member "Not organization or person (literal replacement)" . + } + WHERE { + ?action a schema:CreateAction ; + schema:agent ?agent . + ?agent schema:memberOf ?project . + ?project schema:member ?org . + ?org a schema:Organization . + } + """ + ) + + do_entity_test( + rocrate_path=ValidROC().five_safes_crate_request, + requirement_severity=Severity.REQUIRED, + expected_validation_result=False, + expected_triggered_requirements=["Responsible Project"], + expected_triggered_issues=[ + "The property 'member' of the Responsible Project MUST be of type schema:Organization or Person." + ], + profile_identifier="five-safes-crate", + rocrate_entity_mod_sparql=sparql, + ) + + +# ---- SHOULD warns tests + + +def test_5src_responsible_project_member_and_agent_affiliation_no_intersection(): + """ + Test a Five Safes Crate where none of the organisations that are members of the + Responsible Project appear in the Requesting Agent's affiliations (violates the + 'Intersection with agent affiliations' SHACL warning). + """ + sparql = ( + SPARQL_PREFIXES + + """ + DELETE { + ?agent schema:affiliation ?oldAff . + } + INSERT { + ?agent schema:affiliation <#missing-affiliation> . + <#missing-affiliation> a schema:Organization . + } + WHERE { + ?action a schema:CreateAction ; + schema:agent ?agent . + ?agent a schema:Person ; + schema:memberOf ?project ; + schema:affiliation ?oldAff . + ?project schema:member ?oldAff . + } + """ + ) + + do_entity_test( + rocrate_path=ValidROC().five_safes_crate_request, + requirement_severity=Severity.RECOMMENDED, + expected_validation_result=False, + expected_triggered_requirements=[ + "Organizations (members of Responsible Project)" + ], + expected_triggered_issues=[ + ( + "At least one of the organisations that are members of the responsible project SHOULD " + "be included in the Requesting Agent's affiliations, if such properties exist." + ) + ], + profile_identifier="five-safes-crate", + rocrate_entity_mod_sparql=sparql, + ) + + +# ---- MAY warns tests + + +def test_5src_responsible_project_missing_funding_property(): + """ + Test a Five Safes Crate where a Responsible Project does NOT have the `funding` property. + This should trigger the SHACL info: 'The Responsible Project does not have the property `funding`.' + """ + sparql = ( + SPARQL_PREFIXES + + """ + DELETE { + ?project schema:funding ?f . + } + WHERE { + ?action a schema:CreateAction ; + schema:agent ?agent . + ?agent schema:memberOf ?project . + ?project schema:funding ?f . + } + """ + ) + + do_entity_test( + rocrate_path=ValidROC().five_safes_crate_request, + requirement_severity=Severity.OPTIONAL, + expected_validation_result=False, + expected_triggered_requirements=["Responsible Project"], + expected_triggered_issues=[ + "The Responsible Project does not have the property `funding`." + ], + profile_identifier="five-safes-crate", + rocrate_entity_mod_sparql=sparql, + ) + + +def test_5src_responsible_project_missing_member_property(): + """ + Test a Five Safes Crate where a Responsible Project does NOT have the `member` property. + This should trigger the SHACL info: 'The Responsible Project does not have the property `member`.' + """ + sparql = ( + SPARQL_PREFIXES + + """ + DELETE { + ?project schema:member ?m . + } + WHERE { + ?action a schema:CreateAction ; + schema:agent ?agent . + ?agent schema:memberOf ?project . + ?project schema:member ?m . + } + """ + ) + + do_entity_test( + rocrate_path=ValidROC().five_safes_crate_request, + requirement_severity=Severity.OPTIONAL, + expected_validation_result=False, + expected_triggered_requirements=["Responsible Project"], + expected_triggered_issues=[ + "The Responsible Project does not have the property `member`." + ], + profile_identifier="five-safes-crate", + rocrate_entity_mod_sparql=sparql, + ) diff --git a/tests/integration/profiles/five-safes-crate/test_5src_1_root_data_entity_metadata.py b/tests/integration/profiles/five-safes-crate/test_5src_1_root_data_entity_metadata.py new file mode 100644 index 000000000..23c51832d --- /dev/null +++ b/tests/integration/profiles/five-safes-crate/test_5src_1_root_data_entity_metadata.py @@ -0,0 +1,90 @@ +# Copyright (c) 2024-2025 CRS4 +# Copyright (c) 2025-2026 eScience Lab, The University of Manchester +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging + +from rocrate_validator.models import Severity +from tests.ro_crates import ValidROC +from tests.shared import do_entity_test, SPARQL_PREFIXES + +# set up logging +logger = logging.getLogger(__name__) + + +# ----- MUST fails tests + + +def test_5src_root_data_entity_without_source_organization(): + """ + Test a Five Safes Crate where the Root Data Entity does NOT have the 'sourceOrganization' property. + """ + sparql = ( + SPARQL_PREFIXES + + """ + DELETE { + <./> schema:sourceOrganization ?o . + } + WHERE { + <./> schema:sourceOrganization ?o . + } + """ + ) + + do_entity_test( + rocrate_path=ValidROC().five_safes_crate_request, + requirement_severity=Severity.REQUIRED, + expected_validation_result=False, + expected_triggered_requirements=["RootDataEntity"], + expected_triggered_issues=[ + """The Root Data Entity MUST have a `sourceOrganization` property.""" + ], + profile_identifier="five-safes-crate", + rocrate_entity_mod_sparql=sparql, + ) + + +def test_5src_root_data_entity_source_organization_not_organization(): + """ + Test a Five Safes Crate where the Root Data Entity's `sourceOrganization` property + does NOT reference a `schema:Project` entity. + (We replace any existing sourceOrganization with a literal to violate the class constraint.) + """ + sparql = ( + SPARQL_PREFIXES + + """ + DELETE { + <./> schema:sourceOrganization ?o . + } + INSERT { + # insert a literal instead of an IRI + <./> schema:sourceOrganization "Investigation of cancer (TRE72 project 81)" . + } + WHERE { + <./> schema:sourceOrganization ?o . + } + """ + ) + + do_entity_test( + rocrate_path=ValidROC().five_safes_crate_request, + requirement_severity=Severity.REQUIRED, + expected_validation_result=False, + expected_triggered_requirements=["RootDataEntity"], + expected_triggered_issues=[ + """The `sourceOrganization` property of the RootDataEntity MUST point to a Project entity.""" + ], + profile_identifier="five-safes-crate", + rocrate_entity_mod_sparql=sparql, + ) diff --git a/tests/integration/profiles/five-safes-crate/test_5src_2_requesting_agent.py b/tests/integration/profiles/five-safes-crate/test_5src_2_requesting_agent.py new file mode 100644 index 000000000..41a4279d1 --- /dev/null +++ b/tests/integration/profiles/five-safes-crate/test_5src_2_requesting_agent.py @@ -0,0 +1,165 @@ +# Copyright (c) 2024-2025 CRS4 +# Copyright (c) 2025-2026 eScience Lab, The University of Manchester +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging + +from rocrate_validator.models import Severity +from tests.ro_crates import ValidROC +from tests.shared import do_entity_test, SPARQL_PREFIXES + +# set up logging +logger = logging.getLogger(__name__) + + +# ----- MUST fails tests + + +def test_createaction_does_not_have_agent(): + """ + Test a Five Safes Crate where CreateAction does not have the property agent. + (We remove the property agent from the CreateAction entity) + """ + sparql = ( + SPARQL_PREFIXES + + """ + DELETE { + ?action schema:agent ?agent . + } + WHERE { + ?action schema:agent ?agent ; + a schema:CreateAction . + ?agent a schema:Person . + } + """ + ) + + do_entity_test( + rocrate_path=ValidROC().five_safes_crate_request, + requirement_severity=Severity.REQUIRED, + expected_validation_result=False, + expected_triggered_requirements=["CreateAction"], + expected_triggered_issues=[ + "CreateAction MUST have at least one agent that is a contextual entity." + ], + profile_identifier="five-safes-crate", + rocrate_entity_mod_sparql=sparql, + ) + + +def test_createaction_agent_is_not_person(): + """ + Test a Five Safes Crate where CreateAction has an agent that is not of type schema:Person. + (We replace the CreateAction's agent with an entity that has no type). + """ + sparql = ( + SPARQL_PREFIXES + + """ + DELETE { + ?action schema:agent ?agent . + } + INSERT { + ?action schema:agent <#not-a-person> . + } + WHERE { + ?action schema:agent ?agent ; + a schema:CreateAction . + ?agent a schema:Person . + } + """ + ) + + do_entity_test( + rocrate_path=ValidROC().five_safes_crate_request, + requirement_severity=Severity.REQUIRED, + expected_validation_result=False, + expected_triggered_requirements=["CreateAction"], + expected_triggered_issues=["Each CreateAction agent MUST be typed as Person."], + profile_identifier="five-safes-crate", + rocrate_entity_mod_sparql=sparql, + ) + + +def test_agent_affiliation_not_organization(): + """ + Test a Five Safes Crate where the agent of CreateAction has an affiliation + that is not of type schema:Organization. + (We rereplace the agent's affiliation with an entity that has no type) + """ + sparql = ( + SPARQL_PREFIXES + + """ + DELETE { + ?agent schema:affiliation ?aff . + } + INSERT { + ?agent schema:affiliation <#not-an-organization> . + } + WHERE { + ?agent schema:affiliation ?aff ; + a schema:Person . + ?aff a schema:Organization . + ?action a schema:CreateAction ; + schema:agent ?agent . + } + """ + ) + + do_entity_test( + rocrate_path=ValidROC().five_safes_crate_request, + requirement_severity=Severity.REQUIRED, + expected_validation_result=False, + expected_triggered_requirements=["CreateAction"], + expected_triggered_issues=[ + "The affiliation of a CreateAction's agent MUST be a contextual entity with type Organization." + ], + profile_identifier="five-safes-crate", + rocrate_entity_mod_sparql=sparql, + ) + + +# ----- SHOULD warning tests + + +def test_5src_agent_does_not_have_affiliation(): + """ + Test a Five Safes Crate where the agent of CreteAction does not have an affiliatin. + (We remove the triplet ?agent schema:affiliation ?org from the RO-Crate graph) + """ + sparql = ( + SPARQL_PREFIXES + + """ + DELETE { + ?agent schema:affiliation ?org . + } + WHERE { + ?agent schema:affiliation ?org ; + a schema:Person . + ?action a schema:CreateAction ; + schema:agent ?agent . + } + """ + ) + + do_entity_test( + rocrate_path=ValidROC().five_safes_crate_request, + requirement_severity=Severity.RECOMMENDED, + expected_validation_result=False, + expected_triggered_requirements=["Agent of WorkflowRunAction"], + expected_triggered_issues=[ + "The agent of the `CreateAction` corresponding to the workflow run SHOULD have an affiliation" + ], + profile_identifier="five-safes-crate", + rocrate_entity_mod_sparql=sparql, + ) diff --git a/tests/integration/profiles/five-safes-crate/test_5src_4_signoff_phase.py b/tests/integration/profiles/five-safes-crate/test_5src_4_signoff_phase.py new file mode 100644 index 000000000..a98c850bd --- /dev/null +++ b/tests/integration/profiles/five-safes-crate/test_5src_4_signoff_phase.py @@ -0,0 +1,576 @@ +# Copyright (c) 2024-2025 CRS4 +# Copyright (c) 2025-2026 eScience Lab, The University of Manchester +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging + +from rocrate_validator.models import Severity +from tests.ro_crates import ValidROC +from tests.shared import do_entity_test, SPARQL_PREFIXES + +# set up logging +logger = logging.getLogger(__name__) + + +# ---- SHOULD fails tests + + +def test_5src_no_signoff_phase(): + """ + Test a Five Safes Crate where no Sign-Off phase is listed. + """ + + sparql = ( + SPARQL_PREFIXES + + """ + DELETE { + <#signoff-3b741265-cfef-49ea-8138-a2fa149bf2f0> ?p ?o . + } + WHERE { + <#signoff-3b741265-cfef-49ea-8138-a2fa149bf2f0> ?p ?o . + } + """ + ) + + do_entity_test( + rocrate_path=ValidROC().five_safes_crate_result, + requirement_severity=Severity.RECOMMENDED, + expected_validation_result=False, + expected_triggered_requirements=["SignOffPhase"], + expected_triggered_issues=[ + "There SHOULD be a Sign-Off Phase in the Final RO-Crate" + ], + profile_identifier="five-safes-crate", + rocrate_entity_mod_sparql=sparql, + ) + + +def test_5src_signoff_phase_no_name(): + """ + Test a Five Safes Crate where the Sign-Off phase has no name. + """ + sparql = ( + SPARQL_PREFIXES + + """ + DELETE { + ?signoff schema:name ?name . + } + WHERE { + ?signoff a schema:AssessAction ; + schema:additionalType ; + schema:name ?name . + } + """ + ) + + do_entity_test( + rocrate_path=ValidROC().five_safes_crate_result, + requirement_severity=Severity.REQUIRED, + expected_validation_result=False, + expected_triggered_requirements=["SignOff"], + expected_triggered_issues=[ + "Sign Off phase MUST have a human-readable name string." + ], + profile_identifier="five-safes-crate", + rocrate_entity_mod_sparql=sparql, + ) + + +def test_5src_signoff_phase_wrong_type(): + """ + Test a Five Safes Crate where the Sign-Off phase has no name. + """ + sparql = ( + SPARQL_PREFIXES + + """ + DELETE { + ?signoff rdf:type ?type . + } + INSERT { + ?signoff rdf:type . + } + WHERE { + ?signoff a schema:AssessAction ; + schema:additionalType ; + rdf:type ?type . + } + """ + ) + + do_entity_test( + rocrate_path=ValidROC().five_safes_crate_result, + requirement_severity=Severity.REQUIRED, + expected_validation_result=False, + expected_triggered_requirements=["SignOff"], + expected_triggered_issues=["Sign Off phase MUST be a `AssessAction`."], + profile_identifier="five-safes-crate", + rocrate_entity_mod_sparql=sparql, + ) + + +def test_5src_signoff_phase_wrong_action_status(): + """ + Test a Five Safes Crate where the Sign-Off phase has the wrong action status. + """ + sparql = ( + SPARQL_PREFIXES + + """ + DELETE { + ?signoff schema:actionStatus ?status . + } + INSERT { + ?signoff schema:actionStatus . + } + WHERE { + ?signoff a schema:AssessAction ; + schema:additionalType ; + schema:actionStatus ?status . + } + """ + ) + + do_entity_test( + rocrate_path=ValidROC().five_safes_crate_result, + requirement_severity=Severity.REQUIRED, + expected_validation_result=False, + expected_triggered_requirements=["SignOffStatus"], + expected_triggered_issues=[ + "The value of actionStatus MUST be one of the allowed values:" + + " PotentialActionStatus; ActiveActionStatus; CompletedActionStatus; FailedActionStatus." + ], + profile_identifier="five-safes-crate", + rocrate_entity_mod_sparql=sparql, + ) + + +def test_5src_signoff_phase_not_mentioned(): + """ + Test a Five Safes Crate where the Sign-Off phase is not mentioned by the MainRootEntity. + """ + sparql = ( + SPARQL_PREFIXES + + """ + DELETE { + <./> schema:mentions <#signoff-3b741265-cfef-49ea-8138-a2fa149bf2f0> . + } + WHERE { + <./> schema:mentions <#signoff-3b741265-cfef-49ea-8138-a2fa149bf2f0> . + } + """ + ) + + do_entity_test( + rocrate_path=ValidROC().five_safes_crate_result, + requirement_severity=Severity.RECOMMENDED, + expected_validation_result=False, + expected_triggered_requirements=["SignOffPhase"], + expected_triggered_issues=[ + "The Root Data Entity SHOULD mention a Sign-Off Phase Object" + ], + profile_identifier="five-safes-crate", + rocrate_entity_mod_sparql=sparql, + ) + + +def test_5src_signoff_phase_no_endtime(): + """ + Test a Five Safes Crate where the Sign-Off phase has no endTime. + """ + sparql = ( + SPARQL_PREFIXES + + """ + DELETE { + ?signoff schema:endTime ?endTime . + } + WHERE { + ?signoff a schema:AssessAction ; + schema:additionalType ; + schema:endTime ?endTime . + } + """ + ) + + do_entity_test( + rocrate_path=ValidROC().five_safes_crate_result, + requirement_severity=Severity.RECOMMENDED, + expected_validation_result=False, + expected_triggered_requirements=["SignOffPhaseEndTime"], + expected_triggered_issues=[ + "Sign Off object SHOULD have endTime property if action completed or failed." + ], + profile_identifier="five-safes-crate", + rocrate_entity_mod_sparql=sparql, + ) + + +def test_5src_signoff_phase_malformed_endtime(): + """ + Test a Five Safes Crate where the Sign-Off phase has an endTime + in the wrong format. + """ + sparql = ( + SPARQL_PREFIXES + + """ + DELETE { + ?signoff schema:endTime ?endTime . + } + INSERT { + ?signoff schema:endTime <2025-10-20> . + } + WHERE { + ?signoff a schema:AssessAction ; + schema:additionalType ; + schema:endTime ?endTime . + } + """ + ) + + do_entity_test( + rocrate_path=ValidROC().five_safes_crate_result, + requirement_severity=Severity.RECOMMENDED, + expected_validation_result=False, + expected_triggered_requirements=["Timestamp Format"], + expected_triggered_issues=[ + ( + "All `startTime` and `endTime` values MUST follow the RFC 3339 standard " + "(YYYY-MM-DD'T'hh:mm:ss[.fraction](Z | ±hh:mm))." + ) + ], + profile_identifier="five-safes-crate", + rocrate_entity_mod_sparql=sparql, + ) + + +def test_5src_signoff_phase_no_starttime(): + """ + Test a Five Safes Crate where the Sign-Off phase has no startTime. + """ + sparql = ( + SPARQL_PREFIXES + + """ + DELETE { + ?signoff schema:startTime ?startTime . + } + WHERE { + ?signoff a schema:AssessAction ; + schema:additionalType ; + schema:startTime ?startTime . + } + """ + ) + + do_entity_test( + rocrate_path=ValidROC().five_safes_crate_result, + requirement_severity=Severity.OPTIONAL, + expected_validation_result=False, + expected_triggered_requirements=["SignOffPhaseStartTime"], + expected_triggered_issues=[ + "Sign Off object MAY have a startTime property if action is active, completed or failed." + ], + profile_identifier="five-safes-crate", + rocrate_entity_mod_sparql=sparql, + ) + + +def test_5src_signoff_phase_malformed_starttime(): + """ + Test a Five Safes Crate where the Sign-Off phase has a startTime + in the wrong format. + """ + sparql = ( + SPARQL_PREFIXES + + """ + DELETE { + ?signoff schema:startTime ?startTime . + } + INSERT { + ?signoff schema:startTime <2025-10-20> . + } + WHERE { + ?signoff a schema:AssessAction ; + schema:additionalType ; + schema:startTime ?startTime . + } + """ + ) + + do_entity_test( + rocrate_path=ValidROC().five_safes_crate_result, + requirement_severity=Severity.OPTIONAL, + expected_validation_result=False, + expected_triggered_requirements=["Timestamp Format"], + expected_triggered_issues=[ + ( + "All `startTime` and `endTime` values MUST follow the RFC 3339 standard " + "(YYYY-MM-DD'T'hh:mm:ss[.fraction](Z | ±hh:mm))." + ) + ], + profile_identifier="five-safes-crate", + rocrate_entity_mod_sparql=sparql, + ) + + +def test_5src_signoff_phase_no_actionstatus(): + """ + Test a Five Safes Crate where the Sign-Off phase has no actionStatus. + """ + sparql = ( + SPARQL_PREFIXES + + """ + DELETE { + ?signoff schema:actionStatus ?actionStatus . + } + WHERE { + ?signoff a schema:AssessAction ; + schema:additionalType ; + schema:actionStatus ?actionStatus . + } + """ + ) + + do_entity_test( + rocrate_path=ValidROC().five_safes_crate_result, + requirement_severity=Severity.RECOMMENDED, + expected_validation_result=False, + expected_triggered_requirements=["SignOffPhaseProperties"], + expected_triggered_issues=["The Sign-Off Phase SHOULD have an actionStatus"], + profile_identifier="five-safes-crate", + rocrate_entity_mod_sparql=sparql, + ) + + +def test_5src_signoff_phase_no_agent(): + """ + Test a Five Safes Crate where the Sign-Off phase has no agent. + """ + sparql = ( + SPARQL_PREFIXES + + """ + DELETE { + ?signoff schema:agent ?agent . + } + WHERE { + ?signoff a schema:AssessAction ; + schema:additionalType ; + schema:agent ?agent . + } + """ + ) + + do_entity_test( + rocrate_path=ValidROC().five_safes_crate_result, + requirement_severity=Severity.RECOMMENDED, + expected_validation_result=False, + expected_triggered_requirements=["SignOffPhaseProperties"], + expected_triggered_issues=["The Sign-Off Phase SHOULD have an agent"], + profile_identifier="five-safes-crate", + rocrate_entity_mod_sparql=sparql, + ) + + +def test_5src_signoff_phase_no_instrument(): + """ + Test a Five Safes Crate where the Sign-Off phase has no TRE policy (instrument). + """ + sparql = ( + SPARQL_PREFIXES + + """ + DELETE { + ?signoff schema:instrument ?instrument . + } + WHERE { + ?signoff a schema:AssessAction ; + schema:additionalType ; + schema:instrument ?instrument . + } + """ + ) + + do_entity_test( + rocrate_path=ValidROC().five_safes_crate_result, + requirement_severity=Severity.RECOMMENDED, + expected_validation_result=False, + expected_triggered_requirements=["SignOffPhaseProperties"], + expected_triggered_issues=[ + "The Sign-Off Phase SHOULD have an TRE policy (instrument) with type CreativeWork" + ], + profile_identifier="five-safes-crate", + rocrate_entity_mod_sparql=sparql, + ) + + +def test_5src_signoff_phase_instrument_not_iri(): + """ + Test a Five Safes Crate where the Sign-Off phase TRE policy (instrument) is not an IRI. + """ + sparql = ( + SPARQL_PREFIXES + + """ + DELETE { + ?signoff schema:instrument ?instrument . + } + INSERT { + ?signoff schema:instrument "Not a cross-reference" . + } + WHERE { + ?signoff a schema:AssessAction ; + schema:additionalType ; + schema:instrument ?instrument . + } + """ + ) + + do_entity_test( + rocrate_path=ValidROC().five_safes_crate_result, + requirement_severity=Severity.RECOMMENDED, + expected_validation_result=False, + expected_triggered_requirements=["SignOffPhaseProperties"], + expected_triggered_issues=[ + "The Sign-Off Phase SHOULD have an TRE policy (instrument) with type CreativeWork" + ], + profile_identifier="five-safes-crate", + rocrate_entity_mod_sparql=sparql, + ) + + +def test_5src_signoff_phase_instrument_no_type(): + """ + Test a Five Safes Crate where the Sign-Off phase instrument has no type. + """ + sparql = ( + SPARQL_PREFIXES + + """ + DELETE { + ?instrument rdf:type ?type . + } + WHERE { + ?signoff a schema:AssessAction ; + schema:additionalType ; + schema:instrument ?instrument . + ?instrument rdf:type ?type . + } + """ + ) + + do_entity_test( + rocrate_path=ValidROC().five_safes_crate_result, + requirement_severity=Severity.RECOMMENDED, + expected_validation_result=False, + expected_triggered_requirements=["SignOffPhaseProperties"], + expected_triggered_issues=[ + "The Sign-Off Phase SHOULD have an TRE policy (instrument) with type CreativeWork" + ], + profile_identifier="five-safes-crate", + rocrate_entity_mod_sparql=sparql, + ) + + +def test_5src_signoff_phase_instrument_no_name(): + """ + Test a Five Safes Crate where the Sign-Off phase instrument has no type. + """ + sparql = ( + SPARQL_PREFIXES + + """ + DELETE { + ?instrument schema:name ?name . + } + WHERE { + ?signoff a schema:AssessAction ; + schema:additionalType ; + schema:instrument ?instrument . + ?instrument schema:name ?name . + } + """ + ) + + do_entity_test( + rocrate_path=ValidROC().five_safes_crate_result, + requirement_severity=Severity.RECOMMENDED, + expected_validation_result=False, + expected_triggered_requirements=["SignOffPhaseProperties"], + expected_triggered_issues=[ + "The Sign-Off Phase SHOULD have an TRE policy (instrument) with a human-readable name" + ], + profile_identifier="five-safes-crate", + rocrate_entity_mod_sparql=sparql, + ) + + +def test_5src_signoff_phase_object_notworkflow(): + """ + Test a Five Safes Crate where there is no workflow in the Sign-Off objects. + """ + sparql = ( + SPARQL_PREFIXES + + """ + DELETE { + ?signoff schema:object . + } + INSERT { + ?signoff schema:object . + } + WHERE { + ?signoff a schema:AssessAction ; + schema:additionalType ; + schema:object . + } + """ + ) + + do_entity_test( + rocrate_path=ValidROC().five_safes_crate_result, + requirement_severity=Severity.RECOMMENDED, + expected_validation_result=False, + expected_triggered_requirements=["SignOffPhaseProperties"], + expected_triggered_issues=[ + "The Sign-Off Phase SHOULD list the workflow (mainEntity) as an object" + ], + profile_identifier="five-safes-crate", + rocrate_entity_mod_sparql=sparql, + ) + + +def test_5src_signoff_phase_object_not_responsible_project(): + """ + Test a Five Safes Crate where there is no Responsible Project in the Sign-Off objects. + """ + sparql = ( + SPARQL_PREFIXES + + """ + DELETE { + ?signoff schema:object <#project-be6ffb55-4f5a-4c14-b60e-47e0951090c70> . + } + INSERT { + ?signoff schema:object . + } + WHERE { + ?signoff a schema:AssessAction ; + schema:additionalType ; + schema:object <#project-be6ffb55-4f5a-4c14-b60e-47e0951090c70> . + } + """ + ) + + do_entity_test( + rocrate_path=ValidROC().five_safes_crate_result, + requirement_severity=Severity.RECOMMENDED, + expected_validation_result=False, + expected_triggered_requirements=["SignOffPhaseProperties"], + expected_triggered_issues=[ + "The Sign-Off Phase SHOULD list the Responsible Project (sourceOrganization) as an object" + ], + profile_identifier="five-safes-crate", + rocrate_entity_mod_sparql=sparql, + ) diff --git a/tests/integration/profiles/five-safes-crate/test_5src_5_profile_conformance.py b/tests/integration/profiles/five-safes-crate/test_5src_5_profile_conformance.py new file mode 100644 index 000000000..313e794e4 --- /dev/null +++ b/tests/integration/profiles/five-safes-crate/test_5src_5_profile_conformance.py @@ -0,0 +1,193 @@ +# Copyright (c) 2024-2025 CRS4 +# Copyright (c) 2025-2026 eScience Lab, The University of Manchester +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging + +from rocrate_validator.models import Severity +from tests.ro_crates import ValidROC +from tests.shared import do_entity_test, SPARQL_PREFIXES + +# set up logging +logger = logging.getLogger(__name__) + + +# ----- MUST fails tests + + +# ----- SHOULD fails tests + + +def test_5src_root_data_entity_missing_conformsto_property(): + """ + Test a Five Safes Crate where the RootDataEntity does not have the conformsTo property. + """ + sparql = ( + SPARQL_PREFIXES + + """ + DELETE { + ?rootdataentity dct:conformsTo ?profile . + } + WHERE { + ?metadatafile a schema:CreativeWork ; + schema:about ?rootdataentity . + ?rootdataentity dct:conformsTo ?profile . + } + """ + ) + + do_entity_test( + rocrate_path=ValidROC().five_safes_crate_request, + requirement_severity=Severity.RECOMMENDED, + expected_validation_result=False, + expected_triggered_requirements=["RootDataEntity"], + expected_triggered_issues=[ + "Root Dataset SHOULD include `conformsTo` https://w3id.org/5s-crate/0.4" + ], + profile_identifier="five-safes-crate", + rocrate_entity_mod_sparql=sparql, + ) + + +def test_5src_root_data_entity_conforms_to_wrong_profile(): + """ + Test a Five Safes Crate where the RootDataEntity does not conform to the expected profile. + """ + sparql = ( + SPARQL_PREFIXES + + """ + DELETE { + ?rootdataentity dct:conformsTo ?profile . + } + INSERT { + ?rootdataentity dct:conformsTo "This is not the IRI to the 5sc profile" + } + WHERE { + ?metadatafile a schema:CreativeWork ; + schema:about ?rootdataentity . + ?rootdataentity dct:conformsTo ?profile . + } + """ + ) + + do_entity_test( + rocrate_path=ValidROC().five_safes_crate_request, + requirement_severity=Severity.RECOMMENDED, + expected_validation_result=False, + expected_triggered_requirements=["RootDataEntity"], + expected_triggered_issues=[ + "Root Dataset SHOULD include `conformsTo` https://w3id.org/5s-crate/0.4" + ], + profile_identifier="five-safes-crate", + rocrate_entity_mod_sparql=sparql, + ) + + +def test_5src_root_data_entity_has_publisher_but_not_date_published(): + """ + Test a Five Safes Crate where the RootDataEntity has published but not date published. + """ + sparql = ( + SPARQL_PREFIXES + + """ + DELETE { + ?rootdataentity schema:datePublished ?datePublished . + } + WHERE { + ?metadatafile a schema:CreativeWork ; + schema:about ?rootdataentity . + ?rootdataentity schema:publisher ?publisher ; + schema:datePublished ?datePublished . + } + """ + ) + + do_entity_test( + rocrate_path=ValidROC().five_safes_crate_result, + requirement_severity=Severity.RECOMMENDED, + expected_validation_result=False, + expected_triggered_requirements=["datePublished present on published crates"], + expected_triggered_issues=[ + "A crate SHOULD have a publishedDate if and only if it has a publisher." + ], + profile_identifier="five-safes-crate", + rocrate_entity_mod_sparql=sparql, + ) + + +def test_5src_root_data_entity_has_date_published_but_not_publisher(): + """ + Test a Five Safes Crate where the RootDataEntity has published but not date published. + """ + sparql = ( + SPARQL_PREFIXES + + """ + DELETE { + ?rootdataentity schema:publisher ?publisher . + } + WHERE { + ?metadatafile a schema:CreativeWork ; + schema:about ?rootdataentity . + ?rootdataentity schema:publisher ?publisher ; + schema:datePublished ?datePublished . + } + """ + ) + + do_entity_test( + rocrate_path=ValidROC().five_safes_crate_result, + requirement_severity=Severity.RECOMMENDED, + expected_validation_result=False, + expected_triggered_requirements=["datePublished present on published crates"], + expected_triggered_issues=[ + "A crate SHOULD have a publishedDate if and only if it has a publisher." + ], + profile_identifier="five-safes-crate", + rocrate_entity_mod_sparql=sparql, + ) + + +def test_5src_root_data_entity_has_publisher_but_not_license(): + """ + Test a Five Safes Crate where the RootDataEntity has publisher but not license. + """ + sparql = ( + SPARQL_PREFIXES + + """ + DELETE { + ?rootdataentity schema:license ?license . + } + WHERE { + ?metadatafile a schema:CreativeWork ; + schema:about ?rootdataentity . + ?rootdataentity schema:publisher ?publisher ; + schema:license ?license . + } + """ + ) + + do_entity_test( + rocrate_path=ValidROC().five_safes_crate_result, + requirement_severity=Severity.RECOMMENDED, + expected_validation_result=False, + expected_triggered_requirements=["License present on published crates"], + expected_triggered_issues=[ + "Profile Conformance: Published crates SHOULD include a license." + ], + profile_identifier="five-safes-crate", + rocrate_entity_mod_sparql=sparql, + ) + + +# ----- MAY fails tests diff --git a/tests/integration/profiles/five-safes-crate/test_5src_6_workflow_reference.py b/tests/integration/profiles/five-safes-crate/test_5src_6_workflow_reference.py new file mode 100644 index 000000000..1271adffd --- /dev/null +++ b/tests/integration/profiles/five-safes-crate/test_5src_6_workflow_reference.py @@ -0,0 +1,258 @@ +# Copyright (c) 2024-2025 CRS4 +# Copyright (c) 2025-2026 eScience Lab, The University of Manchester +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging + +from rocrate_validator.models import Severity +from tests.ro_crates import ValidROC +from tests.shared import do_entity_test, SPARQL_PREFIXES + +# set up logging +logger = logging.getLogger(__name__) + + +# ----- MUST fails tests + + +def test_5src_root_data_entity_two_main_entities(): + """ + Add a 2nd RootDataEntity's mainEntity so maxCount=1 is violated. + """ + sparql = ( + SPARQL_PREFIXES + + """ + INSERT { + # add an IRI that is NOT typed as schema:Dataset (e.g. a schema:SoftwareSourceCode) + <./> schema:mainEntity . + a schema:Dataset . + } + WHERE { + <./> schema:mainEntity ?m . + } + """ + ) + + do_entity_test( + rocrate_path=ValidROC().five_safes_crate_request, + requirement_severity=Severity.REQUIRED, + expected_validation_result=False, + expected_triggered_requirements=["RootDataEntity"], + expected_triggered_issues=[ + "The RootDataEntity MUST have exactly one mainEntity property that is an IRI." + ], + profile_identifier="five-safes-crate", + rocrate_entity_mod_sparql=sparql, + ) + + +def test_5src_root_data_entity_no_main_entity(): + """ + Remove the RootDataEntity's mainEntity so minCount=1 is violated. + """ + sparql = ( + SPARQL_PREFIXES + + """ + DELETE { + <./> schema:mainEntity ?m . + } + WHERE { + <./> schema:mainEntity ?m . + } + """ + ) + + do_entity_test( + rocrate_path=ValidROC().five_safes_crate_request, + requirement_severity=Severity.REQUIRED, + expected_validation_result=False, + expected_triggered_requirements=["RootDataEntity"], + expected_triggered_issues=[ + "The RootDataEntity MUST have exactly one mainEntity property that is an IRI." + ], + profile_identifier="five-safes-crate", + rocrate_entity_mod_sparql=sparql, + ) + + +def test_5src_root_data_entity_main_entity_not_dataset_iri(): + """ + Test a Five Safes Crate where the RootDataEntity's mainEntity is an IRI but not typed as schema:Dataset. + (We point mainEntity to a new crate-local entity typed as something else.) + """ + sparql = ( + SPARQL_PREFIXES + + """ + DELETE { + <./> schema:mainEntity ?m . + } + INSERT { + # add an IRI that is NOT typed as schema:Dataset (e.g. a schema:SoftwareSourceCode) + <./> schema:mainEntity <./not-a-dataset> . + <./not-a-dataset> a schema:SoftwareSourceCode . + } + WHERE { + <./> schema:mainEntity ?m . + } + """ + ) + + do_entity_test( + rocrate_path=ValidROC().five_safes_crate_request, + requirement_severity=Severity.REQUIRED, + expected_validation_result=False, + expected_triggered_requirements=["RootDataEntity"], + expected_triggered_issues=[ + "The mainEntity pointed to by the RootDataEntity MUST be of type Dataset" + ], + profile_identifier="five-safes-crate", + rocrate_entity_mod_sparql=sparql, + ) + + +def test_5src_main_entity_conformsTo_absent(): + """ + Test a Five Safes Crate where the mainEntity does not have the purl:conformsTo property + (we remove from the graph the triplet 'mainEntity conformsTo ?o'). + """ + sparql = ( + SPARQL_PREFIXES + + """ + PREFIX purl: + DELETE { + ?dataset purl:conformsTo ?o . + } + WHERE { + <./> schema:mainEntity ?dataset . + ?dataset purl:conformsTo ?o . + } + """ + ) + + do_entity_test( + rocrate_path=ValidROC().five_safes_crate_request, + requirement_severity=Severity.REQUIRED, + expected_validation_result=False, + expected_triggered_requirements=["mainEntity"], + expected_triggered_issues=[ + "mainEntity MUST have one and only one `conformsTo` property." + ], + profile_identifier="five-safes-crate", + rocrate_entity_mod_sparql=sparql, + ) + + +def test_5src_main_entity_has_two_conformsto(): + """ + Test a Five Safes Crate where the mainEntity -> purl:conformsTo has two + objects. + (we add second object to mainEntity -> purl:conformsTo to violate maxCount 1). + """ + sparql = ( + SPARQL_PREFIXES + + """ + PREFIX purl: + INSERT { + ?main purl:conformsTo . + } + WHERE { + <./> schema:mainEntity ?main . + ?main purl:conformsTo ?o . + } + """ + ) + + do_entity_test( + rocrate_path=ValidROC().five_safes_crate_request, + requirement_severity=Severity.REQUIRED, + expected_validation_result=False, + expected_triggered_requirements=["mainEntity"], + expected_triggered_issues=[ + "mainEntity MUST have one and only one `conformsTo` property." + ], + profile_identifier="five-safes-crate", + rocrate_entity_mod_sparql=sparql, + ) + + +def test_5src_main_entity_conformsTo_invalid(): + """ + Test a Five Safes Crate where the mainEntity's conformsTo IRI does NOT start with + "https://w3id.org/workflowhub/workflow-ro-crate" (violates the SHACL SPARQL constraint). + """ + sparql = ( + SPARQL_PREFIXES + + """ + PREFIX purl: + DELETE { + ?dataset purl:conformsTo ?iri . + } + INSERT { + ?dataset purl:conformsTo . + } + WHERE { + <./> schema:mainEntity ?dataset . + ?dataset purl:conformsTo ?iri . + FILTER(STRSTARTS(STR(?iri), "https://w3id.org/workflowhub/workflow-ro-crate")) + } + """ + ) + + do_entity_test( + rocrate_path=ValidROC().five_safes_crate_request, + requirement_severity=Severity.REQUIRED, + expected_validation_result=False, + expected_triggered_requirements=["mainEntity"], + expected_triggered_issues=[ + "conformsTo IRI must start with https://w3id.org/workflowhub/workflow-ro-crate" + ], + profile_identifier="five-safes-crate", + rocrate_entity_mod_sparql=sparql, + ) + + +# ----- SHOULD fails tests + + +def test_5src_main_entity_missing_distribution_warning(): + """ + Test a Five Safes Crate where a mainEntity has an HTTP(S) IRI but no distribution with an HTTP(S) URL. + This should trigger the SHACL warning about missing or non-HTTP(S) distributions. + """ + sparql = ( + SPARQL_PREFIXES + + """ + DELETE { + ?dataset schema:distribution ?dist . + } + WHERE { + <./> schema:mainEntity ?dataset . + ?dataset schema:distribution ?dist . + FILTER (STRSTARTS(STR(?dataset), "http://") || STRSTARTS(STR(?dataset), "https://")) . + FILTER (STRSTARTS(STR(?dist), "http://") || STRSTARTS(STR(?dist), "https://")) . + } + """ + ) + + do_entity_test( + rocrate_path=ValidROC().five_safes_crate_request, + requirement_severity=Severity.RECOMMENDED, + expected_validation_result=False, + expected_triggered_requirements=["mainEntity"], + expected_triggered_issues=[ + "If mainEntity has an HTTP(S) @id SHOULD have at least one distribution with an HTTP(S) URL." + ], + profile_identifier="five-safes-crate", + rocrate_entity_mod_sparql=sparql, + ) diff --git a/tests/integration/profiles/five-safes-crate/test_5src_7_requesting_workflow_run.py b/tests/integration/profiles/five-safes-crate/test_5src_7_requesting_workflow_run.py new file mode 100644 index 000000000..b58147651 --- /dev/null +++ b/tests/integration/profiles/five-safes-crate/test_5src_7_requesting_workflow_run.py @@ -0,0 +1,200 @@ +# Copyright (c) 2024-2025 CRS4 +# Copyright (c) 2025-2026 eScience Lab, The University of Manchester +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging + +from rocrate_validator.models import Severity +from tests.ro_crates import ValidROC +from tests.shared import do_entity_test, SPARQL_PREFIXES + +# set up logging +logger = logging.getLogger(__name__) + + +# ----- MUST fails tests + + +def test_rocrate_does_not_have_createaction(): + """ + Test a Five Safes Crate where no `CreateAction` entity exists. + (We remove the entire CreateAction entity from the RO-Crate) + """ + sparql = ( + SPARQL_PREFIXES + + """ + DELETE { + ?action ?p ?o . + ?s ?p2 ?action . + } + WHERE { + ?action a schema:CreateAction . + ?action ?p ?o . + OPTIONAL { ?s ?p2 ?action . } + } + """ + ) + + do_entity_test( + rocrate_path=ValidROC().five_safes_crate_request, + requirement_severity=Severity.REQUIRED, + expected_validation_result=False, + expected_triggered_requirements=["RootDataEntity"], + expected_triggered_issues=[ + "`RootDataEntity` MUST reference at least one `CreateAction`", + " (corresponding to the workflow run) through `mentions`", + ( + "The CreateAction entity corresponding to the workflow MUST " + "reference, as an instrument, the entity that is referenced " + "as mainEntity by the RO-Crate" + ), + ], + profile_identifier="five-safes-crate", + rocrate_entity_mod_sparql=sparql, + ) + + +def test_rootdataentity_does_not_have_mentions_property(): + """ + Test a Five Safes Crate where RootDataEntity does not have the property mentions. + (We remove the property mentions from the RootDataEntity entity) + """ + sparql = ( + SPARQL_PREFIXES + + """ + DELETE { + <./> schema:mentions ?o . + } + WHERE { + <./> schema:mentions ?o . + } + """ + ) + + do_entity_test( + rocrate_path=ValidROC().five_safes_crate_request, + requirement_severity=Severity.REQUIRED, + expected_validation_result=False, + expected_triggered_requirements=["RootDataEntity"], + expected_triggered_issues=[ + "`RootDataEntity` MUST reference at least one `CreateAction`", + " (corresponding to the workflow run) through `mentions`", + ], + profile_identifier="five-safes-crate", + rocrate_entity_mod_sparql=sparql, + ) + + +def test_rootdataentity_does_not_mention_create_action(): + """ + Test a Five Safes Crate where `RootDataEntity` does not `mention` a `CreateAction` entity. + (We replace the object of RooDataEntity --> mentions with a string literal). + """ + sparql = ( + SPARQL_PREFIXES + + """ + DELETE { + <./> schema:mentions ?o . + } + INSERT { + <./> schema:mentions "This is not a CreateAction entity" . + } + WHERE { + <./> schema:mentions ?o . + } + """ + ) + + do_entity_test( + rocrate_path=ValidROC().five_safes_crate_request, + requirement_severity=Severity.REQUIRED, + expected_validation_result=False, + expected_triggered_requirements=["RootDataEntity"], + expected_triggered_issues=[ + "`RootDataEntity` MUST reference at least one `CreateAction`", + " (corresponding to the workflow run) through `mentions`", + ], + profile_identifier="five-safes-crate", + rocrate_entity_mod_sparql=sparql, + ) + + +def test_createaction_object_does_not_reference_existing_entities(): + """ + Test a Five Safes Crate where `CreateAction` --> `object` does not + reference an existing entity in the RO-Crate. + (We replace the objects of `CreateAction` --> `object` with a literal.`) + """ + sparql = ( + SPARQL_PREFIXES + + """ + DELETE { + ?action schema:object ?o . + } + INSERT { + ?action schema:object "This is not an entity in the RO-Crate" . + } + WHERE { + ?action schema:object ?o ; + a schema:CreateAction . + } + """ + ) + + do_entity_test( + rocrate_path=ValidROC().five_safes_crate_request, + requirement_severity=Severity.REQUIRED, + expected_validation_result=False, + expected_triggered_requirements=["WorkflowRunAction"], + expected_triggered_issues=[ + "In the `CreateAction` entity corresponding to the workflow run," + + " each `object` MUST reference an existing entity." + ], + profile_identifier="five-safes-crate", + rocrate_entity_mod_sparql=sparql, + ) + + +# ----- SHOULD fails tests + + +def test_createaction_does_not_have_object_property(): + """ + Test a Five Safes Crate where `CreateAction` does not have the property `object`. + (We remove the property `object` from `CreateAction`) + """ + sparql = ( + SPARQL_PREFIXES + + """ + DELETE { + ?action schema:object ?o . + } + WHERE { + ?action schema:object ?o ; + a schema:CreateAction . + } + """ + ) + + do_entity_test( + rocrate_path=ValidROC().five_safes_crate_request, + requirement_severity=Severity.RECOMMENDED, + expected_validation_result=False, + expected_triggered_requirements=["WorkflowRunAction"], + expected_triggered_issues=[ + "`CreateAction` (corresponding to the workflow run) SHOULD have the property `object` with IRI values." + ], + profile_identifier="five-safes-crate", + rocrate_entity_mod_sparql=sparql, + ) diff --git a/tests/integration/profiles/five-safes-crate/test_5src_8_disclosure_phase.py b/tests/integration/profiles/five-safes-crate/test_5src_8_disclosure_phase.py new file mode 100644 index 000000000..b50710cc8 --- /dev/null +++ b/tests/integration/profiles/five-safes-crate/test_5src_8_disclosure_phase.py @@ -0,0 +1,373 @@ +# Copyright (c) 2024-2025 CRS4 +# Copyright (c) 2025-2026 eScience Lab, The University of Manchester +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging + +from rocrate_validator.models import Severity +from tests.ro_crates import ValidROC +from tests.shared import do_entity_test, SPARQL_PREFIXES + +# set up logging +logger = logging.getLogger(__name__) + + +# ----- MUST fails tests + + +def test_5src_disclosure_object_with_no_name(): + sparql = ( + SPARQL_PREFIXES + + """ + DELETE { + ?this schema:name ?name . + } + WHERE { + ?this schema:additionalType shp:DisclosureCheck ; + schema:name ?name . + <./> schema:mentions ?this . + } + """ + ) + + do_entity_test( + rocrate_path=ValidROC().five_safes_crate_result, + requirement_severity=Severity.REQUIRED, + expected_validation_result=False, + expected_triggered_requirements=["DisclosureCheck"], + expected_triggered_issues=[ + "`DisclosureCheck` MUST have a name string of at least 10 characters." + ], + profile_identifier="five-safes-crate", + rocrate_entity_mod_sparql=sparql, + ) + + +def test_5src_disclosure_object_with_name_not_string(): + sparql = ( + SPARQL_PREFIXES + + """ + DELETE { + ?this schema:name ?name . + } + INSERT { + ?this schema:name 123 . + } + WHERE { + ?this schema:additionalType shp:DisclosureCheck ; + schema:name ?name . + <./> schema:mentions ?this . + } + """ + ) + + do_entity_test( + rocrate_path=ValidROC().five_safes_crate_result, + requirement_severity=Severity.REQUIRED, + expected_validation_result=False, + expected_triggered_requirements=["DisclosureCheck"], + expected_triggered_issues=[ + "`DisclosureCheck` MUST have a name string of at least 10 characters." + ], + profile_identifier="five-safes-crate", + rocrate_entity_mod_sparql=sparql, + ) + + +def test_5src_disclosure_object_with_not_long_enough_name(): + sparql = ( + SPARQL_PREFIXES + + """ + DELETE { + ?this schema:name ?name . + } + INSERT { + ?this schema:name "Short" . + } + WHERE { + ?this schema:additionalType shp:DisclosureCheck ; + schema:name ?name . + <./> schema:mentions ?this . + } + """ + ) + + do_entity_test( + rocrate_path=ValidROC().five_safes_crate_result, + requirement_severity=Severity.REQUIRED, + expected_validation_result=False, + expected_triggered_requirements=["DisclosureCheck"], + expected_triggered_issues=[ + "`DisclosureCheck` MUST have a name string of at least 10 characters." + ], + profile_identifier="five-safes-crate", + rocrate_entity_mod_sparql=sparql, + ) + + +def test_5src_disclosure_object_not_an_assess_action(): + sparql = ( + SPARQL_PREFIXES + + """ + DELETE { + ?this rdf:type schema:AssessAction . + } + INSERT { + ?this rdf:type "Not an AssessAction type" . + } + WHERE { + ?this rdf:type schema:AssessAction ; + schema:additionalType shp:DisclosureCheck . + } + """ + ) + + do_entity_test( + rocrate_path=ValidROC().five_safes_crate_result, + requirement_severity=Severity.REQUIRED, + expected_validation_result=False, + expected_triggered_requirements=["DisclosureCheck"], + expected_triggered_issues=["`DisclosureCheck` MUST be a `AssessAction`."], + profile_identifier="five-safes-crate", + rocrate_entity_mod_sparql=sparql, + ) + + +def test_5src_disclosure_object_with_no_proper_action_status(): + sparql = ( + SPARQL_PREFIXES + + """ + DELETE { + ?s schema:actionStatus ?o . + } + INSERT { + ?s schema:actionStatus "This is not a proper actionStatus" . + } + WHERE { + ?s schema:actionStatus ?o ; + schema:additionalType shp:DisclosureCheck . + } + """ + ) + + do_entity_test( + rocrate_path=ValidROC().five_safes_crate_result, + requirement_severity=Severity.REQUIRED, + expected_validation_result=False, + expected_triggered_requirements=["DisclosureCheck"], + expected_triggered_issues=[ + ( + "`DisclosureCheck` MUST have an actionStatus with an allowed value " + "(see https://schema.org/ActionStatusType)." + ) + ], + profile_identifier="five-safes-crate", + rocrate_entity_mod_sparql=sparql, + ) + + +def test_5src_disclosure_object_has_no_properly_formatted_start_time(): + sparql = ( + SPARQL_PREFIXES + + """ + DELETE { + ?s schema:startTime ?time . + } + INSERT { + ?s schema:startTime "1st Dec '25 @ 10:00:00" . + } + WHERE { + ?s schema:additionalType shp:DisclosureCheck ; + schema:startTime ?time . + } + """ + ) + + do_entity_test( + rocrate_path=ValidROC().five_safes_crate_result, + requirement_severity=Severity.REQUIRED, + expected_validation_result=False, + expected_triggered_requirements=["Timestamp Format"], + expected_triggered_issues=[ + ( + "All `startTime` and `endTime` values MUST follow the RFC 3339 standard " + "(YYYY-MM-DD'T'hh:mm:ss[.fraction](Z | ±hh:mm))." + ) + ], + profile_identifier="five-safes-crate", + rocrate_entity_mod_sparql=sparql, + ) + + +def test_5src_disclosure_object_has_no_properly_formatted_end_time(): + sparql = ( + SPARQL_PREFIXES + + """ + DELETE { + ?s schema:endTime ?time . + } + INSERT { + ?s schema:endTime "1st Dec '25 @ 10:00:00" . + } + WHERE { + ?s schema:additionalType shp:DisclosureCheck ; + schema:endTime ?time . + } + """ + ) + + do_entity_test( + rocrate_path=ValidROC().five_safes_crate_result, + requirement_severity=Severity.REQUIRED, + expected_validation_result=False, + expected_triggered_requirements=["Timestamp Format"], + expected_triggered_issues=[ + ( + "All `startTime` and `endTime` values MUST follow the RFC 3339 standard " + "(YYYY-MM-DD'T'hh:mm:ss[.fraction](Z | ±hh:mm))." + ) + ], + profile_identifier="five-safes-crate", + rocrate_entity_mod_sparql=sparql, + ) + + +# ----- SHOULD fails tests + + +def test_5src_disclosure_object_not_mentioned_by_root_data_entity(): + sparql = ( + SPARQL_PREFIXES + + """ + DELETE { + <./> schema:mentions ?o . + } + WHERE { + ?o schema:additionalType shp:DisclosureCheck . + } + """ + ) + + do_entity_test( + rocrate_path=ValidROC().five_safes_crate_result, + requirement_severity=Severity.RECOMMENDED, + expected_validation_result=False, + expected_triggered_requirements=["RootDataEntity"], + expected_triggered_issues=[ + "`RootDataEntity` SHOULD mention a disclosure object." + ], + profile_identifier="five-safes-crate", + rocrate_entity_mod_sparql=sparql, + ) + + +def test_5src_disclosure_object_with_no_action_status(): + sparql = ( + SPARQL_PREFIXES + + """ + DELETE { + ?s schema:actionStatus ?o . + } + WHERE { + ?s schema:additionalType shp:DisclosureCheck ; + schema:actionStatus ?o . + } + """ + ) + + do_entity_test( + rocrate_path=ValidROC().five_safes_crate_result, + requirement_severity=Severity.RECOMMENDED, + expected_validation_result=False, + expected_triggered_requirements=["DisclosureCheck"], + expected_triggered_issues=[ + "The `DisclosureCheck` SHOULD have `actionStatus` property." + ], + profile_identifier="five-safes-crate", + rocrate_entity_mod_sparql=sparql, + ) + + +def test_5src_disclosure_object_has_no_end_time_if_ended(): + sparql = ( + SPARQL_PREFIXES + + """ + DELETE { + ?s schema:endTime ?o . + } + WHERE { + ?s schema:additionalType shp:DisclosureCheck ; + schema:endTime ?o ; + schema:actionStatus ?status . + FILTER(?status IN ( + "http://schema.org/CompletedActionStatus", + "http://schema.org/FailedActionStatus" + )) + } + """ + ) + + do_entity_test( + rocrate_path=ValidROC().five_safes_crate_result, + requirement_severity=Severity.RECOMMENDED, + expected_validation_result=False, + expected_triggered_requirements=["DisclosureCheck"], + expected_triggered_issues=[ + ( + "`DisclosureCheck` SHOULD have the `endTime` property if `actionStatus` " + "is either CompletedActionStatus or FailedActionStatus." + ) + ], + profile_identifier="five-safes-crate", + rocrate_entity_mod_sparql=sparql, + ) + + +# ----- MAY fails tests + + +def test_5src_disclosure_object_has_no_start_time_if_begun(): + sparql = ( + SPARQL_PREFIXES + + """ + DELETE { + ?s schema:startTime ?o . + } + WHERE { + ?s schema:additionalType shp:DisclosureCheck ; + schema:startTime ?o ; + schema:actionStatus ?status . + FILTER(?status IN ( + "http://schema.org/CompletedActionStatus", + "http://schema.org/FailedActionStatus", + "http://schema.org/ActiveActionStatus" + )) + } + """ + ) + + do_entity_test( + rocrate_path=ValidROC().five_safes_crate_result, + requirement_severity=Severity.OPTIONAL, + expected_validation_result=False, + expected_triggered_requirements=["DisclosureCheck"], + expected_triggered_issues=[ + ( + "`DisclosureCheck` MAY have the `startTime` property if `actionStatus` " + "is either ActiveActionStatus, CompletedActionStatus or FailedActionStatus." + ) + ], + profile_identifier="five-safes-crate", + rocrate_entity_mod_sparql=sparql, + ) diff --git a/tests/integration/profiles/five-safes-crate/test_5src_9_inputs.py b/tests/integration/profiles/five-safes-crate/test_5src_9_inputs.py new file mode 100644 index 000000000..e6517098d --- /dev/null +++ b/tests/integration/profiles/five-safes-crate/test_5src_9_inputs.py @@ -0,0 +1,64 @@ +# Copyright (c) 2024-2025 CRS4 +# Copyright (c) 2025-2026 eScience Lab, The University of Manchester +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging + +from rocrate_validator.models import Severity +from tests.ro_crates import ValidROC +from tests.shared import do_entity_test, SPARQL_PREFIXES + +# set up logging +logger = logging.getLogger(__name__) + + +# ----- SHOULD fails tests + + +def test_input_does_not_reference_formalparameter(): + """ + Test a Five Safes Crate where an input entity does not reference a + `bioschemas:FormalParameter using `schema:exampleOfWork`. + (We replace the ?object of input --> exampleOfWork with a literal) + """ + sparql = ( + SPARQL_PREFIXES + + """ + PREFIX bioschemas: + DELETE { + ?input schema:exampleOfWork ?formalParameter . + } + INSERT { + ?input schema:exampleOfWork "not-a-formal-parameter" . + } + WHERE { + ?input schema:exampleOfWork ?formalParameter . + ?formalParameter a bioschemas:FormalParameter . + ?action a schema:CreateAction ; + schema:object ?input . + } + """ + ) + + do_entity_test( + rocrate_path=ValidROC().five_safes_crate_request, + requirement_severity=Severity.RECOMMENDED, + expected_validation_result=False, + expected_triggered_requirements=["Input"], + expected_triggered_issues=[ + "Input SHOULD reference a FormalParameter using exampleOfWork" + ], + profile_identifier="five-safes-crate", + rocrate_entity_mod_sparql=sparql, + ) diff --git a/tests/integration/profiles/five-safes-crate/test_valid_5src.py b/tests/integration/profiles/five-safes-crate/test_valid_5src.py new file mode 100644 index 000000000..04bb321b1 --- /dev/null +++ b/tests/integration/profiles/five-safes-crate/test_valid_5src.py @@ -0,0 +1,119 @@ +# Copyright (c) 2024-2025 CRS4 +# Copyright (c) 2025-2026 eScience Lab, The University of Manchester +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging +import pytest + +from rocrate_validator import services +from rocrate_validator.models import Severity +from tests.ro_crates import ValidROC +from tests.shared import do_entity_test + +logger = logging.getLogger(__name__) +logger.setLevel(logging.DEBUG) + +# Dynamically fetch the SKIP_WEB_RESOURCE_AVAILABILITY_IDENTIFIER +# required as disable_inherited_profiles_reporting does not disable Python checks from +# inherited profiles (https://github.com/crs4/rocrate-validator/issues/135) +rocrate_profile = services.get_profile("ro-crate-1.2") +if not rocrate_profile: + raise RuntimeError("Unable to load the RO-Crate 1.2 profile") +check_local_data_entity_existence = rocrate_profile.get_requirement_check( + "Data Entity: REQUIRED resource availability" +) +assert check_local_data_entity_existence, "Unable to find the requirement 'Data Entity: REQUIRED resource availability'" +SKIP_LOCAL_DATA_ENTITY_EXISTENCE_CHECK_IDENTIFIER = check_local_data_entity_existence.identifier +check_local_data_entity_existence = rocrate_profile.get_requirement_check( + "Web-based Data Entity: REQUIRED resource availability" +) +assert ( + check_local_data_entity_existence +), "Unable to find the requirement 'Web-based Data Entity: REQUIRED resource availability'" +SKIP_WEB_RESOURCE_AVAILABILITY_IDENTIFIER = check_local_data_entity_existence.identifier + + +def test_valid_five_safes_crate_request_required(): + """Test a valid Five Safes Crate representing a request.""" + do_entity_test( + ValidROC().five_safes_crate_request, + Severity.REQUIRED, + True, + profile_identifier="five-safes-crate", + skip_checks=[ + SKIP_LOCAL_DATA_ENTITY_EXISTENCE_CHECK_IDENTIFIER, + ], + ) + + +@pytest.mark.xfail( + reason=""" + Checks that ensure certain Five Safes actions are present currently fail for this crate, + as this crate represents an early stage of a process before those actions have happened. + """ +) +def test_valid_five_safes_crate_request_recommended(): + """Test a valid Five Safes Crate representing a request.""" + do_entity_test( + ValidROC().five_safes_crate_request, + Severity.RECOMMENDED, + True, + profile_identifier="five-safes-crate", + skip_checks=[ + SKIP_LOCAL_DATA_ENTITY_EXISTENCE_CHECK_IDENTIFIER, + SKIP_WEB_RESOURCE_AVAILABILITY_IDENTIFIER, + ], + disable_inherited_profiles_issue_reporting=True, + ) + + +def test_valid_five_safes_crate_result_required(): + """Test a valid Five Safes Crate representing a result.""" + do_entity_test( + ValidROC().five_safes_crate_result, + Severity.REQUIRED, + True, + profile_identifier="five-safes-crate", + skip_checks=[ + SKIP_LOCAL_DATA_ENTITY_EXISTENCE_CHECK_IDENTIFIER, + ], + ) + + +def test_valid_five_safes_crate_result_recommended(): + """Test a valid Five Safes Crate representing a result.""" + do_entity_test( + ValidROC().five_safes_crate_result, + Severity.RECOMMENDED, + True, + profile_identifier="five-safes-crate", + skip_checks=[ + SKIP_LOCAL_DATA_ENTITY_EXISTENCE_CHECK_IDENTIFIER, + SKIP_WEB_RESOURCE_AVAILABILITY_IDENTIFIER, + ], + disable_inherited_profiles_issue_reporting=True, + ) + + +def test_valid_five_safes_crate_multiple_context(): + """Test a valid Five Safes Crate representing a result.""" + do_entity_test( + ValidROC().five_safes_crate_multiple_context, + Severity.REQUIRED, + True, + profile_identifier="five-safes-crate", + skip_checks=[ + SKIP_LOCAL_DATA_ENTITY_EXISTENCE_CHECK_IDENTIFIER, + ], + ) diff --git a/tests/ro_crates.py b/tests/ro_crates.py index facb1457d..4a450c9d0 100644 --- a/tests/ro_crates.py +++ b/tests/ro_crates.py @@ -121,6 +121,18 @@ def isa_ro_crate(self) -> Path: def multi_profile_crate(self) -> Path: return VALID_CRATES_DATA_PATH / "multi-profile-crate" + @property + def five_safes_crate_request(self) -> Path: + return VALID_CRATES_DATA_PATH / "five-safes-crate-request" + + @property + def five_safes_crate_result(self) -> Path: + return VALID_CRATES_DATA_PATH / "five-safes-crate-result" + + @property + def five_safes_crate_multiple_context(self) -> Path: + return VALID_CRATES_DATA_PATH / "five-safes-crate-multiple-context" + class InvalidFileDescriptor: base_path = INVALID_CRATES_DATA_PATH / "0_file_descriptor_format" @@ -991,6 +1003,18 @@ def propertyvalue_no_unitcode(self) -> Path: return self.base_path / "propertyvalue_no_unitcode" +class Invalid5sROC: + base_path = INVALID_CRATES_DATA_PATH / "five_safes_crate" + + @property + def context_multiple_wrong_version(self) -> Path: + return self.base_path / "context_multiple_wrong_version" + + @property + def context_single_wrong_version(self) -> Path: + return self.base_path / "context_single_wrong_version" + + class InvalidMultiProfileROC: @property def invalid_multi_profile_crate(self) -> Path: diff --git a/tests/shared.py b/tests/shared.py index 9fbe3bce2..30a98ec65 100644 --- a/tests/shared.py +++ b/tests/shared.py @@ -35,7 +35,13 @@ T = TypeVar("T") -SPARQL_PREFIXES = "PREFIX schema: " +SPARQL_PREFIXES = """ +PREFIX schema: +PREFIX shp: +PREFIX rdf: +PREFIX rocrate: +PREFIX dct: +""" def first(c: Collection[T]) -> T: From 9b12bc79fe5692d8cb1892b5682cb44d27f75c48 Mon Sep 17 00:00:00 2001 From: EttoreM Date: Thu, 2 Jul 2026 15:40:49 +0100 Subject: [PATCH 351/352] Amended formatting --- .../five-safes-crate/test_5src_10_outputs.py | 6 ++-- .../test_5src_11_workflow_execution.py | 6 ++-- .../test_5src_13_validation_phase.py | 18 +++------- .../test_5src_14_workflow_retrieval_phase.py | 23 ++++-------- .../test_5src_15_metadata_file.py | 8 ++--- .../test_5src_16_publishing_phase.py | 9 ++--- .../test_5src_1_requesting_agent.py | 10 ++---- .../test_5src_1_responsible_project.py | 18 +++------- .../test_5src_1_root_data_entity_metadata.py | 6 ++-- .../test_5src_2_requesting_agent.py | 6 ++-- .../test_5src_4_signoff_phase.py | 36 ++++++------------- .../test_5src_5_profile_conformance.py | 22 ++++-------- .../test_5src_6_workflow_reference.py | 26 ++++---------- .../test_5src_7_requesting_workflow_run.py | 4 +-- .../test_5src_8_disclosure_phase.py | 22 ++++-------- .../five-safes-crate/test_5src_9_inputs.py | 6 ++-- .../five-safes-crate/test_valid_5src.py | 11 +++--- 17 files changed, 71 insertions(+), 166 deletions(-) diff --git a/tests/integration/profiles/five-safes-crate/test_5src_10_outputs.py b/tests/integration/profiles/five-safes-crate/test_5src_10_outputs.py index 1cbdbe6d0..15dcc2449 100644 --- a/tests/integration/profiles/five-safes-crate/test_5src_10_outputs.py +++ b/tests/integration/profiles/five-safes-crate/test_5src_10_outputs.py @@ -17,7 +17,7 @@ from rocrate_validator.models import Severity from tests.ro_crates import ValidROC -from tests.shared import do_entity_test, SPARQL_PREFIXES +from tests.shared import SPARQL_PREFIXES, do_entity_test # set up logging logger = logging.getLogger(__name__) @@ -87,9 +87,7 @@ def test_result_output_does_not_have_allowed_type(): requirement_severity=Severity.RECOMMENDED, expected_validation_result=False, expected_triggered_requirements=["Output"], - expected_triggered_issues=[ - "Result SHOULD have a `@type` among an allowed set of values." - ], + expected_triggered_issues=["Result SHOULD have a `@type` among an allowed set of values."], profile_identifier="five-safes-crate", rocrate_entity_mod_sparql=sparql, ) diff --git a/tests/integration/profiles/five-safes-crate/test_5src_11_workflow_execution.py b/tests/integration/profiles/five-safes-crate/test_5src_11_workflow_execution.py index a271978a8..9f54354d9 100644 --- a/tests/integration/profiles/five-safes-crate/test_5src_11_workflow_execution.py +++ b/tests/integration/profiles/five-safes-crate/test_5src_11_workflow_execution.py @@ -17,7 +17,7 @@ from rocrate_validator.models import Severity from tests.ro_crates import ValidROC -from tests.shared import do_entity_test, SPARQL_PREFIXES +from tests.shared import SPARQL_PREFIXES, do_entity_test # set up logging logger = logging.getLogger(__name__) @@ -267,9 +267,7 @@ def test_5src_workflow_object_has_no_end_time_if_ended(): requirement_severity=Severity.RECOMMENDED, expected_validation_result=False, expected_triggered_requirements=["WorkflowExecution"], - expected_triggered_issues=[ - "The workflow execution object SHOULD have an endTime property if it has ended." - ], + expected_triggered_issues=["The workflow execution object SHOULD have an endTime property if it has ended."], profile_identifier="five-safes-crate", rocrate_entity_mod_sparql=sparql, ) diff --git a/tests/integration/profiles/five-safes-crate/test_5src_13_validation_phase.py b/tests/integration/profiles/five-safes-crate/test_5src_13_validation_phase.py index be6f44906..c19e54663 100644 --- a/tests/integration/profiles/five-safes-crate/test_5src_13_validation_phase.py +++ b/tests/integration/profiles/five-safes-crate/test_5src_13_validation_phase.py @@ -17,7 +17,7 @@ from rocrate_validator.models import Severity from tests.ro_crates import ValidROC -from tests.shared import do_entity_test, SPARQL_PREFIXES +from tests.shared import SPARQL_PREFIXES, do_entity_test # set up logging logger = logging.getLogger(__name__) @@ -75,9 +75,7 @@ def test_5src_validation_check_name_not_a_string(): requirement_severity=Severity.REQUIRED, expected_validation_result=False, expected_triggered_requirements=["ValidationCheck"], - expected_triggered_issues=[ - "ValidationCheck MUST have a human readable name string." - ], + expected_triggered_issues=["ValidationCheck MUST have a human readable name string."], profile_identifier="five-safes-crate", rocrate_entity_mod_sparql=sparql, ) @@ -200,9 +198,7 @@ def test_5src_root_data_entity_does_not_mention_validation_check_entity(): requirement_severity=Severity.RECOMMENDED, expected_validation_result=False, expected_triggered_requirements=["RootDataEntity"], - expected_triggered_issues=[ - "RootDataEntity SHOULD mention a ValidationCheck object." - ], + expected_triggered_issues=["RootDataEntity SHOULD mention a ValidationCheck object."], profile_identifier="five-safes-crate", rocrate_entity_mod_sparql=sparql, ) @@ -229,9 +225,7 @@ def test_5src_validation_check_object_does_not_point_to_root_data_entity(): requirement_severity=Severity.RECOMMENDED, expected_validation_result=False, expected_triggered_requirements=["ValidationCheck"], - expected_triggered_issues=[ - "`ValidationCheck` --> `object` SHOULD point to the root of the RO-Crate" - ], + expected_triggered_issues=["`ValidationCheck` --> `object` SHOULD point to the root of the RO-Crate"], profile_identifier="five-safes-crate", rocrate_entity_mod_sparql=sparql, ) @@ -283,9 +277,7 @@ def test_5src_Validation_check_does_not_have_action_status_property(): requirement_severity=Severity.RECOMMENDED, expected_validation_result=False, expected_triggered_requirements=["ValidationCheck"], - expected_triggered_issues=[ - "ValidationCheck SHOULD have actionStatus property." - ], + expected_triggered_issues=["ValidationCheck SHOULD have actionStatus property."], profile_identifier="five-safes-crate", rocrate_entity_mod_sparql=sparql, ) diff --git a/tests/integration/profiles/five-safes-crate/test_5src_14_workflow_retrieval_phase.py b/tests/integration/profiles/five-safes-crate/test_5src_14_workflow_retrieval_phase.py index 52228408e..c7d92d0cd 100644 --- a/tests/integration/profiles/five-safes-crate/test_5src_14_workflow_retrieval_phase.py +++ b/tests/integration/profiles/five-safes-crate/test_5src_14_workflow_retrieval_phase.py @@ -17,7 +17,7 @@ from rocrate_validator.models import Severity from tests.ro_crates import ValidROC -from tests.shared import do_entity_test, SPARQL_PREFIXES +from tests.shared import SPARQL_PREFIXES, do_entity_test # set up logging logger = logging.getLogger(__name__) @@ -45,9 +45,7 @@ def test_5src_download_action_does_not_have_name(): requirement_severity=Severity.REQUIRED, expected_validation_result=False, expected_triggered_requirements=["DownloadAction"], - expected_triggered_issues=[ - "DownloadAction MUST have a human readable name string." - ], + expected_triggered_issues=["DownloadAction MUST have a human readable name string."], profile_identifier="five-safes-crate", rocrate_entity_mod_sparql=sparql, ) @@ -74,9 +72,7 @@ def test_5src_download_action_name_not_a_string(): requirement_severity=Severity.REQUIRED, expected_validation_result=False, expected_triggered_requirements=["DownloadAction"], - expected_triggered_issues=[ - "DownloadAction MUST have a human readable name string." - ], + expected_triggered_issues=["DownloadAction MUST have a human readable name string."], profile_identifier="five-safes-crate", rocrate_entity_mod_sparql=sparql, ) @@ -206,10 +202,7 @@ def test_5src_downloaded_workflow_distribution_is_not_the_same_as_download_actio expected_validation_result=False, expected_triggered_requirements=["Downloaded Workflow"], expected_triggered_issues=[ - ( - "DownloadedWorkflow --> `distribution` MUST reference " - "the same entity as `DownloadAction` --> `object`." - ) + ("DownloadedWorkflow --> `distribution` MUST reference the same entity as `DownloadAction` --> `object`.") ], profile_identifier="five-safes-crate", rocrate_entity_mod_sparql=sparql, @@ -295,9 +288,7 @@ def test_5src_root_data_entity_does_not_mention_download_action_entity(): requirement_severity=Severity.RECOMMENDED, expected_validation_result=False, expected_triggered_requirements=["RootDataEntity"], - expected_triggered_issues=[ - "RootDataEntity SHOULD mention DownloadAction if this exists." - ], + expected_triggered_issues=["RootDataEntity SHOULD mention DownloadAction if this exists."], profile_identifier="five-safes-crate", rocrate_entity_mod_sparql=sparql, ) @@ -352,9 +343,7 @@ def test_5src_download_action_does_not_have_action_status_property(): requirement_severity=Severity.RECOMMENDED, expected_validation_result=False, expected_triggered_requirements=["DownloadAction"], - expected_triggered_issues=[ - "`DownloadAction` SHOULD have `actionStatus` property." - ], + expected_triggered_issues=["`DownloadAction` SHOULD have `actionStatus` property."], profile_identifier="five-safes-crate", rocrate_entity_mod_sparql=sparql, ) diff --git a/tests/integration/profiles/five-safes-crate/test_5src_15_metadata_file.py b/tests/integration/profiles/five-safes-crate/test_5src_15_metadata_file.py index e94a76fa9..a166ed4aa 100644 --- a/tests/integration/profiles/five-safes-crate/test_5src_15_metadata_file.py +++ b/tests/integration/profiles/five-safes-crate/test_5src_15_metadata_file.py @@ -16,8 +16,8 @@ import logging from rocrate_validator.models import Severity -from tests.ro_crates import ValidROC, Invalid5sROC -from tests.shared import do_entity_test, SPARQL_PREFIXES +from tests.ro_crates import Invalid5sROC, ValidROC +from tests.shared import SPARQL_PREFIXES, do_entity_test # set up logging logger = logging.getLogger(__name__) @@ -47,9 +47,7 @@ def test_5src_conforms_to_old_version(): rocrate_path=ValidROC().five_safes_crate_request, requirement_severity=Severity.REQUIRED, expected_validation_result=False, - expected_triggered_requirements=[ - "RO-Crate conforms to 1.2 or later minor version" - ], + expected_triggered_requirements=["RO-Crate conforms to 1.2 or later minor version"], expected_triggered_issues=[ "The RO-Crate metadata file descriptor MUST have a `conformsTo` property with " "RO-Crate specification version 1.2 or later minor version" diff --git a/tests/integration/profiles/five-safes-crate/test_5src_16_publishing_phase.py b/tests/integration/profiles/five-safes-crate/test_5src_16_publishing_phase.py index 8149d1b24..485aeb026 100644 --- a/tests/integration/profiles/five-safes-crate/test_5src_16_publishing_phase.py +++ b/tests/integration/profiles/five-safes-crate/test_5src_16_publishing_phase.py @@ -17,7 +17,7 @@ from rocrate_validator.models import Severity from tests.ro_crates import ValidROC -from tests.shared import do_entity_test, SPARQL_PREFIXES +from tests.shared import SPARQL_PREFIXES, do_entity_test # set up logging logger = logging.getLogger(__name__) @@ -44,12 +44,9 @@ def test_5src_assess_action_not_referenced_from_rde(): rocrate_path=ValidROC().five_safes_crate_result, requirement_severity=Severity.REQUIRED, expected_validation_result=False, - expected_triggered_requirements=[ - "All AssessActions are mentioned from Root Data Entity" - ], + expected_triggered_requirements=["All AssessActions are mentioned from Root Data Entity"], expected_triggered_issues=[ - "All AssessAction entities in the crate MUST be referenced from " - "the Root Dataset via `mentions`." + "All AssessAction entities in the crate MUST be referenced from the Root Dataset via `mentions`." ], profile_identifier="five-safes-crate", rocrate_entity_mod_sparql=sparql, diff --git a/tests/integration/profiles/five-safes-crate/test_5src_1_requesting_agent.py b/tests/integration/profiles/five-safes-crate/test_5src_1_requesting_agent.py index 1ab9b4b37..84c1372a7 100644 --- a/tests/integration/profiles/five-safes-crate/test_5src_1_requesting_agent.py +++ b/tests/integration/profiles/five-safes-crate/test_5src_1_requesting_agent.py @@ -17,7 +17,7 @@ from rocrate_validator.models import Severity from tests.ro_crates import ValidROC -from tests.shared import do_entity_test, SPARQL_PREFIXES +from tests.shared import SPARQL_PREFIXES, do_entity_test # set up logging logger = logging.getLogger(__name__) @@ -54,9 +54,7 @@ def test_5src_agent_memberOf_not_project(): requirement_severity=Severity.REQUIRED, expected_validation_result=False, expected_triggered_requirements=["Requesting Agent"], - expected_triggered_issues=[ - "The 'memberOf' property of an agent MUST be of type Project." - ], + expected_triggered_issues=["The 'memberOf' property of an agent MUST be of type Project."], profile_identifier="five-safes-crate", rocrate_entity_mod_sparql=sparql, ) @@ -131,9 +129,7 @@ def test_5src_agent_memberOf_missing_warning(): requirement_severity=Severity.RECOMMENDED, expected_validation_result=False, # or True if warnings are not treated as failures expected_triggered_requirements=["Requesting Agent"], - expected_triggered_issues=[ - "The Requesting Agent SHOULD have a `memberOf` property." - ], + expected_triggered_issues=["The Requesting Agent SHOULD have a `memberOf` property."], profile_identifier="five-safes-crate", rocrate_entity_mod_sparql=sparql, ) diff --git a/tests/integration/profiles/five-safes-crate/test_5src_1_responsible_project.py b/tests/integration/profiles/five-safes-crate/test_5src_1_responsible_project.py index 1f5c9664b..96bda9e82 100644 --- a/tests/integration/profiles/five-safes-crate/test_5src_1_responsible_project.py +++ b/tests/integration/profiles/five-safes-crate/test_5src_1_responsible_project.py @@ -17,7 +17,7 @@ from rocrate_validator.models import Severity from tests.ro_crates import ValidROC -from tests.shared import do_entity_test, SPARQL_PREFIXES +from tests.shared import SPARQL_PREFIXES, do_entity_test # set up logging logger = logging.getLogger(__name__) @@ -56,9 +56,7 @@ def test_5src_responsible_project_funding_not_grant(): requirement_severity=Severity.REQUIRED, expected_validation_result=False, expected_triggered_requirements=["Responsible Project"], - expected_triggered_issues=[ - "The property 'funding' of the Responsible Project MUST be of type Grant." - ], + expected_triggered_issues=["The property 'funding' of the Responsible Project MUST be of type Grant."], profile_identifier="five-safes-crate", rocrate_entity_mod_sparql=sparql, ) @@ -136,9 +134,7 @@ def test_5src_responsible_project_member_and_agent_affiliation_no_intersection() rocrate_path=ValidROC().five_safes_crate_request, requirement_severity=Severity.RECOMMENDED, expected_validation_result=False, - expected_triggered_requirements=[ - "Organizations (members of Responsible Project)" - ], + expected_triggered_requirements=["Organizations (members of Responsible Project)"], expected_triggered_issues=[ ( "At least one of the organisations that are members of the responsible project SHOULD " @@ -178,9 +174,7 @@ def test_5src_responsible_project_missing_funding_property(): requirement_severity=Severity.OPTIONAL, expected_validation_result=False, expected_triggered_requirements=["Responsible Project"], - expected_triggered_issues=[ - "The Responsible Project does not have the property `funding`." - ], + expected_triggered_issues=["The Responsible Project does not have the property `funding`."], profile_identifier="five-safes-crate", rocrate_entity_mod_sparql=sparql, ) @@ -211,9 +205,7 @@ def test_5src_responsible_project_missing_member_property(): requirement_severity=Severity.OPTIONAL, expected_validation_result=False, expected_triggered_requirements=["Responsible Project"], - expected_triggered_issues=[ - "The Responsible Project does not have the property `member`." - ], + expected_triggered_issues=["The Responsible Project does not have the property `member`."], profile_identifier="five-safes-crate", rocrate_entity_mod_sparql=sparql, ) diff --git a/tests/integration/profiles/five-safes-crate/test_5src_1_root_data_entity_metadata.py b/tests/integration/profiles/five-safes-crate/test_5src_1_root_data_entity_metadata.py index 23c51832d..19e3f21fe 100644 --- a/tests/integration/profiles/five-safes-crate/test_5src_1_root_data_entity_metadata.py +++ b/tests/integration/profiles/five-safes-crate/test_5src_1_root_data_entity_metadata.py @@ -17,7 +17,7 @@ from rocrate_validator.models import Severity from tests.ro_crates import ValidROC -from tests.shared import do_entity_test, SPARQL_PREFIXES +from tests.shared import SPARQL_PREFIXES, do_entity_test # set up logging logger = logging.getLogger(__name__) @@ -47,9 +47,7 @@ def test_5src_root_data_entity_without_source_organization(): requirement_severity=Severity.REQUIRED, expected_validation_result=False, expected_triggered_requirements=["RootDataEntity"], - expected_triggered_issues=[ - """The Root Data Entity MUST have a `sourceOrganization` property.""" - ], + expected_triggered_issues=["""The Root Data Entity MUST have a `sourceOrganization` property."""], profile_identifier="five-safes-crate", rocrate_entity_mod_sparql=sparql, ) diff --git a/tests/integration/profiles/five-safes-crate/test_5src_2_requesting_agent.py b/tests/integration/profiles/five-safes-crate/test_5src_2_requesting_agent.py index 41a4279d1..7f496cdac 100644 --- a/tests/integration/profiles/five-safes-crate/test_5src_2_requesting_agent.py +++ b/tests/integration/profiles/five-safes-crate/test_5src_2_requesting_agent.py @@ -17,7 +17,7 @@ from rocrate_validator.models import Severity from tests.ro_crates import ValidROC -from tests.shared import do_entity_test, SPARQL_PREFIXES +from tests.shared import SPARQL_PREFIXES, do_entity_test # set up logging logger = logging.getLogger(__name__) @@ -50,9 +50,7 @@ def test_createaction_does_not_have_agent(): requirement_severity=Severity.REQUIRED, expected_validation_result=False, expected_triggered_requirements=["CreateAction"], - expected_triggered_issues=[ - "CreateAction MUST have at least one agent that is a contextual entity." - ], + expected_triggered_issues=["CreateAction MUST have at least one agent that is a contextual entity."], profile_identifier="five-safes-crate", rocrate_entity_mod_sparql=sparql, ) diff --git a/tests/integration/profiles/five-safes-crate/test_5src_4_signoff_phase.py b/tests/integration/profiles/five-safes-crate/test_5src_4_signoff_phase.py index a98c850bd..79d08229f 100644 --- a/tests/integration/profiles/five-safes-crate/test_5src_4_signoff_phase.py +++ b/tests/integration/profiles/five-safes-crate/test_5src_4_signoff_phase.py @@ -17,7 +17,7 @@ from rocrate_validator.models import Severity from tests.ro_crates import ValidROC -from tests.shared import do_entity_test, SPARQL_PREFIXES +from tests.shared import SPARQL_PREFIXES, do_entity_test # set up logging logger = logging.getLogger(__name__) @@ -48,9 +48,7 @@ def test_5src_no_signoff_phase(): requirement_severity=Severity.RECOMMENDED, expected_validation_result=False, expected_triggered_requirements=["SignOffPhase"], - expected_triggered_issues=[ - "There SHOULD be a Sign-Off Phase in the Final RO-Crate" - ], + expected_triggered_issues=["There SHOULD be a Sign-Off Phase in the Final RO-Crate"], profile_identifier="five-safes-crate", rocrate_entity_mod_sparql=sparql, ) @@ -79,9 +77,7 @@ def test_5src_signoff_phase_no_name(): requirement_severity=Severity.REQUIRED, expected_validation_result=False, expected_triggered_requirements=["SignOff"], - expected_triggered_issues=[ - "Sign Off phase MUST have a human-readable name string." - ], + expected_triggered_issues=["Sign Off phase MUST have a human-readable name string."], profile_identifier="five-safes-crate", rocrate_entity_mod_sparql=sparql, ) @@ -147,7 +143,7 @@ def test_5src_signoff_phase_wrong_action_status(): expected_triggered_requirements=["SignOffStatus"], expected_triggered_issues=[ "The value of actionStatus MUST be one of the allowed values:" - + " PotentialActionStatus; ActiveActionStatus; CompletedActionStatus; FailedActionStatus." + " PotentialActionStatus; ActiveActionStatus; CompletedActionStatus; FailedActionStatus." ], profile_identifier="five-safes-crate", rocrate_entity_mod_sparql=sparql, @@ -175,9 +171,7 @@ def test_5src_signoff_phase_not_mentioned(): requirement_severity=Severity.RECOMMENDED, expected_validation_result=False, expected_triggered_requirements=["SignOffPhase"], - expected_triggered_issues=[ - "The Root Data Entity SHOULD mention a Sign-Off Phase Object" - ], + expected_triggered_issues=["The Root Data Entity SHOULD mention a Sign-Off Phase Object"], profile_identifier="five-safes-crate", rocrate_entity_mod_sparql=sparql, ) @@ -206,9 +200,7 @@ def test_5src_signoff_phase_no_endtime(): requirement_severity=Severity.RECOMMENDED, expected_validation_result=False, expected_triggered_requirements=["SignOffPhaseEndTime"], - expected_triggered_issues=[ - "Sign Off object SHOULD have endTime property if action completed or failed." - ], + expected_triggered_issues=["Sign Off object SHOULD have endTime property if action completed or failed."], profile_identifier="five-safes-crate", rocrate_entity_mod_sparql=sparql, ) @@ -402,9 +394,7 @@ def test_5src_signoff_phase_no_instrument(): requirement_severity=Severity.RECOMMENDED, expected_validation_result=False, expected_triggered_requirements=["SignOffPhaseProperties"], - expected_triggered_issues=[ - "The Sign-Off Phase SHOULD have an TRE policy (instrument) with type CreativeWork" - ], + expected_triggered_issues=["The Sign-Off Phase SHOULD have an TRE policy (instrument) with type CreativeWork"], profile_identifier="five-safes-crate", rocrate_entity_mod_sparql=sparql, ) @@ -436,9 +426,7 @@ def test_5src_signoff_phase_instrument_not_iri(): requirement_severity=Severity.RECOMMENDED, expected_validation_result=False, expected_triggered_requirements=["SignOffPhaseProperties"], - expected_triggered_issues=[ - "The Sign-Off Phase SHOULD have an TRE policy (instrument) with type CreativeWork" - ], + expected_triggered_issues=["The Sign-Off Phase SHOULD have an TRE policy (instrument) with type CreativeWork"], profile_identifier="five-safes-crate", rocrate_entity_mod_sparql=sparql, ) @@ -468,9 +456,7 @@ def test_5src_signoff_phase_instrument_no_type(): requirement_severity=Severity.RECOMMENDED, expected_validation_result=False, expected_triggered_requirements=["SignOffPhaseProperties"], - expected_triggered_issues=[ - "The Sign-Off Phase SHOULD have an TRE policy (instrument) with type CreativeWork" - ], + expected_triggered_issues=["The Sign-Off Phase SHOULD have an TRE policy (instrument) with type CreativeWork"], profile_identifier="five-safes-crate", rocrate_entity_mod_sparql=sparql, ) @@ -534,9 +520,7 @@ def test_5src_signoff_phase_object_notworkflow(): requirement_severity=Severity.RECOMMENDED, expected_validation_result=False, expected_triggered_requirements=["SignOffPhaseProperties"], - expected_triggered_issues=[ - "The Sign-Off Phase SHOULD list the workflow (mainEntity) as an object" - ], + expected_triggered_issues=["The Sign-Off Phase SHOULD list the workflow (mainEntity) as an object"], profile_identifier="five-safes-crate", rocrate_entity_mod_sparql=sparql, ) diff --git a/tests/integration/profiles/five-safes-crate/test_5src_5_profile_conformance.py b/tests/integration/profiles/five-safes-crate/test_5src_5_profile_conformance.py index 313e794e4..8012a37b4 100644 --- a/tests/integration/profiles/five-safes-crate/test_5src_5_profile_conformance.py +++ b/tests/integration/profiles/five-safes-crate/test_5src_5_profile_conformance.py @@ -17,7 +17,7 @@ from rocrate_validator.models import Severity from tests.ro_crates import ValidROC -from tests.shared import do_entity_test, SPARQL_PREFIXES +from tests.shared import SPARQL_PREFIXES, do_entity_test # set up logging logger = logging.getLogger(__name__) @@ -52,9 +52,7 @@ def test_5src_root_data_entity_missing_conformsto_property(): requirement_severity=Severity.RECOMMENDED, expected_validation_result=False, expected_triggered_requirements=["RootDataEntity"], - expected_triggered_issues=[ - "Root Dataset SHOULD include `conformsTo` https://w3id.org/5s-crate/0.4" - ], + expected_triggered_issues=["Root Dataset SHOULD include `conformsTo` https://w3id.org/5s-crate/0.4"], profile_identifier="five-safes-crate", rocrate_entity_mod_sparql=sparql, ) @@ -86,9 +84,7 @@ def test_5src_root_data_entity_conforms_to_wrong_profile(): requirement_severity=Severity.RECOMMENDED, expected_validation_result=False, expected_triggered_requirements=["RootDataEntity"], - expected_triggered_issues=[ - "Root Dataset SHOULD include `conformsTo` https://w3id.org/5s-crate/0.4" - ], + expected_triggered_issues=["Root Dataset SHOULD include `conformsTo` https://w3id.org/5s-crate/0.4"], profile_identifier="five-safes-crate", rocrate_entity_mod_sparql=sparql, ) @@ -118,9 +114,7 @@ def test_5src_root_data_entity_has_publisher_but_not_date_published(): requirement_severity=Severity.RECOMMENDED, expected_validation_result=False, expected_triggered_requirements=["datePublished present on published crates"], - expected_triggered_issues=[ - "A crate SHOULD have a publishedDate if and only if it has a publisher." - ], + expected_triggered_issues=["A crate SHOULD have a publishedDate if and only if it has a publisher."], profile_identifier="five-safes-crate", rocrate_entity_mod_sparql=sparql, ) @@ -150,9 +144,7 @@ def test_5src_root_data_entity_has_date_published_but_not_publisher(): requirement_severity=Severity.RECOMMENDED, expected_validation_result=False, expected_triggered_requirements=["datePublished present on published crates"], - expected_triggered_issues=[ - "A crate SHOULD have a publishedDate if and only if it has a publisher." - ], + expected_triggered_issues=["A crate SHOULD have a publishedDate if and only if it has a publisher."], profile_identifier="five-safes-crate", rocrate_entity_mod_sparql=sparql, ) @@ -182,9 +174,7 @@ def test_5src_root_data_entity_has_publisher_but_not_license(): requirement_severity=Severity.RECOMMENDED, expected_validation_result=False, expected_triggered_requirements=["License present on published crates"], - expected_triggered_issues=[ - "Profile Conformance: Published crates SHOULD include a license." - ], + expected_triggered_issues=["Profile Conformance: Published crates SHOULD include a license."], profile_identifier="five-safes-crate", rocrate_entity_mod_sparql=sparql, ) diff --git a/tests/integration/profiles/five-safes-crate/test_5src_6_workflow_reference.py b/tests/integration/profiles/five-safes-crate/test_5src_6_workflow_reference.py index 1271adffd..2aacf26e3 100644 --- a/tests/integration/profiles/five-safes-crate/test_5src_6_workflow_reference.py +++ b/tests/integration/profiles/five-safes-crate/test_5src_6_workflow_reference.py @@ -17,7 +17,7 @@ from rocrate_validator.models import Severity from tests.ro_crates import ValidROC -from tests.shared import do_entity_test, SPARQL_PREFIXES +from tests.shared import SPARQL_PREFIXES, do_entity_test # set up logging logger = logging.getLogger(__name__) @@ -49,9 +49,7 @@ def test_5src_root_data_entity_two_main_entities(): requirement_severity=Severity.REQUIRED, expected_validation_result=False, expected_triggered_requirements=["RootDataEntity"], - expected_triggered_issues=[ - "The RootDataEntity MUST have exactly one mainEntity property that is an IRI." - ], + expected_triggered_issues=["The RootDataEntity MUST have exactly one mainEntity property that is an IRI."], profile_identifier="five-safes-crate", rocrate_entity_mod_sparql=sparql, ) @@ -78,9 +76,7 @@ def test_5src_root_data_entity_no_main_entity(): requirement_severity=Severity.REQUIRED, expected_validation_result=False, expected_triggered_requirements=["RootDataEntity"], - expected_triggered_issues=[ - "The RootDataEntity MUST have exactly one mainEntity property that is an IRI." - ], + expected_triggered_issues=["The RootDataEntity MUST have exactly one mainEntity property that is an IRI."], profile_identifier="five-safes-crate", rocrate_entity_mod_sparql=sparql, ) @@ -113,9 +109,7 @@ def test_5src_root_data_entity_main_entity_not_dataset_iri(): requirement_severity=Severity.REQUIRED, expected_validation_result=False, expected_triggered_requirements=["RootDataEntity"], - expected_triggered_issues=[ - "The mainEntity pointed to by the RootDataEntity MUST be of type Dataset" - ], + expected_triggered_issues=["The mainEntity pointed to by the RootDataEntity MUST be of type Dataset"], profile_identifier="five-safes-crate", rocrate_entity_mod_sparql=sparql, ) @@ -145,9 +139,7 @@ def test_5src_main_entity_conformsTo_absent(): requirement_severity=Severity.REQUIRED, expected_validation_result=False, expected_triggered_requirements=["mainEntity"], - expected_triggered_issues=[ - "mainEntity MUST have one and only one `conformsTo` property." - ], + expected_triggered_issues=["mainEntity MUST have one and only one `conformsTo` property."], profile_identifier="five-safes-crate", rocrate_entity_mod_sparql=sparql, ) @@ -178,9 +170,7 @@ def test_5src_main_entity_has_two_conformsto(): requirement_severity=Severity.REQUIRED, expected_validation_result=False, expected_triggered_requirements=["mainEntity"], - expected_triggered_issues=[ - "mainEntity MUST have one and only one `conformsTo` property." - ], + expected_triggered_issues=["mainEntity MUST have one and only one `conformsTo` property."], profile_identifier="five-safes-crate", rocrate_entity_mod_sparql=sparql, ) @@ -214,9 +204,7 @@ def test_5src_main_entity_conformsTo_invalid(): requirement_severity=Severity.REQUIRED, expected_validation_result=False, expected_triggered_requirements=["mainEntity"], - expected_triggered_issues=[ - "conformsTo IRI must start with https://w3id.org/workflowhub/workflow-ro-crate" - ], + expected_triggered_issues=["conformsTo IRI must start with https://w3id.org/workflowhub/workflow-ro-crate"], profile_identifier="five-safes-crate", rocrate_entity_mod_sparql=sparql, ) diff --git a/tests/integration/profiles/five-safes-crate/test_5src_7_requesting_workflow_run.py b/tests/integration/profiles/five-safes-crate/test_5src_7_requesting_workflow_run.py index b58147651..c2a82ba3e 100644 --- a/tests/integration/profiles/five-safes-crate/test_5src_7_requesting_workflow_run.py +++ b/tests/integration/profiles/five-safes-crate/test_5src_7_requesting_workflow_run.py @@ -17,7 +17,7 @@ from rocrate_validator.models import Severity from tests.ro_crates import ValidROC -from tests.shared import do_entity_test, SPARQL_PREFIXES +from tests.shared import SPARQL_PREFIXES, do_entity_test # set up logging logger = logging.getLogger(__name__) @@ -159,7 +159,7 @@ def test_createaction_object_does_not_reference_existing_entities(): expected_triggered_requirements=["WorkflowRunAction"], expected_triggered_issues=[ "In the `CreateAction` entity corresponding to the workflow run," - + " each `object` MUST reference an existing entity." + " each `object` MUST reference an existing entity." ], profile_identifier="five-safes-crate", rocrate_entity_mod_sparql=sparql, diff --git a/tests/integration/profiles/five-safes-crate/test_5src_8_disclosure_phase.py b/tests/integration/profiles/five-safes-crate/test_5src_8_disclosure_phase.py index b50710cc8..8113a5111 100644 --- a/tests/integration/profiles/five-safes-crate/test_5src_8_disclosure_phase.py +++ b/tests/integration/profiles/five-safes-crate/test_5src_8_disclosure_phase.py @@ -17,7 +17,7 @@ from rocrate_validator.models import Severity from tests.ro_crates import ValidROC -from tests.shared import do_entity_test, SPARQL_PREFIXES +from tests.shared import SPARQL_PREFIXES, do_entity_test # set up logging logger = logging.getLogger(__name__) @@ -46,9 +46,7 @@ def test_5src_disclosure_object_with_no_name(): requirement_severity=Severity.REQUIRED, expected_validation_result=False, expected_triggered_requirements=["DisclosureCheck"], - expected_triggered_issues=[ - "`DisclosureCheck` MUST have a name string of at least 10 characters." - ], + expected_triggered_issues=["`DisclosureCheck` MUST have a name string of at least 10 characters."], profile_identifier="five-safes-crate", rocrate_entity_mod_sparql=sparql, ) @@ -77,9 +75,7 @@ def test_5src_disclosure_object_with_name_not_string(): requirement_severity=Severity.REQUIRED, expected_validation_result=False, expected_triggered_requirements=["DisclosureCheck"], - expected_triggered_issues=[ - "`DisclosureCheck` MUST have a name string of at least 10 characters." - ], + expected_triggered_issues=["`DisclosureCheck` MUST have a name string of at least 10 characters."], profile_identifier="five-safes-crate", rocrate_entity_mod_sparql=sparql, ) @@ -108,9 +104,7 @@ def test_5src_disclosure_object_with_not_long_enough_name(): requirement_severity=Severity.REQUIRED, expected_validation_result=False, expected_triggered_requirements=["DisclosureCheck"], - expected_triggered_issues=[ - "`DisclosureCheck` MUST have a name string of at least 10 characters." - ], + expected_triggered_issues=["`DisclosureCheck` MUST have a name string of at least 10 characters."], profile_identifier="five-safes-crate", rocrate_entity_mod_sparql=sparql, ) @@ -264,9 +258,7 @@ def test_5src_disclosure_object_not_mentioned_by_root_data_entity(): requirement_severity=Severity.RECOMMENDED, expected_validation_result=False, expected_triggered_requirements=["RootDataEntity"], - expected_triggered_issues=[ - "`RootDataEntity` SHOULD mention a disclosure object." - ], + expected_triggered_issues=["`RootDataEntity` SHOULD mention a disclosure object."], profile_identifier="five-safes-crate", rocrate_entity_mod_sparql=sparql, ) @@ -291,9 +283,7 @@ def test_5src_disclosure_object_with_no_action_status(): requirement_severity=Severity.RECOMMENDED, expected_validation_result=False, expected_triggered_requirements=["DisclosureCheck"], - expected_triggered_issues=[ - "The `DisclosureCheck` SHOULD have `actionStatus` property." - ], + expected_triggered_issues=["The `DisclosureCheck` SHOULD have `actionStatus` property."], profile_identifier="five-safes-crate", rocrate_entity_mod_sparql=sparql, ) diff --git a/tests/integration/profiles/five-safes-crate/test_5src_9_inputs.py b/tests/integration/profiles/five-safes-crate/test_5src_9_inputs.py index e6517098d..eb8efa7ef 100644 --- a/tests/integration/profiles/five-safes-crate/test_5src_9_inputs.py +++ b/tests/integration/profiles/five-safes-crate/test_5src_9_inputs.py @@ -17,7 +17,7 @@ from rocrate_validator.models import Severity from tests.ro_crates import ValidROC -from tests.shared import do_entity_test, SPARQL_PREFIXES +from tests.shared import SPARQL_PREFIXES, do_entity_test # set up logging logger = logging.getLogger(__name__) @@ -56,9 +56,7 @@ def test_input_does_not_reference_formalparameter(): requirement_severity=Severity.RECOMMENDED, expected_validation_result=False, expected_triggered_requirements=["Input"], - expected_triggered_issues=[ - "Input SHOULD reference a FormalParameter using exampleOfWork" - ], + expected_triggered_issues=["Input SHOULD reference a FormalParameter using exampleOfWork"], profile_identifier="five-safes-crate", rocrate_entity_mod_sparql=sparql, ) diff --git a/tests/integration/profiles/five-safes-crate/test_valid_5src.py b/tests/integration/profiles/five-safes-crate/test_valid_5src.py index 04bb321b1..fdc4c143a 100644 --- a/tests/integration/profiles/five-safes-crate/test_valid_5src.py +++ b/tests/integration/profiles/five-safes-crate/test_valid_5src.py @@ -14,6 +14,7 @@ # limitations under the License. import logging + import pytest from rocrate_validator import services @@ -30,17 +31,15 @@ rocrate_profile = services.get_profile("ro-crate-1.2") if not rocrate_profile: raise RuntimeError("Unable to load the RO-Crate 1.2 profile") -check_local_data_entity_existence = rocrate_profile.get_requirement_check( - "Data Entity: REQUIRED resource availability" -) +check_local_data_entity_existence = rocrate_profile.get_requirement_check("Data Entity: REQUIRED resource availability") assert check_local_data_entity_existence, "Unable to find the requirement 'Data Entity: REQUIRED resource availability'" SKIP_LOCAL_DATA_ENTITY_EXISTENCE_CHECK_IDENTIFIER = check_local_data_entity_existence.identifier check_local_data_entity_existence = rocrate_profile.get_requirement_check( "Web-based Data Entity: REQUIRED resource availability" ) -assert ( - check_local_data_entity_existence -), "Unable to find the requirement 'Web-based Data Entity: REQUIRED resource availability'" +assert check_local_data_entity_existence, ( + "Unable to find the requirement 'Web-based Data Entity: REQUIRED resource availability'" +) SKIP_WEB_RESOURCE_AVAILABILITY_IDENTIFIER = check_local_data_entity_existence.identifier From 69db1e812140a9dd832b4ec0629abfb393abecae Mon Sep 17 00:00:00 2001 From: EttoreM Date: Thu, 2 Jul 2026 15:49:08 +0100 Subject: [PATCH 352/352] More formatting fixes --- .../five-safes-crate/15_metadata_file.py | 19 ++++++------------- 1 file changed, 6 insertions(+), 13 deletions(-) diff --git a/rocrate_validator/profiles/five-safes-crate/15_metadata_file.py b/rocrate_validator/profiles/five-safes-crate/15_metadata_file.py index b589d332f..f284bfe8a 100644 --- a/rocrate_validator/profiles/five-safes-crate/15_metadata_file.py +++ b/rocrate_validator/profiles/five-safes-crate/15_metadata_file.py @@ -36,20 +36,13 @@ def test_existence(self, context: ValidationContext) -> bool: try: json_dict = context.ro_crate.metadata.as_dict() context_value = json_dict["@context"] - pattern = re.compile( - r"https://w3id\.org/ro/crate/1\.[2-9](-DRAFT)?/context" - ) + pattern = re.compile(r"https://w3id\.org/ro/crate/1\.[2-9](-DRAFT)?/context") passed = True if isinstance(context_value, list): - if not any( - pattern.match(item) - for item in context_value - if isinstance(item, str) - ): - passed = False - else: - if not pattern.match(context_value): + if not any(pattern.match(item) for item in context_value if isinstance(item, str)): passed = False + elif not pattern.match(context_value): + passed = False if not passed: context.result.add_issue( "The RO-Crate metadata file MUST include the RO-Crate context " @@ -58,7 +51,7 @@ def test_existence(self, context: ValidationContext) -> bool: ) return passed - except Exception as e: + except Exception: if logger.isEnabledFor(logging.DEBUG): - logger.exception(e) + logger.exception("Unexpected error during RO-Crate context version check") return True