From a6309f6f254bcd08b8f8cdafe0c4922c4b1ab7da Mon Sep 17 00:00:00 2001 From: EttoreM Date: Fri, 27 Feb 2026 09:45:07 +0000 Subject: [PATCH 1/7] Metadata validation service now validates against five-safes-crate profile. --- app/tasks/validation_tasks.py | 80 ++++++++++++++++++++++------------ tests/test_validation_tasks.py | 27 +++++++++--- 2 files changed, 72 insertions(+), 35 deletions(-) diff --git a/app/tasks/validation_tasks.py b/app/tasks/validation_tasks.py index e6096d6..d0d6925 100644 --- a/app/tasks/validation_tasks.py +++ b/app/tasks/validation_tasks.py @@ -20,7 +20,7 @@ get_validation_status_from_minio, get_minio_client, find_rocrate_object_on_minio, - find_validation_object_on_minio + find_validation_object_on_minio, ) from app.utils.webhook_utils import send_webhook_notification @@ -29,8 +29,12 @@ @celery.task def process_validation_task_by_id( - minio_config: dict, crate_id: str, root_path: str, profile_name: str | None, - webhook_url: str | None, profiles_path: str | None + minio_config: dict, + crate_id: str, + root_path: str, + profile_name: str | None, + webhook_url: str | None, + profiles_path: str | None, ) -> None: """ Background task to process the RO-Crate validation by ID. @@ -52,12 +56,16 @@ def process_validation_task_by_id( try: # Fetch the RO-Crate from MinIO using the provided ID: - file_path = fetch_ro_crate_from_minio(minio_client, minio_config["bucket"], crate_id, root_path) + file_path = fetch_ro_crate_from_minio( + minio_client, minio_config["bucket"], crate_id, root_path + ) logging.info(f"Processing validation task for {file_path}") # Perform validation: - validation_result = perform_ro_crate_validation(file_path, profile_name, profiles_path=profiles_path) + validation_result = perform_ro_crate_validation( + file_path, profile_name, profiles_path=profiles_path + ) if isinstance(validation_result, str): logging.error(f"Validation failed: {validation_result}") @@ -70,7 +78,13 @@ def process_validation_task_by_id( logging.info(f"RO Crate {crate_id} is invalid.") # Update the validation status in MinIO: - update_validation_status_in_minio(minio_client, minio_config["bucket"], crate_id, root_path, validation_result.to_json()) + update_validation_status_in_minio( + minio_client, + minio_config["bucket"], + crate_id, + root_path, + validation_result.to_json(), + ) # TODO: Prepare the data to send to the webhook, and send the webhook notification. @@ -98,7 +112,10 @@ def process_validation_task_by_id( @celery.task def process_validation_task_by_metadata( - crate_json: str, profile_name: str | None, webhook_url: str | None, profiles_path: Optional[str] = None + crate_json: str, + profile_name: str | None, + webhook_url: str | None, + profiles_path: Optional[str] = None, ) -> ValidationResult | str: """ Background task to process the RO-Crate validation for a given json metadata string. @@ -116,10 +133,9 @@ def process_validation_task_by_metadata( logging.info("Processing validation task for provided metadata string") # Perform validation: - validation_result = perform_metadata_validation(crate_json, - profile_name, - profiles_path - ) + validation_result = perform_metadata_validation( + crate_json, profile_name, profiles_path=profiles_path + ) if isinstance(validation_result, str): logging.error(f"Validation failed: {validation_result}") @@ -150,7 +166,10 @@ def process_validation_task_by_metadata( def perform_ro_crate_validation( - file_path: str, profile_name: str | None, skip_checks_list: Optional[list] = None, profiles_path: Optional[str] = None + file_path: str, + profile_name: str | None, + skip_checks_list: Optional[list] = None, + profiles_path: Optional[str] = None, ) -> ValidationResult | str: """ Validates an RO-Crate using the provided file path and profile name. @@ -177,7 +196,7 @@ def perform_ro_crate_validation( rocrate_uri=full_file_path, **({"profile_identifier": profile_name} if profile_name else {}), **({"skip_checks": skip_checks_list} if skip_checks_list else {}), - **({"profiles_path": profiles_path} if profiles_path else {}) + **({"profiles_path": profiles_path} if profiles_path else {}), ) return services.validate(settings) @@ -188,7 +207,10 @@ def perform_ro_crate_validation( def perform_metadata_validation( - crate_json: str, profile_name: str | None, skip_checks_list: Optional[list] = None, profiles_path: Optional[str] = None + crate_json: str, + profile_name: str | None, + skip_checks_list: Optional[list] = None, + profiles_path: Optional[str] = None, ) -> ValidationResult | str: """ Validates only RO-Crate metadata provided as a json string. @@ -210,7 +232,7 @@ def perform_metadata_validation( **({"metadata_dict": json.loads(crate_json)}), **({"profile_identifier": profile_name} if profile_name else {}), **({"skip_checks": skip_checks_list} if skip_checks_list else {}), - **({"profiles_path": profiles_path} if profiles_path else {}) + **({"profiles_path": profiles_path} if profiles_path else {}), ) return services.validate(settings) @@ -221,10 +243,10 @@ def perform_metadata_validation( def check_ro_crate_exists( - minio_client: object, - bucket_name: str, - crate_id: str, - root_path: str, + minio_client: object, + bucket_name: str, + crate_id: str, + root_path: str, ) -> bool: """ Checks for the existence of an RO-Crate using the provided Crate ID. @@ -245,10 +267,10 @@ def check_ro_crate_exists( def check_validation_exists( - minio_client: object, - bucket_name: str, - crate_id: str, - root_path: str, + minio_client: object, + bucket_name: str, + crate_id: str, + root_path: str, ) -> bool: """ Checks for the existence of a validation result using the provided Crate ID. @@ -269,10 +291,10 @@ def check_validation_exists( def return_ro_crate_validation( - minio_client: object, - bucket_name: str, - crate_id: str, - root_path: str, + minio_client: object, + bucket_name: str, + crate_id: str, + root_path: str, ) -> dict | str: """ Retrieves the validation result for an RO-Crate using the provided Crate ID. @@ -284,4 +306,6 @@ def return_ro_crate_validation( logging.info(f"Fetching validation result for RO-Crate {crate_id}") - return get_validation_status_from_minio(minio_client, bucket_name, crate_id, root_path) + return get_validation_status_from_minio( + minio_client, bucket_name, crate_id, root_path + ) diff --git a/tests/test_validation_tasks.py b/tests/test_validation_tasks.py index ade15b9..5d36e1d 100644 --- a/tests/test_validation_tasks.py +++ b/tests/test_validation_tasks.py @@ -229,16 +229,18 @@ def test_process_validation_failure( # Test function: process_validation_task_by_metadata @pytest.mark.parametrize( - "crate_json, profile_name, webhook_url, validation_json, validation_value", + "crate_json, profile_name, webhook_url, profiles_path, validation_json, validation_value", [ ( '{"@context": "https://w3id.org/ro/crate/1.1/context", "@graph": []}', "test-profile", "https://example.com/webhook", + "/app/profiles", '{"status": "valid"}', False ), ( '{"@context": "https://w3id.org/ro/crate/1.1/context", "@graph": []}', "test-profile", "https://example.com/webhook", + None, '{"status": "invalid"}', True ) ], @@ -248,7 +250,7 @@ def test_process_validation_failure( @mock.patch("app.tasks.validation_tasks.perform_metadata_validation") def test_metadata_validation( mock_validate, mock_webhook, - crate_json: str, profile_name: str, webhook_url: str, + crate_json: str, profile_name: str, webhook_url: str, profiles_path: str | None, validation_json: str, validation_value: bool, ): mock_result = mock.Mock() @@ -256,24 +258,30 @@ def test_metadata_validation( mock_result.to_json.return_value = validation_json mock_validate.return_value = mock_result - result = process_validation_task_by_metadata(crate_json, profile_name, webhook_url) + result = process_validation_task_by_metadata( + crate_json, profile_name, webhook_url, profiles_path + ) assert result == validation_json - mock_validate.assert_called_once() + mock_validate.assert_called_once_with( + crate_json, profile_name, profiles_path=profiles_path + ) mock_webhook.assert_called_once_with(webhook_url, validation_json) @pytest.mark.parametrize( - "crate_json, profile_name, webhook_url, validation_message", + "crate_json, profile_name, webhook_url, profiles_path, validation_message", [ ( '{"@context": "https://w3id.org/ro/crate/1.1/context", "@graph": []}', "test-profile", "https://example.com/webhook", + "/app/profiles", "Validation error" ), ( '{"@context": "https://w3id.org/ro/crate/1.1/context", "@graph": []}', "test-profile", None, + None, "Validation error" ) ], @@ -283,16 +291,21 @@ def test_metadata_validation( @mock.patch("app.tasks.validation_tasks.perform_metadata_validation") def test_validation_fails_and_sends_error_notification_to_webhook( mock_validate, mock_webhook, - crate_json: str, profile_name: str, webhook_url: str, + crate_json: str, profile_name: str, webhook_url: str, profiles_path: str | None, validation_message: str ): mock_validate.return_value = validation_message - result = process_validation_task_by_metadata(crate_json, profile_name, webhook_url) + result = process_validation_task_by_metadata( + crate_json, profile_name, webhook_url, profiles_path + ) assert isinstance(result, str) assert validation_message in result + mock_validate.assert_called_once_with( + crate_json, profile_name, profiles_path=profiles_path + ) if webhook_url is not None: # Error webhook should be sent From ebdd12bf72d06fc132ef0127912d6759f27e9ccd Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 2 Mar 2026 12:24:24 +0000 Subject: [PATCH 2/7] Bump python-dotenv from 1.2.1 to 1.2.2 Bumps [python-dotenv](https://github.com/theskumar/python-dotenv) from 1.2.1 to 1.2.2. - [Release notes](https://github.com/theskumar/python-dotenv/releases) - [Changelog](https://github.com/theskumar/python-dotenv/blob/main/CHANGELOG.md) - [Commits](https://github.com/theskumar/python-dotenv/compare/v1.2.1...v1.2.2) --- updated-dependencies: - dependency-name: python-dotenv dependency-version: 1.2.2 dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- requirements.in | 2 +- requirements.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/requirements.in b/requirements.in index 1dcad46..748c6bd 100644 --- a/requirements.in +++ b/requirements.in @@ -4,6 +4,6 @@ requests==2.32.5 Flask==3.1.3 Werkzeug==3.1.6 redis==7.2.0 -python-dotenv==1.2.1 +python-dotenv==1.2.2 apiflask==3.0.2 roc-validator==0.8.1 diff --git a/requirements.txt b/requirements.txt index 2dd5129..c304049 100644 --- a/requirements.txt +++ b/requirements.txt @@ -138,7 +138,7 @@ pyshacl==0.30.1 # via roc-validator python-dateutil==2.9.0.post0 # via celery -python-dotenv==1.2.1 +python-dotenv==1.2.2 # via -r requirements.in rdflib[html]==7.1.4 # via From 43a10fc4379eefb437fd030bcde8f3254234d852 Mon Sep 17 00:00:00 2001 From: EttoreM Date: Sun, 8 Mar 2026 11:44:28 +0000 Subject: [PATCH 3/7] Added dockerfile and workflow to build Docker image with baked-in profiles when releasing new version of Cratey-Validator --- .github/workflows/build-with-profiles.yml | 52 +++++++++++++++++++++++ Dockerfile.profiles | 27 ++++++++++++ 2 files changed, 79 insertions(+) create mode 100644 .github/workflows/build-with-profiles.yml create mode 100644 Dockerfile.profiles diff --git a/.github/workflows/build-with-profiles.yml b/.github/workflows/build-with-profiles.yml new file mode 100644 index 0000000..16af3e7 --- /dev/null +++ b/.github/workflows/build-with-profiles.yml @@ -0,0 +1,52 @@ +name: Create and publish a Docker image (with profiles) + +on: + release: + types: [published] + workflow_dispatch: + +env: + REGISTRY: ghcr.io + IMAGE_NAME: ${{ github.repository }}-profiles + +jobs: + build-and-push-image: + runs-on: ubuntu-latest + permissions: + contents: read + packages: write + attestations: write + id-token: write + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Log in to the Container registry + uses: docker/login-action@v3 + with: + registry: ${{ env.REGISTRY }} + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Extract metadata (tags, labels) for Docker + id: meta + uses: docker/metadata-action@v5 + with: + images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }} + + - name: Build and push Docker image + id: push + uses: docker/build-push-action@v6 + with: + context: . + file: ./Dockerfile.profiles + push: true + tags: ${{ steps.meta.outputs.tags }} + labels: ${{ steps.meta.outputs.labels }} + + - name: Generate artifact attestation + uses: actions/attest-build-provenance@v2 + with: + subject-name: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME}} + subject-digest: ${{ steps.push.outputs.digest }} + push-to-registry: true diff --git a/Dockerfile.profiles b/Dockerfile.profiles new file mode 100644 index 0000000..6a8a505 --- /dev/null +++ b/Dockerfile.profiles @@ -0,0 +1,27 @@ +FROM python:3.11-slim + +# Install required system packages, including git +RUN apt-get update && apt-get install -y git && rm -rf /var/lib/apt/lists/* + +WORKDIR /app + +COPY requirements.txt . +RUN pip install --upgrade pip +RUN pip install --no-cache-dir -r requirements.txt + +COPY cratey.py LICENSE /app/ +COPY app /app/app +COPY tests/data/rocrate_validator_profiles /app/profiles + +RUN useradd -ms /bin/bash flaskuser +RUN chown -R flaskuser:flaskuser /app + +ENV PROFILES_PATH=/app/profiles + +USER flaskuser + +EXPOSE 5000 + +CMD ["flask", "run", "--host=0.0.0.0"] + +LABEL org.opencontainers.image.source="https://github.com/eScienceLab/Cratey-Validator" From 7658707b3092a491614c6ceefaf21804174dfcb3 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 9 Mar 2026 12:07:40 +0000 Subject: [PATCH 4/7] Bump redis from 7.2.0 to 7.3.0 Bumps [redis](https://github.com/redis/redis-py) from 7.2.0 to 7.3.0. - [Release notes](https://github.com/redis/redis-py/releases) - [Changelog](https://github.com/redis/redis-py/blob/master/CHANGES) - [Commits](https://github.com/redis/redis-py/compare/v7.2.0...v7.3.0) --- updated-dependencies: - dependency-name: redis dependency-version: 7.3.0 dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- requirements.in | 2 +- requirements.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/requirements.in b/requirements.in index 1dcad46..c2592da 100644 --- a/requirements.in +++ b/requirements.in @@ -3,7 +3,7 @@ minio==7.2.20 requests==2.32.5 Flask==3.1.3 Werkzeug==3.1.6 -redis==7.2.0 +redis==7.3.0 python-dotenv==1.2.1 apiflask==3.0.2 roc-validator==0.8.1 diff --git a/requirements.txt b/requirements.txt index 2dd5129..867849c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -145,7 +145,7 @@ rdflib[html]==7.1.4 # owlrl # pyshacl # roc-validator -redis==7.2.0 +redis==7.3.0 # via -r requirements.in requests==2.32.5 # via From 20a830fc87542d3f8a6eb74d6f282f92d564e85d Mon Sep 17 00:00:00 2001 From: EttoreM Date: Thu, 12 Mar 2026 08:23:44 +0000 Subject: [PATCH 5/7] This fix adds the curated five-safes profile to the Docker image. Profile version is accessible through a label () or as an environment variable from inside the Docker image (). --- .github/workflows/build-with-profiles.yml | 4 +-- Dockerfile.fivesafes-profile | 40 +++++++++++++++++++++++ Dockerfile.profiles | 27 --------------- 3 files changed, 42 insertions(+), 29 deletions(-) create mode 100644 Dockerfile.fivesafes-profile delete mode 100644 Dockerfile.profiles diff --git a/.github/workflows/build-with-profiles.yml b/.github/workflows/build-with-profiles.yml index 16af3e7..ba77faf 100644 --- a/.github/workflows/build-with-profiles.yml +++ b/.github/workflows/build-with-profiles.yml @@ -7,7 +7,7 @@ on: env: REGISTRY: ghcr.io - IMAGE_NAME: ${{ github.repository }}-profiles + IMAGE_NAME: ${{ github.repository }}-fivesafes-profile jobs: build-and-push-image: @@ -39,7 +39,7 @@ jobs: uses: docker/build-push-action@v6 with: context: . - file: ./Dockerfile.profiles + file: ./Dockerfile.fivesafes-profile push: true tags: ${{ steps.meta.outputs.tags }} labels: ${{ steps.meta.outputs.labels }} diff --git a/Dockerfile.fivesafes-profile b/Dockerfile.fivesafes-profile new file mode 100644 index 0000000..82b9597 --- /dev/null +++ b/Dockerfile.fivesafes-profile @@ -0,0 +1,40 @@ +FROM python:3.11-slim + +ARG FIVE_SAFES_PROFILE_VERSION=five-safes-0.7.3-beta +ARG PROFILES_ARCHIVE_URL=https://github.com/eScienceLab/rocrate-validator/archive/refs/tags/${FIVE_SAFES_PROFILE_VERSION}.tar.gz + +# Install required system packages, including git +RUN apt-get update && apt-get install -y git wget && rm -rf /var/lib/apt/lists/* + +WORKDIR /app + +COPY requirements.txt . +RUN pip install --upgrade pip +RUN pip install --no-cache-dir -r requirements.txt + +COPY cratey.py LICENSE /app/ +COPY app /app/app +RUN < Date: Fri, 13 Mar 2026 07:59:10 +0000 Subject: [PATCH 6/7] The five-safes profile is now baked into the default directory. Also, now the five-safes profile imported contains the fix of the log import. --- Dockerfile.fivesafes-profile | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/Dockerfile.fivesafes-profile b/Dockerfile.fivesafes-profile index 82b9597..5946e21 100644 --- a/Dockerfile.fivesafes-profile +++ b/Dockerfile.fivesafes-profile @@ -1,7 +1,8 @@ FROM python:3.11-slim -ARG FIVE_SAFES_PROFILE_VERSION=five-safes-0.7.3-beta +ARG FIVE_SAFES_PROFILE_VERSION=five-safes-0.7.4-beta ARG PROFILES_ARCHIVE_URL=https://github.com/eScienceLab/rocrate-validator/archive/refs/tags/${FIVE_SAFES_PROFILE_VERSION}.tar.gz +ARG PY_VER=3.11 # Install required system packages, including git RUN apt-get update && apt-get install -y git wget && rm -rf /var/lib/apt/lists/* @@ -18,7 +19,7 @@ RUN < Date: Fri, 13 Mar 2026 08:41:05 +0000 Subject: [PATCH 7/7] Removed stale lines from Dockerfile.fivesafes-profile. --- Dockerfile.fivesafes-profile | 2 -- 1 file changed, 2 deletions(-) diff --git a/Dockerfile.fivesafes-profile b/Dockerfile.fivesafes-profile index 5946e21..df234b9 100644 --- a/Dockerfile.fivesafes-profile +++ b/Dockerfile.fivesafes-profile @@ -16,7 +16,6 @@ RUN pip install --no-cache-dir -r requirements.txt COPY cratey.py LICENSE /app/ COPY app /app/app RUN <