Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 52 additions & 0 deletions .github/workflows/build-with-profiles.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
name: Create and publish a Docker image (with profiles)

on:
release:
types: [published]
workflow_dispatch:

env:
REGISTRY: ghcr.io
IMAGE_NAME: ${{ github.repository }}-fivesafes-profile

jobs:
build-and-push-image:
runs-on: ubuntu-latest
permissions:
contents: read
packages: write
attestations: write
id-token: write
steps:
- name: Checkout repository
uses: actions/checkout@v4

- name: Log in to the Container registry
uses: docker/login-action@v3
with:
registry: ${{ env.REGISTRY }}
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}

- name: Extract metadata (tags, labels) for Docker
id: meta
uses: docker/metadata-action@v5
with:
images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}

- name: Build and push Docker image
id: push
uses: docker/build-push-action@v6
with:
context: .
file: ./Dockerfile.fivesafes-profile
push: true
tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}

- name: Generate artifact attestation
uses: actions/attest-build-provenance@v2
with:
subject-name: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME}}
subject-digest: ${{ steps.push.outputs.digest }}
push-to-registry: true
39 changes: 39 additions & 0 deletions Dockerfile.fivesafes-profile
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
FROM python:3.11-slim

ARG FIVE_SAFES_PROFILE_VERSION=five-safes-0.7.4-beta
ARG PROFILES_ARCHIVE_URL=https://github.com/eScienceLab/rocrate-validator/archive/refs/tags/${FIVE_SAFES_PROFILE_VERSION}.tar.gz
ARG PY_VER=3.11

# Install required system packages, including git
RUN apt-get update && apt-get install -y git wget && rm -rf /var/lib/apt/lists/*

WORKDIR /app

COPY requirements.txt .
RUN pip install --upgrade pip
RUN pip install --no-cache-dir -r requirements.txt

COPY cratey.py LICENSE /app/
COPY app /app/app
RUN <<EOF_WRF
wget -O /tmp/rocrate-validator-profiles.tar.gz "$PROFILES_ARCHIVE_URL"
tar -xzf /tmp/rocrate-validator-profiles.tar.gz \
-C /usr/local/lib/python${PY_VER}/site-packages/rocrate_validator/profiles/ \
--strip-components=3 \
"rocrate-validator-${FIVE_SAFES_PROFILE_VERSION}/rocrate_validator/profiles/five-safes-crate"
rm /tmp/rocrate-validator-profiles.tar.gz
EOF_WRF

RUN useradd -ms /bin/bash flaskuser
RUN chown -R flaskuser:flaskuser /app

ENV FIVE_SAFES_PROFILE_VERSION=${FIVE_SAFES_PROFILE_VERSION}

USER flaskuser

EXPOSE 5000

CMD ["flask", "run", "--host=0.0.0.0"]

LABEL org.opencontainers.image.source="https://github.com/eScienceLab/Cratey-Validator"
LABEL org.cratey.five-safes-profile-version="${FIVE_SAFES_PROFILE_VERSION}"
80 changes: 52 additions & 28 deletions app/tasks/validation_tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
get_validation_status_from_minio,
get_minio_client,
find_rocrate_object_on_minio,
find_validation_object_on_minio
find_validation_object_on_minio,
)
from app.utils.webhook_utils import send_webhook_notification

Expand All @@ -29,8 +29,12 @@

@celery.task
def process_validation_task_by_id(
minio_config: dict, crate_id: str, root_path: str, profile_name: str | None,
webhook_url: str | None, profiles_path: str | None
minio_config: dict,
crate_id: str,
root_path: str,
profile_name: str | None,
webhook_url: str | None,
profiles_path: str | None,
) -> None:
"""
Background task to process the RO-Crate validation by ID.
Expand All @@ -52,12 +56,16 @@ def process_validation_task_by_id(

try:
# Fetch the RO-Crate from MinIO using the provided ID:
file_path = fetch_ro_crate_from_minio(minio_client, minio_config["bucket"], crate_id, root_path)
file_path = fetch_ro_crate_from_minio(
minio_client, minio_config["bucket"], crate_id, root_path
)

logging.info(f"Processing validation task for {file_path}")

# Perform validation:
validation_result = perform_ro_crate_validation(file_path, profile_name, profiles_path=profiles_path)
validation_result = perform_ro_crate_validation(
file_path, profile_name, profiles_path=profiles_path
)

if isinstance(validation_result, str):
logging.error(f"Validation failed: {validation_result}")
Expand All @@ -70,7 +78,13 @@ def process_validation_task_by_id(
logging.info(f"RO Crate {crate_id} is invalid.")

# Update the validation status in MinIO:
update_validation_status_in_minio(minio_client, minio_config["bucket"], crate_id, root_path, validation_result.to_json())
update_validation_status_in_minio(
minio_client,
minio_config["bucket"],
crate_id,
root_path,
validation_result.to_json(),
)

# TODO: Prepare the data to send to the webhook, and send the webhook notification.

Expand Down Expand Up @@ -98,7 +112,10 @@ def process_validation_task_by_id(

@celery.task
def process_validation_task_by_metadata(
crate_json: str, profile_name: str | None, webhook_url: str | None, profiles_path: Optional[str] = None
crate_json: str,
profile_name: str | None,
webhook_url: str | None,
profiles_path: Optional[str] = None,
) -> ValidationResult | str:
"""
Background task to process the RO-Crate validation for a given json metadata string.
Expand All @@ -116,10 +133,9 @@ def process_validation_task_by_metadata(
logging.info("Processing validation task for provided metadata string")

# Perform validation:
validation_result = perform_metadata_validation(crate_json,
profile_name,
profiles_path
)
validation_result = perform_metadata_validation(
crate_json, profile_name, profiles_path=profiles_path
)

if isinstance(validation_result, str):
logging.error(f"Validation failed: {validation_result}")
Expand Down Expand Up @@ -150,7 +166,10 @@ def process_validation_task_by_metadata(


def perform_ro_crate_validation(
file_path: str, profile_name: str | None, skip_checks_list: Optional[list] = None, profiles_path: Optional[str] = None
file_path: str,
profile_name: str | None,
skip_checks_list: Optional[list] = None,
profiles_path: Optional[str] = None,
) -> ValidationResult | str:
"""
Validates an RO-Crate using the provided file path and profile name.
Expand All @@ -177,7 +196,7 @@ def perform_ro_crate_validation(
rocrate_uri=full_file_path,
**({"profile_identifier": profile_name} if profile_name else {}),
**({"skip_checks": skip_checks_list} if skip_checks_list else {}),
**({"profiles_path": profiles_path} if profiles_path else {})
**({"profiles_path": profiles_path} if profiles_path else {}),
)

return services.validate(settings)
Expand All @@ -188,7 +207,10 @@ def perform_ro_crate_validation(


def perform_metadata_validation(
crate_json: str, profile_name: str | None, skip_checks_list: Optional[list] = None, profiles_path: Optional[str] = None
crate_json: str,
profile_name: str | None,
skip_checks_list: Optional[list] = None,
profiles_path: Optional[str] = None,
) -> ValidationResult | str:
"""
Validates only RO-Crate metadata provided as a json string.
Expand All @@ -210,7 +232,7 @@ def perform_metadata_validation(
**({"metadata_dict": json.loads(crate_json)}),
**({"profile_identifier": profile_name} if profile_name else {}),
**({"skip_checks": skip_checks_list} if skip_checks_list else {}),
**({"profiles_path": profiles_path} if profiles_path else {})
**({"profiles_path": profiles_path} if profiles_path else {}),
)

return services.validate(settings)
Expand All @@ -221,10 +243,10 @@ def perform_metadata_validation(


def check_ro_crate_exists(
minio_client: object,
bucket_name: str,
crate_id: str,
root_path: str,
minio_client: object,
bucket_name: str,
crate_id: str,
root_path: str,
) -> bool:
"""
Checks for the existence of an RO-Crate using the provided Crate ID.
Expand All @@ -245,10 +267,10 @@ def check_ro_crate_exists(


def check_validation_exists(
minio_client: object,
bucket_name: str,
crate_id: str,
root_path: str,
minio_client: object,
bucket_name: str,
crate_id: str,
root_path: str,
) -> bool:
"""
Checks for the existence of a validation result using the provided Crate ID.
Expand All @@ -269,10 +291,10 @@ def check_validation_exists(


def return_ro_crate_validation(
minio_client: object,
bucket_name: str,
crate_id: str,
root_path: str,
minio_client: object,
bucket_name: str,
crate_id: str,
root_path: str,
) -> dict | str:
"""
Retrieves the validation result for an RO-Crate using the provided Crate ID.
Expand All @@ -284,4 +306,6 @@ def return_ro_crate_validation(

logging.info(f"Fetching validation result for RO-Crate {crate_id}")

return get_validation_status_from_minio(minio_client, bucket_name, crate_id, root_path)
return get_validation_status_from_minio(
minio_client, bucket_name, crate_id, root_path
)
4 changes: 2 additions & 2 deletions requirements.in
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ minio==7.2.20
requests==2.32.5
Flask==3.1.3
Werkzeug==3.1.6
redis==7.2.0
python-dotenv==1.2.1
redis==7.3.0
python-dotenv==1.2.2
apiflask==3.0.2
roc-validator==0.8.1
4 changes: 2 additions & 2 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -138,14 +138,14 @@ pyshacl==0.30.1
# via roc-validator
python-dateutil==2.9.0.post0
# via celery
python-dotenv==1.2.1
python-dotenv==1.2.2
# via -r requirements.in
rdflib[html]==7.1.4
# via
# owlrl
# pyshacl
# roc-validator
redis==7.2.0
redis==7.3.0
# via -r requirements.in
requests==2.32.5
# via
Expand Down
27 changes: 20 additions & 7 deletions tests/test_validation_tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -229,16 +229,18 @@ def test_process_validation_failure(
# Test function: process_validation_task_by_metadata

@pytest.mark.parametrize(
"crate_json, profile_name, webhook_url, validation_json, validation_value",
"crate_json, profile_name, webhook_url, profiles_path, validation_json, validation_value",
[
(
'{"@context": "https://w3id.org/ro/crate/1.1/context", "@graph": []}',
"test-profile", "https://example.com/webhook",
"/app/profiles",
'{"status": "valid"}', False
),
(
'{"@context": "https://w3id.org/ro/crate/1.1/context", "@graph": []}',
"test-profile", "https://example.com/webhook",
None,
'{"status": "invalid"}', True
)
],
Expand All @@ -248,32 +250,38 @@ def test_process_validation_failure(
@mock.patch("app.tasks.validation_tasks.perform_metadata_validation")
def test_metadata_validation(
mock_validate, mock_webhook,
crate_json: str, profile_name: str, webhook_url: str,
crate_json: str, profile_name: str, webhook_url: str, profiles_path: str | None,
validation_json: str, validation_value: bool,
):
mock_result = mock.Mock()
mock_result.has_issues.return_value = validation_value
mock_result.to_json.return_value = validation_json
mock_validate.return_value = mock_result

result = process_validation_task_by_metadata(crate_json, profile_name, webhook_url)
result = process_validation_task_by_metadata(
crate_json, profile_name, webhook_url, profiles_path
)

assert result == validation_json
mock_validate.assert_called_once()
mock_validate.assert_called_once_with(
crate_json, profile_name, profiles_path=profiles_path
)
mock_webhook.assert_called_once_with(webhook_url, validation_json)


@pytest.mark.parametrize(
"crate_json, profile_name, webhook_url, validation_message",
"crate_json, profile_name, webhook_url, profiles_path, validation_message",
[
(
'{"@context": "https://w3id.org/ro/crate/1.1/context", "@graph": []}',
"test-profile", "https://example.com/webhook",
"/app/profiles",
"Validation error"
),
(
'{"@context": "https://w3id.org/ro/crate/1.1/context", "@graph": []}',
"test-profile", None,
None,
"Validation error"
)
],
Expand All @@ -283,16 +291,21 @@ def test_metadata_validation(
@mock.patch("app.tasks.validation_tasks.perform_metadata_validation")
def test_validation_fails_and_sends_error_notification_to_webhook(
mock_validate, mock_webhook,
crate_json: str, profile_name: str, webhook_url: str,
crate_json: str, profile_name: str, webhook_url: str, profiles_path: str | None,
validation_message: str
):

mock_validate.return_value = validation_message

result = process_validation_task_by_metadata(crate_json, profile_name, webhook_url)
result = process_validation_task_by_metadata(
crate_json, profile_name, webhook_url, profiles_path
)

assert isinstance(result, str)
assert validation_message in result
mock_validate.assert_called_once_with(
crate_json, profile_name, profiles_path=profiles_path
)

if webhook_url is not None:
# Error webhook should be sent
Expand Down
Loading