Skip to content

Commit abe30e8

Browse files
authored
Merge pull request #128 from eScienceLab/develop
version 1.1 API
2 parents 5f2eff0 + b0bd555 commit abe30e8

72 files changed

Lines changed: 5382 additions & 205 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

README.md

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ This project presents a Flask-based API for validating RO-Crates.
2121
|------------|-----------|-------------------------|-----------------------------------------------------------------------|
2222
| root_path | optional | string | Root path which contains the RO-Crate |
2323
| webhook_url | optional | string | Webhook to send validation result to |
24+
| profile_name | optional | string | RO-Crate profile to validate against |
2425
| minio_config | required | dictionary | MinIO Configuration Details |
2526

2627
`minio_config`
@@ -167,12 +168,24 @@ curl -X 'POST' \
167168
168169
2. Create the `.env` file for shared environment information. An example environment file is included (`example.env`), which can be copied for this purpose. But make sure to change any security settings (username and passwords).
169170
170-
3. Build and start the services using Docker Compose:
171+
3. A directory containing RO-Crate profiles to replace the default RO-Crate profiles for validation may be provided. Note that this will need to contain all profile files, as the default profile data will not be used. An example of this is given in the `docker-compose-develop.yml` file, and described here:
172+
1. Store the profiles in a convenient directory, e.g.: `./local/rocrate_validator_profiles`
173+
2. Add a volume to the celery worker container for these, e.g.:
174+
```
175+
volumes:
176+
- ./local/rocrate_validator_profiles:/app/profiles:ro
177+
```
178+
3. Provide the `PROFILES_PATH` environment to the flask container (not the celery worker container) to match the internal path, e.g.:
179+
```
180+
- PROFILES_PATH=/app/profiles
181+
```
182+
183+
4. Build and start the services using Docker Compose:
171184
```bash
172185
docker compose up --build
173186
```
174187
175-
4. Set up the MinIO bucket
188+
5. Set up the MinIO bucket
176189
1. Open the MinIO web interface at `http://localhost:9000`.
177190
2. Log in with your MinIO credentials.
178191
3. Create a new bucket named `ro-crates`.

app/ro_crates/routes/post_routes.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
from apiflask import APIBlueprint, Schema
88
from apiflask.fields import String, Boolean
99
from marshmallow.fields import Nested
10-
from flask import Response
10+
from flask import Response, current_app
1111

1212
from app.services.validation_service import (
1313
queue_ro_crate_validation_task,
@@ -81,7 +81,10 @@ def validate_ro_crate_via_id(json_data, crate_id) -> tuple[Response, int]:
8181
else:
8282
profile_name = None
8383

84-
return queue_ro_crate_validation_task(minio_config, crate_id, root_path, profile_name, webhook_url)
84+
profiles_path = current_app.config["PROFILES_PATH"]
85+
86+
return queue_ro_crate_validation_task(minio_config, crate_id, root_path, profile_name,
87+
webhook_url, profiles_path)
8588

8689

8790
@post_routes_bp.post("/validate_metadata")
@@ -108,4 +111,6 @@ def validate_ro_crate_metadata(json_data) -> tuple[Response, int]:
108111
else:
109112
profile_name = None
110113

111-
return queue_ro_crate_metadata_validation_task(crate_json, profile_name)
114+
profiles_path = current_app.config["PROFILES_PATH"]
115+
116+
return queue_ro_crate_metadata_validation_task(crate_json, profile_name, profiles_path=profiles_path)

app/services/validation_service.py

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,8 @@
2525

2626

2727
def queue_ro_crate_validation_task(
28-
minio_config, crate_id, root_path=None, profile_name=None, webhook_url=None
28+
minio_config, crate_id, root_path=None, profile_name=None, webhook_url=None,
29+
profiles_path=None
2930
) -> tuple[Response, int]:
3031
"""
3132
Queues an RO-Crate for validation with Celery.
@@ -51,22 +52,24 @@ def queue_ro_crate_validation_task(
5152
raise InvalidAPIUsage(f"No RO-Crate with prefix: {crate_id}", 400)
5253

5354
try:
54-
process_validation_task_by_id.delay(minio_config, crate_id, root_path, profile_name, webhook_url)
55+
process_validation_task_by_id.delay(minio_config, crate_id, root_path,
56+
profile_name, webhook_url, profiles_path)
5557
return jsonify({"message": "Validation in progress"}), 202
5658

5759
except Exception as e:
5860
return jsonify({"error": str(e)}), 500
5961

6062

6163
def queue_ro_crate_metadata_validation_task(
62-
crate_json: str, profile_name=None, webhook_url=None
64+
crate_json: str, profile_name=None, webhook_url=None, profiles_path=None
6365
) -> tuple[Response, int]:
6466
"""
6567
Queues an RO-Crate for validation with Celery.
6668
6769
:param crate_id: The ID of the RO-Crate to validate.
6870
:param profile_name: The profile to validate against.
6971
:param webhook_url: The URL to POST the validation results to.
72+
:param profiles_path: A path to the profile definition directory.
7073
:return: A tuple containing a JSON response and an HTTP status code.
7174
:raises: Exception: If an error occurs whilst queueing the task.
7275
"""
@@ -88,7 +91,8 @@ def queue_ro_crate_metadata_validation_task(
8891
result = process_validation_task_by_metadata.delay(
8992
crate_json,
9093
profile_name,
91-
webhook_url
94+
webhook_url,
95+
profiles_path
9296
)
9397
if webhook_url:
9498
return jsonify({"message": "Validation in progress"}), 202

app/tasks/validation_tasks.py

Lines changed: 48 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
import logging
88
import os
99
import shutil
10+
import json
1011
from typing import Optional
1112

1213
from rocrate_validator import services
@@ -22,14 +23,14 @@
2223
find_validation_object_on_minio
2324
)
2425
from app.utils.webhook_utils import send_webhook_notification
25-
from app.utils.file_utils import build_metadata_only_rocrate
2626

2727
logger = logging.getLogger(__name__)
2828

2929

3030
@celery.task
3131
def process_validation_task_by_id(
32-
minio_config: dict, crate_id: str, root_path: str, profile_name: str | None, webhook_url: str | None
32+
minio_config: dict, crate_id: str, root_path: str, profile_name: str | None,
33+
webhook_url: str | None, profiles_path: str | None
3334
) -> None:
3435
"""
3536
Background task to process the RO-Crate validation by ID.
@@ -56,7 +57,7 @@ def process_validation_task_by_id(
5657
logging.info(f"Processing validation task for {file_path}")
5758

5859
# Perform validation:
59-
validation_result = perform_ro_crate_validation(file_path, profile_name)
60+
validation_result = perform_ro_crate_validation(file_path, profile_name, profiles_path=profiles_path)
6061

6162
if isinstance(validation_result, str):
6263
logging.error(f"Validation failed: {validation_result}")
@@ -97,32 +98,27 @@ def process_validation_task_by_id(
9798

9899
@celery.task
99100
def process_validation_task_by_metadata(
100-
crate_json: str, profile_name: str | None, webhook_url: str | None
101+
crate_json: str, profile_name: str | None, webhook_url: str | None, profiles_path: Optional[str] = None
101102
) -> ValidationResult | str:
102103
"""
103104
Background task to process the RO-Crate validation for a given json metadata string.
104105
105106
:param crate_json: A string containing the RO-Crate JSON metadata to validate.
106107
:param profile_name: The name of the validation profile to use. Defaults to None.
107108
:param webhook_url: The webhook URL to send notifications to. Defaults to None.
109+
:param profiles_path: The path to the profiles definition directory. Defaults to None.
108110
:raises Exception: If an error occurs during the validation process.
109111
110112
:todo: Replace the Crate ID with a more comprehensive system, and replace profile name with URI.
111113
"""
112114

113-
skip_checks_list = ['ro-crate-1.1_12.1']
114-
file_path = None
115-
116115
try:
117-
# Fetch the RO-Crate from MinIO using the provided ID:
118-
file_path = build_metadata_only_rocrate(crate_json)
119-
120-
logging.info(f"Processing validation task for {file_path}")
116+
logging.info("Processing validation task for provided metadata string")
121117

122118
# Perform validation:
123-
validation_result = perform_ro_crate_validation(file_path,
119+
validation_result = perform_metadata_validation(crate_json,
124120
profile_name,
125-
skip_checks_list
121+
profiles_path
126122
)
127123

128124
if isinstance(validation_result, str):
@@ -131,9 +127,9 @@ def process_validation_task_by_metadata(
131127
raise Exception(f"Validation failed: {validation_result}")
132128

133129
if not validation_result.has_issues():
134-
logging.info(f"RO Crate {file_path} is valid.")
130+
logging.info("RO Crate metadata is valid.")
135131
else:
136-
logging.info(f"RO Crate {file_path} is invalid.")
132+
logging.info("RO Crate metadata is invalid.")
137133

138134
if webhook_url:
139135
send_webhook_notification(webhook_url, validation_result.to_json())
@@ -147,25 +143,22 @@ def process_validation_task_by_metadata(
147143
send_webhook_notification(webhook_url, error_data)
148144

149145
finally:
150-
# Clean up the temporary file if it was created:
151-
if file_path and os.path.exists(file_path):
152-
shutil.rmtree(file_path)
153-
154146
if isinstance(validation_result, str):
155147
return validation_result
156148
else:
157149
return validation_result.to_json()
158150

159151

160152
def perform_ro_crate_validation(
161-
file_path: str, profile_name: str | None, skip_checks_list: Optional[list] = None
153+
file_path: str, profile_name: str | None, skip_checks_list: Optional[list] = None, profiles_path: Optional[str] = None
162154
) -> ValidationResult | str:
163155
"""
164156
Validates an RO-Crate using the provided file path and profile name.
165157
166158
:param file_path: The path to the RO-Crate file to validate
167159
:param profile_name: The name of the validation profile to use. Defaults to None. If None, the CRS4 validator will
168160
attempt to determine the profile.
161+
:param profiles_path: The path to the profiles definition directory
169162
:param skip_checks_list: A list of checks to skip, if needed
170163
:return: The validation result.
171164
:raises Exception: If an error occurs during the validation process.
@@ -183,7 +176,41 @@ def perform_ro_crate_validation(
183176
settings = services.ValidationSettings(
184177
rocrate_uri=full_file_path,
185178
**({"profile_identifier": profile_name} if profile_name else {}),
186-
**({"skip_checks": skip_checks_list} if skip_checks_list else {})
179+
**({"skip_checks": skip_checks_list} if skip_checks_list else {}),
180+
**({"profiles_path": profiles_path} if profiles_path else {})
181+
)
182+
183+
return services.validate(settings)
184+
185+
except Exception as e:
186+
logging.error(f"Unexpected error during validation: {e}")
187+
return str(e)
188+
189+
190+
def perform_metadata_validation(
191+
crate_json: str, profile_name: str | None, skip_checks_list: Optional[list] = None, profiles_path: Optional[str] = None
192+
) -> ValidationResult | str:
193+
"""
194+
Validates only RO-Crate metadata provided as a json string.
195+
196+
:param crate_json: The JSON string containing the metadata
197+
:param profile_name: The name of the validation profile to use. Defaults to None. If None, the CRS4 validator will
198+
attempt to determine the profile.
199+
:param profiles_path: The path to the profiles definition directory
200+
:param skip_checks_list: A list of checks to skip, if needed
201+
:return: The validation result.
202+
:raises Exception: If an error occurs during the validation process.
203+
"""
204+
205+
try:
206+
logging.info(f"Validating ro-crate metadata with profile {profile_name}")
207+
208+
settings = services.ValidationSettings(
209+
**({"metadata_only": True}),
210+
**({"metadata_dict": json.loads(crate_json)}),
211+
**({"profile_identifier": profile_name} if profile_name else {}),
212+
**({"skip_checks": skip_checks_list} if skip_checks_list else {}),
213+
**({"profiles_path": profiles_path} if profiles_path else {})
187214
)
188215

189216
return services.validate(settings)

app/utils/config.py

Lines changed: 16 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -10,34 +10,32 @@
1010
from flask import Flask
1111

1212

13+
def get_env(name: str, default=None, required=False):
14+
value = os.environ.get(name, default)
15+
if required and value is None:
16+
raise RuntimeError(f"Missing required environment variable: {name}")
17+
return value
18+
19+
1320
class Config:
1421
"""Base configuration class for the Flask application."""
1522

16-
SECRET_KEY = os.getenv("SECRET_KEY", "my_precious")
17-
1823
# Celery configuration:
19-
CELERY_BROKER_URL = os.getenv("CELERY_BROKER_URL")
20-
CELERY_RESULT_BACKEND = os.getenv("CELERY_RESULT_BACKEND")
24+
CELERY_BROKER_URL = get_env("CELERY_BROKER_URL", required=False)
25+
CELERY_RESULT_BACKEND = get_env("CELERY_RESULT_BACKEND", required=False)
2126

22-
# MinIO configuration:
23-
MINIO_ENDPOINT = os.getenv("MINIO_ENDPOINT")
24-
MINIO_ACCESS_KEY = os.getenv("MINIO_ACCESS_KEY")
25-
MINIO_SECRET_KEY = os.getenv("MINIO_SECRET_KEY")
26-
MINIO_BUCKET_NAME = os.getenv("MINIO_BUCKET_NAME", "bucket-name")
27+
# rocrate validator configuration:
28+
PROFILES_PATH = get_env("PROFILES_PATH", required=False)
2729

2830

2931
class DevelopmentConfig(Config):
3032
"""Development configuration class."""
31-
3233
DEBUG = True
33-
ENV = "development"
3434

3535

3636
class ProductionConfig(Config):
3737
"""Production configuration class."""
38-
3938
DEBUG = False
40-
ENV = "production"
4139

4240

4341
class InvalidAPIUsage(Exception):
@@ -63,10 +61,13 @@ def make_celery(app: Flask = None) -> Celery:
6361
:param app: The Flask application to use.
6462
:return: The Celery instance.
6563
"""
64+
env = os.environ.get("FLASK_ENV", "development")
65+
config_cls = ProductionConfig if env == "production" else DevelopmentConfig
66+
6667
celery = Celery(
6768
app.import_name if app else __name__,
68-
broker=os.getenv("CELERY_BROKER_URL"),
69-
backend=os.getenv("CELERY_RESULT_BACKEND"),
69+
broker=config_cls.CELERY_BROKER_URL,
70+
backend=config_cls.CELERY_RESULT_BACKEND,
7071
)
7172

7273
if app:

app/utils/file_utils.py

Lines changed: 0 additions & 53 deletions
This file was deleted.

0 commit comments

Comments
 (0)