diff --git a/.woodpecker/.feature.yml b/.woodpecker/.feature.yml index f8d7863..d236200 100644 --- a/.woodpecker/.feature.yml +++ b/.woodpecker/.feature.yml @@ -2,8 +2,9 @@ steps: build-and-push: image: woodpeckerci/plugin-docker-buildx settings: - repo: "${CI_REPO_OWNER##mu-}/${CI_REPO_NAME}" - tags: "feature-${CI_COMMIT_BRANCH##feature/}" + platforms: linux/amd64,linux/arm64 + repo: '${CI_REPO_OWNER##mu-}/${CI_REPO_NAME}' + tags: 'feature-${CI_COMMIT_BRANCH##feature/}' username: from_secret: docker_username password: diff --git a/.woodpecker/.release.yml b/.woodpecker/.release.yml index 262f247..22e77ab 100644 --- a/.woodpecker/.release.yml +++ b/.woodpecker/.release.yml @@ -2,8 +2,9 @@ steps: release: image: woodpeckerci/plugin-docker-buildx settings: - repo: "${CI_REPO_OWNER##mu-}/${CI_REPO_NAME}" - tags: "${CI_COMMIT_TAG##v}" + platforms: linux/amd64,linux/arm64 + repo: '${CI_REPO_OWNER##mu-}/${CI_REPO_NAME}' + tags: '${CI_COMMIT_TAG##v}' username: from_secret: docker_username password: diff --git a/README.md b/README.md index 91ff3ac..4fc3bcd 100644 --- a/README.md +++ b/README.md @@ -52,7 +52,7 @@ apt update && apt install -y libgeos-dev ### Development mode -By leveraging Dockers' [bind-mount](https://docs.docker.com/storage/bind-mounts/), you can mount your application code into an existing service image. This spares you from building a new image to test each change. Just mount your services' folder to the containers' `/app`. On top of that, you can configure the environment variable `MODE` to `development`. That enables live-reloading of the server, so it immediately updates when you save a file. +By leveraging Dockers' [bind-mount](https://docs.docker.com/storage/bind-mounts/), you can mount your application code into an existing service image. This spares you from building a new image to test each change. Just mount your services' folder to the containers' `/app`. On top of that, you can configure the environment variable `MODE` to `development`. That enables live-reloading of the server, so it immediately updates when you save a file. example docker-compose parameters: ```yml @@ -82,11 +82,11 @@ def log(msg, *args, **kwargs) ``` > Write a log message to the log file. -> +> > Works exactly the same as the logging.info (https://docs.python.org/3/library/logging.html#logging.info) method from pythons' logging module. -> Logs are written to the /logs directory in the docker container. -> -> Note that the `helpers` module also exposes `logger`, which is the logger instance (https://docs.python.org/3/library/logging.html#logger-objects) +> Logs are written to the /logs directory in the docker container. +> +> Note that the `helpers` module also exposes `logger`, which is the logger instance (https://docs.python.org/3/library/logging.html#logger-objects) > used by the template. The methods provided by this instance can be used for more fine-grained logging. @@ -134,20 +134,28 @@ def validate_resource_type(expected_type, data) #### `query` ```python -def query(the_query) +def query(the_query, thread_safe = False, sudo = False ) ``` > Execute the given SPARQL query (select/ask/construct) on the triplestore and returns the results in the given return Format (JSON by default). +> +> Advanced options: +> - thread_safe: you may configure fastapi to use multiple worker threads and still use sudo or request to modify the sparql request's http headers. If so, use thread_safe to create a new sparql client every time to avoid contamination of the sparqlQuery object by other threads. Slight performance loss, but hey, you got threads! +> - sudo: perform a sudo query, ignoring the groups of the originating #### `update` ```python -def update(the_query) +def update(the_query, thread_safe = False, sudo = False) ``` > Execute the given update SPARQL query on the triplestore. If the given query is not an update query, nothing happens. +> +> Advanced options: +> - thread_safe: you may configure fastapi to use multiple worker threads and still use sudo or request to modify the sparql request's http headers. If so, use thread_safe to create a new sparql client every time to avoid contamination of the sparqlQuery object by other threads. Slight performance loss, but hey, you got threads! +> - sudo: perform a sudo query, ignoring the groups of the originating request @@ -248,12 +256,12 @@ def sparql_escape_uri(obj) def sparql_escape(obj) ``` -> Converts the given object to a SPARQL-safe RDF object string with the right RDF-datatype. -> +> Converts the given object to a SPARQL-safe RDF object string with the right RDF-datatype. +> > These functions should be used especially when inserting user-input to avoid SPARQL-injection. > Separate functions are available for different python datatypes. > The `sparql_escape` function however can automatically select the right method to use, for the following Python datatypes: -> +> > - `str` > - `int` > - `float` @@ -261,9 +269,17 @@ def sparql_escape(obj) > - `datetime.date` > - `datetime.time` > - `boolean` -> +> > The `sparql_escape_uri`-function can be used for escaping URI's. +#### `wait_for_triplestore` + +```python +def wait_for_triplestore() +``` + +> Wait until the triplestore is running. Performs a sudo select query with limit 1 until it gets a proper result from the triplestore + ### Writing SPARQL Queries The template itself is unopinionated when it comes to constructing SPARQL-queries. However, since Python's most common string formatting methods aren't a great fit for SPARQL queries, we hereby want to provide an example on how to construct a query based on [template strings](https://docs.python.org/3.8/library/string.html#template-strings) while keeping things readable. @@ -288,6 +304,19 @@ query_string = query_template.substitute(person=sparql_escape_uri(my_person)) query_result = query(query_string) ``` +### Functions on startup +Because of the way FastApi works, logic that should be run on startup should always be wrapped with an `@app.on_evente("startup")` decorator. For instance: + +```py +@app.on_event("startup") +async def startup_event(): + wait_for_triplestore() + # on startup fail existing busy tasks + fail_busy_and_scheduled_tasks() + # on startup also immediately start scheduled tasks + process_open_tasks() +``` + ## Deployment Example snippet for adding a service to a docker-compose stack: @@ -342,7 +371,7 @@ python3 README.py ``` You can customise the output through the API configuration! See [README.py](README.py) && the [pydoc-markdown docs](https://niklasrosenstein.github.io/pydoc-markdown/). -## Migate from Flask based versions +## Migrate from Flask based versions Previous versions of this template were based on Flask. Effort was made to keep as much backward compatible as possible. However, some things were slightly modified or require your attention @@ -387,4 +416,7 @@ Unless you know what you are doing, methods annotated with @app or any router yo always be declared as synchronous methods (no async in front!). This might make your service blocking on computationally demanding requests. -More information [here](https://fastapi.tiangolo.com/async/#in-a-hurry) \ No newline at end of file +More information [here](https://fastapi.tiangolo.com/async/#in-a-hurry) + +### Startup functions +Be sure to wrap your startup functions with a `@app.on_event("startup")` decorator diff --git a/helpers.py b/helpers.py index 4c9b4f1..a619d83 100644 --- a/helpers.py +++ b/helpers.py @@ -3,11 +3,12 @@ import logging import os import sys -from fastapi import Request +import time from rdflib.namespace import DC from escape_helpers import sparql_escape from SPARQLWrapper import SPARQLWrapper, JSON from deprecated import deprecated +from starlette_context import context """ The template provides the user with several helper methods. They aim to give you a step ahead for: @@ -61,11 +62,11 @@ def generate_uuid(): def log(msg, *args, **kwargs): """ Write a log message to the log file. - + Works exactly the same as the logging.info (https://docs.python.org/3/library/logging.html#logging.info) method from pythons' logging module. - Logs are written to the /logs directory in the docker container. - - Note that the `helpers` module also exposes `logger`, which is the logger instance (https://docs.python.org/3/library/logging.html#logger-objects) + Logs are written to the /logs directory in the docker container. + + Note that the `helpers` module also exposes `logger`, which is the logger instance (https://docs.python.org/3/library/logging.html#logger-objects) used by the template. The methods provided by this instance can be used for more fine-grained logging. """ return logger.info(msg, *args, **kwargs) @@ -105,14 +106,23 @@ def validate_resource_type(expected_type, data): return error("Incorrect type. Type must be " + str(expected_type) + ", instead of " + str(data['type']) + ".", 409) +def build_sparql_query(): + sparql_query = SPARQLWrapper(os.environ.get('MU_SPARQL_ENDPOINT'), returnFormat=JSON) + if os.environ.get('MU_SPARQL_TIMEOUT'): + timeout = int(os.environ.get('MU_SPARQL_TIMEOUT')) + sparql_query.setTimeout(timeout) + return sparql_query + +def build_sparql_update(): + sparql_update = SPARQLWrapper(os.environ.get('MU_SPARQL_UPDATEPOINT'), returnFormat=JSON) + sparql_update.method = 'POST' + if os.environ.get('MU_SPARQL_TIMEOUT'): + timeout = int(os.environ.get('MU_SPARQL_TIMEOUT')) + sparql_update.setTimeout(timeout) + return sparql_update -sparqlQuery = SPARQLWrapper(os.environ.get('MU_SPARQL_ENDPOINT'), returnFormat=JSON) -sparqlUpdate = SPARQLWrapper(os.environ.get('MU_SPARQL_UPDATEPOINT'), returnFormat=JSON) -sparqlUpdate.method = 'POST' -if os.environ.get('MU_SPARQL_TIMEOUT'): - timeout = int(os.environ.get('MU_SPARQL_TIMEOUT')) - sparqlQuery.setTimeout(timeout) - sparqlUpdate.setTimeout(timeout) +sparqlQuery = build_sparql_query() +sparqlUpdate = build_sparql_update() MU_HEADERS = [ "MU-SESSION-ID", @@ -121,42 +131,81 @@ def validate_resource_type(expected_type, data): "MU-AUTH-USED-GROUPS" ] -def query(the_query: str, request: Request | None = None): - """Execute the given SPARQL query (select/ask/construct) on the triplestore and returns the results in the given return Format (JSON by default).""" +def set_sparql_interface_headers(sparql_interface, sudo): for header in MU_HEADERS: - if request is not None and header in request.headers: - sparqlQuery.customHttpHeaders[header] = request.headers[header] + if context.exists() and header in context["headers"]: + sparql_interface.customHttpHeaders[header] = context["headers"][header] else: # Make sure headers used for a previous query are cleared - if header in sparqlQuery.customHttpHeaders: - del sparqlQuery.customHttpHeaders[header] - sparqlQuery.setQuery(the_query) + if header in sparql_interface.customHttpHeaders: + del sparql_interface.customHttpHeaders[header] + if sudo: + sparql_interface.customHttpHeaders["mu-auth-sudo"] = "true" + elif "mu-auth-sudo" in sparql_interface.customHttpHeaders: + del sparql_interface.customHttpHeaders["mu-auth-sudo"] + + + +def query(the_query: str, thread_safe: bool = False, sudo: bool = False): + """Execute the given SPARQL query (select/ask/construct) on the triplestore and returns the results in the given return Format (JSON by default).""" + sparql_interface = sparqlQuery + + if thread_safe: + # we're editing properties of sparql_interface, if this is done by multiple worker threads, the behavior is undefined, better create a new instance + sparql_interface = build_sparql_query() + + set_sparql_interface_headers(sparql_interface, sudo) + + sparql_interface.setQuery(the_query) if LOG_SPARQL_QUERIES: log("Execute query: \n" + the_query) try: - return sparqlQuery.query().convert() + return sparql_interface.query().convert() except Exception as e: log("Failed Query: \n" + the_query) raise e -def update(the_query: str, request: Request | None = None): +def update(the_query: str, thread_safe: bool = False, sudo: bool = False): """Execute the given update SPARQL query on the triplestore. If the given query is not an update query, nothing happens.""" - for header in MU_HEADERS: - if request is not None and header in request.headers: - sparqlUpdate.customHttpHeaders[header] = request.headers[header] - else: # Make sure headers used for a previous query are cleared - if header in sparqlUpdate.customHttpHeaders: - del sparqlUpdate.customHttpHeaders[header] - sparqlUpdate.setQuery(the_query) - if sparqlUpdate.isSparqlUpdateRequest(): + sparql_interface = sparqlUpdate + + if thread_safe: + # we're editing properties of sparql_interface, if this is done by multiple worker threads, the behavior is undefined, better create a new instance + sparql_interface = build_sparql_update() + + set_sparql_interface_headers(sparql_interface, sudo) + + sparql_interface.setQuery(the_query) + if sparql_interface.isSparqlUpdateRequest(): if LOG_SPARQL_UPDATES: log("Execute query: \n" + the_query) try: - sparqlUpdate.query() + sparql_interface.query() except Exception as e: log("Failed Query: \n" + the_query) raise e +def wait_for_triplestore(): + triplestore_live = False + log("Waiting for triplestore...") + while not triplestore_live: + try: + result = query( + """ + SELECT ?s WHERE { + ?s ?p ?o. + } LIMIT 1""", + sudo=True + ) + if result["results"]["bindings"][0]["s"]["value"]: + triplestore_live = True + else: + raise Exception("triplestore not ready yet...") + except Exception as _e: + log("Triplestore not live yet, retrying...") + time.sleep(1) + log("Triplestore ready!") + def update_modified(subject, modified=datetime.datetime.now()): """(DEPRECATED) Executes a SPARQL query to update the modification date of the given subject URI (string). diff --git a/pyproject.toml b/pyproject.toml index d2e2e97..c218f98 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -11,4 +11,5 @@ dependencies = [ "rdflib>=7.2.1", "sparqlwrapper>=2.0.0", "uvicorn>=0.37.0", + "starlette-context>=0.4.0", ] diff --git a/web.py b/web.py index f3d2a84..3f8afcd 100644 --- a/web.py +++ b/web.py @@ -8,12 +8,26 @@ from jsonapi_pydantic.v1_0 import Error, TopLevel, Meta, Source, ErrorLinks from starlette.exceptions import HTTPException as StarletteHTTPException from rdflib.namespace import Namespace +from starlette.middleware import Middleware +from starlette_context.middleware import RawContextMiddleware +from fastapi import Request +from helpers import MU_HEADERS import helpers from escape_helpers import sparql_escape +class CustomContextMiddleware(RawContextMiddleware): + async def set_context(self, request: Request) -> dict: + context = await super().set_context(request) + headers = {} + context["headers"] = headers + for header in MU_HEADERS: + if header in request.headers: + headers[header] = request.headers[header] + return context + # WSGI variable name used by the server -app = FastAPI() +app = FastAPI(middleware=[Middleware(CustomContextMiddleware)]) class BaseHTTPException(StarletteHTTPException):