From 0379c61cf5a1daef06598a298c5d7886aa105ae8 Mon Sep 17 00:00:00 2001 From: karel kremer Date: Tue, 27 Jan 2026 16:05:58 +0100 Subject: [PATCH 1/5] also build arm in feature and release versions --- .woodpecker/.feature.yml | 5 +++-- .woodpecker/.release.yml | 5 +++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/.woodpecker/.feature.yml b/.woodpecker/.feature.yml index f8d7863..d236200 100644 --- a/.woodpecker/.feature.yml +++ b/.woodpecker/.feature.yml @@ -2,8 +2,9 @@ steps: build-and-push: image: woodpeckerci/plugin-docker-buildx settings: - repo: "${CI_REPO_OWNER##mu-}/${CI_REPO_NAME}" - tags: "feature-${CI_COMMIT_BRANCH##feature/}" + platforms: linux/amd64,linux/arm64 + repo: '${CI_REPO_OWNER##mu-}/${CI_REPO_NAME}' + tags: 'feature-${CI_COMMIT_BRANCH##feature/}' username: from_secret: docker_username password: diff --git a/.woodpecker/.release.yml b/.woodpecker/.release.yml index 262f247..22e77ab 100644 --- a/.woodpecker/.release.yml +++ b/.woodpecker/.release.yml @@ -2,8 +2,9 @@ steps: release: image: woodpeckerci/plugin-docker-buildx settings: - repo: "${CI_REPO_OWNER##mu-}/${CI_REPO_NAME}" - tags: "${CI_COMMIT_TAG##v}" + platforms: linux/amd64,linux/arm64 + repo: '${CI_REPO_OWNER##mu-}/${CI_REPO_NAME}' + tags: '${CI_COMMIT_TAG##v}' username: from_secret: docker_username password: From 6882a658f80b6b0fc30a0c1f0cfc3700455b6daf Mon Sep 17 00:00:00 2001 From: karel kremer Date: Tue, 10 Feb 2026 16:48:42 +0100 Subject: [PATCH 2/5] add sudo and thread_safe options to query functions --- helpers.py | 83 ++++++++++++++++++++++++++++++++++++------------------ 1 file changed, 55 insertions(+), 28 deletions(-) diff --git a/helpers.py b/helpers.py index 4c9b4f1..54a646c 100644 --- a/helpers.py +++ b/helpers.py @@ -61,11 +61,11 @@ def generate_uuid(): def log(msg, *args, **kwargs): """ Write a log message to the log file. - + Works exactly the same as the logging.info (https://docs.python.org/3/library/logging.html#logging.info) method from pythons' logging module. - Logs are written to the /logs directory in the docker container. - - Note that the `helpers` module also exposes `logger`, which is the logger instance (https://docs.python.org/3/library/logging.html#logger-objects) + Logs are written to the /logs directory in the docker container. + + Note that the `helpers` module also exposes `logger`, which is the logger instance (https://docs.python.org/3/library/logging.html#logger-objects) used by the template. The methods provided by this instance can be used for more fine-grained logging. """ return logger.info(msg, *args, **kwargs) @@ -105,14 +105,23 @@ def validate_resource_type(expected_type, data): return error("Incorrect type. Type must be " + str(expected_type) + ", instead of " + str(data['type']) + ".", 409) +def build_sparql_query(): + sparql_query = SPARQLWrapper(os.environ.get('MU_SPARQL_ENDPOINT'), returnFormat=JSON) + if os.environ.get('MU_SPARQL_TIMEOUT'): + timeout = int(os.environ.get('MU_SPARQL_TIMEOUT')) + sparql_query.setTimeout(timeout) + return sparql_query + +def build_sparql_update(): + sparql_update = SPARQLWrapper(os.environ.get('MU_SPARQL_UPDATEPOINT'), returnFormat=JSON) + sparql_update.method = 'POST' + if os.environ.get('MU_SPARQL_TIMEOUT'): + timeout = int(os.environ.get('MU_SPARQL_TIMEOUT')) + sparql_update.setTimeout(timeout) + return sparql_update -sparqlQuery = SPARQLWrapper(os.environ.get('MU_SPARQL_ENDPOINT'), returnFormat=JSON) -sparqlUpdate = SPARQLWrapper(os.environ.get('MU_SPARQL_UPDATEPOINT'), returnFormat=JSON) -sparqlUpdate.method = 'POST' -if os.environ.get('MU_SPARQL_TIMEOUT'): - timeout = int(os.environ.get('MU_SPARQL_TIMEOUT')) - sparqlQuery.setTimeout(timeout) - sparqlUpdate.setTimeout(timeout) +sparqlQuery = build_sparql_query() +sparqlUpdate = build_sparql_update() MU_HEADERS = [ "MU-SESSION-ID", @@ -121,38 +130,56 @@ def validate_resource_type(expected_type, data): "MU-AUTH-USED-GROUPS" ] -def query(the_query: str, request: Request | None = None): - """Execute the given SPARQL query (select/ask/construct) on the triplestore and returns the results in the given return Format (JSON by default).""" +def set_sparql_interface_headers(sparql_interface, request, sudo): for header in MU_HEADERS: if request is not None and header in request.headers: - sparqlQuery.customHttpHeaders[header] = request.headers[header] + sparql_interface.customHttpHeaders[header] = request.headers[header] else: # Make sure headers used for a previous query are cleared - if header in sparqlQuery.customHttpHeaders: - del sparqlQuery.customHttpHeaders[header] - sparqlQuery.setQuery(the_query) + if header in sparql_interface.customHttpHeaders: + del sparql_interface.customHttpHeaders[header] + if sudo: + sparql_interface.customHttpHeaders["mu-auth-sudo"] = "true" + else: + del sparql_interface.customHttpHeaders["mu-auth-sudo"] + + + +def query(the_query: str, request: Request | None = None, thread_safe: bool = False, sudo: bool = False): + """Execute the given SPARQL query (select/ask/construct) on the triplestore and returns the results in the given return Format (JSON by default).""" + sparql_interface = sparqlQuery + + if thread_safe: + # we're editing properties of sparql_interface, if this is done by multiple worker threads, the behavior is undefined, better create a new instance + sparql_interface = build_sparql_query() + + set_sparql_interface_headers(sparql_interface, request, sudo) + + sparql_interface.setQuery(the_query) if LOG_SPARQL_QUERIES: log("Execute query: \n" + the_query) try: - return sparqlQuery.query().convert() + return sparql_interface.query().convert() except Exception as e: log("Failed Query: \n" + the_query) raise e -def update(the_query: str, request: Request | None = None): +def update(the_query: str, request: Request | None = None, thread_safe: bool = False, sudo: bool = False): """Execute the given update SPARQL query on the triplestore. If the given query is not an update query, nothing happens.""" - for header in MU_HEADERS: - if request is not None and header in request.headers: - sparqlUpdate.customHttpHeaders[header] = request.headers[header] - else: # Make sure headers used for a previous query are cleared - if header in sparqlUpdate.customHttpHeaders: - del sparqlUpdate.customHttpHeaders[header] - sparqlUpdate.setQuery(the_query) - if sparqlUpdate.isSparqlUpdateRequest(): + sparql_interface = sparqlUpdate + + if thread_safe: + # we're editing properties of sparql_interface, if this is done by multiple worker threads, the behavior is undefined, better create a new instance + sparql_interface = build_sparql_update() + + set_sparql_interface_headers(sparql_interface, request, sudo) + + sparql_interface.setQuery(the_query) + if sparql_interface.isSparqlUpdateRequest(): if LOG_SPARQL_UPDATES: log("Execute query: \n" + the_query) try: - sparqlUpdate.query() + sparql_interface.query() except Exception as e: log("Failed Query: \n" + the_query) raise e From 1a22ff33794b64ed159e0727608f6874b7ec8058 Mon Sep 17 00:00:00 2001 From: karel kremer Date: Tue, 10 Feb 2026 17:04:07 +0100 Subject: [PATCH 3/5] add wait_for_triplestore --- helpers.py | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/helpers.py b/helpers.py index 54a646c..aeb3fd3 100644 --- a/helpers.py +++ b/helpers.py @@ -3,6 +3,7 @@ import logging import os import sys +import time from fastapi import Request from rdflib.namespace import DC from escape_helpers import sparql_escape @@ -184,6 +185,27 @@ def update(the_query: str, request: Request | None = None, thread_safe: bool = F log("Failed Query: \n" + the_query) raise e +def wait_for_triplestore(): + triplestore_live = False + log("Waiting for triplestore...") + while not triplestore_live: + try: + result = query( + """ + SELECT ?s WHERE { + ?s ?p ?o. + } LIMIT 1""", + sudo=True + ) + if result["results"]["bindings"][0]["s"]["value"]: + triplestore_live = True + else: + raise Exception("triplestore not ready yet...") + except Exception as _e: + log("Triplestore not live yet, retrying...") + time.sleep(1) + log("Triplestore ready!") + def update_modified(subject, modified=datetime.datetime.now()): """(DEPRECATED) Executes a SPARQL query to update the modification date of the given subject URI (string). From 9cde6d6671e6e6c0aa10167e007fb754265555ce Mon Sep 17 00:00:00 2001 From: karel kremer Date: Wed, 11 Feb 2026 08:08:44 +0100 Subject: [PATCH 4/5] extend readme --- README.md | 58 +++++++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 46 insertions(+), 12 deletions(-) diff --git a/README.md b/README.md index 91ff3ac..939408e 100644 --- a/README.md +++ b/README.md @@ -52,7 +52,7 @@ apt update && apt install -y libgeos-dev ### Development mode -By leveraging Dockers' [bind-mount](https://docs.docker.com/storage/bind-mounts/), you can mount your application code into an existing service image. This spares you from building a new image to test each change. Just mount your services' folder to the containers' `/app`. On top of that, you can configure the environment variable `MODE` to `development`. That enables live-reloading of the server, so it immediately updates when you save a file. +By leveraging Dockers' [bind-mount](https://docs.docker.com/storage/bind-mounts/), you can mount your application code into an existing service image. This spares you from building a new image to test each change. Just mount your services' folder to the containers' `/app`. On top of that, you can configure the environment variable `MODE` to `development`. That enables live-reloading of the server, so it immediately updates when you save a file. example docker-compose parameters: ```yml @@ -82,11 +82,11 @@ def log(msg, *args, **kwargs) ``` > Write a log message to the log file. -> +> > Works exactly the same as the logging.info (https://docs.python.org/3/library/logging.html#logging.info) method from pythons' logging module. -> Logs are written to the /logs directory in the docker container. -> -> Note that the `helpers` module also exposes `logger`, which is the logger instance (https://docs.python.org/3/library/logging.html#logger-objects) +> Logs are written to the /logs directory in the docker container. +> +> Note that the `helpers` module also exposes `logger`, which is the logger instance (https://docs.python.org/3/library/logging.html#logger-objects) > used by the template. The methods provided by this instance can be used for more fine-grained logging. @@ -134,10 +134,15 @@ def validate_resource_type(expected_type, data) #### `query` ```python -def query(the_query) +def query(the_query, request = None, thread_safe = False, sudo = False ) ``` > Execute the given SPARQL query (select/ask/construct) on the triplestore and returns the results in the given return Format (JSON by default). +> +> Advanced options: +> - request: pass in the original request to add in MU-SESSION-ID, MU-CALL-ID, MU-AUTH-ALLOWED-GROUPS, MU-AUTH-USED-GROUPS headers to the sparql request +> - thread_safe: you may configure fastapi to use multiple worker threads and still use sudo or request to modify the sparql request's http headers. If so, use thread_safe to create a new sparql client every time to avoid contamination of the sparqlQuery object by other threads. Slight performance loss, but hey, you got threads! +> - sudo: perform a sudo query, ignoring the groups of the originating @@ -148,6 +153,11 @@ def update(the_query) ``` > Execute the given update SPARQL query on the triplestore. If the given query is not an update query, nothing happens. +> +> Advanced options: +> - request: pass in the original request to add in MU-SESSION-ID, MU-CALL-ID, MU-AUTH-ALLOWED-GROUPS, MU-AUTH-USED-GROUPS headers to the sparql request +> - thread_safe: you may configure fastapi to use multiple worker threads and still use sudo or request to modify the sparql request's http headers. If so, use thread_safe to create a new sparql client every time to avoid contamination of the sparqlQuery object by other threads. Slight performance loss, but hey, you got threads! +> - sudo: perform a sudo query, ignoring the groups of the originating request @@ -248,12 +258,12 @@ def sparql_escape_uri(obj) def sparql_escape(obj) ``` -> Converts the given object to a SPARQL-safe RDF object string with the right RDF-datatype. -> +> Converts the given object to a SPARQL-safe RDF object string with the right RDF-datatype. +> > These functions should be used especially when inserting user-input to avoid SPARQL-injection. > Separate functions are available for different python datatypes. > The `sparql_escape` function however can automatically select the right method to use, for the following Python datatypes: -> +> > - `str` > - `int` > - `float` @@ -261,9 +271,17 @@ def sparql_escape(obj) > - `datetime.date` > - `datetime.time` > - `boolean` -> +> > The `sparql_escape_uri`-function can be used for escaping URI's. +#### `wait_for_triplestore` + +```python +def wait_for_triplestore() +``` + +> Wait until the triplestore is running. Performs a sudo select query with limit 1 until it gets a proper result from the triplestore + ### Writing SPARQL Queries The template itself is unopinionated when it comes to constructing SPARQL-queries. However, since Python's most common string formatting methods aren't a great fit for SPARQL queries, we hereby want to provide an example on how to construct a query based on [template strings](https://docs.python.org/3.8/library/string.html#template-strings) while keeping things readable. @@ -288,6 +306,19 @@ query_string = query_template.substitute(person=sparql_escape_uri(my_person)) query_result = query(query_string) ``` +### Functions on startup +Because of the way FastApi works, logic that should be run on startup should always be wrapped with an `@app.on_evente("startup")` decorator. For instance: + +```py +@app.on_event("startup") +async def startup_event(): + wait_for_triplestore() + # on startup fail existing busy tasks + fail_busy_and_scheduled_tasks() + # on startup also immediately start scheduled tasks + process_open_tasks() +``` + ## Deployment Example snippet for adding a service to a docker-compose stack: @@ -342,7 +373,7 @@ python3 README.py ``` You can customise the output through the API configuration! See [README.py](README.py) && the [pydoc-markdown docs](https://niklasrosenstein.github.io/pydoc-markdown/). -## Migate from Flask based versions +## Migrate from Flask based versions Previous versions of this template were based on Flask. Effort was made to keep as much backward compatible as possible. However, some things were slightly modified or require your attention @@ -387,4 +418,7 @@ Unless you know what you are doing, methods annotated with @app or any router yo always be declared as synchronous methods (no async in front!). This might make your service blocking on computationally demanding requests. -More information [here](https://fastapi.tiangolo.com/async/#in-a-hurry) \ No newline at end of file +More information [here](https://fastapi.tiangolo.com/async/#in-a-hurry) + +### Startup functions +Be sure to wrap your startup functions with a `@app.on_event("startup")` decorator From 8f3622e252e4cba6bc8b28e47ce41623c3a270f9 Mon Sep 17 00:00:00 2001 From: karel kremer Date: Wed, 11 Feb 2026 09:08:11 +0100 Subject: [PATCH 5/5] don't force users to pass in request, do it automatically if in request context --- README.md | 6 ++---- helpers.py | 18 +++++++++--------- pyproject.toml | 1 + web.py | 16 +++++++++++++++- 4 files changed, 27 insertions(+), 14 deletions(-) diff --git a/README.md b/README.md index 939408e..4fc3bcd 100644 --- a/README.md +++ b/README.md @@ -134,13 +134,12 @@ def validate_resource_type(expected_type, data) #### `query` ```python -def query(the_query, request = None, thread_safe = False, sudo = False ) +def query(the_query, thread_safe = False, sudo = False ) ``` > Execute the given SPARQL query (select/ask/construct) on the triplestore and returns the results in the given return Format (JSON by default). > > Advanced options: -> - request: pass in the original request to add in MU-SESSION-ID, MU-CALL-ID, MU-AUTH-ALLOWED-GROUPS, MU-AUTH-USED-GROUPS headers to the sparql request > - thread_safe: you may configure fastapi to use multiple worker threads and still use sudo or request to modify the sparql request's http headers. If so, use thread_safe to create a new sparql client every time to avoid contamination of the sparqlQuery object by other threads. Slight performance loss, but hey, you got threads! > - sudo: perform a sudo query, ignoring the groups of the originating @@ -149,13 +148,12 @@ def query(the_query, request = None, thread_safe = False, sudo = False ) #### `update` ```python -def update(the_query) +def update(the_query, thread_safe = False, sudo = False) ``` > Execute the given update SPARQL query on the triplestore. If the given query is not an update query, nothing happens. > > Advanced options: -> - request: pass in the original request to add in MU-SESSION-ID, MU-CALL-ID, MU-AUTH-ALLOWED-GROUPS, MU-AUTH-USED-GROUPS headers to the sparql request > - thread_safe: you may configure fastapi to use multiple worker threads and still use sudo or request to modify the sparql request's http headers. If so, use thread_safe to create a new sparql client every time to avoid contamination of the sparqlQuery object by other threads. Slight performance loss, but hey, you got threads! > - sudo: perform a sudo query, ignoring the groups of the originating request diff --git a/helpers.py b/helpers.py index aeb3fd3..a619d83 100644 --- a/helpers.py +++ b/helpers.py @@ -4,11 +4,11 @@ import os import sys import time -from fastapi import Request from rdflib.namespace import DC from escape_helpers import sparql_escape from SPARQLWrapper import SPARQLWrapper, JSON from deprecated import deprecated +from starlette_context import context """ The template provides the user with several helper methods. They aim to give you a step ahead for: @@ -131,21 +131,21 @@ def build_sparql_update(): "MU-AUTH-USED-GROUPS" ] -def set_sparql_interface_headers(sparql_interface, request, sudo): +def set_sparql_interface_headers(sparql_interface, sudo): for header in MU_HEADERS: - if request is not None and header in request.headers: - sparql_interface.customHttpHeaders[header] = request.headers[header] + if context.exists() and header in context["headers"]: + sparql_interface.customHttpHeaders[header] = context["headers"][header] else: # Make sure headers used for a previous query are cleared if header in sparql_interface.customHttpHeaders: del sparql_interface.customHttpHeaders[header] if sudo: sparql_interface.customHttpHeaders["mu-auth-sudo"] = "true" - else: + elif "mu-auth-sudo" in sparql_interface.customHttpHeaders: del sparql_interface.customHttpHeaders["mu-auth-sudo"] -def query(the_query: str, request: Request | None = None, thread_safe: bool = False, sudo: bool = False): +def query(the_query: str, thread_safe: bool = False, sudo: bool = False): """Execute the given SPARQL query (select/ask/construct) on the triplestore and returns the results in the given return Format (JSON by default).""" sparql_interface = sparqlQuery @@ -153,7 +153,7 @@ def query(the_query: str, request: Request | None = None, thread_safe: bool = Fa # we're editing properties of sparql_interface, if this is done by multiple worker threads, the behavior is undefined, better create a new instance sparql_interface = build_sparql_query() - set_sparql_interface_headers(sparql_interface, request, sudo) + set_sparql_interface_headers(sparql_interface, sudo) sparql_interface.setQuery(the_query) if LOG_SPARQL_QUERIES: @@ -165,7 +165,7 @@ def query(the_query: str, request: Request | None = None, thread_safe: bool = Fa raise e -def update(the_query: str, request: Request | None = None, thread_safe: bool = False, sudo: bool = False): +def update(the_query: str, thread_safe: bool = False, sudo: bool = False): """Execute the given update SPARQL query on the triplestore. If the given query is not an update query, nothing happens.""" sparql_interface = sparqlUpdate @@ -173,7 +173,7 @@ def update(the_query: str, request: Request | None = None, thread_safe: bool = F # we're editing properties of sparql_interface, if this is done by multiple worker threads, the behavior is undefined, better create a new instance sparql_interface = build_sparql_update() - set_sparql_interface_headers(sparql_interface, request, sudo) + set_sparql_interface_headers(sparql_interface, sudo) sparql_interface.setQuery(the_query) if sparql_interface.isSparqlUpdateRequest(): diff --git a/pyproject.toml b/pyproject.toml index d2e2e97..c218f98 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -11,4 +11,5 @@ dependencies = [ "rdflib>=7.2.1", "sparqlwrapper>=2.0.0", "uvicorn>=0.37.0", + "starlette-context>=0.4.0", ] diff --git a/web.py b/web.py index f3d2a84..3f8afcd 100644 --- a/web.py +++ b/web.py @@ -8,12 +8,26 @@ from jsonapi_pydantic.v1_0 import Error, TopLevel, Meta, Source, ErrorLinks from starlette.exceptions import HTTPException as StarletteHTTPException from rdflib.namespace import Namespace +from starlette.middleware import Middleware +from starlette_context.middleware import RawContextMiddleware +from fastapi import Request +from helpers import MU_HEADERS import helpers from escape_helpers import sparql_escape +class CustomContextMiddleware(RawContextMiddleware): + async def set_context(self, request: Request) -> dict: + context = await super().set_context(request) + headers = {} + context["headers"] = headers + for header in MU_HEADERS: + if header in request.headers: + headers[header] = request.headers[header] + return context + # WSGI variable name used by the server -app = FastAPI() +app = FastAPI(middleware=[Middleware(CustomContextMiddleware)]) class BaseHTTPException(StarletteHTTPException):