From 27db5952202ab1d2a6445261323f08014388cac4 Mon Sep 17 00:00:00 2001 From: ckouder Date: Tue, 12 May 2026 18:10:41 -0500 Subject: [PATCH] feat: add user guide, license, and Python examples to API docs Mirrors the OpenEnzymeDB-api pattern (moleculemaker/oed-api) for the Swagger UI at /api/v1/docs: - Expand the FastAPI description with User Guide, Automatic Pagination technical note, CC BY 4.0 data license, and support contact sections. - Switch the Swagger UI syntax-highlight theme to "obsidian" so Python code blocks render with a neutral dark palette instead of purple. - Add URL examples and copy-pasteable Python (requests) snippets to the docstrings for /search, /typeahead, /curation-statuses, and /ec_lookup. Refs: moleculemaker/CLEANDB-api#12 --- app/main.py | 51 ++++++++-- app/routers/search.py | 221 ++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 258 insertions(+), 14 deletions(-) diff --git a/app/main.py b/app/main.py index 54ab19f..cf72447 100644 --- a/app/main.py +++ b/app/main.py @@ -25,14 +25,52 @@ async def lifespan(app: FastAPI): description=settings.DESCRIPTION + f""" -## Automatic Pagination +# User Guide -When a query would return more than {settings.AUTO_PAGINATION_THRESHOLD} records and no explicit limit is -provided, the API will automatically paginate results to return {settings.AUTO_PAGINATION_THRESHOLD} records -at a time. The response will include pagination metadata with links to navigate -to next and previous pages. +The CLEAN Database API provides programmatic access to the same enzyme annotations and +CLEAN-predicted EC numbers available through the +[CLEAN Database user interface](https://cleandb.frontend.staging.mmli2.ncsa.illinois.edu). + +This page serves as the API documentation, detailing available endpoints, request parameters, +response formats, and example usage. There are also interactive features to test endpoints +directly from your browser. + +## Endpoint Documentation, Python Examples, and Interactive Testing + +Below you will find a list of available endpoints. Each endpoint appears as a colored box with a +title and brief description. You can click on any endpoint to expand it and see more details, +including: +- **Description**: A brief overview of the endpoint's purpose. +- **Example Usage**: Sample code snippets in Python demonstrating how to call the endpoint and + handle the response. +- **Parameters**: Required and optional parameters for the endpoint. +- **Response Format**: The structure of the data returned by the endpoint. +- **Try it out**: An interactive feature that allows you to test the endpoint directly from this + page. + +### Technical Note: Automatic Pagination + +When a query would return more than {settings.AUTO_PAGINATION_THRESHOLD} records and no explicit +limit is provided, the API will automatically paginate results to return +{settings.AUTO_PAGINATION_THRESHOLD} records at a time. The response will include pagination +metadata with links to navigate to next and previous pages. + +This threshold can be configured using the `AUTO_PAGINATION_THRESHOLD` environment variable. + +## Data License + +The CLEAN Database dataset is licensed under the Creative Commons Attribution 4.0 International +License (CC BY 4.0). This means you are free to share and adapt the material for any purpose, +even commercially, as long as you give appropriate credit, provide a link to the license, and +indicate if changes were made. + +Full license text: [https://creativecommons.org/licenses/by/4.0/](https://creativecommons.org/licenses/by/4.0/) + +## Support + +If you have any questions, issues, or feedback regarding the API, please reach out to us via +email at . -This threshold can be configured using the AUTO_PAGINATION_THRESHOLD environment variable. """, version=settings.VERSION, lifespan=lifespan, @@ -40,6 +78,7 @@ async def lifespan(app: FastAPI): openapi_url="/api/v1/openapi.json", docs_url="/api/v1/docs", redoc_url="/api/v1/redoc", + swagger_ui_parameters={"syntaxHighlight.theme": "obsidian"}, ) # Add CORS middleware diff --git a/app/routers/search.py b/app/routers/search.py index 1426a61..a660662 100644 --- a/app/routers/search.py +++ b/app/routers/search.py @@ -105,8 +105,102 @@ async def get_data( db: Database = Depends(get_db), request: Request = None, ) -> CLEANSearchResponse: - """ - Get enzyme kinetic data with filtering options. + r""" +Get enzyme records and CLEAN-predicted EC numbers with filtering options. + +This endpoint allows querying the CLEAN Database with various filters across UniProt +metadata and CLEAN prediction confidence ranges. + +Filters that accept multiple values on the same parameter (e.g. `organism`) are combined +with OR logic, while filters on different parameters are combined with AND logic. + +The response format can be either JSON (default) or CSV. Results are automatically +paginated when no explicit `limit` is provided. + +### URL examples + +- /api/v1/search?organism=Homo%20sapiens&organism=Mus%20musculus + +- /api/v1/search?ec_number=1.1.1.1&clean_ec_confidence_min=0.8 + +- /api/v1/search?curation_status=reviewed&format=csv&limit=100 + +### Python example: retrieving JSON data + +```python +import requests + +# Query CLEAN records filtered by organism and CLEAN-prediction confidence +params = { + "organism": ["Escherichia coli", "Homo sapiens"], # OR within the same param + "clean_ec_confidence_min": 0.8, # CLEAN confidence floor + "curation_status": "reviewed", # Swiss-Prot only + "limit": 10, +} + +response = requests.get( + "https://fastapi.cleandb.mmli2.ncsa.illinois.edu/api/v1/search", + params=params, +) + +if response.status_code == 200: + payload = response.json() + print(f"Total matching records: {payload['total']}") + print(f"Returned in this page: {len(payload['data'])}") + + if payload["data"]: + first = payload["data"][0] + print("\nFirst record:") + print(f" Accession: {first['accession']}") + print(f" Protein: {first['protein']}") + print(f" Organism: {first['organism']}") + # CLEAN-predicted EC numbers are returned as a list of {ec_number, score} + for prediction in first.get("predicted_ec") or []: + print(f" CLEAN EC: {prediction['ec_number']} (score={prediction['score']:.3f})") + + # Follow pagination links if present + if payload.get("next"): + print(f"\nNext page: {payload['next']}") +else: + print(f"Error: {response.status_code} - {response.text}") +``` + +### Python example: downloading filtered results as CSV + +```python +import csv +import requests +from io import StringIO + +params = { + "ec_number": ["1.1.1.1", "2.7.1.1"], + "format": "csv", + "limit": 50, +} + +response = requests.get( + "https://fastapi.cleandb.mmli2.ncsa.illinois.edu/api/v1/search", + params=params, +) + +if response.status_code == 200: + # Save the CSV response to disk + with open("clean_search_export.csv", "w", newline="") as fh: + fh.write(response.text) + print("Saved clean_search_export.csv") + + # Preview the first few rows + reader = csv.DictReader(StringIO(response.text)) + for i, row in enumerate(reader): + if i >= 3: + break + print(f"\nRecord {i + 1}:") + print(f" Accession: {row.get('accession')}") + print(f" Organism: {row.get('organism')}") + print(f" Amino acids: {row.get('amino_acids')}") +else: + print(f"Error: {response.status_code} - {response.text}") +``` """ try: @@ -330,8 +424,67 @@ async def get_typeahead( db: Database = Depends(get_db), request: Request = None, ) -> CLEANTypeaheadResponse: - """ - Get typeahead suggestions for searching the database of predicted EC numbers. + r""" +Get autocomplete suggestions for a chosen field, optionally constrained by a search context. + +Use `field_name` to choose which column to search (`accession`, `organism`, `protein_name`, +`gene_name`, `uniprot_id`, or `predicted_ec`). The `search` term must be at least 3 characters. + +Optionally pass any of the `/search` filter parameters (e.g. `organism`, `curation_status`, +`clean_ec_confidence_min`) to scope the suggestions to records that already match those filters. + +### URL examples + +- /api/v1/typeahead?field_name=organism&search=esch + +- /api/v1/typeahead?field_name=protein_name&search=kin&organism=Homo%20sapiens + +- /api/v1/typeahead?field_name=predicted_ec&search=1.1.1&clean_ec_confidence_min=0.7 + +### Python example: simple autocomplete + +```python +import requests + +response = requests.get( + "https://fastapi.cleandb.mmli2.ncsa.illinois.edu/api/v1/typeahead", + params={"field_name": "organism", "search": "esch", "limit": 10}, +) + +if response.status_code == 200: + payload = response.json() + print(f"Field: {payload['field_name']}, query: {payload['search']!r}") + print(f"Total matches: {payload['total']}") + for match in payload["matches"]: + print(f" - {match}") +else: + print(f"Error: {response.status_code} - {response.text}") +``` + +### Python example: autocomplete scoped by a search context + +```python +import requests + +# Look up protein names that contain "kin" — but only within Homo sapiens records +response = requests.get( + "https://fastapi.cleandb.mmli2.ncsa.illinois.edu/api/v1/typeahead", + params={ + "field_name": "protein_name", + "search": "kin", + "organism": "Homo sapiens", + "curation_status": "reviewed", + }, +) + +if response.status_code == 200: + payload = response.json() + print(f"Search context applied: {payload.get('search_context')}") + for match in payload["matches"][:5]: + print(f" - {match}") +else: + print(f"Error: {response.status_code} - {response.text}") +``` """ try: @@ -410,8 +563,32 @@ async def get_typeahead( @router.get("/curation-statuses", summary="Get available curation status options") async def get_curation_statuses() -> CLEANCurationStatusResponse: - """ - Get the list of available curation status options for filtering. + r""" +Get the list of available curation status options that can be used with the `curation_status` +filter on `/search` and `/typeahead`. + +### URL example + +- /api/v1/curation-statuses + +### Python example + +```python +import requests + +response = requests.get( + "https://fastapi.cleandb.mmli2.ncsa.illinois.edu/api/v1/curation-statuses", +) + +if response.status_code == 200: + for option in response.json()["statuses"]: + print(f"{option['value']:>11} -> {option['label']}") + # Expected output: + # reviewed -> Reviewed (Swiss-Prot) + # unreviewed -> Unreviewed (TrEMBL) +else: + print(f"Error: {response.status_code} - {response.text}") +``` """ return CLEANCurationStatusResponse( statuses=[ @@ -445,8 +622,36 @@ async def get_ec_lookup( db: Database = Depends(get_db), request: Request = None, ) -> CLEANECLookupResponse: - """ - Look up EC numbers or classes based on a search term. + r""" +Look up EC numbers or EC classes by partial number or descriptive name. + +Useful for resolving free-text user input into canonical EC numbers before calling +`/search` with the `ec_number` filter. + +### URL examples + +- /api/v1/ec_lookup?search=1.1.1 + +- /api/v1/ec_lookup?search=oxidoreductase + +### Python example + +```python +import requests + +response = requests.get( + "https://fastapi.cleandb.mmli2.ncsa.illinois.edu/api/v1/ec_lookup", + params={"search": "1.1.1"}, +) + +if response.status_code == 200: + payload = response.json() + print(f"Matches for {payload['search']!r}:") + for match in payload["matches"][:5]: + print(f" {match['ec_number']} - {match['ec_name']}") +else: + print(f"Error: {response.status_code} - {response.text}") +``` """ try: