From e28fc79022486e8b8470e7f5f171192bdce6f82d Mon Sep 17 00:00:00 2001
From: Lucas <lyoon@redhat.com>
Date: Tue, 7 Apr 2026 19:07:05 -0400
Subject: [PATCH 1/7] Centralize Llama Stack Vector Store/File Operations in
 Lightspeed-Core

Signed-off-by: Lucas <lyoon@redhat.com>
---
 src/app/endpoints/vector_stores.py            | 819 ++++++++++++++++++
 src/app/routers.py                            |   2 +
 src/models/config.py                          |   5 +
 src/models/requests.py                        | 160 ++++
 src/models/responses.py                       | 194 +++++
 .../unit/app/endpoints/test_vector_stores.py  | 527 +++++++++++
 6 files changed, 1707 insertions(+)
 create mode 100644 src/app/endpoints/vector_stores.py
 create mode 100644 tests/unit/app/endpoints/test_vector_stores.py

diff --git a/src/app/endpoints/vector_stores.py b/src/app/endpoints/vector_stores.py
new file mode 100644
index 000000000..d3dc3597d
--- /dev/null
+++ b/src/app/endpoints/vector_stores.py
@@ -0,0 +1,819 @@
+"""Handler for REST API calls to manage vector stores and files."""
+
+import asyncio
+import os
+import traceback
+from io import BytesIO
+from typing import Annotated, Any
+
+from fastapi import APIRouter, File, HTTPException, Request, UploadFile
+from fastapi.params import Depends
+from llama_stack_client import APIConnectionError, BadRequestError
+
+from authentication import get_auth_dependency
+from authentication.interface import AuthTuple
+from authorization.middleware import authorize
+from client import AsyncLlamaStackClientHolder
+from configuration import configuration
+from log import get_logger
+from models.config import Action
+from models.requests import (
+    VectorStoreCreateRequest,
+    VectorStoreFileCreateRequest,
+    VectorStoreUpdateRequest,
+)
+from models.responses import (
+    FileResponse,
+    ForbiddenResponse,
+    InternalServerErrorResponse,
+    NotFoundResponse,
+    ServiceUnavailableResponse,
+    UnauthorizedResponse,
+    VectorStoreFileResponse,
+    VectorStoreFilesListResponse,
+    VectorStoreResponse,
+    VectorStoresListResponse,
+)
+from utils.endpoints import check_configuration_loaded
+
+logger = get_logger(__name__)
+router = APIRouter(tags=["vector-stores"])
+
+
+# Response schemas for OpenAPI documentation
+vector_stores_list_responses: dict[int | str, dict[str, Any]] = {
+    200: VectorStoresListResponse.openapi_response(),
+    401: UnauthorizedResponse.openapi_response(
+        examples=["missing header", "missing token"]
+    ),
+    403: ForbiddenResponse.openapi_response(examples=["endpoint"]),
+    500: InternalServerErrorResponse.openapi_response(examples=["configuration"]),
+    503: ServiceUnavailableResponse.openapi_response(),
+}
+
+vector_store_responses: dict[int | str, dict[str, Any]] = {
+    200: VectorStoreResponse.openapi_response(),
+    401: UnauthorizedResponse.openapi_response(
+        examples=["missing header", "missing token"]
+    ),
+    403: ForbiddenResponse.openapi_response(examples=["endpoint"]),
+    404: NotFoundResponse.openapi_response(examples=["vector_store"]),
+    500: InternalServerErrorResponse.openapi_response(examples=["configuration"]),
+    503: ServiceUnavailableResponse.openapi_response(),
+}
+
+file_responses: dict[int | str, dict[str, Any]] = {
+    200: FileResponse.openapi_response(),
+    401: UnauthorizedResponse.openapi_response(
+        examples=["missing header", "missing token"]
+    ),
+    403: ForbiddenResponse.openapi_response(examples=["endpoint"]),
+    500: InternalServerErrorResponse.openapi_response(examples=["configuration"]),
+    503: ServiceUnavailableResponse.openapi_response(),
+}
+
+vector_store_file_responses: dict[int | str, dict[str, Any]] = {
+    200: VectorStoreFileResponse.openapi_response(),
+    401: UnauthorizedResponse.openapi_response(
+        examples=["missing header", "missing token"]
+    ),
+    403: ForbiddenResponse.openapi_response(examples=["endpoint"]),
+    404: NotFoundResponse.openapi_response(examples=["vector_store_file"]),
+    500: InternalServerErrorResponse.openapi_response(examples=["configuration"]),
+    503: ServiceUnavailableResponse.openapi_response(),
+}
+
+vector_store_files_list_responses: dict[int | str, dict[str, Any]] = {
+    200: VectorStoreFilesListResponse.openapi_response(),
+    401: UnauthorizedResponse.openapi_response(
+        examples=["missing header", "missing token"]
+    ),
+    403: ForbiddenResponse.openapi_response(examples=["endpoint"]),
+    500: InternalServerErrorResponse.openapi_response(examples=["configuration"]),
+    503: ServiceUnavailableResponse.openapi_response(),
+}
+
+
+@router.post("/vector-stores", responses=vector_store_responses)
+@authorize(Action.MANAGE_VECTOR_STORES)
+async def create_vector_store(
+    request: Request,
+    auth: Annotated[AuthTuple, Depends(get_auth_dependency())],
+    body: VectorStoreCreateRequest,
+) -> VectorStoreResponse:
+    """Create a new vector store.
+
+    Parameters:
+        request: The incoming HTTP request.
+        auth: Authentication tuple from the auth dependency.
+        body: Vector store creation parameters.
+
+    Returns:
+        VectorStoreResponse: The created vector store object.
+
+    Raises:
+        HTTPException:
+            - 401: Authentication failed
+            - 403: Authorization failed
+            - 500: Lightspeed Stack configuration not loaded
+            - 503: Unable to connect to Llama Stack
+    """
+    _ = auth
+    _ = request
+
+    check_configuration_loaded(configuration)
+
+    try:
+        client = AsyncLlamaStackClientHolder().get_client()
+
+        # Extract provider_id for extra_body (not a direct client parameter)
+        body_dict = body.model_dump(exclude_none=True)
+        print("client.models.list() reaches here", await client.models.list())
+        extra_body = {}
+        if "provider_id" in body_dict:
+            extra_body["provider_id"] = body_dict.pop("provider_id")
+        if "embedding_model" in body_dict:
+            extra_body["embedding_model"] = body_dict.pop("embedding_model")
+        if "embedding_dimension" in body_dict:
+            extra_body["embedding_dimension"] = body_dict.pop("embedding_dimension")
+
+        logger.info(
+            "Creating vector store - body_dict: %s, extra_body: %s",
+            body_dict,
+            extra_body,
+        )
+
+        vector_store = await client.vector_stores.create(
+            **body_dict,
+            extra_body=extra_body,
+        )
+
+        return VectorStoreResponse(
+            id=vector_store.id,
+            name=vector_store.name,
+            created_at=vector_store.created_at,
+            last_active_at=vector_store.last_active_at,
+            expires_at=vector_store.expires_at,
+            status=vector_store.status or "unknown",
+            usage_bytes=vector_store.usage_bytes or 0,
+            metadata=vector_store.metadata,
+        )
+    except APIConnectionError as e:
+        logger.error("Unable to connect to Llama Stack: %s", e)
+        response = ServiceUnavailableResponse(backend_name="Llama Stack", cause=str(e))
+        raise HTTPException(**response.model_dump()) from e
+    except Exception as e:
+        logger.error("Unable to create vector store: %s", e)
+        response = InternalServerErrorResponse(
+            response="Unable to create vector store",
+            cause=f"Error creating vector store: {type(e).__name__}: {str(e)}",
+        )
+        raise HTTPException(**response.model_dump()) from e
+
+
+@router.get("/vector-stores", responses=vector_stores_list_responses)
+@authorize(Action.READ_VECTOR_STORES)
+async def list_vector_stores(
+    request: Request,
+    auth: Annotated[AuthTuple, Depends(get_auth_dependency())],
+) -> VectorStoresListResponse:
+    """List all vector stores.
+
+    Parameters:
+        request: The incoming HTTP request.
+        auth: Authentication tuple from the auth dependency.
+
+    Returns:
+        VectorStoresListResponse: List of all vector stores.
+
+    Raises:
+        HTTPException:
+            - 401: Authentication failed
+            - 403: Authorization failed
+            - 500: Lightspeed Stack configuration not loaded
+            - 503: Unable to connect to Llama Stack
+    """
+    _ = auth
+    _ = request
+
+    check_configuration_loaded(configuration)
+
+    try:
+        client = AsyncLlamaStackClientHolder().get_client()
+        vector_stores = await client.vector_stores.list()
+
+        data = [
+            VectorStoreResponse(
+                id=vs.id,
+                name=vs.name,
+                created_at=vs.created_at,
+                last_active_at=vs.last_active_at,
+                expires_at=vs.expires_at or None,
+                status=vs.status or "unknown",
+                usage_bytes=vs.usage_bytes or 0,
+                metadata=vs.metadata,
+            )
+            for vs in vector_stores.data
+        ]
+
+        return VectorStoresListResponse(data=data)
+    except APIConnectionError as e:
+        logger.error("Unable to connect to Llama Stack: %s", e)
+        response = ServiceUnavailableResponse(backend_name="Llama Stack", cause=str(e))
+        raise HTTPException(**response.model_dump()) from e
+    except Exception as e:
+        logger.error("Unable to list vector stores: %s", e)
+        response = InternalServerErrorResponse(
+            response="Unable to list vector stores",
+            cause=f"Error listing vector stores: {type(e).__name__}: {str(e)}",
+        )
+        raise HTTPException(**response.model_dump()) from e
+
+
+@router.get("/vector-stores/{vector_store_id}", responses=vector_store_responses)
+@authorize(Action.READ_VECTOR_STORES)
+async def get_vector_store(
+    request: Request,
+    vector_store_id: str,
+    auth: Annotated[AuthTuple, Depends(get_auth_dependency())],
+) -> VectorStoreResponse:
+    """Retrieve a vector store by ID.
+
+    Parameters:
+        request: The incoming HTTP request.
+        vector_store_id: ID of the vector store to retrieve.
+        auth: Authentication tuple from the auth dependency.
+
+    Returns:
+        VectorStoreResponse: The vector store object.
+
+    Raises:
+        HTTPException:
+            - 401: Authentication failed
+            - 403: Authorization failed
+            - 404: Vector store not found
+            - 500: Lightspeed Stack configuration not loaded
+            - 503: Unable to connect to Llama Stack
+    """
+    _ = auth
+    _ = request
+
+    check_configuration_loaded(configuration)
+
+    try:
+        client = AsyncLlamaStackClientHolder().get_client()
+        vector_store = await client.vector_stores.retrieve(vector_store_id)
+
+        return VectorStoreResponse(
+            id=vector_store.id,
+            name=vector_store.name,
+            created_at=vector_store.created_at,
+            last_active_at=vector_store.last_active_at,
+            expires_at=vector_store.expires_at,
+            status=vector_store.status or "unknown",
+            usage_bytes=vector_store.usage_bytes or 0,
+            metadata=vector_store.metadata,
+        )
+    except APIConnectionError as e:
+        logger.error("Unable to connect to Llama Stack: %s", e)
+        response = ServiceUnavailableResponse(backend_name="Llama Stack", cause=str(e))
+        raise HTTPException(**response.model_dump()) from e
+    except BadRequestError as e:
+        logger.error("Vector store not found: %s", e)
+        response = NotFoundResponse(
+            resource="vector_store", resource_id=vector_store_id
+        )
+        raise HTTPException(**response.model_dump()) from e
+    except Exception as e:
+        logger.error("Unable to get vector store: %s", e)
+        response = InternalServerErrorResponse(
+            response="Unable to retrieve vector store",
+            cause=(
+                f"Error retrieving vector store '{vector_store_id}': "
+                f"{type(e).__name__}: {str(e)}"
+            ),
+        )
+        raise HTTPException(**response.model_dump()) from e
+
+
+@router.put("/vector-stores/{vector_store_id}", responses=vector_store_responses)
+@authorize(Action.MANAGE_VECTOR_STORES)
+async def update_vector_store(
+    request: Request,
+    vector_store_id: str,
+    auth: Annotated[AuthTuple, Depends(get_auth_dependency())],
+    body: VectorStoreUpdateRequest,
+) -> VectorStoreResponse:
+    """Update a vector store.
+
+    Parameters:
+        request: The incoming HTTP request.
+        vector_store_id: ID of the vector store to update.
+        auth: Authentication tuple from the auth dependency.
+        body: Vector store update parameters.
+
+    Returns:
+        VectorStoreResponse: The updated vector store object.
+
+    Raises:
+        HTTPException:
+            - 401: Authentication failed
+            - 403: Authorization failed
+            - 404: Vector store not found
+            - 500: Lightspeed Stack configuration not loaded
+            - 503: Unable to connect to Llama Stack
+    """
+    _ = auth
+    _ = request
+
+    check_configuration_loaded(configuration)
+
+    try:
+        client = AsyncLlamaStackClientHolder().get_client()
+        vector_store = await client.vector_stores.update(
+            vector_store_id, **body.model_dump(exclude_none=True)
+        )
+
+        return VectorStoreResponse(
+            id=vector_store.id,
+            name=vector_store.name,
+            created_at=vector_store.created_at,
+            last_active_at=vector_store.last_active_at,
+            expires_at=vector_store.expires_at,
+            status=vector_store.status or "unknown",
+            usage_bytes=vector_store.usage_bytes or 0,
+            metadata=vector_store.metadata or None,
+        )
+    except APIConnectionError as e:
+        logger.error("Unable to connect to Llama Stack: %s", e)
+        response = ServiceUnavailableResponse(backend_name="Llama Stack", cause=str(e))
+        raise HTTPException(**response.model_dump()) from e
+    except BadRequestError as e:
+        logger.error("Vector store not found: %s", e)
+        response = NotFoundResponse(
+            resource="vector_store", resource_id=vector_store_id
+        )
+        raise HTTPException(**response.model_dump()) from e
+    except Exception as e:
+        logger.error("Unable to update vector store: %s", e)
+        response = InternalServerErrorResponse(
+            response="Unable to update vector store",
+            cause=f"Error updating vector store '{vector_store_id}': {type(e).__name__}: {str(e)}",
+        )
+        raise HTTPException(**response.model_dump()) from e
+
+
+@router.delete(
+    "/vector-stores/{vector_store_id}",
+    responses={"204": {"description": "Vector store deleted"}},
+)
+@authorize(Action.MANAGE_VECTOR_STORES)
+async def delete_vector_store(
+    request: Request,
+    vector_store_id: str,
+    auth: Annotated[AuthTuple, Depends(get_auth_dependency())],
+) -> None:
+    """Delete a vector store.
+
+    Parameters:
+        request: The incoming HTTP request.
+        vector_store_id: ID of the vector store to delete.
+        auth: Authentication tuple from the auth dependency.
+
+    Raises:
+        HTTPException:
+            - 401: Authentication failed
+            - 403: Authorization failed
+            - 404: Vector store not found
+            - 500: Lightspeed Stack configuration not loaded
+            - 503: Unable to connect to Llama Stack
+    """
+    _ = auth
+    _ = request
+
+    check_configuration_loaded(configuration)
+
+    try:
+        client = AsyncLlamaStackClientHolder().get_client()
+        await client.vector_stores.delete(vector_store_id)
+    except APIConnectionError as e:
+        logger.error("Unable to connect to Llama Stack: %s", e)
+        response = ServiceUnavailableResponse(backend_name="Llama Stack", cause=str(e))
+        raise HTTPException(**response.model_dump()) from e
+    except BadRequestError as e:
+        logger.error("Vector store not found: %s", e)
+        response = NotFoundResponse(
+            resource="vector_store", resource_id=vector_store_id
+        )
+        raise HTTPException(**response.model_dump()) from e
+    except Exception as e:
+        logger.error("Unable to delete vector store: %s", e)
+        response = InternalServerErrorResponse(
+            response="Unable to delete vector store",
+            cause=f"Error deleting vector store '{vector_store_id}': {type(e).__name__}: {str(e)}",
+        )
+        raise HTTPException(**response.model_dump()) from e
+
+
+@router.post("/files", responses=file_responses)
+@authorize(Action.MANAGE_FILES)
+async def create_file(
+    request: Request,
+    auth: Annotated[AuthTuple, Depends(get_auth_dependency())],
+    file: UploadFile = File(...),
+) -> FileResponse:
+    """Upload a file.
+
+    Parameters:
+        request: The incoming HTTP request.
+        auth: Authentication tuple from the auth dependency.
+        file: The file to upload.
+
+    Returns:
+        FileResponse: The uploaded file object.
+
+    Raises:
+        HTTPException:
+            - 400: Bad request (e.g., file too large)
+            - 401: Authentication failed
+            - 403: Authorization failed
+            - 500: Lightspeed Stack configuration not loaded
+            - 503: Unable to connect to Llama Stack
+    """
+    _ = auth
+    _ = request
+
+    check_configuration_loaded(configuration)
+
+    try:
+        client = AsyncLlamaStackClientHolder().get_client()
+
+        # Read file content
+        content = await file.read()
+        filename = file.filename or "uploaded_file"
+
+        # Add .txt extension if no extension present
+        # (since parsed PDFs/URLs are sent as plain text)
+        if not os.path.splitext(filename)[1]:
+            filename = f"{filename}.txt"
+
+        logger.info(
+            "Uploading file - filename: %s, size: %d bytes",
+            filename,
+            len(content),
+        )
+        print("filename reaches here", filename)
+
+        # Convert to BytesIO for Llama Stack client
+        # The client expects bytes, io.IOBase, PathLike, or a tuple
+        file_bytes = BytesIO(content)
+        file_bytes.name = filename  # Set the filename attribute
+
+        file_obj = await client.files.create(
+            file=file_bytes,
+            purpose="assistants",
+        )
+
+        return FileResponse(
+            id=file_obj.id,
+            filename=file_obj.filename or filename,
+            bytes=file_obj.bytes or len(content),
+            created_at=file_obj.created_at,
+            purpose=file_obj.purpose or "assistants",
+            object=file_obj.object or "file",
+        )
+    except APIConnectionError as e:
+        logger.error("Unable to connect to Llama Stack: %s", e)
+        response = ServiceUnavailableResponse(backend_name="Llama Stack", cause=str(e))
+        raise HTTPException(**response.model_dump()) from e
+    except BadRequestError as e:
+        logger.error("Bad request for file upload: %s", e)
+        response = ServiceUnavailableResponse(backend_name="Llama Stack", cause=str(e))
+        raise HTTPException(**response.model_dump()) from e
+    except Exception as e:
+        full_trace = traceback.format_exc()
+        logger.error("Unable to upload file: %s", e)
+        logger.error("Full traceback:\n%s", full_trace)
+        response = InternalServerErrorResponse(
+            response="Unable to upload file",
+            cause=(
+                f"Error uploading file '{file.filename or 'unknown'}': "
+                f"{type(e).__name__}: {str(e)}"
+            ),
+        )
+        raise HTTPException(**response.model_dump()) from e
+
+
+@router.post(
+    "/vector-stores/{vector_store_id}/files", responses=vector_store_file_responses
+)
+@authorize(Action.MANAGE_VECTOR_STORES)
+async def add_file_to_vector_store(
+    request: Request,
+    vector_store_id: str,
+    auth: Annotated[AuthTuple, Depends(get_auth_dependency())],
+    body: VectorStoreFileCreateRequest,
+) -> VectorStoreFileResponse:
+    """Add a file to a vector store.
+
+    Parameters:
+        request: The incoming HTTP request.
+        vector_store_id: ID of the vector store.
+        auth: Authentication tuple from the auth dependency.
+        body: File addition parameters.
+
+    Returns:
+        VectorStoreFileResponse: The vector store file object.
+
+    Raises:
+        HTTPException:
+            - 401: Authentication failed
+            - 403: Authorization failed
+            - 404: Vector store or file not found
+            - 500: Lightspeed Stack configuration not loaded
+            - 503: Unable to connect to Llama Stack
+    """
+    _ = auth
+    _ = request
+
+    check_configuration_loaded(configuration)
+
+    try:
+        client = AsyncLlamaStackClientHolder().get_client()
+
+        # Retry logic for database lock errors
+        max_retries = 3
+        retry_delay = 0.5  # seconds
+        vs_file = None
+
+        for attempt in range(max_retries):
+            try:
+                vs_file = await client.vector_stores.files.create(
+                    vector_store_id=vector_store_id,
+                    **body.model_dump(exclude_none=True),
+                )
+                break  # Success, exit retry loop
+            except Exception as retry_error:  # pylint: disable=broad-exception-caught
+                error_msg = str(retry_error).lower()
+                is_lock_error = (
+                    "database is locked" in error_msg or "locked" in error_msg
+                )
+                is_last_attempt = attempt == max_retries - 1
+
+                if is_lock_error and not is_last_attempt:
+                    logger.warning(
+                        "Database locked while adding file to vector store, "
+                        "retrying in %s seconds (attempt %d/%d)",
+                        retry_delay,
+                        attempt + 1,
+                        max_retries,
+                    )
+                    await asyncio.sleep(retry_delay)
+                    retry_delay *= 2  # Exponential backoff
+                else:
+                    raise  # Re-raise if not a lock error or max retries reached
+        if not vs_file:
+            raise HTTPException(
+                status_code=500, detail="Failed to create vector store file"
+            )
+        logger.info(
+            "Vector store file created - ID: %s, status: %s, last_error: %s",
+            vs_file.id,
+            vs_file.status,
+            vs_file.last_error if vs_file.last_error else "None",
+        )
+
+        return VectorStoreFileResponse(
+            id=vs_file.id,
+            vector_store_id=vs_file.vector_store_id or vector_store_id,
+            status=vs_file.status or "unknown",
+            attributes=vs_file.attributes,
+            last_error=(
+                vs_file.last_error.message
+                if vs_file.last_error and hasattr(vs_file.last_error, "message")
+                else None
+            ),
+            object=vs_file.object or "vector_store.file",
+        )
+    except APIConnectionError as e:
+        logger.error("Unable to connect to Llama Stack: %s", e)
+        response = ServiceUnavailableResponse(backend_name="Llama Stack", cause=str(e))
+        raise HTTPException(**response.model_dump()) from e
+    except BadRequestError as e:
+        logger.error("Vector store file operation failed: %s", e)
+        response = NotFoundResponse(
+            resource="vector_store_file", resource_id=body.file_id
+        )
+        raise HTTPException(**response.model_dump()) from e
+    except Exception as e:
+        logger.error("Unable to add file to vector store: %s", e)
+        response = InternalServerErrorResponse(
+            response="Unable to add file to vector store",
+            cause=(
+                f"Error adding file '{body.file_id}' to vector store "
+                f"'{vector_store_id}': {type(e).__name__}: {str(e)}"
+            ),
+        )
+        raise HTTPException(**response.model_dump()) from e
+
+
+@router.get(
+    "/vector-stores/{vector_store_id}/files",
+    responses=vector_store_files_list_responses,
+)
+@authorize(Action.READ_VECTOR_STORES)
+async def list_vector_store_files(
+    request: Request,
+    vector_store_id: str,
+    auth: Annotated[AuthTuple, Depends(get_auth_dependency())],
+) -> VectorStoreFilesListResponse:
+    """List files in a vector store.
+
+    Parameters:
+        request: The incoming HTTP request.
+        vector_store_id: ID of the vector store.
+        auth: Authentication tuple from the auth dependency.
+
+    Returns:
+        VectorStoreFilesListResponse: List of files in the vector store.
+
+    Raises:
+        HTTPException:
+            - 401: Authentication failed
+            - 403: Authorization failed
+            - 500: Lightspeed Stack configuration not loaded
+            - 503: Unable to connect to Llama Stack
+    """
+    _ = auth
+    _ = request
+
+    check_configuration_loaded(configuration)
+
+    try:
+        client = AsyncLlamaStackClientHolder().get_client()
+        files = await client.vector_stores.files.list(vector_store_id=vector_store_id)
+
+        data = [
+            VectorStoreFileResponse(
+                id=f.id,
+                vector_store_id=f.vector_store_id or vector_store_id,
+                status=f.status or "unknown",
+                attributes=f.attributes,
+                last_error=(
+                    f.last_error.message
+                    if f.last_error and hasattr(f.last_error, "message")
+                    else None
+                ),
+                object=f.object or "vector_store.file",
+            )
+            for f in files.data
+        ]
+        return VectorStoreFilesListResponse(data=data)
+    except APIConnectionError as e:
+        logger.error("Unable to connect to Llama Stack: %s", e)
+        response = ServiceUnavailableResponse(backend_name="Llama Stack", cause=str(e))
+        raise HTTPException(**response.model_dump()) from e
+    except Exception as e:
+        logger.error("Unable to list vector store files: %s", e)
+        response = InternalServerErrorResponse(
+            response="Unable to list vector store files",
+            cause=(
+                f"Error listing files in vector store '{vector_store_id}': "
+                f"{type(e).__name__}: {str(e)}"
+            ),
+        )
+        raise HTTPException(**response.model_dump()) from e
+
+
+@router.get(
+    "/vector-stores/{vector_store_id}/files/{file_id}",
+    responses=vector_store_file_responses,
+)
+@authorize(Action.READ_VECTOR_STORES)
+async def get_vector_store_file(
+    request: Request,
+    vector_store_id: str,
+    file_id: str,
+    auth: Annotated[AuthTuple, Depends(get_auth_dependency())],
+) -> VectorStoreFileResponse:
+    """Retrieve a file from a vector store.
+
+    Parameters:
+        request: The incoming HTTP request.
+        vector_store_id: ID of the vector store.
+        file_id: ID of the file.
+        auth: Authentication tuple from the auth dependency.
+
+    Returns:
+        VectorStoreFileResponse: The vector store file object.
+
+    Raises:
+        HTTPException:
+            - 401: Authentication failed
+            - 403: Authorization failed
+            - 404: File not found in vector store
+            - 500: Lightspeed Stack configuration not loaded
+            - 503: Unable to connect to Llama Stack
+    """
+    _ = auth
+    _ = request
+
+    check_configuration_loaded(configuration)
+
+    try:
+        client = AsyncLlamaStackClientHolder().get_client()
+        print("file_id", file_id)
+        vs_file = await client.vector_stores.files.retrieve(
+            vector_store_id=vector_store_id,
+            file_id=file_id,
+        )
+        print("vs_file reaches here", vs_file)
+
+        return VectorStoreFileResponse(
+            id=vs_file.id,
+            vector_store_id=vs_file.vector_store_id or vector_store_id,
+            status=vs_file.status or "unknown",
+            attributes=vs_file.attributes,
+            last_error=(
+                vs_file.last_error.message
+                if vs_file.last_error and hasattr(vs_file.last_error, "message")
+                else None
+            ),
+            object=vs_file.object or "vector_store.file",
+        )
+    except APIConnectionError as e:
+        logger.error("Unable to connect to Llama Stack: %s", e)
+        response = ServiceUnavailableResponse(backend_name="Llama Stack", cause=str(e))
+        raise HTTPException(**response.model_dump()) from e
+    except BadRequestError as e:
+        logger.error("Vector store file not found: %s", e)
+        response = NotFoundResponse(resource="vector_store_file", resource_id=file_id)
+        raise HTTPException(**response.model_dump()) from e
+    except Exception as e:
+        logger.error("Unable to get vector store file: %s", e)
+        response = InternalServerErrorResponse(
+            response="Unable to retrieve vector store file",
+            cause=(
+                f"Error retrieving file '{file_id}' from vector store "
+                f"'{vector_store_id}': {type(e).__name__}: {str(e)}"
+            ),
+        )
+        raise HTTPException(**response.model_dump()) from e
+
+
+@router.delete(
+    "/vector-stores/{vector_store_id}/files/{file_id}",
+    responses={"204": {"description": "File deleted from vector store"}},
+)
+@authorize(Action.MANAGE_VECTOR_STORES)
+async def delete_vector_store_file(
+    request: Request,
+    vector_store_id: str,
+    file_id: str,
+    auth: Annotated[AuthTuple, Depends(get_auth_dependency())],
+) -> None:
+    """Delete a file from a vector store.
+
+    Parameters:
+        request: The incoming HTTP request.
+        vector_store_id: ID of the vector store.
+        file_id: ID of the file to delete.
+        auth: Authentication tuple from the auth dependency.
+
+    Raises:
+        HTTPException:
+            - 401: Authentication failed
+            - 403: Authorization failed
+            - 404: File not found in vector store
+            - 500: Lightspeed Stack configuration not loaded
+            - 503: Unable to connect to Llama Stack
+    """
+    _ = auth
+    _ = request
+
+    check_configuration_loaded(configuration)
+
+    try:
+        client = AsyncLlamaStackClientHolder().get_client()
+        await client.vector_stores.files.delete(
+            vector_store_id=vector_store_id,
+            file_id=file_id,
+        )
+    except APIConnectionError as e:
+        logger.error("Unable to connect to Llama Stack: %s", e)
+        response = ServiceUnavailableResponse(backend_name="Llama Stack", cause=str(e))
+        raise HTTPException(**response.model_dump()) from e
+    except BadRequestError as e:
+        logger.error("Vector store file not found: %s", e)
+        response = NotFoundResponse(resource="vector_store_file", resource_id=file_id)
+        raise HTTPException(**response.model_dump()) from e
+    except Exception as e:
+        logger.error("Unable to delete vector store file: %s", e)
+        response = InternalServerErrorResponse(
+            response="Unable to delete vector store file",
+            cause=(
+                f"Error deleting file '{file_id}' from vector store "
+                f"'{vector_store_id}': {type(e).__name__}: {str(e)}"
+            ),
+        )
+        raise HTTPException(**response.model_dump()) from e
diff --git a/src/app/routers.py b/src/app/routers.py
index 1841d1fc4..391864b4f 100644
--- a/src/app/routers.py
+++ b/src/app/routers.py
@@ -28,6 +28,7 @@
     stream_interrupt,
     streaming_query,
     tools,
+    vector_stores,
 )
 
 
@@ -53,6 +54,7 @@ def include_routers(app: FastAPI) -> None:
     app.include_router(shields.router, prefix="/v1")
     app.include_router(providers.router, prefix="/v1")
     app.include_router(rags.router, prefix="/v1")
+    app.include_router(vector_stores.router, prefix="/v1")
     # Query endpoints
     app.include_router(query.router, prefix="/v1")
     app.include_router(streaming_query.router, prefix="/v1")
diff --git a/src/models/config.py b/src/models/config.py
index 1b86e5437..3d69ecf2c 100644
--- a/src/models/config.py
+++ b/src/models/config.py
@@ -1030,6 +1030,11 @@ class Action(str, Enum):
     A2A_MESSAGE = "a2a_message"
     A2A_JSONRPC = "a2a_jsonrpc"
 
+    # Vector store management
+    MANAGE_VECTOR_STORES = "manage_vector_stores"
+    READ_VECTOR_STORES = "read_vector_stores"
+    MANAGE_FILES = "manage_files"
+
 
 class AccessRule(ConfigurationBase):
     """Rule defining what actions a role can perform."""
diff --git a/src/models/requests.py b/src/models/requests.py
index 7a8aba99c..04351e4d3 100644
--- a/src/models/requests.py
+++ b/src/models/requests.py
@@ -1,5 +1,7 @@
 """Models for REST API requests."""
 
+# pylint: disable=too-many-lines
+
 from enum import Enum
 from typing import Any, Optional, Self
 
@@ -935,3 +937,161 @@ def validate_authorization_header_values(
                     "File-path based secrets are only supported in static YAML config."
                 )
         return value
+
+
+class VectorStoreCreateRequest(BaseModel):
+    """Model representing a request to create a vector store.
+
+    Attributes:
+        name: Name of the vector store.
+        embedding_model: Optional embedding model to use.
+        embedding_dimension: Optional embedding dimension.
+        chunking_strategy: Optional chunking strategy configuration.
+        provider_id: Optional vector store provider identifier.
+        metadata: Optional metadata dictionary for storing session information.
+    """
+
+    name: str = Field(
+        ...,
+        description="Name of the vector store",
+        examples=["my_vector_store"],
+        min_length=1,
+        max_length=256,
+    )
+
+    embedding_model: Optional[str] = Field(
+        None,
+        description="Embedding model to use for the vector store",
+        examples=["text-embedding-ada-002"],
+    )
+
+    embedding_dimension: Optional[int] = Field(
+        None,
+        description="Dimension of the embedding vectors",
+        examples=[1536],
+        gt=0,
+    )
+
+    chunking_strategy: Optional[dict[str, Any]] = Field(
+        None,
+        description="Chunking strategy configuration",
+        examples=[{"type": "fixed", "chunk_size": 512, "chunk_overlap": 50}],
+    )
+
+    provider_id: Optional[str] = Field(
+        None,
+        description="Vector store provider identifier",
+        examples=["rhdh-docs"],
+    )
+
+    metadata: Optional[dict[str, Any]] = Field(
+        None,
+        description="Metadata dictionary for storing session information",
+        examples=[{"user_id": "user123", "session_id": "sess456"}],
+    )
+
+    model_config = {
+        "extra": "forbid",
+        "json_schema_extra": {
+            "examples": [
+                {
+                    "name": "my_vector_store",
+                    "embedding_model": "text-embedding-ada-002",
+                    "embedding_dimension": 1536,
+                    "provider_id": "rhdh-docs",
+                    "metadata": {"user_id": "user123"},
+                },
+            ]
+        },
+    }
+
+
+class VectorStoreUpdateRequest(BaseModel):
+    """Model representing a request to update a vector store.
+
+    Attributes:
+        name: New name for the vector store.
+        expires_at: Optional expiration timestamp.
+        metadata: Optional metadata dictionary for storing session information.
+    """
+
+    name: Optional[str] = Field(
+        None,
+        description="New name for the vector store",
+        examples=["updated_vector_store"],
+        min_length=1,
+        max_length=256,
+    )
+
+    expires_at: Optional[int] = Field(
+        None,
+        description="Unix timestamp when the vector store should expire",
+        examples=[1735689600],
+        gt=0,
+    )
+
+    metadata: Optional[dict[str, Any]] = Field(
+        None,
+        description="Metadata dictionary for storing session information",
+        examples=[{"user_id": "user123", "session_id": "sess456"}],
+    )
+
+    model_config = {
+        "extra": "forbid",
+        "json_schema_extra": {
+            "examples": [
+                {
+                    "name": "updated_vector_store",
+                    "expires_at": 1735689600,
+                    "metadata": {"user_id": "user123"},
+                },
+            ]
+        },
+    }
+
+
+class VectorStoreFileCreateRequest(BaseModel):
+    """Model representing a request to add a file to a vector store.
+
+    Attributes:
+        file_id: ID of the file to add to the vector store.
+        attributes: Optional metadata key-value pairs (max 16 pairs).
+        chunking_strategy: Optional chunking strategy configuration.
+    """
+
+    file_id: str = Field(
+        ...,
+        description="ID of the file to add to the vector store",
+        examples=["file-abc123"],
+        min_length=1,
+    )
+
+    attributes: Optional[dict[str, str | float | bool]] = Field(
+        None,
+        description=(
+            "Set of up to 16 key-value pairs for storing additional information. "
+            "Keys: strings (max 64 chars). Values: strings (max 512 chars), booleans, or numbers."
+        ),
+        examples=[
+            {"created_at": "2026-04-04T15:20:00Z", "updated_at": "2026-04-04T15:20:00Z"}
+        ],
+    )
+
+    chunking_strategy: Optional[dict[str, Any]] = Field(
+        None,
+        description="Chunking strategy configuration for this file",
+        examples=[{"type": "fixed", "chunk_size": 512, "chunk_overlap": 50}],
+    )
+
+    model_config = {
+        "extra": "forbid",
+        "json_schema_extra": {
+            "examples": [
+                {
+                    "file_id": "file-abc123",
+                    "attributes": {"created_at": "2026-04-04T15:20:00Z"},
+                    "chunking_strategy": {"type": "fixed", "chunk_size": 512},
+                },
+            ]
+        },
+    }
diff --git a/src/models/responses.py b/src/models/responses.py
index 5c2e974cc..5cd05a131 100644
--- a/src/models/responses.py
+++ b/src/models/responses.py
@@ -2604,3 +2604,197 @@ def __init__(self, *, backend_name: str, cause: str):
             cause=cause,
             status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
         )
+
+
+class VectorStoreResponse(BaseModel):
+    """Response model containing a single vector store.
+
+    Attributes:
+        id: Vector store ID.
+        name: Vector store name.
+        created_at: Unix timestamp when created.
+        last_active_at: Unix timestamp of last activity.
+        expires_at: Optional Unix timestamp when it expires.
+        status: Vector store status.
+        usage_bytes: Storage usage in bytes.
+    """
+
+    id: str = Field(..., description="Vector store ID")
+    name: str = Field(..., description="Vector store name")
+    created_at: int = Field(..., description="Unix timestamp when created")
+    last_active_at: Optional[int] = Field(
+        None, description="Unix timestamp of last activity"
+    )
+    expires_at: Optional[int] = Field(
+        None, description="Unix timestamp when it expires"
+    )
+    status: str = Field(..., description="Vector store status")
+    usage_bytes: int = Field(default=0, description="Storage usage in bytes")
+
+    model_config = {"extra": "forbid"}
+    metadata: Optional[dict[str, Any]] = Field(
+        None,
+        description="Metadata dictionary for storing session information",
+        examples=[
+            {"conversation_id": "conv_123", "document_ids": ["doc_456", "doc_789"]}
+        ],
+    )
+
+    @classmethod
+    def openapi_response(cls, _examples: Optional[list[str]] = None) -> dict[str, Any]:
+        """Generate OpenAPI response schema.
+
+        Parameters:
+            _examples: Optional list of example identifiers (unused).
+
+        Returns:
+            OpenAPI response schema dictionary.
+        """
+        return {
+            "description": SUCCESSFUL_RESPONSE_DESCRIPTION,
+            "model": cls,
+        }
+
+
+class VectorStoresListResponse(BaseModel):
+    """Response model containing a list of vector stores.
+
+    Attributes:
+        data: List of vector store objects.
+        object: Object type (always "list").
+    """
+
+    data: list[VectorStoreResponse] = Field(
+        default_factory=list, description="List of vector stores"
+    )
+    object: str = Field(default="list", description="Object type")
+
+    model_config = {"extra": "forbid"}
+
+    @classmethod
+    def openapi_response(cls, _examples: Optional[list[str]] = None) -> dict[str, Any]:
+        """Generate OpenAPI response schema.
+
+        Parameters:
+            _examples: Optional list of example identifiers (unused).
+
+        Returns:
+            OpenAPI response schema dictionary.
+        """
+        return {
+            "description": SUCCESSFUL_RESPONSE_DESCRIPTION,
+            "model": cls,
+        }
+
+
+class FileResponse(BaseModel):
+    """Response model containing a file object.
+
+    Attributes:
+        id: File ID.
+        filename: File name.
+        bytes: File size in bytes.
+        created_at: Unix timestamp when created.
+        purpose: File purpose.
+        object: Object type (always "file").
+    """
+
+    id: str = Field(..., description="File ID")
+    filename: str = Field(..., description="File name")
+    bytes: int = Field(..., description="File size in bytes")
+    created_at: int = Field(..., description="Unix timestamp when created")
+    purpose: str = Field(default="assistants", description="File purpose")
+    object: str = Field(default="file", description="Object type")
+
+    model_config = {"extra": "forbid"}
+
+    @classmethod
+    def openapi_response(cls, _examples: Optional[list[str]] = None) -> dict[str, Any]:
+        """Generate OpenAPI response schema.
+
+        Parameters:
+            _examples: Optional list of example identifiers (unused).
+
+        Returns:
+            OpenAPI response schema dictionary.
+        """
+        return {
+            "description": SUCCESSFUL_RESPONSE_DESCRIPTION,
+            "model": cls,
+        }
+
+
+class VectorStoreFileResponse(BaseModel):
+    """Response model containing a vector store file object.
+
+    Attributes:
+        id: Vector store file ID.
+        vector_store_id: ID of the vector store.
+        status: File processing status.
+        attributes: Optional metadata key-value pairs.
+        last_error: Optional error message if processing failed.
+        object: Object type (always "vector_store.file").
+    """
+
+    id: str = Field(..., description="Vector store file ID")
+    vector_store_id: str = Field(..., description="ID of the vector store")
+    status: str = Field(..., description="File processing status")
+    attributes: Optional[dict[str, str | float | bool]] = Field(
+        None,
+        description=(
+            "Set of up to 16 key-value pairs for storing additional information. "
+            "Keys: strings (max 64 chars). Values: strings (max 512 chars), booleans, or numbers."
+        ),
+    )
+    last_error: Optional[str] = Field(
+        None, description="Error message if processing failed"
+    )
+    object: str = Field(default="vector_store.file", description="Object type")
+
+    model_config = {"extra": "forbid"}
+
+    @classmethod
+    def openapi_response(cls, _examples: Optional[list[str]] = None) -> dict[str, Any]:
+        """Generate OpenAPI response schema.
+
+        Parameters:
+            _examples: Optional list of example identifiers (unused).
+
+        Returns:
+            OpenAPI response schema dictionary.
+        """
+        return {
+            "description": SUCCESSFUL_RESPONSE_DESCRIPTION,
+            "model": cls,
+        }
+
+
+class VectorStoreFilesListResponse(BaseModel):
+    """Response model containing a list of vector store files.
+
+    Attributes:
+        data: List of vector store file objects.
+        object: Object type (always "list").
+    """
+
+    data: list[VectorStoreFileResponse] = Field(
+        default_factory=list, description="List of vector store files"
+    )
+    object: str = Field(default="list", description="Object type")
+
+    model_config = {"extra": "forbid"}
+
+    @classmethod
+    def openapi_response(cls, _examples: Optional[list[str]] = None) -> dict[str, Any]:
+        """Generate OpenAPI response schema.
+
+        Parameters:
+            _examples: Optional list of example identifiers (unused).
+
+        Returns:
+            OpenAPI response schema dictionary.
+        """
+        return {
+            "description": SUCCESSFUL_RESPONSE_DESCRIPTION,
+            "model": cls,
+        }
diff --git a/tests/unit/app/endpoints/test_vector_stores.py b/tests/unit/app/endpoints/test_vector_stores.py
new file mode 100644
index 000000000..c4b3f12aa
--- /dev/null
+++ b/tests/unit/app/endpoints/test_vector_stores.py
@@ -0,0 +1,527 @@
+"""Unit tests for the /vector-stores REST API endpoints."""
+
+from typing import Any
+
+import pytest
+from fastapi import HTTPException, Request, status
+from llama_stack_client import APIConnectionError, BadRequestError
+from pytest_mock import MockerFixture
+
+from app.endpoints.vector_stores import (
+    add_file_to_vector_store,
+    create_file,
+    create_vector_store,
+    delete_vector_store,
+    delete_vector_store_file,
+    get_vector_store,
+    get_vector_store_file,
+    list_vector_store_files,
+    list_vector_stores,
+    update_vector_store,
+)
+from authentication.interface import AuthTuple
+from configuration import AppConfig
+from models.requests import (
+    VectorStoreCreateRequest,
+    VectorStoreFileCreateRequest,
+    VectorStoreUpdateRequest,
+)
+from tests.unit.utils.auth_helpers import mock_authorization_resolvers
+
+
+# pylint: disable=R0903,R0902
+class VectorStore:
+    """Mock vector store object."""
+
+    def __init__(
+        self,
+        vs_id: str,
+        name: str,
+        created_at: int = 1735689600,
+        vs_status: str = "active",
+    ) -> None:
+        """Initialize vector store mock."""
+        self.id = vs_id
+        self.name = name
+        self.created_at = created_at
+        self.last_active_at = created_at
+        self.expires_at = None
+        self.object = "vector_store"
+        self.status = vs_status
+        self.usage_bytes = 0
+
+
+# pylint: disable=R0903
+class VectorStoresList:
+    """Mock vector stores list."""
+
+    def __init__(self, stores: list[VectorStore]) -> None:
+        """Initialize vector stores list mock."""
+        self.data = stores
+
+
+# pylint: disable=R0903
+class File:
+    """Mock file object."""
+
+    def __init__(self, file_id: str, filename: str, file_bytes: int = 1024) -> None:
+        """Initialize file mock."""
+        self.id = file_id
+        self.filename = filename
+        self.bytes = file_bytes
+        self.created_at = 1735689600
+        self.purpose = "assistants"
+        self.object = "file"
+
+
+# pylint: disable=R0903
+class VectorStoreFile:
+    """Mock vector store file object."""
+
+    def __init__(
+        self, file_id: str, vector_store_id: str, file_status: str = "completed"
+    ) -> None:
+        """Initialize vector store file mock."""
+        self.id = file_id
+        self.vector_store_id = vector_store_id
+        self.created_at = 1735689600
+        self.status = file_status
+        self.last_error = None
+        self.object = "vector_store.file"
+
+
+# pylint: disable=R0903
+class VectorStoreFilesList:
+    """Mock vector store files list."""
+
+    def __init__(self, files: list[VectorStoreFile]) -> None:
+        """Initialize vector store files list mock."""
+        self.data = files
+
+
+def get_test_config() -> dict[str, Any]:
+    """Get test configuration dictionary.
+
+    Returns:
+        Test configuration dictionary.
+    """
+    return {
+        "name": "foo",
+        "service": {
+            "host": "localhost",
+            "port": 8080,
+            "auth_enabled": False,
+            "workers": 1,
+            "color_log": True,
+            "access_log": True,
+        },
+        "llama_stack": {
+            "api_key": "xyzzy",
+            "url": "http://x.y.com:1234",
+            "use_as_library_client": False,
+        },
+        "user_data_collection": {
+            "feedback_enabled": False,
+        },
+        "customization": None,
+        "authorization": {"access_rules": []},
+        "authentication": {"module": "noop"},
+    }
+
+
+def get_test_request() -> Request:
+    """Get test request object.
+
+    Returns:
+        Test request object.
+    """
+    return Request(
+        scope={
+            "type": "http",
+            "headers": [(b"authorization", b"Bearer test-token")],
+        }
+    )
+
+
+def get_test_auth() -> AuthTuple:
+    """Get test auth tuple.
+
+    Returns:
+        Test auth tuple.
+    """
+    return ("test_user_id", "test_user", True, "test_token")
+
+
+@pytest.mark.asyncio
+async def test_create_vector_store_configuration_not_loaded(
+    mocker: MockerFixture,
+) -> None:
+    """Test create vector store endpoint if configuration is not loaded."""
+    mock_authorization_resolvers(mocker)
+
+    mock_config = AppConfig()
+    mocker.patch("app.endpoints.vector_stores.configuration", mock_config)
+
+    request = get_test_request()
+    auth = get_test_auth()
+    body = VectorStoreCreateRequest(name="test_store")
+
+    with pytest.raises(HTTPException) as e:
+        await create_vector_store(request=request, auth=auth, body=body)
+        assert e.value.status_code == status.HTTP_500_INTERNAL_SERVER_ERROR
+        assert e.value.detail["response"] == "Configuration is not loaded"  # type: ignore
+
+
+@pytest.mark.asyncio
+async def test_create_vector_store_success(mocker: MockerFixture) -> None:
+    """Test successful vector store creation."""
+    mock_authorization_resolvers(mocker)
+
+    config_dict = get_test_config()
+    cfg = AppConfig()
+    cfg.init_from_dict(config_dict)
+
+    mock_client = mocker.AsyncMock()
+    mock_client.vector_stores.create.return_value = VectorStore("vs_123", "test_store")
+    mock_lsc = mocker.patch(
+        "app.endpoints.vector_stores.AsyncLlamaStackClientHolder.get_client"
+    )
+    mock_lsc.return_value = mock_client
+    mocker.patch("app.endpoints.vector_stores.configuration", cfg)
+
+    request = get_test_request()
+    auth = get_test_auth()
+    body = VectorStoreCreateRequest(name="test_store")
+
+    response = await create_vector_store(request=request, auth=auth, body=body)
+    assert response is not None
+    assert response.id == "vs_123"
+    assert response.name == "test_store"
+    assert response.status == "active"
+
+
+@pytest.mark.asyncio
+async def test_create_vector_store_connection_error(mocker: MockerFixture) -> None:
+    """Test create vector store with connection error."""
+    mock_authorization_resolvers(mocker)
+
+    config_dict = get_test_config()
+    cfg = AppConfig()
+    cfg.init_from_dict(config_dict)
+
+    mock_client = mocker.AsyncMock()
+    mock_client.vector_stores.create.side_effect = APIConnectionError(request=None)  # type: ignore
+    mock_lsc = mocker.patch(
+        "app.endpoints.vector_stores.AsyncLlamaStackClientHolder.get_client"
+    )
+    mock_lsc.return_value = mock_client
+    mocker.patch("app.endpoints.vector_stores.configuration", cfg)
+
+    request = get_test_request()
+    auth = get_test_auth()
+    body = VectorStoreCreateRequest(name="test_store")
+
+    with pytest.raises(HTTPException) as e:
+        await create_vector_store(request=request, auth=auth, body=body)
+        assert e.value.status_code == status.HTTP_503_SERVICE_UNAVAILABLE
+        assert e.value.detail["response"] == "Unable to connect to Llama Stack"  # type: ignore
+
+
+@pytest.mark.asyncio
+async def test_list_vector_stores_success(mocker: MockerFixture) -> None:
+    """Test successful vector stores list."""
+    mock_authorization_resolvers(mocker)
+
+    config_dict = get_test_config()
+    cfg = AppConfig()
+    cfg.init_from_dict(config_dict)
+
+    mock_client = mocker.AsyncMock()
+    mock_client.vector_stores.list.return_value = VectorStoresList(
+        [VectorStore("vs_1", "store1"), VectorStore("vs_2", "store2")]
+    )
+    mock_lsc = mocker.patch(
+        "app.endpoints.vector_stores.AsyncLlamaStackClientHolder.get_client"
+    )
+    mock_lsc.return_value = mock_client
+    mocker.patch("app.endpoints.vector_stores.configuration", cfg)
+
+    request = get_test_request()
+    auth = get_test_auth()
+
+    response = await list_vector_stores(request=request, auth=auth)
+    assert response is not None
+    assert len(response.data) == 2
+    assert response.data[0].id == "vs_1"
+    assert response.data[1].id == "vs_2"
+
+
+@pytest.mark.asyncio
+async def test_get_vector_store_success(mocker: MockerFixture) -> None:
+    """Test successful vector store retrieval."""
+    mock_authorization_resolvers(mocker)
+
+    config_dict = get_test_config()
+    cfg = AppConfig()
+    cfg.init_from_dict(config_dict)
+
+    mock_client = mocker.AsyncMock()
+    mock_client.vector_stores.retrieve.return_value = VectorStore(
+        "vs_123", "test_store"
+    )
+    mock_lsc = mocker.patch(
+        "app.endpoints.vector_stores.AsyncLlamaStackClientHolder.get_client"
+    )
+    mock_lsc.return_value = mock_client
+    mocker.patch("app.endpoints.vector_stores.configuration", cfg)
+
+    request = get_test_request()
+    auth = get_test_auth()
+
+    response = await get_vector_store(
+        request=request, vector_store_id="vs_123", auth=auth
+    )
+    assert response is not None
+    assert response.id == "vs_123"
+    assert response.name == "test_store"
+
+
+@pytest.mark.asyncio
+async def test_get_vector_store_not_found(mocker: MockerFixture) -> None:
+    """Test vector store retrieval with not found error."""
+    mock_authorization_resolvers(mocker)
+
+    config_dict = get_test_config()
+    cfg = AppConfig()
+    cfg.init_from_dict(config_dict)
+
+    mock_client = mocker.AsyncMock()
+    # Create a mock response for BadRequestError
+    mock_response = mocker.Mock()
+    mock_response.request = mocker.Mock()
+    mock_client.vector_stores.retrieve.side_effect = BadRequestError(
+        message="Not found", response=mock_response, body=None
+    )
+    mock_lsc = mocker.patch(
+        "app.endpoints.vector_stores.AsyncLlamaStackClientHolder.get_client"
+    )
+    mock_lsc.return_value = mock_client
+    mocker.patch("app.endpoints.vector_stores.configuration", cfg)
+
+    request = get_test_request()
+    auth = get_test_auth()
+
+    with pytest.raises(HTTPException) as e:
+        await get_vector_store(request=request, vector_store_id="vs_999", auth=auth)
+    assert e.value.status_code == status.HTTP_404_NOT_FOUND
+
+
+@pytest.mark.asyncio
+async def test_update_vector_store_success(mocker: MockerFixture) -> None:
+    """Test successful vector store update."""
+    mock_authorization_resolvers(mocker)
+
+    config_dict = get_test_config()
+    cfg = AppConfig()
+    cfg.init_from_dict(config_dict)
+
+    mock_client = mocker.AsyncMock()
+    mock_client.vector_stores.update.return_value = VectorStore(
+        "vs_123", "updated_store"
+    )
+    mock_lsc = mocker.patch(
+        "app.endpoints.vector_stores.AsyncLlamaStackClientHolder.get_client"
+    )
+    mock_lsc.return_value = mock_client
+    mocker.patch("app.endpoints.vector_stores.configuration", cfg)
+
+    request = get_test_request()
+    auth = get_test_auth()
+    body = VectorStoreUpdateRequest(name="updated_store")
+
+    response = await update_vector_store(
+        request=request, vector_store_id="vs_123", auth=auth, body=body
+    )
+    assert response is not None
+    assert response.id == "vs_123"
+    assert response.name == "updated_store"
+
+
+@pytest.mark.asyncio
+async def test_delete_vector_store_success(mocker: MockerFixture) -> None:
+    """Test successful vector store deletion."""
+    mock_authorization_resolvers(mocker)
+
+    config_dict = get_test_config()
+    cfg = AppConfig()
+    cfg.init_from_dict(config_dict)
+
+    mock_client = mocker.AsyncMock()
+    mock_client.vector_stores.delete.return_value = None
+    mock_lsc = mocker.patch(
+        "app.endpoints.vector_stores.AsyncLlamaStackClientHolder.get_client"
+    )
+    mock_lsc.return_value = mock_client
+    mocker.patch("app.endpoints.vector_stores.configuration", cfg)
+
+    request = get_test_request()
+    auth = get_test_auth()
+
+    response = await delete_vector_store(
+        request=request, vector_store_id="vs_123", auth=auth
+    )
+    assert response is None
+
+
+@pytest.mark.asyncio
+async def test_create_file_success(mocker: MockerFixture) -> None:
+    """Test successful file upload."""
+    mock_authorization_resolvers(mocker)
+
+    config_dict = get_test_config()
+    cfg = AppConfig()
+    cfg.init_from_dict(config_dict)
+
+    mock_client = mocker.AsyncMock()
+    mock_client.files.create.return_value = File("file_123", "test.txt", 1024)
+    mock_lsc = mocker.patch(
+        "app.endpoints.vector_stores.AsyncLlamaStackClientHolder.get_client"
+    )
+    mock_lsc.return_value = mock_client
+    mocker.patch("app.endpoints.vector_stores.configuration", cfg)
+
+    request = get_test_request()
+    auth = get_test_auth()
+
+    # Mock UploadFile
+    mock_file = mocker.AsyncMock()
+    mock_file.filename = "test.txt"
+    mock_file.read.return_value = b"test content"
+
+    response = await create_file(request=request, auth=auth, file=mock_file)
+    assert response is not None
+    assert response.id == "file_123"
+    assert response.filename == "test.txt"
+    assert response.bytes == 1024
+
+
+@pytest.mark.asyncio
+async def test_add_file_to_vector_store_success(mocker: MockerFixture) -> None:
+    """Test successfully adding file to vector store."""
+    mock_authorization_resolvers(mocker)
+
+    config_dict = get_test_config()
+    cfg = AppConfig()
+    cfg.init_from_dict(config_dict)
+
+    mock_client = mocker.AsyncMock()
+    mock_client.vector_stores.files.create.return_value = VectorStoreFile(
+        "file_123", "vs_123"
+    )
+    mock_lsc = mocker.patch(
+        "app.endpoints.vector_stores.AsyncLlamaStackClientHolder.get_client"
+    )
+    mock_lsc.return_value = mock_client
+    mocker.patch("app.endpoints.vector_stores.configuration", cfg)
+
+    request = get_test_request()
+    auth = get_test_auth()
+    body = VectorStoreFileCreateRequest(file_id="file_123")
+
+    response = await add_file_to_vector_store(
+        request=request, vector_store_id="vs_123", auth=auth, body=body
+    )
+    assert response is not None
+    assert response.id == "file_123"
+    assert response.vector_store_id == "vs_123"
+    assert response.status == "completed"
+
+
+@pytest.mark.asyncio
+async def test_list_vector_store_files_success(mocker: MockerFixture) -> None:
+    """Test successfully listing files in vector store."""
+    mock_authorization_resolvers(mocker)
+
+    config_dict = get_test_config()
+    cfg = AppConfig()
+    cfg.init_from_dict(config_dict)
+
+    mock_client = mocker.AsyncMock()
+    mock_client.vector_stores.files.list.return_value = VectorStoreFilesList(
+        [
+            VectorStoreFile("file_1", "vs_123"),
+            VectorStoreFile("file_2", "vs_123"),
+        ]
+    )
+    mock_lsc = mocker.patch(
+        "app.endpoints.vector_stores.AsyncLlamaStackClientHolder.get_client"
+    )
+    mock_lsc.return_value = mock_client
+    mocker.patch("app.endpoints.vector_stores.configuration", cfg)
+
+    request = get_test_request()
+    auth = get_test_auth()
+
+    response = await list_vector_store_files(
+        request=request, vector_store_id="vs_123", auth=auth
+    )
+    assert response is not None
+    assert len(response.data) == 2
+    assert response.data[0].id == "file_1"
+    assert response.data[1].id == "file_2"
+
+
+@pytest.mark.asyncio
+async def test_get_vector_store_file_success(mocker: MockerFixture) -> None:
+    """Test successfully retrieving file from vector store."""
+    mock_authorization_resolvers(mocker)
+
+    config_dict = get_test_config()
+    cfg = AppConfig()
+    cfg.init_from_dict(config_dict)
+
+    mock_client = mocker.AsyncMock()
+    mock_client.vector_stores.files.retrieve.return_value = VectorStoreFile(
+        "file_123", "vs_123"
+    )
+    mock_lsc = mocker.patch(
+        "app.endpoints.vector_stores.AsyncLlamaStackClientHolder.get_client"
+    )
+    mock_lsc.return_value = mock_client
+    mocker.patch("app.endpoints.vector_stores.configuration", cfg)
+
+    request = get_test_request()
+    auth = get_test_auth()
+
+    response = await get_vector_store_file(
+        request=request, vector_store_id="vs_123", file_id="file_123", auth=auth
+    )
+    assert response is not None
+    assert response.id == "file_123"
+    assert response.vector_store_id == "vs_123"
+
+
+@pytest.mark.asyncio
+async def test_delete_vector_store_file_success(mocker: MockerFixture) -> None:
+    """Test successfully deleting file from vector store."""
+    mock_authorization_resolvers(mocker)
+
+    config_dict = get_test_config()
+    cfg = AppConfig()
+    cfg.init_from_dict(config_dict)
+
+    mock_client = mocker.AsyncMock()
+    mock_client.vector_stores.files.delete.return_value = None
+    mock_lsc = mocker.patch(
+        "app.endpoints.vector_stores.AsyncLlamaStackClientHolder.get_client"
+    )
+    mock_lsc.return_value = mock_client
+    mocker.patch("app.endpoints.vector_stores.configuration", cfg)
+
+    request = get_test_request()
+    auth = get_test_auth()
+
+    response = await delete_vector_store_file(
+        request=request, vector_store_id="vs_123", file_id="file_123", auth=auth
+    )
+    assert response is None

From 3fe63aa5fbec56d188233a3dbc6b359278bc3bdc Mon Sep 17 00:00:00 2001
From: Lucas <lyoon@redhat.com>
Date: Tue, 7 Apr 2026 21:08:21 -0400
Subject: [PATCH 2/7] unit tests

Signed-off-by: Lucas <lyoon@redhat.com>
---
 src/app/endpoints/vector_stores.py            |   4 -
 .../unit/app/endpoints/test_vector_stores.py  | 549 ++++++++++++++++++
 tests/unit/app/test_routers.py                |   9 +-
 3 files changed, 555 insertions(+), 7 deletions(-)

diff --git a/src/app/endpoints/vector_stores.py b/src/app/endpoints/vector_stores.py
index d3dc3597d..fb7f024c0 100644
--- a/src/app/endpoints/vector_stores.py
+++ b/src/app/endpoints/vector_stores.py
@@ -128,7 +128,6 @@ async def create_vector_store(
 
         # Extract provider_id for extra_body (not a direct client parameter)
         body_dict = body.model_dump(exclude_none=True)
-        print("client.models.list() reaches here", await client.models.list())
         extra_body = {}
         if "provider_id" in body_dict:
             extra_body["provider_id"] = body_dict.pop("provider_id")
@@ -462,7 +461,6 @@ async def create_file(
             filename,
             len(content),
         )
-        print("filename reaches here", filename)
 
         # Convert to BytesIO for Llama Stack client
         # The client expects bytes, io.IOBase, PathLike, or a tuple
@@ -722,12 +720,10 @@ async def get_vector_store_file(
 
     try:
         client = AsyncLlamaStackClientHolder().get_client()
-        print("file_id", file_id)
         vs_file = await client.vector_stores.files.retrieve(
             vector_store_id=vector_store_id,
             file_id=file_id,
         )
-        print("vs_file reaches here", vs_file)
 
         return VectorStoreFileResponse(
             id=vs_file.id,
diff --git a/tests/unit/app/endpoints/test_vector_stores.py b/tests/unit/app/endpoints/test_vector_stores.py
index c4b3f12aa..2cdd2cffd 100644
--- a/tests/unit/app/endpoints/test_vector_stores.py
+++ b/tests/unit/app/endpoints/test_vector_stores.py
@@ -1,5 +1,7 @@
 """Unit tests for the /vector-stores REST API endpoints."""
 
+# pylint: disable=too-many-lines
+
 from typing import Any
 
 import pytest
@@ -49,6 +51,7 @@ def __init__(
         self.object = "vector_store"
         self.status = vs_status
         self.usage_bytes = 0
+        self.metadata = None
 
 
 # pylint: disable=R0903
@@ -86,6 +89,7 @@ def __init__(
         self.vector_store_id = vector_store_id
         self.created_at = 1735689600
         self.status = file_status
+        self.attributes = None
         self.last_error = None
         self.object = "vector_store.file"
 
@@ -437,6 +441,130 @@ async def test_add_file_to_vector_store_success(mocker: MockerFixture) -> None:
     assert response.status == "completed"
 
 
+@pytest.mark.asyncio
+async def test_add_file_to_vector_store_retry_on_database_lock(
+    mocker: MockerFixture,
+) -> None:
+    """Test retry logic when database lock error occurs."""
+    mock_authorization_resolvers(mocker)
+
+    config_dict = get_test_config()
+    cfg = AppConfig()
+    cfg.init_from_dict(config_dict)
+
+    mock_client = mocker.AsyncMock()
+    # First call raises database lock error, second call succeeds
+    mock_client.vector_stores.files.create.side_effect = [
+        Exception("database is locked"),
+        VectorStoreFile("file_123", "vs_123"),
+    ]
+    mock_lsc = mocker.patch(
+        "app.endpoints.vector_stores.AsyncLlamaStackClientHolder.get_client"
+    )
+    mock_lsc.return_value = mock_client
+    mocker.patch("app.endpoints.vector_stores.configuration", cfg)
+
+    # Mock asyncio.sleep to avoid actual delays in tests
+    mock_sleep = mocker.patch("app.endpoints.vector_stores.asyncio.sleep")
+
+    request = get_test_request()
+    auth = get_test_auth()
+    body = VectorStoreFileCreateRequest(file_id="file_123")
+
+    response = await add_file_to_vector_store(
+        request=request, vector_store_id="vs_123", auth=auth, body=body
+    )
+    assert response is not None
+    assert response.id == "file_123"
+    assert response.vector_store_id == "vs_123"
+    assert response.status == "completed"
+
+    # Verify retry logic was triggered
+    assert mock_client.vector_stores.files.create.call_count == 2
+    # Verify sleep was called once with 0.5 seconds (first retry delay)
+    mock_sleep.assert_called_once_with(0.5)
+
+
+@pytest.mark.asyncio
+async def test_add_file_to_vector_store_max_retries_exceeded(
+    mocker: MockerFixture,
+) -> None:
+    """Test that max retries are respected when database lock persists."""
+    mock_authorization_resolvers(mocker)
+
+    config_dict = get_test_config()
+    cfg = AppConfig()
+    cfg.init_from_dict(config_dict)
+
+    mock_client = mocker.AsyncMock()
+    # All attempts fail with database lock error
+    mock_client.vector_stores.files.create.side_effect = Exception("database is locked")
+    mock_lsc = mocker.patch(
+        "app.endpoints.vector_stores.AsyncLlamaStackClientHolder.get_client"
+    )
+    mock_lsc.return_value = mock_client
+    mocker.patch("app.endpoints.vector_stores.configuration", cfg)
+
+    # Mock asyncio.sleep to avoid actual delays in tests
+    mock_sleep = mocker.patch("app.endpoints.vector_stores.asyncio.sleep")
+
+    request = get_test_request()
+    auth = get_test_auth()
+    body = VectorStoreFileCreateRequest(file_id="file_123")
+
+    with pytest.raises(HTTPException) as e:
+        await add_file_to_vector_store(
+            request=request, vector_store_id="vs_123", auth=auth, body=body
+        )
+    assert e.value.status_code == status.HTTP_500_INTERNAL_SERVER_ERROR
+
+    # Verify all 3 retry attempts were made
+    assert mock_client.vector_stores.files.create.call_count == 3
+    # Verify exponential backoff: 0.5s, then 1s (0.5 * 2)
+    assert mock_sleep.call_count == 2
+    assert mock_sleep.call_args_list[0][0][0] == 0.5
+    assert mock_sleep.call_args_list[1][0][0] == 1.0
+
+
+@pytest.mark.asyncio
+async def test_add_file_to_vector_store_non_lock_error_no_retry(
+    mocker: MockerFixture,
+) -> None:
+    """Test that non-lock errors are not retried."""
+    mock_authorization_resolvers(mocker)
+
+    config_dict = get_test_config()
+    cfg = AppConfig()
+    cfg.init_from_dict(config_dict)
+
+    mock_client = mocker.AsyncMock()
+    # Raise a non-lock error
+    mock_client.vector_stores.files.create.side_effect = Exception("Some other error")
+    mock_lsc = mocker.patch(
+        "app.endpoints.vector_stores.AsyncLlamaStackClientHolder.get_client"
+    )
+    mock_lsc.return_value = mock_client
+    mocker.patch("app.endpoints.vector_stores.configuration", cfg)
+
+    # Mock asyncio.sleep to verify it's not called
+    mock_sleep = mocker.patch("app.endpoints.vector_stores.asyncio.sleep")
+
+    request = get_test_request()
+    auth = get_test_auth()
+    body = VectorStoreFileCreateRequest(file_id="file_123")
+
+    with pytest.raises(HTTPException) as e:
+        await add_file_to_vector_store(
+            request=request, vector_store_id="vs_123", auth=auth, body=body
+        )
+    assert e.value.status_code == status.HTTP_500_INTERNAL_SERVER_ERROR
+
+    # Verify only one attempt was made (no retries for non-lock errors)
+    assert mock_client.vector_stores.files.create.call_count == 1
+    # Verify sleep was not called (no retry)
+    mock_sleep.assert_not_called()
+
+
 @pytest.mark.asyncio
 async def test_list_vector_store_files_success(mocker: MockerFixture) -> None:
     """Test successfully listing files in vector store."""
@@ -525,3 +653,424 @@ async def test_delete_vector_store_file_success(mocker: MockerFixture) -> None:
         request=request, vector_store_id="vs_123", file_id="file_123", auth=auth
     )
     assert response is None
+
+
+# Additional error path tests
+
+
+@pytest.mark.asyncio
+async def test_list_vector_stores_connection_error(mocker: MockerFixture) -> None:
+    """Test list vector stores with connection error."""
+    mock_authorization_resolvers(mocker)
+
+    config_dict = get_test_config()
+    cfg = AppConfig()
+    cfg.init_from_dict(config_dict)
+
+    mock_client = mocker.AsyncMock()
+    mock_client.vector_stores.list.side_effect = APIConnectionError(request=None)  # type: ignore
+    mock_lsc = mocker.patch(
+        "app.endpoints.vector_stores.AsyncLlamaStackClientHolder.get_client"
+    )
+    mock_lsc.return_value = mock_client
+    mocker.patch("app.endpoints.vector_stores.configuration", cfg)
+
+    request = get_test_request()
+    auth = get_test_auth()
+
+    with pytest.raises(HTTPException) as e:
+        await list_vector_stores(request=request, auth=auth)
+    assert e.value.status_code == status.HTTP_503_SERVICE_UNAVAILABLE
+
+
+@pytest.mark.asyncio
+async def test_update_vector_store_connection_error(mocker: MockerFixture) -> None:
+    """Test update vector store with connection error."""
+    mock_authorization_resolvers(mocker)
+
+    config_dict = get_test_config()
+    cfg = AppConfig()
+    cfg.init_from_dict(config_dict)
+
+    mock_client = mocker.AsyncMock()
+    mock_client.vector_stores.update.side_effect = APIConnectionError(request=None)  # type: ignore
+    mock_lsc = mocker.patch(
+        "app.endpoints.vector_stores.AsyncLlamaStackClientHolder.get_client"
+    )
+    mock_lsc.return_value = mock_client
+    mocker.patch("app.endpoints.vector_stores.configuration", cfg)
+
+    request = get_test_request()
+    auth = get_test_auth()
+    body = VectorStoreUpdateRequest(name="updated_store")
+
+    with pytest.raises(HTTPException) as e:
+        await update_vector_store(
+            request=request, vector_store_id="vs_123", auth=auth, body=body
+        )
+    assert e.value.status_code == status.HTTP_503_SERVICE_UNAVAILABLE
+
+
+@pytest.mark.asyncio
+async def test_update_vector_store_not_found(mocker: MockerFixture) -> None:
+    """Test update vector store with not found error."""
+    mock_authorization_resolvers(mocker)
+
+    config_dict = get_test_config()
+    cfg = AppConfig()
+    cfg.init_from_dict(config_dict)
+
+    mock_client = mocker.AsyncMock()
+    mock_response = mocker.Mock()
+    mock_response.request = mocker.Mock()
+    mock_client.vector_stores.update.side_effect = BadRequestError(
+        message="Not found", response=mock_response, body=None
+    )
+    mock_lsc = mocker.patch(
+        "app.endpoints.vector_stores.AsyncLlamaStackClientHolder.get_client"
+    )
+    mock_lsc.return_value = mock_client
+    mocker.patch("app.endpoints.vector_stores.configuration", cfg)
+
+    request = get_test_request()
+    auth = get_test_auth()
+    body = VectorStoreUpdateRequest(name="updated_store")
+
+    with pytest.raises(HTTPException) as e:
+        await update_vector_store(
+            request=request, vector_store_id="vs_999", auth=auth, body=body
+        )
+    assert e.value.status_code == status.HTTP_404_NOT_FOUND
+
+
+@pytest.mark.asyncio
+async def test_delete_vector_store_connection_error(mocker: MockerFixture) -> None:
+    """Test delete vector store with connection error."""
+    mock_authorization_resolvers(mocker)
+
+    config_dict = get_test_config()
+    cfg = AppConfig()
+    cfg.init_from_dict(config_dict)
+
+    mock_client = mocker.AsyncMock()
+    mock_client.vector_stores.delete.side_effect = APIConnectionError(request=None)  # type: ignore
+    mock_lsc = mocker.patch(
+        "app.endpoints.vector_stores.AsyncLlamaStackClientHolder.get_client"
+    )
+    mock_lsc.return_value = mock_client
+    mocker.patch("app.endpoints.vector_stores.configuration", cfg)
+
+    request = get_test_request()
+    auth = get_test_auth()
+
+    with pytest.raises(HTTPException) as e:
+        await delete_vector_store(request=request, vector_store_id="vs_123", auth=auth)
+    assert e.value.status_code == status.HTTP_503_SERVICE_UNAVAILABLE
+
+
+@pytest.mark.asyncio
+async def test_delete_vector_store_not_found(mocker: MockerFixture) -> None:
+    """Test delete vector store with not found error."""
+    mock_authorization_resolvers(mocker)
+
+    config_dict = get_test_config()
+    cfg = AppConfig()
+    cfg.init_from_dict(config_dict)
+
+    mock_client = mocker.AsyncMock()
+    mock_response = mocker.Mock()
+    mock_response.request = mocker.Mock()
+    mock_client.vector_stores.delete.side_effect = BadRequestError(
+        message="Not found", response=mock_response, body=None
+    )
+    mock_lsc = mocker.patch(
+        "app.endpoints.vector_stores.AsyncLlamaStackClientHolder.get_client"
+    )
+    mock_lsc.return_value = mock_client
+    mocker.patch("app.endpoints.vector_stores.configuration", cfg)
+
+    request = get_test_request()
+    auth = get_test_auth()
+
+    with pytest.raises(HTTPException) as e:
+        await delete_vector_store(request=request, vector_store_id="vs_999", auth=auth)
+    assert e.value.status_code == status.HTTP_404_NOT_FOUND
+
+
+@pytest.mark.asyncio
+async def test_create_file_connection_error(mocker: MockerFixture) -> None:
+    """Test create file with connection error."""
+    mock_authorization_resolvers(mocker)
+
+    config_dict = get_test_config()
+    cfg = AppConfig()
+    cfg.init_from_dict(config_dict)
+
+    mock_client = mocker.AsyncMock()
+    mock_client.files.create.side_effect = APIConnectionError(request=None)  # type: ignore
+    mock_lsc = mocker.patch(
+        "app.endpoints.vector_stores.AsyncLlamaStackClientHolder.get_client"
+    )
+    mock_lsc.return_value = mock_client
+    mocker.patch("app.endpoints.vector_stores.configuration", cfg)
+
+    request = get_test_request()
+    auth = get_test_auth()
+
+    mock_file = mocker.AsyncMock()
+    mock_file.filename = "test.txt"
+    mock_file.read.return_value = b"test content"
+
+    with pytest.raises(HTTPException) as e:
+        await create_file(request=request, auth=auth, file=mock_file)
+    assert e.value.status_code == status.HTTP_503_SERVICE_UNAVAILABLE
+
+
+@pytest.mark.asyncio
+async def test_create_file_bad_request(mocker: MockerFixture) -> None:
+    """Test create file with bad request error."""
+    mock_authorization_resolvers(mocker)
+
+    config_dict = get_test_config()
+    cfg = AppConfig()
+    cfg.init_from_dict(config_dict)
+
+    mock_client = mocker.AsyncMock()
+    mock_response = mocker.Mock()
+    mock_response.request = mocker.Mock()
+    mock_client.files.create.side_effect = BadRequestError(
+        message="File too large", response=mock_response, body=None
+    )
+    mock_lsc = mocker.patch(
+        "app.endpoints.vector_stores.AsyncLlamaStackClientHolder.get_client"
+    )
+    mock_lsc.return_value = mock_client
+    mocker.patch("app.endpoints.vector_stores.configuration", cfg)
+
+    request = get_test_request()
+    auth = get_test_auth()
+
+    mock_file = mocker.AsyncMock()
+    mock_file.filename = "test.txt"
+    mock_file.read.return_value = b"test content"
+
+    with pytest.raises(HTTPException) as e:
+        await create_file(request=request, auth=auth, file=mock_file)
+    assert e.value.status_code == status.HTTP_503_SERVICE_UNAVAILABLE
+
+
+@pytest.mark.asyncio
+async def test_add_file_to_vector_store_connection_error(
+    mocker: MockerFixture,
+) -> None:
+    """Test add file to vector store with connection error."""
+    mock_authorization_resolvers(mocker)
+
+    config_dict = get_test_config()
+    cfg = AppConfig()
+    cfg.init_from_dict(config_dict)
+
+    mock_client = mocker.AsyncMock()
+    mock_client.vector_stores.files.create.side_effect = APIConnectionError(
+        request=None  # type: ignore
+    )
+    mock_lsc = mocker.patch(
+        "app.endpoints.vector_stores.AsyncLlamaStackClientHolder.get_client"
+    )
+    mock_lsc.return_value = mock_client
+    mocker.patch("app.endpoints.vector_stores.configuration", cfg)
+
+    request = get_test_request()
+    auth = get_test_auth()
+    body = VectorStoreFileCreateRequest(file_id="file_123")
+
+    with pytest.raises(HTTPException) as e:
+        await add_file_to_vector_store(
+            request=request, vector_store_id="vs_123", auth=auth, body=body
+        )
+    assert e.value.status_code == status.HTTP_503_SERVICE_UNAVAILABLE
+
+
+@pytest.mark.asyncio
+async def test_add_file_to_vector_store_not_found(mocker: MockerFixture) -> None:
+    """Test add file to vector store with not found error."""
+    mock_authorization_resolvers(mocker)
+
+    config_dict = get_test_config()
+    cfg = AppConfig()
+    cfg.init_from_dict(config_dict)
+
+    mock_client = mocker.AsyncMock()
+    mock_response = mocker.Mock()
+    mock_response.request = mocker.Mock()
+    mock_client.vector_stores.files.create.side_effect = BadRequestError(
+        message="File not found", response=mock_response, body=None
+    )
+    mock_lsc = mocker.patch(
+        "app.endpoints.vector_stores.AsyncLlamaStackClientHolder.get_client"
+    )
+    mock_lsc.return_value = mock_client
+    mocker.patch("app.endpoints.vector_stores.configuration", cfg)
+
+    request = get_test_request()
+    auth = get_test_auth()
+    body = VectorStoreFileCreateRequest(file_id="file_999")
+
+    with pytest.raises(HTTPException) as e:
+        await add_file_to_vector_store(
+            request=request, vector_store_id="vs_123", auth=auth, body=body
+        )
+    assert e.value.status_code == status.HTTP_404_NOT_FOUND
+
+
+@pytest.mark.asyncio
+async def test_list_vector_store_files_connection_error(
+    mocker: MockerFixture,
+) -> None:
+    """Test list vector store files with connection error."""
+    mock_authorization_resolvers(mocker)
+
+    config_dict = get_test_config()
+    cfg = AppConfig()
+    cfg.init_from_dict(config_dict)
+
+    mock_client = mocker.AsyncMock()
+    mock_client.vector_stores.files.list.side_effect = APIConnectionError(
+        request=None  # type: ignore
+    )
+    mock_lsc = mocker.patch(
+        "app.endpoints.vector_stores.AsyncLlamaStackClientHolder.get_client"
+    )
+    mock_lsc.return_value = mock_client
+    mocker.patch("app.endpoints.vector_stores.configuration", cfg)
+
+    request = get_test_request()
+    auth = get_test_auth()
+
+    with pytest.raises(HTTPException) as e:
+        await list_vector_store_files(
+            request=request, vector_store_id="vs_123", auth=auth
+        )
+    assert e.value.status_code == status.HTTP_503_SERVICE_UNAVAILABLE
+
+
+@pytest.mark.asyncio
+async def test_get_vector_store_file_connection_error(mocker: MockerFixture) -> None:
+    """Test get vector store file with connection error."""
+    mock_authorization_resolvers(mocker)
+
+    config_dict = get_test_config()
+    cfg = AppConfig()
+    cfg.init_from_dict(config_dict)
+
+    mock_client = mocker.AsyncMock()
+    mock_client.vector_stores.files.retrieve.side_effect = APIConnectionError(
+        request=None  # type: ignore
+    )
+    mock_lsc = mocker.patch(
+        "app.endpoints.vector_stores.AsyncLlamaStackClientHolder.get_client"
+    )
+    mock_lsc.return_value = mock_client
+    mocker.patch("app.endpoints.vector_stores.configuration", cfg)
+
+    request = get_test_request()
+    auth = get_test_auth()
+
+    with pytest.raises(HTTPException) as e:
+        await get_vector_store_file(
+            request=request, vector_store_id="vs_123", file_id="file_123", auth=auth
+        )
+    assert e.value.status_code == status.HTTP_503_SERVICE_UNAVAILABLE
+
+
+@pytest.mark.asyncio
+async def test_get_vector_store_file_not_found(mocker: MockerFixture) -> None:
+    """Test get vector store file with not found error."""
+    mock_authorization_resolvers(mocker)
+
+    config_dict = get_test_config()
+    cfg = AppConfig()
+    cfg.init_from_dict(config_dict)
+
+    mock_client = mocker.AsyncMock()
+    mock_response = mocker.Mock()
+    mock_response.request = mocker.Mock()
+    mock_client.vector_stores.files.retrieve.side_effect = BadRequestError(
+        message="File not found", response=mock_response, body=None
+    )
+    mock_lsc = mocker.patch(
+        "app.endpoints.vector_stores.AsyncLlamaStackClientHolder.get_client"
+    )
+    mock_lsc.return_value = mock_client
+    mocker.patch("app.endpoints.vector_stores.configuration", cfg)
+
+    request = get_test_request()
+    auth = get_test_auth()
+
+    with pytest.raises(HTTPException) as e:
+        await get_vector_store_file(
+            request=request, vector_store_id="vs_123", file_id="file_999", auth=auth
+        )
+    assert e.value.status_code == status.HTTP_404_NOT_FOUND
+
+
+@pytest.mark.asyncio
+async def test_delete_vector_store_file_connection_error(
+    mocker: MockerFixture,
+) -> None:
+    """Test delete vector store file with connection error."""
+    mock_authorization_resolvers(mocker)
+
+    config_dict = get_test_config()
+    cfg = AppConfig()
+    cfg.init_from_dict(config_dict)
+
+    mock_client = mocker.AsyncMock()
+    mock_client.vector_stores.files.delete.side_effect = APIConnectionError(
+        request=None  # type: ignore
+    )
+    mock_lsc = mocker.patch(
+        "app.endpoints.vector_stores.AsyncLlamaStackClientHolder.get_client"
+    )
+    mock_lsc.return_value = mock_client
+    mocker.patch("app.endpoints.vector_stores.configuration", cfg)
+
+    request = get_test_request()
+    auth = get_test_auth()
+
+    with pytest.raises(HTTPException) as e:
+        await delete_vector_store_file(
+            request=request, vector_store_id="vs_123", file_id="file_123", auth=auth
+        )
+    assert e.value.status_code == status.HTTP_503_SERVICE_UNAVAILABLE
+
+
+@pytest.mark.asyncio
+async def test_delete_vector_store_file_not_found(mocker: MockerFixture) -> None:
+    """Test delete vector store file with not found error."""
+    mock_authorization_resolvers(mocker)
+
+    config_dict = get_test_config()
+    cfg = AppConfig()
+    cfg.init_from_dict(config_dict)
+
+    mock_client = mocker.AsyncMock()
+    mock_response = mocker.Mock()
+    mock_response.request = mocker.Mock()
+    mock_client.vector_stores.files.delete.side_effect = BadRequestError(
+        message="File not found", response=mock_response, body=None
+    )
+    mock_lsc = mocker.patch(
+        "app.endpoints.vector_stores.AsyncLlamaStackClientHolder.get_client"
+    )
+    mock_lsc.return_value = mock_client
+    mocker.patch("app.endpoints.vector_stores.configuration", cfg)
+
+    request = get_test_request()
+    auth = get_test_auth()
+
+    with pytest.raises(HTTPException) as e:
+        await delete_vector_store_file(
+            request=request, vector_store_id="vs_123", file_id="file_999", auth=auth
+        )
+    assert e.value.status_code == status.HTTP_404_NOT_FOUND
diff --git a/tests/unit/app/test_routers.py b/tests/unit/app/test_routers.py
index 7ac178881..7e1f455a6 100644
--- a/tests/unit/app/test_routers.py
+++ b/tests/unit/app/test_routers.py
@@ -28,6 +28,7 @@
     stream_interrupt,
     streaming_query,
     tools,
+    vector_stores,
 )
 from app.routers import include_routers
 
@@ -109,7 +110,7 @@ def test_include_routers() -> None:
     include_routers(app)
 
     # are all routers added?
-    assert len(app.routers) == 22
+    assert len(app.routers) == 23
     assert root.router in app.get_routers()
     assert info.router in app.get_routers()
     assert models.router in app.get_routers()
@@ -132,6 +133,7 @@ def test_include_routers() -> None:
     assert a2a.router in app.get_routers()
     assert stream_interrupt.router in app.get_routers()
     assert responses.router in app.get_routers()
+    assert vector_stores.router in app.get_routers()
 
 
 def test_check_prefixes() -> None:
@@ -139,7 +141,7 @@ def test_check_prefixes() -> None:
 
     Verify that include_routers registers the expected routers with their configured URL prefixes.
 
-    Asserts that 21 routers are registered on a MockFastAPI instance and that
+    Asserts that 23 routers are registered on a MockFastAPI instance and that
     each router's prefix matches the expected value (e.g., root, health,
     authorized, metrics use an empty prefix; most API routers use "/v1";
     conversations_v2 uses "/v2").
@@ -148,7 +150,7 @@ def test_check_prefixes() -> None:
     include_routers(app)
 
     # are all routers added?
-    assert len(app.routers) == 22
+    assert len(app.routers) == 23
     assert app.get_router_prefix(root.router) == ""
     assert app.get_router_prefix(info.router) == "/v1"
     assert app.get_router_prefix(models.router) == "/v1"
@@ -172,3 +174,4 @@ def test_check_prefixes() -> None:
     assert app.get_router_prefix(a2a.router) == ""
     assert app.get_router_prefix(stream_interrupt.router) == "/v1"
     assert app.get_router_prefix(responses.router) == "/v1"
+    assert app.get_router_prefix(vector_stores.router) == "/v1"

From 03470cbf941f7d9067b2abd93219d932ba764707 Mon Sep 17 00:00:00 2001
From: Lucas <lyoon@redhat.com>
Date: Thu, 9 Apr 2026 14:21:16 -0400
Subject: [PATCH 3/7] address coderabbit

Signed-off-by: Lucas <lyoon@redhat.com>
---
 src/app/endpoints/vector_stores.py            |  19 ++-
 src/models/requests.py                        |  52 ++++++
 .../unit/app/endpoints/test_vector_stores.py  |  42 ++++-
 .../requests/test_vector_store_requests.py    | 158 ++++++++++++++++++
 4 files changed, 263 insertions(+), 8 deletions(-)
 create mode 100644 tests/unit/models/requests/test_vector_store_requests.py

diff --git a/src/app/endpoints/vector_stores.py b/src/app/endpoints/vector_stores.py
index fb7f024c0..3876f1175 100644
--- a/src/app/endpoints/vector_stores.py
+++ b/src/app/endpoints/vector_stores.py
@@ -6,8 +6,7 @@
 from io import BytesIO
 from typing import Annotated, Any
 
-from fastapi import APIRouter, File, HTTPException, Request, UploadFile
-from fastapi.params import Depends
+from fastapi import APIRouter, Depends, File, HTTPException, Request, UploadFile, status
 from llama_stack_client import APIConnectionError, BadRequestError
 
 from authentication import get_auth_dependency
@@ -89,6 +88,7 @@
         examples=["missing header", "missing token"]
     ),
     403: ForbiddenResponse.openapi_response(examples=["endpoint"]),
+    404: NotFoundResponse.openapi_response(examples=["vector_store"]),
     500: InternalServerErrorResponse.openapi_response(examples=["configuration"]),
     503: ServiceUnavailableResponse.openapi_response(),
 }
@@ -136,7 +136,7 @@ async def create_vector_store(
         if "embedding_dimension" in body_dict:
             extra_body["embedding_dimension"] = body_dict.pop("embedding_dimension")
 
-        logger.info(
+        logger.debug(
             "Creating vector store - body_dict: %s, extra_body: %s",
             body_dict,
             extra_body,
@@ -365,6 +365,7 @@ async def update_vector_store(
 @router.delete(
     "/vector-stores/{vector_store_id}",
     responses={"204": {"description": "Vector store deleted"}},
+    status_code=status.HTTP_204_NO_CONTENT,
 )
 @authorize(Action.MANAGE_VECTOR_STORES)
 async def delete_vector_store(
@@ -599,8 +600,10 @@ async def add_file_to_vector_store(
         raise HTTPException(**response.model_dump()) from e
     except BadRequestError as e:
         logger.error("Vector store file operation failed: %s", e)
+        # Don't assume which resource is missing - could be vector_store_id OR file_id
         response = NotFoundResponse(
-            resource="vector_store_file", resource_id=body.file_id
+            resource="vector_store_or_file",
+            resource_id=f"vector_store={vector_store_id}, file={body.file_id}",
         )
         raise HTTPException(**response.model_dump()) from e
     except Exception as e:
@@ -639,6 +642,7 @@ async def list_vector_store_files(
         HTTPException:
             - 401: Authentication failed
             - 403: Authorization failed
+            - 404: Vector store not found
             - 500: Lightspeed Stack configuration not loaded
             - 503: Unable to connect to Llama Stack
     """
@@ -671,6 +675,12 @@ async def list_vector_store_files(
         logger.error("Unable to connect to Llama Stack: %s", e)
         response = ServiceUnavailableResponse(backend_name="Llama Stack", cause=str(e))
         raise HTTPException(**response.model_dump()) from e
+    except BadRequestError as e:
+        logger.error("Vector store not found: %s", e)
+        response = NotFoundResponse(
+            resource="vector_store", resource_id=vector_store_id
+        )
+        raise HTTPException(**response.model_dump()) from e
     except Exception as e:
         logger.error("Unable to list vector store files: %s", e)
         response = InternalServerErrorResponse(
@@ -760,6 +770,7 @@ async def get_vector_store_file(
 @router.delete(
     "/vector-stores/{vector_store_id}/files/{file_id}",
     responses={"204": {"description": "File deleted from vector store"}},
+    status_code=status.HTTP_204_NO_CONTENT,
 )
 @authorize(Action.MANAGE_VECTOR_STORES)
 async def delete_vector_store_file(
diff --git a/src/models/requests.py b/src/models/requests.py
index b47f541cc..32207d788 100644
--- a/src/models/requests.py
+++ b/src/models/requests.py
@@ -1069,6 +1069,22 @@ class VectorStoreUpdateRequest(BaseModel):
         },
     }
 
+    @model_validator(mode="after")
+    def check_at_least_one_field(self) -> Self:
+        """Ensure at least one field is provided for update.
+
+        Raises:
+            ValueError: If all fields are None (empty update).
+
+        Returns:
+            Self: The validated model instance.
+        """
+        if self.name is None and self.expires_at is None and self.metadata is None:
+            raise ValueError(
+                "At least one field must be provided: name, expires_at, or metadata"
+            )
+        return self
+
 
 class VectorStoreFileCreateRequest(BaseModel):
     """Model representing a request to add a file to a vector store.
@@ -1115,3 +1131,39 @@ class VectorStoreFileCreateRequest(BaseModel):
             ]
         },
     }
+
+    @field_validator("attributes")
+    @classmethod
+    def validate_attributes(
+        cls, value: Optional[dict[str, str | float | bool]]
+    ) -> Optional[dict[str, str | float | bool]]:
+        """Validate attributes field constraints.
+
+        Ensures:
+        - Maximum 16 key-value pairs
+        - Keys are max 64 characters
+        - String values are max 512 characters
+
+        Parameters:
+            value: The attributes dictionary to validate.
+
+        Raises:
+            ValueError: If constraints are violated.
+
+        Returns:
+            The validated attributes dictionary.
+        """
+        if value is None:
+            return value
+
+        if len(value) > 16:
+            raise ValueError(f"attributes can have at most 16 pairs, got {len(value)}")
+
+        for key, val in value.items():
+            if len(key) > 64:
+                raise ValueError(f"attribute key '{key}' exceeds 64 characters")
+
+            if isinstance(val, str) and len(val) > 512:
+                raise ValueError(f"attribute value for '{key}' exceeds 512 characters")
+
+        return value
diff --git a/tests/unit/app/endpoints/test_vector_stores.py b/tests/unit/app/endpoints/test_vector_stores.py
index 2cdd2cffd..5365287ee 100644
--- a/tests/unit/app/endpoints/test_vector_stores.py
+++ b/tests/unit/app/endpoints/test_vector_stores.py
@@ -172,8 +172,9 @@ async def test_create_vector_store_configuration_not_loaded(
 
     with pytest.raises(HTTPException) as e:
         await create_vector_store(request=request, auth=auth, body=body)
-        assert e.value.status_code == status.HTTP_500_INTERNAL_SERVER_ERROR
-        assert e.value.detail["response"] == "Configuration is not loaded"  # type: ignore
+
+    assert e.value.status_code == status.HTTP_500_INTERNAL_SERVER_ERROR
+    assert e.value.detail["response"] == "Configuration is not loaded"  # type: ignore
 
 
 @pytest.mark.asyncio
@@ -227,8 +228,9 @@ async def test_create_vector_store_connection_error(mocker: MockerFixture) -> No
 
     with pytest.raises(HTTPException) as e:
         await create_vector_store(request=request, auth=auth, body=body)
-        assert e.value.status_code == status.HTTP_503_SERVICE_UNAVAILABLE
-        assert e.value.detail["response"] == "Unable to connect to Llama Stack"  # type: ignore
+
+    assert e.value.status_code == status.HTTP_503_SERVICE_UNAVAILABLE
+    assert e.value.detail["response"] == "Unable to connect to Llama Stack"  # type: ignore
 
 
 @pytest.mark.asyncio
@@ -954,6 +956,38 @@ async def test_list_vector_store_files_connection_error(
     assert e.value.status_code == status.HTTP_503_SERVICE_UNAVAILABLE
 
 
+@pytest.mark.asyncio
+async def test_list_vector_store_files_not_found(mocker: MockerFixture) -> None:
+    """Test list vector store files with invalid vector store ID."""
+    mock_authorization_resolvers(mocker)
+
+    config_dict = get_test_config()
+    cfg = AppConfig()
+    cfg.init_from_dict(config_dict)
+
+    mock_client = mocker.AsyncMock()
+    mock_response = mocker.Mock()
+    mock_response.request = mocker.Mock()
+    mock_client.vector_stores.files.list.side_effect = BadRequestError(
+        message="Vector store not found", response=mock_response, body=None
+    )
+    mock_lsc = mocker.patch(
+        "app.endpoints.vector_stores.AsyncLlamaStackClientHolder.get_client"
+    )
+    mock_lsc.return_value = mock_client
+    mocker.patch("app.endpoints.vector_stores.configuration", cfg)
+
+    request = get_test_request()
+    auth = get_test_auth()
+
+    with pytest.raises(HTTPException) as e:
+        await list_vector_store_files(
+            request=request, vector_store_id="vs_999", auth=auth
+        )
+
+    assert e.value.status_code == status.HTTP_404_NOT_FOUND
+
+
 @pytest.mark.asyncio
 async def test_get_vector_store_file_connection_error(mocker: MockerFixture) -> None:
     """Test get vector store file with connection error."""
diff --git a/tests/unit/models/requests/test_vector_store_requests.py b/tests/unit/models/requests/test_vector_store_requests.py
new file mode 100644
index 000000000..c666023f7
--- /dev/null
+++ b/tests/unit/models/requests/test_vector_store_requests.py
@@ -0,0 +1,158 @@
+"""Unit tests for Vector Store request models."""
+
+import pytest
+from pydantic import ValidationError
+
+from models.requests import VectorStoreFileCreateRequest, VectorStoreUpdateRequest
+
+
+class TestVectorStoreUpdateRequest:
+    """Test cases for the VectorStoreUpdateRequest model."""
+
+    def test_valid_update_with_name(self) -> None:
+        """Test valid update request with name field."""
+        request = VectorStoreUpdateRequest(name="updated_store")
+        assert request.name == "updated_store"
+        assert request.expires_at is None
+        assert request.metadata is None
+
+    def test_valid_update_with_expires_at(self) -> None:
+        """Test valid update request with expires_at field."""
+        request = VectorStoreUpdateRequest(expires_at=1735689600)
+        assert request.name is None
+        assert request.expires_at == 1735689600
+        assert request.metadata is None
+
+    def test_valid_update_with_metadata(self) -> None:
+        """Test valid update request with metadata field."""
+        metadata = {"user_id": "user123"}
+        request = VectorStoreUpdateRequest(metadata=metadata)
+        assert request.name is None
+        assert request.expires_at is None
+        assert request.metadata == metadata
+
+    def test_valid_update_with_multiple_fields(self) -> None:
+        """Test valid update request with multiple fields."""
+        request = VectorStoreUpdateRequest(
+            name="updated_store",
+            expires_at=1735689600,
+            metadata={"user_id": "user123"},
+        )
+        assert request.name == "updated_store"
+        assert request.expires_at == 1735689600
+        assert request.metadata == {"user_id": "user123"}
+
+    def test_empty_update_rejected(self) -> None:
+        """Test that empty update request is rejected."""
+        with pytest.raises(
+            ValueError,
+            match="At least one field must be provided: name, expires_at, or metadata",
+        ):
+            VectorStoreUpdateRequest()
+
+
+class TestVectorStoreFileCreateRequest:
+    """Test cases for the VectorStoreFileCreateRequest model."""
+
+    def test_valid_request_basic(self) -> None:
+        """Test valid request with only file_id."""
+        request = VectorStoreFileCreateRequest(file_id="file-abc123")
+        assert request.file_id == "file-abc123"
+        assert request.attributes is None
+        assert request.chunking_strategy is None
+
+    def test_valid_attributes_basic(self) -> None:
+        """Test valid request with attributes."""
+        attributes = {"key1": "value1", "key2": "value2"}
+        request = VectorStoreFileCreateRequest(
+            file_id="file-abc123", attributes=attributes
+        )
+        assert request.file_id == "file-abc123"
+        assert request.attributes == attributes
+
+    def test_attributes_max_16_pairs(self) -> None:
+        """Test that attributes can have exactly 16 pairs."""
+        attributes = {f"key{i}": f"value{i}" for i in range(16)}
+        request = VectorStoreFileCreateRequest(
+            file_id="file-abc123", attributes=attributes
+        )
+        assert len(request.attributes) == 16  # type: ignore
+
+    def test_attributes_exceeds_16_pairs(self) -> None:
+        """Test that attributes with more than 16 pairs is rejected."""
+        attributes = {f"key{i}": f"value{i}" for i in range(17)}
+        with pytest.raises(
+            ValueError, match="attributes can have at most 16 pairs, got 17"
+        ):
+            VectorStoreFileCreateRequest(file_id="file-abc123", attributes=attributes)
+
+    def test_attributes_key_max_64_chars(self) -> None:
+        """Test that attribute keys can be exactly 64 characters."""
+        key_64_chars = "a" * 64
+        attributes = {key_64_chars: "value"}
+        request = VectorStoreFileCreateRequest(
+            file_id="file-abc123", attributes=attributes
+        )
+        assert key_64_chars in request.attributes  # type: ignore
+
+    def test_attributes_key_exceeds_64_chars(self) -> None:
+        """Test that attribute keys exceeding 64 characters are rejected."""
+        key_65_chars = "a" * 65
+        attributes = {key_65_chars: "value"}
+        with pytest.raises(ValueError, match="exceeds 64 characters"):
+            VectorStoreFileCreateRequest(file_id="file-abc123", attributes=attributes)
+
+    def test_attributes_string_value_max_512_chars(self) -> None:
+        """Test that string attribute values can be exactly 512 characters."""
+        value_512_chars = "b" * 512
+        attributes = {"key": value_512_chars}
+        request = VectorStoreFileCreateRequest(
+            file_id="file-abc123", attributes=attributes
+        )
+        assert request.attributes["key"] == value_512_chars  # type: ignore
+
+    def test_attributes_string_value_exceeds_512_chars(self) -> None:
+        """Test that string attribute values exceeding 512 characters are rejected."""
+        value_513_chars = "b" * 513
+        attributes = {"key": value_513_chars}
+        with pytest.raises(ValueError, match="exceeds 512 characters"):
+            VectorStoreFileCreateRequest(file_id="file-abc123", attributes=attributes)
+
+    def test_attributes_non_string_values_allowed(self) -> None:
+        """Test that non-string attribute values (numbers, booleans) are not length-checked."""
+        attributes = {
+            "bool_key": True,
+            "int_key": 12345,
+            "float_key": 3.14159,
+        }
+        request = VectorStoreFileCreateRequest(
+            file_id="file-abc123", attributes=attributes
+        )
+        assert request.attributes == attributes
+
+    def test_attributes_mixed_value_types(self) -> None:
+        """Test that mixed value types in attributes are validated correctly."""
+        attributes = {
+            "string_key": "value",
+            "bool_key": False,
+            "number_key": 42,
+        }
+        request = VectorStoreFileCreateRequest(
+            file_id="file-abc123", attributes=attributes
+        )
+        assert request.attributes == attributes
+
+    def test_attributes_none_is_valid(self) -> None:
+        """Test that None attributes is valid (optional field)."""
+        request = VectorStoreFileCreateRequest(file_id="file-abc123", attributes=None)
+        assert request.attributes is None
+
+    def test_file_id_required(self) -> None:
+        """Test that file_id is required."""
+        with pytest.raises(ValidationError):
+            VectorStoreFileCreateRequest()  # type: ignore
+
+    def test_file_id_cannot_be_empty(self) -> None:
+        """Test that file_id cannot be an empty string."""
+        with pytest.raises(ValidationError, match="at least 1 character"):
+            VectorStoreFileCreateRequest(file_id="")

From 2bd58deb4bf950e2d36f92e00b1f9b00ee11f670 Mon Sep 17 00:00:00 2001
From: Lucas <lyoon@redhat.com>
Date: Thu, 9 Apr 2026 18:05:04 -0400
Subject: [PATCH 4/7] addressing comments

Signed-off-by: Lucas <lyoon@redhat.com>
---
 src/app/endpoints/vector_stores.py            | 75 ++++++++++++++++---
 src/constants.py                              |  4 +
 .../unit/app/endpoints/test_vector_stores.py  | 68 ++++++++++++++++-
 3 files changed, 137 insertions(+), 10 deletions(-)

diff --git a/src/app/endpoints/vector_stores.py b/src/app/endpoints/vector_stores.py
index 3876f1175..50fa8fa67 100644
--- a/src/app/endpoints/vector_stores.py
+++ b/src/app/endpoints/vector_stores.py
@@ -14,6 +14,7 @@
 from authorization.middleware import authorize
 from client import AsyncLlamaStackClientHolder
 from configuration import configuration
+from constants import DEFAULT_MAX_FILE_UPLOAD_SIZE
 from log import get_logger
 from models.config import Action
 from models.requests import (
@@ -22,6 +23,7 @@
     VectorStoreUpdateRequest,
 )
 from models.responses import (
+    AbstractErrorResponse,
     FileResponse,
     ForbiddenResponse,
     InternalServerErrorResponse,
@@ -63,6 +65,7 @@
 
 file_responses: dict[int | str, dict[str, Any]] = {
     200: FileResponse.openapi_response(),
+    400: {"description": "Bad Request - Invalid file upload"},
     401: UnauthorizedResponse.openapi_response(
         examples=["missing header", "missing token"]
     ),
@@ -434,22 +437,69 @@ async def create_file(
 
     Raises:
         HTTPException:
-            - 400: Bad request (e.g., file too large)
+            - 400: Bad request (e.g., file too large, invalid format)
             - 401: Authentication failed
             - 403: Authorization failed
             - 500: Lightspeed Stack configuration not loaded
             - 503: Unable to connect to Llama Stack
     """
     _ = auth
-    _ = request
 
     check_configuration_loaded(configuration)
 
+    # Check Content-Length header BEFORE reading to prevent DoS via memory exhaustion
+    content_length = request.headers.get("content-length")
+    if content_length:
+        try:
+            size = int(content_length)
+            if size > DEFAULT_MAX_FILE_UPLOAD_SIZE:
+                response = AbstractErrorResponse(
+                    response="File too large",
+                    cause=(
+                        f"File size {size} bytes exceeds maximum allowed "
+                        f"size of {DEFAULT_MAX_FILE_UPLOAD_SIZE} bytes "
+                        f"({DEFAULT_MAX_FILE_UPLOAD_SIZE // (1024 * 1024)} MB)"
+                    ),
+                    status_code=status.HTTP_400_BAD_REQUEST,
+                )
+                raise HTTPException(**response.model_dump())
+        except ValueError:
+            # Invalid Content-Length header, continue and validate after reading
+            pass
+
+    # file.size attribute if available
+    if hasattr(file, "size") and file.size is not None:
+        if file.size > DEFAULT_MAX_FILE_UPLOAD_SIZE:
+            response = AbstractErrorResponse(
+                response="File too large",
+                cause=(
+                    f"File size {file.size} bytes exceeds maximum allowed "
+                    f"size of {DEFAULT_MAX_FILE_UPLOAD_SIZE} bytes "
+                    f"({DEFAULT_MAX_FILE_UPLOAD_SIZE // (1024 * 1024)} MB)"
+                ),
+                status_code=status.HTTP_400_BAD_REQUEST,
+            )
+            raise HTTPException(**response.model_dump())
+
     try:
         client = AsyncLlamaStackClientHolder().get_client()
 
-        # Read file content
+        # Read file content once
         content = await file.read()
+
+        # Verify actual size after reading
+        if len(content) > DEFAULT_MAX_FILE_UPLOAD_SIZE:
+            response = AbstractErrorResponse(
+                response="File too large",
+                cause=(
+                    f"File content size {len(content)} bytes exceeds maximum "
+                    f"allowed size of {DEFAULT_MAX_FILE_UPLOAD_SIZE} bytes "
+                    f"({DEFAULT_MAX_FILE_UPLOAD_SIZE // (1024 * 1024)} MB)"
+                ),
+                status_code=status.HTTP_400_BAD_REQUEST,
+            )
+            raise HTTPException(**response.model_dump())
+
         filename = file.filename or "uploaded_file"
 
         # Add .txt extension if no extension present
@@ -463,10 +513,8 @@ async def create_file(
             len(content),
         )
 
-        # Convert to BytesIO for Llama Stack client
-        # The client expects bytes, io.IOBase, PathLike, or a tuple
         file_bytes = BytesIO(content)
-        file_bytes.name = filename  # Set the filename attribute
+        file_bytes.name = filename
 
         file_obj = await client.files.create(
             file=file_bytes,
@@ -487,7 +535,13 @@ async def create_file(
         raise HTTPException(**response.model_dump()) from e
     except BadRequestError as e:
         logger.error("Bad request for file upload: %s", e)
-        response = ServiceUnavailableResponse(backend_name="Llama Stack", cause=str(e))
+        # BadRequestError from Llama Stack indicates client error (e.g., file too large)
+        # Map to 400 Bad Request, not 503 Service Unavailable
+        response = AbstractErrorResponse(
+            response="Invalid file upload",
+            cause=f"File upload rejected by Llama Stack: {str(e)}",
+            status_code=status.HTTP_400_BAD_REQUEST,
+        )
         raise HTTPException(**response.model_dump()) from e
     except Exception as e:
         full_trace = traceback.format_exc()
@@ -572,9 +626,12 @@ async def add_file_to_vector_store(
                 else:
                     raise  # Re-raise if not a lock error or max retries reached
         if not vs_file:
-            raise HTTPException(
-                status_code=500, detail="Failed to create vector store file"
+            # Use standard error response model for consistency
+            response = InternalServerErrorResponse(
+                response="Failed to create vector store file",
+                cause="All retry attempts failed to create the vector store file",
             )
+            raise HTTPException(**response.model_dump())
         logger.info(
             "Vector store file created - ID: %s, status: %s, last_error: %s",
             vs_file.id,
diff --git a/src/constants.py b/src/constants.py
index 9325a6a38..69fea3cf9 100644
--- a/src/constants.py
+++ b/src/constants.py
@@ -128,6 +128,10 @@
 DEFAULT_AUTHENTICATION_MODULE = AUTH_MOD_NOOP
 # Maximum allowed size for base64-encoded x-rh-identity header (bytes)
 DEFAULT_RH_IDENTITY_MAX_HEADER_SIZE = 8192
+
+# Maximum allowed file upload size (bytes) - 100MB default
+# Protects against DoS attacks via large file uploads
+DEFAULT_MAX_FILE_UPLOAD_SIZE = 100 * 1024 * 1024  # 100 MB
 DEFAULT_JWT_UID_CLAIM = "user_id"
 DEFAULT_JWT_USER_NAME_CLAIM = "username"
 
diff --git a/tests/unit/app/endpoints/test_vector_stores.py b/tests/unit/app/endpoints/test_vector_stores.py
index 5365287ee..0e9b3b268 100644
--- a/tests/unit/app/endpoints/test_vector_stores.py
+++ b/tests/unit/app/endpoints/test_vector_stores.py
@@ -402,6 +402,7 @@ async def test_create_file_success(mocker: MockerFixture) -> None:
     # Mock UploadFile
     mock_file = mocker.AsyncMock()
     mock_file.filename = "test.txt"
+    mock_file.size = 12  # Size of "test content"
     mock_file.read.return_value = b"test content"
 
     response = await create_file(request=request, auth=auth, file=mock_file)
@@ -821,6 +822,7 @@ async def test_create_file_connection_error(mocker: MockerFixture) -> None:
 
     mock_file = mocker.AsyncMock()
     mock_file.filename = "test.txt"
+    mock_file.size = 12  # Size of "test content"
     mock_file.read.return_value = b"test content"
 
     with pytest.raises(HTTPException) as e:
@@ -854,11 +856,75 @@ async def test_create_file_bad_request(mocker: MockerFixture) -> None:
 
     mock_file = mocker.AsyncMock()
     mock_file.filename = "test.txt"
+    mock_file.size = 12  # Size of "test content"
     mock_file.read.return_value = b"test content"
 
     with pytest.raises(HTTPException) as e:
         await create_file(request=request, auth=auth, file=mock_file)
-    assert e.value.status_code == status.HTTP_503_SERVICE_UNAVAILABLE
+
+    assert e.value.status_code == status.HTTP_400_BAD_REQUEST
+
+
+@pytest.mark.asyncio
+async def test_create_file_too_large(mocker: MockerFixture) -> None:
+    """Test create file with file size exceeding limit."""
+    mock_authorization_resolvers(mocker)
+
+    config_dict = get_test_config()
+    cfg = AppConfig()
+    cfg.init_from_dict(config_dict)
+
+    mocker.patch("app.endpoints.vector_stores.configuration", cfg)
+
+    request = get_test_request()
+    auth = get_test_auth()
+
+    # Create a mock file that exceeds the size limit
+    mock_file = mocker.AsyncMock()
+    mock_file.filename = "large_file.pdf"
+    mock_file.size = 200 * 1024 * 1024  # 200 MB (exceeds 100 MB limit)
+    mock_file.read.return_value = b"x" * (200 * 1024 * 1024)
+
+    with pytest.raises(HTTPException) as e:
+        await create_file(request=request, auth=auth, file=mock_file)
+
+    assert e.value.status_code == status.HTTP_400_BAD_REQUEST
+    assert "too large" in str(e.value.detail).lower()
+
+
+@pytest.mark.asyncio
+async def test_create_file_content_length_too_large(mocker: MockerFixture) -> None:
+    """Test create file with Content-Length header exceeding limit."""
+    mock_authorization_resolvers(mocker)
+
+    config_dict = get_test_config()
+    cfg = AppConfig()
+    cfg.init_from_dict(config_dict)
+
+    mocker.patch("app.endpoints.vector_stores.configuration", cfg)
+
+    # Create request with large Content-Length header
+    request = Request(
+        scope={
+            "type": "http",
+            "headers": [
+                (b"authorization", b"Bearer test-token"),
+                (b"content-length", b"209715200"),  # 200 MB
+            ],
+        }
+    )
+    auth = get_test_auth()
+
+    # Create a mock file
+    mock_file = mocker.AsyncMock()
+    mock_file.filename = "large_file.pdf"
+    mock_file.size = None  # No size attribute
+
+    with pytest.raises(HTTPException) as e:
+        await create_file(request=request, auth=auth, file=mock_file)
+
+    assert e.value.status_code == status.HTTP_400_BAD_REQUEST
+    assert "too large" in str(e.value.detail).lower()
 
 
 @pytest.mark.asyncio

From ffcc38925681162efe48673990e4e40867a7a50c Mon Sep 17 00:00:00 2001
From: Lucas <lyoon@redhat.com>
Date: Thu, 9 Apr 2026 18:31:09 -0400
Subject: [PATCH 5/7]  address more comments

Signed-off-by: Lucas <lyoon@redhat.com>
---
 src/app/endpoints/vector_stores.py            | 48 +++++++++++--------
 .../unit/app/endpoints/test_vector_stores.py  |  2 +-
 2 files changed, 28 insertions(+), 22 deletions(-)

diff --git a/src/app/endpoints/vector_stores.py b/src/app/endpoints/vector_stores.py
index 50fa8fa67..bf1727a22 100644
--- a/src/app/endpoints/vector_stores.py
+++ b/src/app/endpoints/vector_stores.py
@@ -561,7 +561,7 @@ async def create_file(
     "/vector-stores/{vector_store_id}/files", responses=vector_store_file_responses
 )
 @authorize(Action.MANAGE_VECTOR_STORES)
-async def add_file_to_vector_store(
+async def add_file_to_vector_store(  # pylint: disable=too-many-locals
     request: Request,
     vector_store_id: str,
     auth: Annotated[AuthTuple, Depends(get_auth_dependency())],
@@ -596,8 +596,9 @@ async def add_file_to_vector_store(
 
         # Retry logic for database lock errors
         max_retries = 3
-        retry_delay = 0.5  # seconds
+        retry_delay = 0.3  # seconds
         vs_file = None
+        last_lock_error: Exception | None = None
 
         for attempt in range(max_retries):
             try:
@@ -613,25 +614,30 @@ async def add_file_to_vector_store(
                 )
                 is_last_attempt = attempt == max_retries - 1
 
-                if is_lock_error and not is_last_attempt:
-                    logger.warning(
-                        "Database locked while adding file to vector store, "
-                        "retrying in %s seconds (attempt %d/%d)",
-                        retry_delay,
-                        attempt + 1,
-                        max_retries,
-                    )
-                    await asyncio.sleep(retry_delay)
-                    retry_delay *= 2  # Exponential backoff
-                else:
-                    raise  # Re-raise if not a lock error or max retries reached
-        if not vs_file:
-            # Use standard error response model for consistency
-            response = InternalServerErrorResponse(
-                response="Failed to create vector store file",
-                cause="All retry attempts failed to create the vector store file",
-            )
-            raise HTTPException(**response.model_dump())
+                if is_lock_error:
+                    last_lock_error = retry_error
+                    if not is_last_attempt:
+                        logger.warning(
+                            "Database locked while adding file to vector store, "
+                            "retrying in %s seconds (attempt %d/%d)",
+                            retry_delay,
+                            attempt + 1,
+                            max_retries,
+                        )
+                        await asyncio.sleep(retry_delay)
+                        retry_delay *= 2  # Exponential backoff
+                        continue
+                    break
+                raise  # Re-raise if not a lock error
+        if vs_file is None:
+            if last_lock_error is not None:
+                # Use standard error response model for consistency
+                response = InternalServerErrorResponse(
+                    response="Failed to create vector store file",
+                    cause="All retry attempts failed to create the vector store file",
+                )
+                raise HTTPException(**response.model_dump()) from last_lock_error
+            raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR)
         logger.info(
             "Vector store file created - ID: %s, status: %s, last_error: %s",
             vs_file.id,
diff --git a/tests/unit/app/endpoints/test_vector_stores.py b/tests/unit/app/endpoints/test_vector_stores.py
index 0e9b3b268..514edffb6 100644
--- a/tests/unit/app/endpoints/test_vector_stores.py
+++ b/tests/unit/app/endpoints/test_vector_stores.py
@@ -883,7 +883,7 @@ async def test_create_file_too_large(mocker: MockerFixture) -> None:
     mock_file = mocker.AsyncMock()
     mock_file.filename = "large_file.pdf"
     mock_file.size = 200 * 1024 * 1024  # 200 MB (exceeds 100 MB limit)
-    mock_file.read.return_value = b"x" * (200 * 1024 * 1024)
+    mock_file.read.side_effect = AssertionError("File too large")
 
     with pytest.raises(HTTPException) as e:
         await create_file(request=request, auth=auth, file=mock_file)

From 27047fd66dbcf5ef4a592506f751096ea3581876 Mon Sep 17 00:00:00 2001
From: Lucas <lyoon@redhat.com>
Date: Thu, 9 Apr 2026 18:45:07 -0400
Subject: [PATCH 6/7] 0.3 -> 0.5

Signed-off-by: Lucas <lyoon@redhat.com>
---
 src/app/endpoints/vector_stores.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/app/endpoints/vector_stores.py b/src/app/endpoints/vector_stores.py
index bf1727a22..7b9c322e6 100644
--- a/src/app/endpoints/vector_stores.py
+++ b/src/app/endpoints/vector_stores.py
@@ -596,7 +596,7 @@ async def add_file_to_vector_store(  # pylint: disable=too-many-locals
 
         # Retry logic for database lock errors
         max_retries = 3
-        retry_delay = 0.3  # seconds
+        retry_delay = 0.5  # seconds
         vs_file = None
         last_lock_error: Exception | None = None
 

From 7100ef35e8ee64af441ad6fb8ddb705cc24e4f99 Mon Sep 17 00:00:00 2001
From: Lucas <lyoon@redhat.com>
Date: Fri, 10 Apr 2026 13:00:45 -0400
Subject: [PATCH 7/7] address comments

Signed-off-by: Lucas <lyoon@redhat.com>
---
 src/app/endpoints/vector_stores.py            | 114 +++++---
 src/models/responses.py                       | 270 ++++++++++++------
 .../unit/app/endpoints/test_vector_stores.py  |   6 +-
 .../models/responses/test_error_responses.py  |   2 +-
 4 files changed, 266 insertions(+), 126 deletions(-)

diff --git a/src/app/endpoints/vector_stores.py b/src/app/endpoints/vector_stores.py
index 7b9c322e6..a32d4606d 100644
--- a/src/app/endpoints/vector_stores.py
+++ b/src/app/endpoints/vector_stores.py
@@ -7,7 +7,14 @@
 from typing import Annotated, Any
 
 from fastapi import APIRouter, Depends, File, HTTPException, Request, UploadFile, status
-from llama_stack_client import APIConnectionError, BadRequestError
+from llama_stack_client import (
+    APIConnectionError,
+    BadRequestError,
+)
+from llama_stack_client import (
+    APIStatusError as LLSApiStatusError,
+)
+from openai._exceptions import APIStatusError as OpenAIAPIStatusError
 
 from authentication import get_auth_dependency
 from authentication.interface import AuthTuple
@@ -23,8 +30,9 @@
     VectorStoreUpdateRequest,
 )
 from models.responses import (
-    AbstractErrorResponse,
+    BadRequestResponse,
     FileResponse,
+    FileTooLargeResponse,
     ForbiddenResponse,
     InternalServerErrorResponse,
     NotFoundResponse,
@@ -36,6 +44,7 @@
     VectorStoresListResponse,
 )
 from utils.endpoints import check_configuration_loaded
+from utils.query import handle_known_apistatus_errors
 
 logger = get_logger(__name__)
 router = APIRouter(tags=["vector-stores"])
@@ -65,7 +74,8 @@
 
 file_responses: dict[int | str, dict[str, Any]] = {
     200: FileResponse.openapi_response(),
-    400: {"description": "Bad Request - Invalid file upload"},
+    400: BadRequestResponse.openapi_response(examples=["file_upload"]),
+    413: FileTooLargeResponse.openapi_response(),
     401: UnauthorizedResponse.openapi_response(
         examples=["missing header", "missing token"]
     ),
@@ -164,6 +174,10 @@ async def create_vector_store(
         logger.error("Unable to connect to Llama Stack: %s", e)
         response = ServiceUnavailableResponse(backend_name="Llama Stack", cause=str(e))
         raise HTTPException(**response.model_dump()) from e
+    except (LLSApiStatusError, OpenAIAPIStatusError) as e:
+        logger.error("API status error while creating vector store: %s", e)
+        error_response = handle_known_apistatus_errors(e, "llama-stack")
+        raise HTTPException(**error_response.model_dump()) from e
     except Exception as e:
         logger.error("Unable to create vector store: %s", e)
         response = InternalServerErrorResponse(
@@ -223,6 +237,10 @@ async def list_vector_stores(
         logger.error("Unable to connect to Llama Stack: %s", e)
         response = ServiceUnavailableResponse(backend_name="Llama Stack", cause=str(e))
         raise HTTPException(**response.model_dump()) from e
+    except (LLSApiStatusError, OpenAIAPIStatusError) as e:
+        logger.error("API status error while listing vector stores: %s", e)
+        error_response = handle_known_apistatus_errors(e, "llama-stack")
+        raise HTTPException(**error_response.model_dump()) from e
     except Exception as e:
         logger.error("Unable to list vector stores: %s", e)
         response = InternalServerErrorResponse(
@@ -286,6 +304,10 @@ async def get_vector_store(
             resource="vector_store", resource_id=vector_store_id
         )
         raise HTTPException(**response.model_dump()) from e
+    except (LLSApiStatusError, OpenAIAPIStatusError) as e:
+        logger.error("API status error while getting vector store: %s", e)
+        error_response = handle_known_apistatus_errors(e, "llama-stack")
+        raise HTTPException(**error_response.model_dump()) from e
     except Exception as e:
         logger.error("Unable to get vector store: %s", e)
         response = InternalServerErrorResponse(
@@ -356,6 +378,10 @@ async def update_vector_store(
             resource="vector_store", resource_id=vector_store_id
         )
         raise HTTPException(**response.model_dump()) from e
+    except (LLSApiStatusError, OpenAIAPIStatusError) as e:
+        logger.error("API status error while updating vector store: %s", e)
+        error_response = handle_known_apistatus_errors(e, "llama-stack")
+        raise HTTPException(**error_response.model_dump()) from e
     except Exception as e:
         logger.error("Unable to update vector store: %s", e)
         response = InternalServerErrorResponse(
@@ -409,6 +435,10 @@ async def delete_vector_store(
             resource="vector_store", resource_id=vector_store_id
         )
         raise HTTPException(**response.model_dump()) from e
+    except (LLSApiStatusError, OpenAIAPIStatusError) as e:
+        logger.error("API status error while deleting vector store: %s", e)
+        error_response = handle_known_apistatus_errors(e, "llama-stack")
+        raise HTTPException(**error_response.model_dump()) from e
     except Exception as e:
         logger.error("Unable to delete vector store: %s", e)
         response = InternalServerErrorResponse(
@@ -420,7 +450,7 @@ async def delete_vector_store(
 
 @router.post("/files", responses=file_responses)
 @authorize(Action.MANAGE_FILES)
-async def create_file(
+async def create_file(  # pylint: disable=too-many-branches,too-many-statements
     request: Request,
     auth: Annotated[AuthTuple, Depends(get_auth_dependency())],
     file: UploadFile = File(...),
@@ -453,14 +483,9 @@ async def create_file(
         try:
             size = int(content_length)
             if size > DEFAULT_MAX_FILE_UPLOAD_SIZE:
-                response = AbstractErrorResponse(
-                    response="File too large",
-                    cause=(
-                        f"File size {size} bytes exceeds maximum allowed "
-                        f"size of {DEFAULT_MAX_FILE_UPLOAD_SIZE} bytes "
-                        f"({DEFAULT_MAX_FILE_UPLOAD_SIZE // (1024 * 1024)} MB)"
-                    ),
-                    status_code=status.HTTP_400_BAD_REQUEST,
+                response = FileTooLargeResponse(
+                    file_size=size,
+                    max_size=DEFAULT_MAX_FILE_UPLOAD_SIZE,
                 )
                 raise HTTPException(**response.model_dump())
         except ValueError:
@@ -470,14 +495,9 @@ async def create_file(
     # file.size attribute if available
     if hasattr(file, "size") and file.size is not None:
         if file.size > DEFAULT_MAX_FILE_UPLOAD_SIZE:
-            response = AbstractErrorResponse(
-                response="File too large",
-                cause=(
-                    f"File size {file.size} bytes exceeds maximum allowed "
-                    f"size of {DEFAULT_MAX_FILE_UPLOAD_SIZE} bytes "
-                    f"({DEFAULT_MAX_FILE_UPLOAD_SIZE // (1024 * 1024)} MB)"
-                ),
-                status_code=status.HTTP_400_BAD_REQUEST,
+            response = FileTooLargeResponse(
+                file_size=file.size,
+                max_size=DEFAULT_MAX_FILE_UPLOAD_SIZE,
             )
             raise HTTPException(**response.model_dump())
 
@@ -489,14 +509,9 @@ async def create_file(
 
         # Verify actual size after reading
         if len(content) > DEFAULT_MAX_FILE_UPLOAD_SIZE:
-            response = AbstractErrorResponse(
-                response="File too large",
-                cause=(
-                    f"File content size {len(content)} bytes exceeds maximum "
-                    f"allowed size of {DEFAULT_MAX_FILE_UPLOAD_SIZE} bytes "
-                    f"({DEFAULT_MAX_FILE_UPLOAD_SIZE // (1024 * 1024)} MB)"
-                ),
-                status_code=status.HTTP_400_BAD_REQUEST,
+            response = FileTooLargeResponse(
+                file_size=len(content),
+                max_size=DEFAULT_MAX_FILE_UPLOAD_SIZE,
             )
             raise HTTPException(**response.model_dump())
 
@@ -535,14 +550,25 @@ async def create_file(
         raise HTTPException(**response.model_dump()) from e
     except BadRequestError as e:
         logger.error("Bad request for file upload: %s", e)
-        # BadRequestError from Llama Stack indicates client error (e.g., file too large)
-        # Map to 400 Bad Request, not 503 Service Unavailable
-        response = AbstractErrorResponse(
-            response="Invalid file upload",
-            cause=f"File upload rejected by Llama Stack: {str(e)}",
-            status_code=status.HTTP_400_BAD_REQUEST,
-        )
+        # Check if backend rejected due to file size
+        error_msg = str(e).lower()
+        if "too large" in error_msg or "size" in error_msg or "exceeds" in error_msg:
+            response = FileTooLargeResponse(
+                response="Invalid file upload",
+                cause=f"File upload rejected by Llama Stack: {str(e)}",
+            )
+        else:
+            response = InternalServerErrorResponse.query_failed(
+                cause=f"File upload rejected by Llama Stack: {str(e)}"
+            )
+            # Override to use 400 status code since it's a client error
+            response.status_code = status.HTTP_400_BAD_REQUEST
+            response.detail.response = "Invalid file upload"
         raise HTTPException(**response.model_dump()) from e
+    except (LLSApiStatusError, OpenAIAPIStatusError) as e:
+        logger.error("API status error while uploading file: %s", e)
+        error_response = handle_known_apistatus_errors(e, "llama-stack")
+        raise HTTPException(**error_response.model_dump()) from e
     except Exception as e:
         full_trace = traceback.format_exc()
         logger.error("Unable to upload file: %s", e)
@@ -561,7 +587,7 @@ async def create_file(
     "/vector-stores/{vector_store_id}/files", responses=vector_store_file_responses
 )
 @authorize(Action.MANAGE_VECTOR_STORES)
-async def add_file_to_vector_store(  # pylint: disable=too-many-locals
+async def add_file_to_vector_store(  # pylint: disable=too-many-locals,too-many-statements
     request: Request,
     vector_store_id: str,
     auth: Annotated[AuthTuple, Depends(get_auth_dependency())],
@@ -669,6 +695,10 @@ async def add_file_to_vector_store(  # pylint: disable=too-many-locals
             resource_id=f"vector_store={vector_store_id}, file={body.file_id}",
         )
         raise HTTPException(**response.model_dump()) from e
+    except (LLSApiStatusError, OpenAIAPIStatusError) as e:
+        logger.error("API status error while adding file to vector store: %s", e)
+        error_response = handle_known_apistatus_errors(e, "llama-stack")
+        raise HTTPException(**error_response.model_dump()) from e
     except Exception as e:
         logger.error("Unable to add file to vector store: %s", e)
         response = InternalServerErrorResponse(
@@ -744,6 +774,10 @@ async def list_vector_store_files(
             resource="vector_store", resource_id=vector_store_id
         )
         raise HTTPException(**response.model_dump()) from e
+    except (LLSApiStatusError, OpenAIAPIStatusError) as e:
+        logger.error("API status error while listing vector store files: %s", e)
+        error_response = handle_known_apistatus_errors(e, "llama-stack")
+        raise HTTPException(**error_response.model_dump()) from e
     except Exception as e:
         logger.error("Unable to list vector store files: %s", e)
         response = InternalServerErrorResponse(
@@ -818,6 +852,10 @@ async def get_vector_store_file(
         logger.error("Vector store file not found: %s", e)
         response = NotFoundResponse(resource="vector_store_file", resource_id=file_id)
         raise HTTPException(**response.model_dump()) from e
+    except (LLSApiStatusError, OpenAIAPIStatusError) as e:
+        logger.error("API status error while getting vector store file: %s", e)
+        error_response = handle_known_apistatus_errors(e, "llama-stack")
+        raise HTTPException(**error_response.model_dump()) from e
     except Exception as e:
         logger.error("Unable to get vector store file: %s", e)
         response = InternalServerErrorResponse(
@@ -877,6 +915,10 @@ async def delete_vector_store_file(
         logger.error("Vector store file not found: %s", e)
         response = NotFoundResponse(resource="vector_store_file", resource_id=file_id)
         raise HTTPException(**response.model_dump()) from e
+    except (LLSApiStatusError, OpenAIAPIStatusError) as e:
+        logger.error("API status error while deleting vector store file: %s", e)
+        error_response = handle_known_apistatus_errors(e, "llama-stack")
+        raise HTTPException(**error_response.model_dump()) from e
     except Exception as e:
         logger.error("Unable to delete vector store file: %s", e)
         response = InternalServerErrorResponse(
diff --git a/src/models/responses.py b/src/models/responses.py
index 2e22b7d02..7c85bbe9e 100644
--- a/src/models/responses.py
+++ b/src/models/responses.py
@@ -1809,7 +1809,14 @@ class BadRequestResponse(AbstractErrorResponse):
                             "123e4567-e89b-12d3-a456-426614174000 has invalid format."
                         ),
                     },
-                }
+                },
+                {
+                    "label": "file_upload",
+                    "detail": {
+                        "response": "Invalid file upload",
+                        "cause": "File upload rejected: Invalid file format",
+                    },
+                },
             ]
         }
     }
@@ -2235,6 +2242,66 @@ def __init__(
         )
 
 
+class FileTooLargeResponse(AbstractErrorResponse):
+    """413 Content Too Large - File upload exceeds size limit."""
+
+    description: ClassVar[str] = "File upload exceeds size limit"
+    model_config = {
+        "json_schema_extra": {
+            "examples": [
+                {
+                    "label": "file upload",
+                    "detail": {
+                        "response": "File too large",
+                        "cause": (
+                            "File size 150000000 bytes exceeds maximum "
+                            "allowed size of 104857600 bytes (100 MB)"
+                        ),
+                    },
+                },
+                {
+                    "label": "backend rejection",
+                    "detail": {
+                        "response": "Invalid file upload",
+                        "cause": "File upload rejected by Llama Stack: File size exceeds limit",
+                    },
+                },
+            ]
+        }
+    }
+
+    def __init__(
+        self,
+        *,
+        response: str = "File too large",
+        cause: str | None = None,
+        file_size: int | None = None,
+        max_size: int | None = None,
+    ) -> None:
+        """Initialize a FileTooLargeResponse.
+
+        Args:
+            response: Short summary of the error. Defaults to "File too large".
+            cause: Detailed explanation. If not provided, will be generated from
+                file_size and max_size.
+            file_size: The size of the uploaded file in bytes.
+            max_size: The maximum allowed file size in bytes.
+        """
+        if cause is None and file_size is not None and max_size is not None:
+            cause = (
+                f"File size {file_size} bytes exceeds maximum allowed "
+                f"size of {max_size} bytes ({max_size // (1024 * 1024)} MB)"
+            )
+        elif cause is None:
+            cause = "The uploaded file exceeds the maximum allowed size."
+
+        super().__init__(
+            response=response,
+            cause=cause,
+            status_code=status.HTTP_413_CONTENT_TOO_LARGE,
+        )
+
+
 class UnprocessableEntityResponse(AbstractErrorResponse):
     """422 Unprocessable Entity - Request validation failed."""
 
@@ -2633,7 +2700,7 @@ def __init__(self, *, backend_name: str, cause: str):
         )
 
 
-class VectorStoreResponse(BaseModel):
+class VectorStoreResponse(AbstractSuccessfulResponse):
     """Response model containing a single vector store.
 
     Attributes:
@@ -2644,6 +2711,7 @@ class VectorStoreResponse(BaseModel):
         expires_at: Optional Unix timestamp when it expires.
         status: Vector store status.
         usage_bytes: Storage usage in bytes.
+        metadata: Optional metadata dictionary for storing session information.
     """
 
     id: str = Field(..., description="Vector store ID")
@@ -2657,8 +2725,6 @@ class VectorStoreResponse(BaseModel):
     )
     status: str = Field(..., description="Vector store status")
     usage_bytes: int = Field(default=0, description="Storage usage in bytes")
-
-    model_config = {"extra": "forbid"}
     metadata: Optional[dict[str, Any]] = Field(
         None,
         description="Metadata dictionary for storing session information",
@@ -2667,23 +2733,29 @@ class VectorStoreResponse(BaseModel):
         ],
     )
 
-    @classmethod
-    def openapi_response(cls, _examples: Optional[list[str]] = None) -> dict[str, Any]:
-        """Generate OpenAPI response schema.
-
-        Parameters:
-            _examples: Optional list of example identifiers (unused).
-
-        Returns:
-            OpenAPI response schema dictionary.
-        """
-        return {
-            "description": SUCCESSFUL_RESPONSE_DESCRIPTION,
-            "model": cls,
-        }
+    model_config = {
+        "extra": "forbid",
+        "json_schema_extra": {
+            "examples": [
+                {
+                    "id": "vs_abc123",
+                    "name": "customer_support_docs",
+                    "created_at": 1704067200,
+                    "last_active_at": 1704153600,
+                    "expires_at": None,
+                    "status": "active",
+                    "usage_bytes": 1048576,
+                    "metadata": {
+                        "conversation_id": "conv_123",
+                        "document_ids": ["doc_456", "doc_789"],
+                    },
+                }
+            ]
+        },
+    }
 
 
-class VectorStoresListResponse(BaseModel):
+class VectorStoresListResponse(AbstractSuccessfulResponse):
     """Response model containing a list of vector stores.
 
     Attributes:
@@ -2696,25 +2768,41 @@ class VectorStoresListResponse(BaseModel):
     )
     object: str = Field(default="list", description="Object type")
 
-    model_config = {"extra": "forbid"}
-
-    @classmethod
-    def openapi_response(cls, _examples: Optional[list[str]] = None) -> dict[str, Any]:
-        """Generate OpenAPI response schema.
-
-        Parameters:
-            _examples: Optional list of example identifiers (unused).
-
-        Returns:
-            OpenAPI response schema dictionary.
-        """
-        return {
-            "description": SUCCESSFUL_RESPONSE_DESCRIPTION,
-            "model": cls,
-        }
+    model_config = {
+        "extra": "forbid",
+        "json_schema_extra": {
+            "examples": [
+                {
+                    "data": [
+                        {
+                            "id": "vs_abc123",
+                            "name": "customer_support_docs",
+                            "created_at": 1704067200,
+                            "last_active_at": 1704153600,
+                            "expires_at": None,
+                            "status": "active",
+                            "usage_bytes": 1048576,
+                            "metadata": {"conversation_id": "conv_123"},
+                        },
+                        {
+                            "id": "vs_def456",
+                            "name": "product_documentation",
+                            "created_at": 1704070800,
+                            "last_active_at": 1704157200,
+                            "expires_at": None,
+                            "status": "active",
+                            "usage_bytes": 2097152,
+                            "metadata": None,
+                        },
+                    ],
+                    "object": "list",
+                }
+            ]
+        },
+    }
 
 
-class FileResponse(BaseModel):
+class FileResponse(AbstractSuccessfulResponse):
     """Response model containing a file object.
 
     Attributes:
@@ -2733,25 +2821,24 @@ class FileResponse(BaseModel):
     purpose: str = Field(default="assistants", description="File purpose")
     object: str = Field(default="file", description="Object type")
 
-    model_config = {"extra": "forbid"}
-
-    @classmethod
-    def openapi_response(cls, _examples: Optional[list[str]] = None) -> dict[str, Any]:
-        """Generate OpenAPI response schema.
-
-        Parameters:
-            _examples: Optional list of example identifiers (unused).
-
-        Returns:
-            OpenAPI response schema dictionary.
-        """
-        return {
-            "description": SUCCESSFUL_RESPONSE_DESCRIPTION,
-            "model": cls,
-        }
+    model_config = {
+        "extra": "forbid",
+        "json_schema_extra": {
+            "examples": [
+                {
+                    "id": "file_abc123",
+                    "filename": "documentation.pdf",
+                    "bytes": 524288,
+                    "created_at": 1704067200,
+                    "purpose": "assistants",
+                    "object": "file",
+                }
+            ]
+        },
+    }
 
 
-class VectorStoreFileResponse(BaseModel):
+class VectorStoreFileResponse(AbstractSuccessfulResponse):
     """Response model containing a vector store file object.
 
     Attributes:
@@ -2778,25 +2865,24 @@ class VectorStoreFileResponse(BaseModel):
     )
     object: str = Field(default="vector_store.file", description="Object type")
 
-    model_config = {"extra": "forbid"}
-
-    @classmethod
-    def openapi_response(cls, _examples: Optional[list[str]] = None) -> dict[str, Any]:
-        """Generate OpenAPI response schema.
-
-        Parameters:
-            _examples: Optional list of example identifiers (unused).
-
-        Returns:
-            OpenAPI response schema dictionary.
-        """
-        return {
-            "description": SUCCESSFUL_RESPONSE_DESCRIPTION,
-            "model": cls,
-        }
+    model_config = {
+        "extra": "forbid",
+        "json_schema_extra": {
+            "examples": [
+                {
+                    "id": "file_abc123",
+                    "vector_store_id": "vs_abc123",
+                    "status": "completed",
+                    "attributes": {"chunk_size": "512", "indexed": True},
+                    "last_error": None,
+                    "object": "vector_store.file",
+                }
+            ]
+        },
+    }
 
 
-class VectorStoreFilesListResponse(BaseModel):
+class VectorStoreFilesListResponse(AbstractSuccessfulResponse):
     """Response model containing a list of vector store files.
 
     Attributes:
@@ -2809,19 +2895,31 @@ class VectorStoreFilesListResponse(BaseModel):
     )
     object: str = Field(default="list", description="Object type")
 
-    model_config = {"extra": "forbid"}
-
-    @classmethod
-    def openapi_response(cls, _examples: Optional[list[str]] = None) -> dict[str, Any]:
-        """Generate OpenAPI response schema.
-
-        Parameters:
-            _examples: Optional list of example identifiers (unused).
-
-        Returns:
-            OpenAPI response schema dictionary.
-        """
-        return {
-            "description": SUCCESSFUL_RESPONSE_DESCRIPTION,
-            "model": cls,
-        }
+    model_config = {
+        "extra": "forbid",
+        "json_schema_extra": {
+            "examples": [
+                {
+                    "data": [
+                        {
+                            "id": "file_abc123",
+                            "vector_store_id": "vs_abc123",
+                            "status": "completed",
+                            "attributes": {"chunk_size": "512"},
+                            "last_error": None,
+                            "object": "vector_store.file",
+                        },
+                        {
+                            "id": "file_def456",
+                            "vector_store_id": "vs_abc123",
+                            "status": "processing",
+                            "attributes": None,
+                            "last_error": None,
+                            "object": "vector_store.file",
+                        },
+                    ],
+                    "object": "list",
+                }
+            ]
+        },
+    }
diff --git a/tests/unit/app/endpoints/test_vector_stores.py b/tests/unit/app/endpoints/test_vector_stores.py
index 514edffb6..5a1c4da9d 100644
--- a/tests/unit/app/endpoints/test_vector_stores.py
+++ b/tests/unit/app/endpoints/test_vector_stores.py
@@ -862,7 +862,7 @@ async def test_create_file_bad_request(mocker: MockerFixture) -> None:
     with pytest.raises(HTTPException) as e:
         await create_file(request=request, auth=auth, file=mock_file)
 
-    assert e.value.status_code == status.HTTP_400_BAD_REQUEST
+    assert e.value.status_code == status.HTTP_413_CONTENT_TOO_LARGE
 
 
 @pytest.mark.asyncio
@@ -888,7 +888,7 @@ async def test_create_file_too_large(mocker: MockerFixture) -> None:
     with pytest.raises(HTTPException) as e:
         await create_file(request=request, auth=auth, file=mock_file)
 
-    assert e.value.status_code == status.HTTP_400_BAD_REQUEST
+    assert e.value.status_code == status.HTTP_413_CONTENT_TOO_LARGE
     assert "too large" in str(e.value.detail).lower()
 
 
@@ -923,7 +923,7 @@ async def test_create_file_content_length_too_large(mocker: MockerFixture) -> No
     with pytest.raises(HTTPException) as e:
         await create_file(request=request, auth=auth, file=mock_file)
 
-    assert e.value.status_code == status.HTTP_400_BAD_REQUEST
+    assert e.value.status_code == status.HTTP_413_CONTENT_TOO_LARGE
     assert "too large" in str(e.value.detail).lower()
 
 
diff --git a/tests/unit/models/responses/test_error_responses.py b/tests/unit/models/responses/test_error_responses.py
index 1f2d0e62a..3ff628162 100644
--- a/tests/unit/models/responses/test_error_responses.py
+++ b/tests/unit/models/responses/test_error_responses.py
@@ -81,7 +81,7 @@ def test_openapi_response(self) -> None:
 
         # Verify example count matches schema examples count
         assert len(examples) == expected_count
-        assert expected_count == 1
+        assert expected_count == 2
 
         # Verify example structure
         assert "conversation_id" in examples