Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ dependencies = [
"jsonpath-ng>=1.7.0",
"mcp==1.26.0",
"langchain-mcp-adapters==0.2.1",
"pillow>=12.1.1",
]

classifiers = [
Expand Down
16 changes: 12 additions & 4 deletions src/uipath_langchain/agent/multimodal/__init__.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,27 @@
"""Multimodal LLM input handling (images, PDFs, etc.)."""

from .invoke import (
build_file_content_block,
build_file_content_blocks_for,
llm_call_with_files,
)
from .types import IMAGE_MIME_TYPES, FileInfo
from .utils import download_file_base64, is_image, is_pdf, sanitize_filename
from .types import IMAGE_MIME_TYPES, TIFF_MIME_TYPES, FileInfo
from .utils import (
download_file_base64,
is_image,
is_pdf,
is_tiff,
sanitize_filename,
)

__all__ = [
"FileInfo",
"IMAGE_MIME_TYPES",
"build_file_content_block",
"TIFF_MIME_TYPES",
"build_file_content_blocks_for",
"download_file_base64",
"is_image",
"is_pdf",
"is_tiff",
"llm_call_with_files",
"sanitize_filename",
]
50 changes: 32 additions & 18 deletions src/uipath_langchain/agent/multimodal/invoke.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,47 +13,61 @@
from langchain_core.messages.content import create_file_block, create_image_block

from .types import MAX_FILE_SIZE_BYTES, FileInfo
from .utils import download_file_base64, is_image, is_pdf, sanitize_filename
from .utils import (
download_file_base64,
is_image,
is_pdf,
is_tiff,
sanitize_filename,
stream_tiff_to_content_blocks,
)

logger = logging.getLogger("uipath")


async def build_file_content_block(
async def build_file_content_blocks_for(
file_info: FileInfo,
*,
max_size: int = MAX_FILE_SIZE_BYTES,
) -> DataContentBlock:
"""Build a LangChain content block for a file attachment.
) -> list[DataContentBlock]:
"""Build LangChain content blocks for a single file attachment.

Downloads the file with size enforcement and creates the content block.
Size validation happens during download (via Content-Length check and
streaming guard) to avoid loading oversized files into memory.
Handles all supported MIME types in one place: images, PDFs, and
TIFFs (multi-page, converted to individual PNG blocks).

Args:
file_info: File URL, name, and MIME type.
max_size: Maximum allowed raw file size in bytes. LLM providers
enforce payload limits; base64 encoding adds ~30% overhead.

Returns:
A DataContentBlock for the file (image or PDF).
A list of DataContentBlock instances for the file.

Raises:
ValueError: If the MIME type is not supported or the file exceeds
the size limit for LLM payloads.
"""
if is_tiff(file_info.mime_type):
try:
return await stream_tiff_to_content_blocks(file_info.url, max_size=max_size)
except ValueError as exc:
raise ValueError(f"File '{file_info.name}': {exc}") from exc

try:
base64_file = await download_file_base64(file_info.url, max_size=max_size)
except ValueError as exc:
raise ValueError(f"File '{file_info.name}': {exc}") from exc

if is_image(file_info.mime_type):
return create_image_block(base64=base64_file, mime_type=file_info.mime_type)
return [create_image_block(base64=base64_file, mime_type=file_info.mime_type)]
if is_pdf(file_info.mime_type):
return create_file_block(
base64=base64_file,
mime_type=file_info.mime_type,
filename=sanitize_filename(file_info.name),
)
return [
create_file_block(
base64=base64_file,
mime_type=file_info.mime_type,
filename=sanitize_filename(file_info.name),
)
]

raise ValueError(f"Unsupported mime_type={file_info.mime_type}")

Expand All @@ -75,8 +89,8 @@ async def build_file_content_blocks(files: list[FileInfo]) -> list[DataContentBl

file_content_blocks: list[DataContentBlock] = []
for file in files:
block = await build_file_content_block(file)
file_content_blocks.append(block)
blocks = await build_file_content_blocks_for(file)
file_content_blocks.extend(blocks)
return file_content_blocks


Expand Down Expand Up @@ -111,8 +125,8 @@ async def llm_call_with_files(

content_blocks: list[Any] = []
for file_info in files:
content_block = await build_file_content_block(file_info)
content_blocks.append(content_block)
blocks = await build_file_content_blocks_for(file_info)
content_blocks.extend(blocks)

file_message = HumanMessage(content_blocks=content_blocks)
all_messages = list(messages) + [file_message]
Expand Down
5 changes: 5 additions & 0 deletions src/uipath_langchain/agent/multimodal/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,11 @@
"image/webp",
}

TIFF_MIME_TYPES: set[str] = {
"image/tiff",
"image/x-tiff",
}


@dataclass
class FileInfo:
Expand Down
91 changes: 83 additions & 8 deletions src/uipath_langchain/agent/multimodal/utils.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,16 @@
"""Utility functions for multimodal file handling."""

import base64
import io
import re
from collections.abc import AsyncIterator
from contextlib import asynccontextmanager

import httpx
from langchain_core.messages import DataContentBlock
from uipath._utils._ssl_context import get_httpx_client_kwargs

from .types import IMAGE_MIME_TYPES
from .types import IMAGE_MIME_TYPES, TIFF_MIME_TYPES


def sanitize_filename(filename: str) -> str:
Expand Down Expand Up @@ -37,6 +40,11 @@ def is_image(mime_type: str) -> bool:
return mime_type.lower() in IMAGE_MIME_TYPES


def is_tiff(mime_type: str) -> bool:
"""Check if the MIME type represents a TIFF image."""
return mime_type.lower() in TIFF_MIME_TYPES


def _format_mb(size_bytes: int, decimals: int = 1) -> str:
"""Format a byte count as MB.

Expand Down Expand Up @@ -97,22 +105,28 @@ async def encode_streamed_base64(
return result


async def download_file_base64(url: str, *, max_size: int = 0) -> str:
"""Download a file from a URL and return its content as a base64 string.
@asynccontextmanager
async def _stream_download(url: str, *, max_size: int = 0):
"""Stream an HTTP download with size enforcement.

Yields the validated response object. Checks Content-Length upfront
and raises ValueError if the file is known to exceed the limit.

Args:
url: The URL to download from.
max_size: Maximum allowed file size in bytes. 0 means unlimited.

Yields:
The httpx response object, ready for streaming via aiter_bytes().

Raises:
ValueError: If the file exceeds max_size.
ValueError: If the file exceeds max_size (Content-Length check).
httpx.HTTPStatusError: If the HTTP request fails.
"""
async with httpx.AsyncClient(**get_httpx_client_kwargs()) as client:
async with client.stream("GET", url) as response:
response.raise_for_status()

# Fast reject via Content-Length before reading the body
if max_size > 0:
content_length = response.headers.get("content-length")
if content_length:
Expand All @@ -130,6 +144,67 @@ async def download_file_base64(url: str, *, max_size: int = 0) -> str:
f" limit for Agent LLM payloads"
)

return await encode_streamed_base64(
response.aiter_bytes(), max_size=max_size
)
yield response


async def stream_tiff_to_content_blocks(
url: str, *, max_size: int = 0
) -> list[DataContentBlock]:
"""Download a TIFF via streaming and convert each page to a content block.

Streams the HTTP response directly into a buffer for PIL, enforcing
size limits as chunks arrive. Each TIFF page is converted to PNG,
base64-encoded, and wrapped in a DataContentBlock immediately so
the raw PNG bytes can be freed.

Args:
url: The URL to download from.
max_size: Maximum allowed file size in bytes. 0 means unlimited.

Returns:
A list of DataContentBlock instances, one per TIFF page.

Raises:
ValueError: If the file exceeds max_size.
httpx.HTTPStatusError: If the HTTP request fails.
"""
from langchain_core.messages.content import create_image_block
from PIL import Image, ImageSequence

async with _stream_download(url, max_size=max_size) as response:
buf = io.BytesIO()
total = 0
async for chunk in response.aiter_bytes():
total += len(chunk)
if max_size > 0 and total > max_size:
raise ValueError(
f"File exceeds the {_format_mb(max_size, decimals=0)}"
f" limit for LLM payloads"
f" (downloaded {_format_mb(total)} so far)"
)
buf.write(chunk)

buf.seek(0)
blocks: list[DataContentBlock] = []
with Image.open(buf) as img:
for frame in ImageSequence.Iterator(img):
png_buf = io.BytesIO()
frame.convert("RGBA").save(png_buf, format="PNG")
png_b64 = base64.b64encode(png_buf.getvalue()).decode("ascii")
blocks.append(create_image_block(base64=png_b64, mime_type="image/png"))
return blocks


async def download_file_base64(url: str, *, max_size: int = 0) -> str:
"""Download a file from a URL and return its content as a base64 string.

Args:
url: The URL to download from.
max_size: Maximum allowed file size in bytes. 0 means unlimited.

Raises:
ValueError: If the file exceeds max_size.
httpx.HTTPStatusError: If the HTTP request fails.
"""
async with _stream_download(url, max_size=max_size) as response:
return await encode_streamed_base64(response.aiter_bytes(), max_size=max_size)
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,10 @@
AgentRuntimeError,
AgentRuntimeErrorCode,
)
from uipath_langchain.agent.multimodal import FileInfo, build_file_content_block
from uipath_langchain.agent.multimodal import (
FileInfo,
build_file_content_blocks_for,
)
from uipath_langchain.agent.react.jsonschema_pydantic_converter import create_model
from uipath_langchain.agent.tools.structured_tool_with_argument_properties import (
StructuredToolWithArgumentProperties,
Expand Down Expand Up @@ -182,8 +185,8 @@ async def add_files_to_message(

file_content_blocks: list[DataContentBlock] = []
for file in files:
block = await build_file_content_block(file)
file_content_blocks.append(block)
blocks = await build_file_content_blocks_for(file)
file_content_blocks.extend(blocks)
return append_content_blocks_to_message(
message, cast(list[ContentBlock], file_content_blocks)
)
Loading
Loading