11"""Utility functions for multimodal file handling."""
22
33import base64
4+ import io
45import re
56from collections .abc import AsyncIterator
7+ from contextlib import asynccontextmanager
68
79import httpx
10+ from langchain_core .messages import DataContentBlock
811from uipath ._utils ._ssl_context import get_httpx_client_kwargs
912
10- from .types import IMAGE_MIME_TYPES
13+ from .types import IMAGE_MIME_TYPES , TIFF_MIME_TYPES
1114
1215
1316def sanitize_filename (filename : str ) -> str :
@@ -37,6 +40,11 @@ def is_image(mime_type: str) -> bool:
3740 return mime_type .lower () in IMAGE_MIME_TYPES
3841
3942
43+ def is_tiff (mime_type : str ) -> bool :
44+ """Check if the MIME type represents a TIFF image."""
45+ return mime_type .lower () in TIFF_MIME_TYPES
46+
47+
4048def _format_mb (size_bytes : int , decimals : int = 1 ) -> str :
4149 """Format a byte count as MB.
4250
@@ -97,22 +105,28 @@ async def encode_streamed_base64(
97105 return result
98106
99107
100- async def download_file_base64 (url : str , * , max_size : int = 0 ) -> str :
101- """Download a file from a URL and return its content as a base64 string.
108+ @asynccontextmanager
109+ async def _stream_download (url : str , * , max_size : int = 0 ):
110+ """Stream an HTTP download with size enforcement.
111+
112+ Yields the validated response object. Checks Content-Length upfront
113+ and raises ValueError if the file is known to exceed the limit.
102114
103115 Args:
104116 url: The URL to download from.
105117 max_size: Maximum allowed file size in bytes. 0 means unlimited.
106118
119+ Yields:
120+ The httpx response object, ready for streaming via aiter_bytes().
121+
107122 Raises:
108- ValueError: If the file exceeds max_size.
123+ ValueError: If the file exceeds max_size (Content-Length check) .
109124 httpx.HTTPStatusError: If the HTTP request fails.
110125 """
111126 async with httpx .AsyncClient (** get_httpx_client_kwargs ()) as client :
112127 async with client .stream ("GET" , url ) as response :
113128 response .raise_for_status ()
114129
115- # Fast reject via Content-Length before reading the body
116130 if max_size > 0 :
117131 content_length = response .headers .get ("content-length" )
118132 if content_length :
@@ -130,6 +144,67 @@ async def download_file_base64(url: str, *, max_size: int = 0) -> str:
130144 f" limit for Agent LLM payloads"
131145 )
132146
133- return await encode_streamed_base64 (
134- response .aiter_bytes (), max_size = max_size
135- )
147+ yield response
148+
149+
150+ async def stream_tiff_to_content_blocks (
151+ url : str , * , max_size : int = 0
152+ ) -> list [DataContentBlock ]:
153+ """Download a TIFF via streaming and convert each page to a content block.
154+
155+ Streams the HTTP response directly into a buffer for PIL, enforcing
156+ size limits as chunks arrive. Each TIFF page is converted to PNG,
157+ base64-encoded, and wrapped in a DataContentBlock immediately so
158+ the raw PNG bytes can be freed.
159+
160+ Args:
161+ url: The URL to download from.
162+ max_size: Maximum allowed file size in bytes. 0 means unlimited.
163+
164+ Returns:
165+ A list of DataContentBlock instances, one per TIFF page.
166+
167+ Raises:
168+ ValueError: If the file exceeds max_size.
169+ httpx.HTTPStatusError: If the HTTP request fails.
170+ """
171+ from langchain_core .messages .content import create_image_block
172+ from PIL import Image , ImageSequence
173+
174+ async with _stream_download (url , max_size = max_size ) as response :
175+ buf = io .BytesIO ()
176+ total = 0
177+ async for chunk in response .aiter_bytes ():
178+ total += len (chunk )
179+ if max_size > 0 and total > max_size :
180+ raise ValueError (
181+ f"File exceeds the { _format_mb (max_size , decimals = 0 )} "
182+ f" limit for LLM payloads"
183+ f" (downloaded { _format_mb (total )} so far)"
184+ )
185+ buf .write (chunk )
186+
187+ buf .seek (0 )
188+ blocks : list [DataContentBlock ] = []
189+ with Image .open (buf ) as img :
190+ for frame in ImageSequence .Iterator (img ):
191+ png_buf = io .BytesIO ()
192+ frame .convert ("RGBA" ).save (png_buf , format = "PNG" )
193+ png_b64 = base64 .b64encode (png_buf .getvalue ()).decode ("ascii" )
194+ blocks .append (create_image_block (base64 = png_b64 , mime_type = "image/png" ))
195+ return blocks
196+
197+
198+ async def download_file_base64 (url : str , * , max_size : int = 0 ) -> str :
199+ """Download a file from a URL and return its content as a base64 string.
200+
201+ Args:
202+ url: The URL to download from.
203+ max_size: Maximum allowed file size in bytes. 0 means unlimited.
204+
205+ Raises:
206+ ValueError: If the file exceeds max_size.
207+ httpx.HTTPStatusError: If the HTTP request fails.
208+ """
209+ async with _stream_download (url , max_size = max_size ) as response :
210+ return await encode_streamed_base64 (response .aiter_bytes (), max_size = max_size )
0 commit comments