forked from agentstack-ai/AgentStack
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy path__init__.py
More file actions
111 lines (91 loc) · 3.26 KB
/
__init__.py
File metadata and controls
111 lines (91 loc) · 3.26 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
from typing import IO, Optional
import os
from pathlib import Path
import base64
import tempfile
import requests
import anthropic
__all__ = ["analyze_image"]
PROMPT = os.getenv('VISION_PROMPT', "What's in this image?")
MODEL = os.getenv('VISION_MODEL', "claude-3-5-sonnet-20241022")
MAX_TOKENS: int = int(os.getenv('VISION_MAX_TOKENS', 1024))
MEDIA_TYPES = {
"jpg": "image/jpeg",
"jpeg": "image/jpeg",
"png": "image/png",
"gif": "image/gif",
"webp": "image/webp",
}
ALLOWED_MEDIA_TYPES = list(MEDIA_TYPES.keys())
# image sizes that will not be resized
# TODO is there any value in resizing pre-upload?
# 1:1 1092x1092 px
# 3:4 951x1268 px
# 2:3 896x1344 px
# 9:16 819x1456 px
# 1:2 784x1568 px
def _get_media_type(image_filename: str) -> Optional[str]:
"""Get the media type from an image filename."""
for ext, media_type in MEDIA_TYPES.items():
if image_filename.endswith(ext):
return media_type
return None
def _encode_image(image_handle: IO) -> str:
"""Encode a file handle to base64."""
return base64.b64encode(image_handle.read()).decode("utf-8")
def _make_anthropic_request(image_handle: IO, media_type: str) -> anthropic.types.Message:
"""Make a request to the Anthropic API using an image."""
client = anthropic.Anthropic()
data = _encode_image(image_handle)
return client.messages.create(
model=MODEL,
max_tokens=MAX_TOKENS,
messages=[
{
"role": "user",
"content": [
{ # type: ignore
"type": "image",
"source": {
"type": "base64",
"media_type": media_type,
"data": data,
},
},
{ # type: ignore
"type": "text",
"text": PROMPT,
},
],
}
],
)
def _analyze_web_image(image_url: str, media_type: str) -> str:
"""Analyze an image from a URL."""
with tempfile.NamedTemporaryFile() as temp_file:
temp_file.write(requests.get(image_url).content)
temp_file.flush()
temp_file.seek(0)
response = _make_anthropic_request(temp_file, media_type)
return response.content[0].text # type: ignore
def _analyze_local_image(image_path: str, media_type: str) -> str:
"""Analyze an image from a local file."""
with open(image_path, "rb") as image_file:
response = _make_anthropic_request(image_file, media_type)
return response.content[0].text # type: ignore
def analyze_image(image_path_or_url: str) -> str:
"""
Analyze an image using OpenAI's Vision API.
Args:
image_path_or_url: Local path or URL to the image.
Returns:
str: Description of the image contents
"""
if not image_path_or_url:
return "Image Path or URL is required."
media_type = _get_media_type(image_path_or_url)
if not media_type:
return f"Unsupported image type use {ALLOWED_MEDIA_TYPES}."
if "http" in image_path_or_url:
return _analyze_web_image(image_path_or_url, media_type)
return _analyze_local_image(image_path_or_url, media_type)