-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathagent.py
More file actions
278 lines (227 loc) · 10.8 KB
/
agent.py
File metadata and controls
278 lines (227 loc) · 10.8 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
from dotenv import load_dotenv
import multiprocessing
import requests
import json
import os
import re
import yfinance as yf
from langchain_community.chat_models import ChatLlamaCpp
from langchain_core.messages import SystemMessage, HumanMessage, AIMessage
from llama_cpp import LlamaGrammar
load_dotenv()
# ── Tool implementations ──────────────────────────────────────────────────────
def web_search(query: str) -> str:
url = "https://api.tavily.com/search"
payload = {
"api_key": os.getenv("TAVILY_API_KEY"),
"query": query,
"search_depth": "basic",
"include_answer": True,
"max_results": 3,
}
try:
response = requests.post(url, json=payload)
response.raise_for_status()
data = response.json()
results = data.get("results", [])
if not results:
return "No relevant search results found."
formatted = []
for res in results:
formatted.append(
f"- Title: {res['title']}\n Content: {res['content']}\n URL: {res['url']}"
)
return "\n\n".join(formatted)
except Exception as e:
return f"Error during web search: {str(e)}"
def stock_price(*symbols: str) -> str:
results = []
for symbol in symbols:
try:
ticker = yf.Ticker(symbol)
data = ticker.history(period="1d", interval="1m")
if data.empty:
results.append(f"Could not find price information for {symbol}.")
else:
current_price = data["Close"].iloc[-1]
results.append(f"The current stock price for {symbol} is ${current_price:.2f}")
except Exception as e:
results.append(f"Error retrieving data for {symbol}: {str(e)}")
return "\n".join(results)
TOOLS = {
"web_search": web_search,
"stock_price": stock_price,
}
# ── GBNF Grammar ──────────────────────────────────────────────────────────────
TOOL_GRAMMAR_STR = r"""
root ::= "{" ws "\"tools\"" ws ":" ws tool-array ws "}"
tool-array ::= "[]" | "[" ws tool-call (ws "," ws tool-call)* ws "]"
tool-call ::= "{" ws "\"name\"" ws ":" ws tool-name ws "," ws "\"args\"" ws ":" ws str-array ws "}"
tool-name ::= "\"web_search\"" | "\"stock_price\""
str-array ::= "[]" | "[" ws string (ws "," ws string)* ws "]"
string ::= "\"" ([^"\\] | "\\" (["\\/bfnrt] | "u" hex hex hex hex))* "\""
hex ::= [0-9a-fA-F]
ws ::= [ \t\n\r]*
"""
tool_grammar = LlamaGrammar.from_string(TOOL_GRAMMAR_STR)
# ── LLM setup ────────────────────────────────────────────────────────────────
llm = ChatLlamaCpp(
model_path="../versatile/models/Qwen2.5-14b-instruct/Q2_K.gguf",
n_ctx=4096,
n_gpu_layers=-1,
n_batch=512,
n_threads=multiprocessing.cpu_count() - 1,
use_mlock=True,
use_mmap=True,
verbose=False,
temperature=0.6,
top_p=0.9,
top_k=40,
repeat_penalty=1.2,
max_tokens=2048,
chat_format="chatml",
)
# ── System prompts ────────────────────────────────────────────────────────────
# [1] CoT 추론 전용 프롬프트 — 내부 사고 과정을 생성
COT_SYSTEM = """
You are an internal reasoning engine, and your specific name is OpenLoop.
You think privately to yourself.
Never address the user directly. Never ask questions. Never say "I will do X" as if talking to someone.
Just think in flowing sentences about what the user wants, what you already know,
and what tools you need. Always end with a clear internal conclusion about which tools to call.
Keep it short and casual, like thinking out loud to yourself.
Do NOT write the final answer here.
""".strip()
# [2] 메인 응답 프롬프트 — CoT 결과를 참고해 자연어 응답 생성
MAIN_SYSTEM = """
You are an AI agent, and your specific name is OpenLoop.
A private reasoning trace has already been performed and is included in the
conversation as a [CoT] block. Use it to guide your response.
Rules:
- If tools are still needed, write ONE short sentence describing what you will do, then STOP.
Do NOT include function call syntax.
- If tool results are present and you have enough information, give a complete final answer.
- Never expose the [CoT] block contents verbatim to the user.
Available tools (for awareness):
- web_search : Search for the latest information
- stock_price : Check current stock prices
""".strip()
# [3] Tool 선택 프롬프트
TOOL_SELECT_SYSTEM = """
You are a tool dispatcher. Analyse the conversation (including any [CoT] reasoning)
and decide which tools to call next.
Output ONLY valid JSON — no explanation, no markdown.
Rules:
- Use the [CoT] trace to determine exactly which tools and arguments are needed.
- Choose arguments precisely; do not use placeholders.
- If the assistant already has sufficient information, output {"tools": []}.
- You may select multiple tools when required.
Available tools:
web_search(query) — Specific search query string
stock_price(symbol) — Exact ticker symbol (e.g. AAPL, TSLA)
""".strip()
# ── CoT helper ────────────────────────────────────────────────────────────────
def run_cot(messages: list) -> str:
llama_msgs = [{"role": "system", "content": COT_SYSTEM}]
for m in messages:
if isinstance(m, HumanMessage):
llama_msgs.append({"role": "user", "content": m.content})
elif isinstance(m, AIMessage):
llama_msgs.append({"role": "assistant", "content": m.content})
stream = llm.client.create_chat_completion(
messages=llama_msgs,
max_tokens=512,
temperature=0.5,
stream=True,
)
raw = ""
header_printed = False
for chunk in stream:
delta = chunk["choices"][0]["delta"].get("content", "")
if not delta:
continue
raw += delta
if not header_printed:
print("\033[90m[CoT]\033[0m ", end="", flush=True)
header_printed = True
print(f"\033[90m{delta}\033[0m", end="", flush=True)
print()
return raw
# ── Other helpers ─────────────────────────────────────────────────────────────
def messages_to_llama_fmt(system: str, messages: list) -> list:
result = [{"role": "system", "content": system}]
for m in messages:
if isinstance(m, HumanMessage):
result.append({"role": "user", "content": m.content})
elif isinstance(m, AIMessage):
result.append({"role": "assistant", "content": m.content})
return result
def select_tools_with_grammar(messages: list) -> list[dict]:
llama_msgs = messages_to_llama_fmt(TOOL_SELECT_SYSTEM, messages)
result = llm.client.create_chat_completion(
messages=llama_msgs,
grammar=tool_grammar,
max_tokens=512,
temperature=0.0,
)
raw = result["choices"][0]["message"]["content"]
print(f"\n[grammar] {raw}")
return json.loads(raw).get("tools", [])
def execute_tool(name: str, args: list) -> str:
if name not in TOOLS:
return f"[Error] Tool '{name}' not found."
try:
return TOOLS[name](*args)
except Exception as e:
return f"[Error] Tool execution failed: {e}"
# ── Agent loop ────────────────────────────────────────────────────────────────
def run_agent(messages: list, user_message: HumanMessage, max_turns: int = 10):
messages.append(user_message)
print(f"\n{'='*60}")
print(f"Q: {user_message.content}")
print("=" * 60)
for turn in range(max_turns):
# ── Step 1: CoT 내부 추론 ────────────────────────────────────────────
print(f"\n[CoT 추론 중 (turn {turn + 1})...]")
cot_block = run_cot(messages)
# CoT 결과를 HumanMessage로 대화 컨텍스트에 주입
# (어시스턴트 관점에서는 외부 정보처럼 제공)
cot_injection = HumanMessage(content=f"[CoT]\n{cot_block}")
augmented_messages = messages + [cot_injection]
# ── Step 2: 메인 LLM 응답 (자연어) ──────────────────────────────────
print(f"A (turn {turn + 1}): ", end="", flush=True)
chat_input = [SystemMessage(content=MAIN_SYSTEM)] + augmented_messages
response = ""
for chunk in llm.stream(chat_input):
response += chunk.content
print(chunk.content, end="", flush=True)
print()
# 실제 대화 히스토리에는 CoT 없이 응답만 저장
messages.append(AIMessage(content=response))
# ── Step 3: Tool 선택 (CoT 포함 컨텍스트 사용) ──────────────────────
tool_calls = select_tools_with_grammar(augmented_messages + [AIMessage(content=response)])
if not tool_calls:
break # 최종 답변 완료
# ── Step 4: Tool 실행 ─────────────────────────────────────────────────
tool_results = []
for tc in tool_calls:
name = tc["name"]
args = tc.get("args", [])
print(f"\n[tool] {name}({', '.join(args)})")
result = execute_tool(name, args)
print(f"[tool_result] {result}")
tool_results.append(
f"- {name}({', '.join(repr(a) for a in args)}):\n {result}"
)
combined = "Tool results:\n" + "\n".join(tool_results)
messages.append(HumanMessage(content=combined))
print()
return messages
# ── Main ──────────────────────────────────────────────────────────────────────
if __name__ == "__main__":
messages = []
while True:
user_input = input("Q: ").strip()
if not user_input:
continue
messages = run_agent(messages, HumanMessage(content=user_input))