WeatherTool: fetch open-meteo directly, skip LLM for fast tool replies
- Replace SearXNG search with direct open-meteo.com API call (no key needed) - WeatherTool now returns a ready-to-deliver reply string - agent.py: short-circuit router+LLM when fast tools return a result (tier=fast) - router.py: fast tool match no longer triggers light reply generation Weather latency: 105-190s → ~1s Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
197
agent.py
197
agent.py
@@ -123,7 +123,7 @@ _memory_search_tool = None
|
||||
|
||||
# Fast tools run before the LLM — classifier + context enricher
|
||||
_fast_tool_runner = FastToolRunner([
|
||||
WeatherTool(searxng_url=SEARXNG_URL),
|
||||
WeatherTool(),
|
||||
CommuteTool(routecheck_url=ROUTECHECK_URL, internal_token=ROUTECHECK_TOKEN),
|
||||
])
|
||||
|
||||
@@ -410,110 +410,121 @@ async def run_agent_task(message: str, session_id: str, channel: str = "telegram
|
||||
if memories:
|
||||
enriched_history = [{"role": "system", "content": memories}] + enriched_history
|
||||
|
||||
tier, light_reply = await router.route(clean_message, enriched_history, force_complex)
|
||||
# Short-circuit: fast tool result is already a complete reply — skip router+LLM
|
||||
if fast_context and not force_complex and not url_context:
|
||||
tier = "fast"
|
||||
final_text = fast_context
|
||||
llm_elapsed = time.monotonic() - t0
|
||||
names = _fast_tool_runner.matching_names(clean_message)
|
||||
print(f"[agent] tier=fast tools={names} — delivering directly", flush=True)
|
||||
await _push_stream_chunk(session_id, final_text)
|
||||
await _end_stream(session_id)
|
||||
else:
|
||||
tier, light_reply = await router.route(clean_message, enriched_history, force_complex)
|
||||
|
||||
# Messages with URL content must be handled by at least medium tier
|
||||
if url_context and tier == "light":
|
||||
tier = "medium"
|
||||
light_reply = None
|
||||
print("[agent] URL in message → upgraded light→medium", flush=True)
|
||||
print(f"[agent] tier={tier} message={clean_message[:60]!r}", flush=True)
|
||||
# Messages with URL content must be handled by at least medium tier
|
||||
if url_context and tier == "light":
|
||||
tier = "medium"
|
||||
light_reply = None
|
||||
print("[agent] URL in message → upgraded light→medium", flush=True)
|
||||
print(f"[agent] tier={tier} message={clean_message[:60]!r}", flush=True)
|
||||
|
||||
final_text = None
|
||||
try:
|
||||
if tier == "light":
|
||||
final_text = light_reply
|
||||
llm_elapsed = time.monotonic() - t0
|
||||
print(f"[agent] light path: answered by router", flush=True)
|
||||
await _push_stream_chunk(session_id, final_text)
|
||||
await _end_stream(session_id)
|
||||
if tier != "fast":
|
||||
final_text = None
|
||||
try:
|
||||
if tier == "light":
|
||||
final_text = light_reply
|
||||
llm_elapsed = time.monotonic() - t0
|
||||
print(f"[agent] light path: answered by router", flush=True)
|
||||
await _push_stream_chunk(session_id, final_text)
|
||||
await _end_stream(session_id)
|
||||
|
||||
elif tier == "medium":
|
||||
system_prompt = MEDIUM_SYSTEM_PROMPT
|
||||
if memories:
|
||||
system_prompt = system_prompt + "\n\n" + memories
|
||||
if url_context:
|
||||
system_prompt = system_prompt + "\n\n" + url_context
|
||||
if fast_context:
|
||||
system_prompt = system_prompt + "\n\nLive web search results (use these to answer):\n\n" + fast_context
|
||||
|
||||
# Stream tokens directly — filter out qwen3 <think> blocks
|
||||
in_think = False
|
||||
response_parts = []
|
||||
async for chunk in medium_model.astream([
|
||||
{"role": "system", "content": system_prompt},
|
||||
*history,
|
||||
{"role": "user", "content": clean_message},
|
||||
]):
|
||||
token = chunk.content or ""
|
||||
if not token:
|
||||
continue
|
||||
if in_think:
|
||||
if "</think>" in token:
|
||||
in_think = False
|
||||
after = token.split("</think>", 1)[1]
|
||||
if after:
|
||||
await _push_stream_chunk(session_id, after)
|
||||
response_parts.append(after)
|
||||
else:
|
||||
if "<think>" in token:
|
||||
in_think = True
|
||||
before = token.split("<think>", 1)[0]
|
||||
if before:
|
||||
await _push_stream_chunk(session_id, before)
|
||||
response_parts.append(before)
|
||||
else:
|
||||
await _push_stream_chunk(session_id, token)
|
||||
response_parts.append(token)
|
||||
|
||||
await _end_stream(session_id)
|
||||
llm_elapsed = time.monotonic() - t0
|
||||
final_text = "".join(response_parts).strip() or None
|
||||
|
||||
else: # complex
|
||||
ok = await vram_manager.enter_complex_mode()
|
||||
if not ok:
|
||||
print("[agent] complex→medium fallback (eviction timeout)", flush=True)
|
||||
tier = "medium"
|
||||
elif tier == "medium":
|
||||
system_prompt = MEDIUM_SYSTEM_PROMPT
|
||||
if memories:
|
||||
system_prompt = system_prompt + "\n\n" + memories
|
||||
if url_context:
|
||||
system_prompt = system_prompt + "\n\n" + url_context
|
||||
result = await medium_agent.ainvoke({
|
||||
"messages": [
|
||||
{"role": "system", "content": system_prompt},
|
||||
*history,
|
||||
{"role": "user", "content": clean_message},
|
||||
]
|
||||
})
|
||||
else:
|
||||
system_prompt = COMPLEX_SYSTEM_PROMPT.format(user_id=session_id)
|
||||
if url_context:
|
||||
system_prompt = system_prompt + "\n\n[Pre-fetched URL content from user's message:]\n" + url_context
|
||||
result = await complex_agent.ainvoke({
|
||||
"messages": [
|
||||
{"role": "system", "content": system_prompt},
|
||||
*history,
|
||||
{"role": "user", "content": clean_message},
|
||||
]
|
||||
})
|
||||
asyncio.create_task(vram_manager.exit_complex_mode())
|
||||
if fast_context:
|
||||
system_prompt = system_prompt + "\n\nLive web search results (use these to answer):\n\n" + fast_context
|
||||
|
||||
# Stream tokens directly — filter out qwen3 <think> blocks
|
||||
in_think = False
|
||||
response_parts = []
|
||||
async for chunk in medium_model.astream([
|
||||
{"role": "system", "content": system_prompt},
|
||||
*history,
|
||||
{"role": "user", "content": clean_message},
|
||||
]):
|
||||
token = chunk.content or ""
|
||||
if not token:
|
||||
continue
|
||||
if in_think:
|
||||
if "</think>" in token:
|
||||
in_think = False
|
||||
after = token.split("</think>", 1)[1]
|
||||
if after:
|
||||
await _push_stream_chunk(session_id, after)
|
||||
response_parts.append(after)
|
||||
else:
|
||||
if "<think>" in token:
|
||||
in_think = True
|
||||
before = token.split("<think>", 1)[0]
|
||||
if before:
|
||||
await _push_stream_chunk(session_id, before)
|
||||
response_parts.append(before)
|
||||
else:
|
||||
await _push_stream_chunk(session_id, token)
|
||||
response_parts.append(token)
|
||||
|
||||
await _end_stream(session_id)
|
||||
llm_elapsed = time.monotonic() - t0
|
||||
final_text = "".join(response_parts).strip() or None
|
||||
|
||||
else: # complex
|
||||
ok = await vram_manager.enter_complex_mode()
|
||||
if not ok:
|
||||
print("[agent] complex→medium fallback (eviction timeout)", flush=True)
|
||||
tier = "medium"
|
||||
system_prompt = MEDIUM_SYSTEM_PROMPT
|
||||
if memories:
|
||||
system_prompt = system_prompt + "\n\n" + memories
|
||||
if url_context:
|
||||
system_prompt = system_prompt + "\n\n" + url_context
|
||||
result = await medium_agent.ainvoke({
|
||||
"messages": [
|
||||
{"role": "system", "content": system_prompt},
|
||||
*history,
|
||||
{"role": "user", "content": clean_message},
|
||||
]
|
||||
})
|
||||
else:
|
||||
system_prompt = COMPLEX_SYSTEM_PROMPT.format(user_id=session_id)
|
||||
if url_context:
|
||||
system_prompt = system_prompt + "\n\n[Pre-fetched URL content from user's message:]\n" + url_context
|
||||
result = await complex_agent.ainvoke({
|
||||
"messages": [
|
||||
{"role": "system", "content": system_prompt},
|
||||
*history,
|
||||
{"role": "user", "content": clean_message},
|
||||
]
|
||||
})
|
||||
asyncio.create_task(vram_manager.exit_complex_mode())
|
||||
|
||||
llm_elapsed = time.monotonic() - t0
|
||||
_log_messages(result)
|
||||
final_text = _extract_final_text(result)
|
||||
if final_text:
|
||||
await _push_stream_chunk(session_id, final_text)
|
||||
await _end_stream(session_id)
|
||||
|
||||
except Exception as e:
|
||||
import traceback
|
||||
llm_elapsed = time.monotonic() - t0
|
||||
_log_messages(result)
|
||||
final_text = _extract_final_text(result)
|
||||
if final_text:
|
||||
await _push_stream_chunk(session_id, final_text)
|
||||
print(f"[agent] error after {llm_elapsed:.1f}s for chat {session_id}: {e}", flush=True)
|
||||
traceback.print_exc()
|
||||
await _end_stream(session_id)
|
||||
|
||||
except Exception as e:
|
||||
import traceback
|
||||
llm_elapsed = time.monotonic() - t0
|
||||
print(f"[agent] error after {llm_elapsed:.1f}s for chat {session_id}: {e}", flush=True)
|
||||
traceback.print_exc()
|
||||
await _end_stream(session_id)
|
||||
|
||||
# Deliver reply through the originating channel
|
||||
if final_text:
|
||||
t1 = time.monotonic()
|
||||
|
||||
Reference in New Issue
Block a user