Skip preflight IO (memory/URL/fast-tools) when no_inference=True
In no_inference mode only the routing decision matters — fetching memories and URLs adds latency without affecting the classification. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
24
agent.py
24
agent.py
@@ -446,16 +446,20 @@ async def _run_agent_pipeline(
|
||||
print(f"[agent] running: {clean_message[:80]!r}", flush=True)
|
||||
|
||||
# Fetch URL content, memories, and fast-tool context concurrently
|
||||
url_context, memories, fast_context = await asyncio.gather(
|
||||
_fetch_urls_from_message(clean_message),
|
||||
_retrieve_memories(clean_message, session_id),
|
||||
_fast_tool_runner.run_matching(clean_message),
|
||||
)
|
||||
if url_context:
|
||||
print(f"[agent] crawl4ai: {len(url_context)} chars fetched", flush=True)
|
||||
if fast_context:
|
||||
names = _fast_tool_runner.matching_names(clean_message)
|
||||
print(f"[agent] fast_tools={names}: {len(fast_context)} chars injected", flush=True)
|
||||
# Skip preflight IO in no_inference mode — only routing decision needed
|
||||
if no_inference:
|
||||
url_context = memories = fast_context = None
|
||||
else:
|
||||
url_context, memories, fast_context = await asyncio.gather(
|
||||
_fetch_urls_from_message(clean_message),
|
||||
_retrieve_memories(clean_message, session_id),
|
||||
_fast_tool_runner.run_matching(clean_message),
|
||||
)
|
||||
if url_context:
|
||||
print(f"[agent] crawl4ai: {len(url_context)} chars fetched", flush=True)
|
||||
if fast_context:
|
||||
names = _fast_tool_runner.matching_names(clean_message)
|
||||
print(f"[agent] fast_tools={names}: {len(fast_context)} chars injected", flush=True)
|
||||
|
||||
# Build enriched history
|
||||
enriched_history = list(history)
|
||||
|
||||
Reference in New Issue
Block a user