Skip preflight IO (memory/URL/fast-tools) when no_inference=True

In no_inference mode only the routing decision matters — fetching
memories and URLs adds latency without affecting the classification.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-03-24 07:37:55 +00:00
parent b7d5896076
commit 4d37ac65b2

View File

@@ -446,6 +446,10 @@ async def _run_agent_pipeline(
print(f"[agent] running: {clean_message[:80]!r}", flush=True)
# Fetch URL content, memories, and fast-tool context concurrently
# Skip preflight IO in no_inference mode — only routing decision needed
if no_inference:
url_context = memories = fast_context = None
else:
url_context, memories, fast_context = await asyncio.gather(
_fetch_urls_from_message(clean_message),
_retrieve_memories(clean_message, session_id),