diff --git a/agent.py b/agent.py index fed4af7..7c650a4 100644 --- a/agent.py +++ b/agent.py @@ -446,16 +446,20 @@ async def _run_agent_pipeline( print(f"[agent] running: {clean_message[:80]!r}", flush=True) # Fetch URL content, memories, and fast-tool context concurrently - url_context, memories, fast_context = await asyncio.gather( - _fetch_urls_from_message(clean_message), - _retrieve_memories(clean_message, session_id), - _fast_tool_runner.run_matching(clean_message), - ) - if url_context: - print(f"[agent] crawl4ai: {len(url_context)} chars fetched", flush=True) - if fast_context: - names = _fast_tool_runner.matching_names(clean_message) - print(f"[agent] fast_tools={names}: {len(fast_context)} chars injected", flush=True) + # Skip preflight IO in no_inference mode — only routing decision needed + if no_inference: + url_context = memories = fast_context = None + else: + url_context, memories, fast_context = await asyncio.gather( + _fetch_urls_from_message(clean_message), + _retrieve_memories(clean_message, session_id), + _fast_tool_runner.run_matching(clean_message), + ) + if url_context: + print(f"[agent] crawl4ai: {len(url_context)} chars fetched", flush=True) + if fast_context: + names = _fast_tool_runner.matching_names(clean_message) + print(f"[agent] fast_tools={names}: {len(fast_context)} chars injected", flush=True) # Build enriched history enriched_history = list(history)