WeatherTool: fetch open-meteo directly, skip LLM for fast tool replies

- Replace SearXNG search with direct open-meteo.com API call (no key needed) - WeatherTool now returns a ready-to-deliver reply string - agent.py: short-circuit router+LLM when fast tools return a result (tier=fast) - router.py: fast tool match no longer triggers light reply generation Weather latency: 105-190s → ~1s Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-15 09:42:55 +00:00
parent 957360f6ce
commit 3ae1cefbd4
3 changed files with 139 additions and 123 deletions
--- a/agent.py
+++ b/agent.py
@@ -123,7 +123,7 @@ _memory_search_tool = None

 # Fast tools run before the LLM — classifier + context enricher
 _fast_tool_runner = FastToolRunner([
-    WeatherTool(searxng_url=SEARXNG_URL),
+    WeatherTool(),
    CommuteTool(routecheck_url=ROUTECHECK_URL, internal_token=ROUTECHECK_TOKEN),
 ])

@@ -410,110 +410,121 @@ async def run_agent_task(message: str, session_id: str, channel: str = "telegram
        if memories:
            enriched_history = [{"role": "system", "content": memories}] + enriched_history

-        tier, light_reply = await router.route(clean_message, enriched_history, force_complex)
+        # Short-circuit: fast tool result is already a complete reply — skip router+LLM
+        if fast_context and not force_complex and not url_context:
+            tier = "fast"
+            final_text = fast_context
+            llm_elapsed = time.monotonic() - t0
+            names = _fast_tool_runner.matching_names(clean_message)
+            print(f"[agent] tier=fast tools={names} — delivering directly", flush=True)
+            await _push_stream_chunk(session_id, final_text)
+            await _end_stream(session_id)
+        else:
+            tier, light_reply = await router.route(clean_message, enriched_history, force_complex)

-        # Messages with URL content must be handled by at least medium tier
-        if url_context and tier == "light":
-            tier = "medium"
-            light_reply = None
-            print("[agent] URL in message → upgraded light→medium", flush=True)
-        print(f"[agent] tier={tier} message={clean_message[:60]!r}", flush=True)
+            # Messages with URL content must be handled by at least medium tier
+            if url_context and tier == "light":
+                tier = "medium"
+                light_reply = None
+                print("[agent] URL in message → upgraded light→medium", flush=True)
+            print(f"[agent] tier={tier} message={clean_message[:60]!r}", flush=True)

-        final_text = None
-        try:
-            if tier == "light":
-                final_text = light_reply
-                llm_elapsed = time.monotonic() - t0
-                print(f"[agent] light path: answered by router", flush=True)
-                await _push_stream_chunk(session_id, final_text)
-                await _end_stream(session_id)
+        if tier != "fast":
+            final_text = None
+            try:
+                if tier == "light":
+                    final_text = light_reply
+                    llm_elapsed = time.monotonic() - t0
+                    print(f"[agent] light path: answered by router", flush=True)
+                    await _push_stream_chunk(session_id, final_text)
+                    await _end_stream(session_id)

-            elif tier == "medium":
-                system_prompt = MEDIUM_SYSTEM_PROMPT
-                if memories:
-                    system_prompt = system_prompt + "\n\n" + memories
-                if url_context:
-                    system_prompt = system_prompt + "\n\n" + url_context
-                if fast_context:
-                    system_prompt = system_prompt + "\n\nLive web search results (use these to answer):\n\n" + fast_context
-
-                # Stream tokens directly — filter out qwen3 <think> blocks
-                in_think = False
-                response_parts = []
-                async for chunk in medium_model.astream([
-                    {"role": "system", "content": system_prompt},
-                    *history,
-                    {"role": "user", "content": clean_message},
-                ]):
-                    token = chunk.content or ""
-                    if not token:
-                        continue
-                    if in_think:
-                        if "</think>" in token:
-                            in_think = False
-                            after = token.split("</think>", 1)[1]
-                            if after:
-                                await _push_stream_chunk(session_id, after)
-                                response_parts.append(after)
-                    else:
-                        if "<think>" in token:
-                            in_think = True
-                            before = token.split("<think>", 1)[0]
-                            if before:
-                                await _push_stream_chunk(session_id, before)
-                                response_parts.append(before)
-                        else:
-                            await _push_stream_chunk(session_id, token)
-                            response_parts.append(token)
-
-                await _end_stream(session_id)
-                llm_elapsed = time.monotonic() - t0
-                final_text = "".join(response_parts).strip() or None
-
-            else:  # complex
-                ok = await vram_manager.enter_complex_mode()
-                if not ok:
-                    print("[agent] complex→medium fallback (eviction timeout)", flush=True)
-                    tier = "medium"
+                elif tier == "medium":
                    system_prompt = MEDIUM_SYSTEM_PROMPT
                    if memories:
                        system_prompt = system_prompt + "\n\n" + memories
                    if url_context:
                        system_prompt = system_prompt + "\n\n" + url_context
-                    result = await medium_agent.ainvoke({
-                        "messages": [
-                            {"role": "system", "content": system_prompt},
-                            *history,
-                            {"role": "user", "content": clean_message},
-                        ]
-                    })
-                else:
-                    system_prompt = COMPLEX_SYSTEM_PROMPT.format(user_id=session_id)
-                    if url_context:
-                        system_prompt = system_prompt + "\n\n[Pre-fetched URL content from user's message:]\n" + url_context
-                    result = await complex_agent.ainvoke({
-                        "messages": [
-                            {"role": "system", "content": system_prompt},
-                            *history,
-                            {"role": "user", "content": clean_message},
-                        ]
-                    })
-                    asyncio.create_task(vram_manager.exit_complex_mode())
+                    if fast_context:
+                        system_prompt = system_prompt + "\n\nLive web search results (use these to answer):\n\n" + fast_context

+                    # Stream tokens directly — filter out qwen3 <think> blocks
+                    in_think = False
+                    response_parts = []
+                    async for chunk in medium_model.astream([
+                        {"role": "system", "content": system_prompt},
+                        *history,
+                        {"role": "user", "content": clean_message},
+                    ]):
+                        token = chunk.content or ""
+                        if not token:
+                            continue
+                        if in_think:
+                            if "</think>" in token:
+                                in_think = False
+                                after = token.split("</think>", 1)[1]
+                                if after:
+                                    await _push_stream_chunk(session_id, after)
+                                    response_parts.append(after)
+                        else:
+                            if "<think>" in token:
+                                in_think = True
+                                before = token.split("<think>", 1)[0]
+                                if before:
+                                    await _push_stream_chunk(session_id, before)
+                                    response_parts.append(before)
+                            else:
+                                await _push_stream_chunk(session_id, token)
+                                response_parts.append(token)
+
+                    await _end_stream(session_id)
+                    llm_elapsed = time.monotonic() - t0
+                    final_text = "".join(response_parts).strip() or None
+
+                else:  # complex
+                    ok = await vram_manager.enter_complex_mode()
+                    if not ok:
+                        print("[agent] complex→medium fallback (eviction timeout)", flush=True)
+                        tier = "medium"
+                        system_prompt = MEDIUM_SYSTEM_PROMPT
+                        if memories:
+                            system_prompt = system_prompt + "\n\n" + memories
+                        if url_context:
+                            system_prompt = system_prompt + "\n\n" + url_context
+                        result = await medium_agent.ainvoke({
+                            "messages": [
+                                {"role": "system", "content": system_prompt},
+                                *history,
+                                {"role": "user", "content": clean_message},
+                            ]
+                        })
+                    else:
+                        system_prompt = COMPLEX_SYSTEM_PROMPT.format(user_id=session_id)
+                        if url_context:
+                            system_prompt = system_prompt + "\n\n[Pre-fetched URL content from user's message:]\n" + url_context
+                        result = await complex_agent.ainvoke({
+                            "messages": [
+                                {"role": "system", "content": system_prompt},
+                                *history,
+                                {"role": "user", "content": clean_message},
+                            ]
+                        })
+                        asyncio.create_task(vram_manager.exit_complex_mode())
+
+                    llm_elapsed = time.monotonic() - t0
+                    _log_messages(result)
+                    final_text = _extract_final_text(result)
+                    if final_text:
+                        await _push_stream_chunk(session_id, final_text)
+                    await _end_stream(session_id)
+
+            except Exception as e:
+                import traceback
                llm_elapsed = time.monotonic() - t0
-                _log_messages(result)
-                final_text = _extract_final_text(result)
-                if final_text:
-                    await _push_stream_chunk(session_id, final_text)
+                print(f"[agent] error after {llm_elapsed:.1f}s for chat {session_id}: {e}", flush=True)
+                traceback.print_exc()
                await _end_stream(session_id)

-        except Exception as e:
-            import traceback
-            llm_elapsed = time.monotonic() - t0
-            print(f"[agent] error after {llm_elapsed:.1f}s for chat {session_id}: {e}", flush=True)
-            traceback.print_exc()
-            await _end_stream(session_id)
-
        # Deliver reply through the originating channel
        if final_text:
            t1 = time.monotonic()