Add real-time query handling: pre-search enrichment + routing fix

- router.py: add _MEDIUM_FORCE_PATTERNS to block weather/news/price queries from light tier regardless of LLM classification - agent.py: add _REALTIME_RE and _searxng_search_async(); real-time queries now run SearXNG search concurrently with URL fetch + memory retrieval, injecting snippets into medium system prompt - tests/use_cases/weather_now.md: use case test for weather queries Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-13 05:08:08 +00:00
parent 8cd41940f0
commit 436299f7e2
3 changed files with 113 additions and 7 deletions
--- a/agent.py
+++ b/agent.py
@@ -12,6 +12,16 @@ import httpx as _httpx

 _URL_RE = _re.compile(r'https?://[^\s<>"\']+')

+# Queries that need live data — trigger pre-search enrichment for medium tier
+_REALTIME_RE = _re.compile(
+    r"\b(weather|forecast|temperature|rain(ing)?|snow(ing)?|humidity|wind speed"
+    r"|today.?s news|breaking news|latest news|news today|current events"
+    r"|bitcoin price|crypto price|stock price|exchange rate"
+    r"|right now|currently|at the moment|live score|score now|score today"
+    r"|open now|hours today|is .+ open)\b",
+    _re.IGNORECASE,
+)
+

 def _extract_urls(text: str) -> list[str]:
    return _URL_RE.findall(text)
@@ -88,6 +98,30 @@ async def _fetch_urls_from_message(message: str) -> str:
    return "User's message contains URLs. Fetched content:\n\n" + "\n\n".join(parts)


+async def _searxng_search_async(query: str) -> str:
+    """Run a SearXNG search and return top result snippets as text for prompt injection.
+    Kept short (snippets only) so medium model context stays within streaming timeout."""
+    try:
+        async with _httpx.AsyncClient(timeout=15) as client:
+            r = await client.get(
+                f"{SEARXNG_URL}/search",
+                params={"q": query, "format": "json"},
+            )
+            r.raise_for_status()
+            items = r.json().get("results", [])[:4]
+    except Exception as e:
+        return f"[search error: {e}]"
+    if not items:
+        return ""
+    lines = [f"Web search results for: {query}\n"]
+    for i, item in enumerate(items, 1):
+        title = item.get("title", "")
+        url = item.get("url", "")
+        snippet = item.get("content", "")[:400]
+        lines.append(f"[{i}] {title}\nURL: {url}\n{snippet}\n")
+    return "\n".join(lines)
+
+
 # /no_think at the start of the system prompt disables qwen3 chain-of-thought.
 # create_deep_agent prepends our system_prompt before BASE_AGENT_PROMPT, so
 # /no_think lands at position 0 and is respected by qwen3 models via Ollama.
@@ -379,18 +413,33 @@ async def run_agent_task(message: str, session_id: str, channel: str = "telegram
        history = _conversation_buffers.get(session_id, [])
        print(f"[agent] running: {clean_message[:80]!r}", flush=True)

-        # Fetch URL content and memories concurrently — both are IO-bound, neither needs GPU
-        url_context, memories = await asyncio.gather(
-            _fetch_urls_from_message(clean_message),
-            _retrieve_memories(clean_message, session_id),
-        )
+        # Fetch URL content, memories, and (for real-time queries) web search — all IO-bound
+        is_realtime = bool(_REALTIME_RE.search(clean_message))
+        if is_realtime:
+            url_context, memories, search_context = await asyncio.gather(
+                _fetch_urls_from_message(clean_message),
+                _retrieve_memories(clean_message, session_id),
+                _searxng_search_async(clean_message),
+            )
+            if search_context and not search_context.startswith("[search error"):
+                print(f"[agent] pre-search: {len(search_context)} chars for real-time query", flush=True)
+            else:
+                search_context = ""
+        else:
+            url_context, memories = await asyncio.gather(
+                _fetch_urls_from_message(clean_message),
+                _retrieve_memories(clean_message, session_id),
+            )
+            search_context = ""
        if url_context:
            print(f"[agent] crawl4ai: {len(url_context)} chars fetched from message URLs", flush=True)

-        # Build enriched history: memories + url_context as system context for ALL tiers
+        # Build enriched history: memories + url_context + search_context for ALL tiers
        enriched_history = list(history)
        if url_context:
            enriched_history = [{"role": "system", "content": url_context}] + enriched_history
+        if search_context:
+            enriched_history = [{"role": "system", "content": search_context}] + enriched_history
        if memories:
            enriched_history = [{"role": "system", "content": memories}] + enriched_history

@@ -418,6 +467,8 @@ async def run_agent_task(message: str, session_id: str, channel: str = "telegram
                    system_prompt = system_prompt + "\n\n" + memories
                if url_context:
                    system_prompt = system_prompt + "\n\n" + url_context
+                if search_context:
+                    system_prompt = system_prompt + "\n\nLive web search results (use these to answer):\n\n" + search_context

                # Stream tokens directly — filter out qwen3 <think> blocks
                in_think = False