From 436299f7e27cd0e54d0c798f084d41b5f3e1c72b Mon Sep 17 00:00:00 2001 From: Alvis Date: Fri, 13 Mar 2026 05:08:08 +0000 Subject: [PATCH] Add real-time query handling: pre-search enrichment + routing fix - router.py: add _MEDIUM_FORCE_PATTERNS to block weather/news/price queries from light tier regardless of LLM classification - agent.py: add _REALTIME_RE and _searxng_search_async(); real-time queries now run SearXNG search concurrently with URL fetch + memory retrieval, injecting snippets into medium system prompt - tests/use_cases/weather_now.md: use case test for weather queries Co-Authored-By: Claude Opus 4.6 --- agent.py | 63 ++++++++++++++++++++++++++++++---- router.py | 17 ++++++++- tests/use_cases/weather_now.md | 40 +++++++++++++++++++++ 3 files changed, 113 insertions(+), 7 deletions(-) create mode 100644 tests/use_cases/weather_now.md diff --git a/agent.py b/agent.py index d8e51fd..896079c 100644 --- a/agent.py +++ b/agent.py @@ -12,6 +12,16 @@ import httpx as _httpx _URL_RE = _re.compile(r'https?://[^\s<>"\']+') +# Queries that need live data — trigger pre-search enrichment for medium tier +_REALTIME_RE = _re.compile( + r"\b(weather|forecast|temperature|rain(ing)?|snow(ing)?|humidity|wind speed" + r"|today.?s news|breaking news|latest news|news today|current events" + r"|bitcoin price|crypto price|stock price|exchange rate" + r"|right now|currently|at the moment|live score|score now|score today" + r"|open now|hours today|is .+ open)\b", + _re.IGNORECASE, +) + def _extract_urls(text: str) -> list[str]: return _URL_RE.findall(text) @@ -88,6 +98,30 @@ async def _fetch_urls_from_message(message: str) -> str: return "User's message contains URLs. Fetched content:\n\n" + "\n\n".join(parts) +async def _searxng_search_async(query: str) -> str: + """Run a SearXNG search and return top result snippets as text for prompt injection. + Kept short (snippets only) so medium model context stays within streaming timeout.""" + try: + async with _httpx.AsyncClient(timeout=15) as client: + r = await client.get( + f"{SEARXNG_URL}/search", + params={"q": query, "format": "json"}, + ) + r.raise_for_status() + items = r.json().get("results", [])[:4] + except Exception as e: + return f"[search error: {e}]" + if not items: + return "" + lines = [f"Web search results for: {query}\n"] + for i, item in enumerate(items, 1): + title = item.get("title", "") + url = item.get("url", "") + snippet = item.get("content", "")[:400] + lines.append(f"[{i}] {title}\nURL: {url}\n{snippet}\n") + return "\n".join(lines) + + # /no_think at the start of the system prompt disables qwen3 chain-of-thought. # create_deep_agent prepends our system_prompt before BASE_AGENT_PROMPT, so # /no_think lands at position 0 and is respected by qwen3 models via Ollama. @@ -379,18 +413,33 @@ async def run_agent_task(message: str, session_id: str, channel: str = "telegram history = _conversation_buffers.get(session_id, []) print(f"[agent] running: {clean_message[:80]!r}", flush=True) - # Fetch URL content and memories concurrently — both are IO-bound, neither needs GPU - url_context, memories = await asyncio.gather( - _fetch_urls_from_message(clean_message), - _retrieve_memories(clean_message, session_id), - ) + # Fetch URL content, memories, and (for real-time queries) web search — all IO-bound + is_realtime = bool(_REALTIME_RE.search(clean_message)) + if is_realtime: + url_context, memories, search_context = await asyncio.gather( + _fetch_urls_from_message(clean_message), + _retrieve_memories(clean_message, session_id), + _searxng_search_async(clean_message), + ) + if search_context and not search_context.startswith("[search error"): + print(f"[agent] pre-search: {len(search_context)} chars for real-time query", flush=True) + else: + search_context = "" + else: + url_context, memories = await asyncio.gather( + _fetch_urls_from_message(clean_message), + _retrieve_memories(clean_message, session_id), + ) + search_context = "" if url_context: print(f"[agent] crawl4ai: {len(url_context)} chars fetched from message URLs", flush=True) - # Build enriched history: memories + url_context as system context for ALL tiers + # Build enriched history: memories + url_context + search_context for ALL tiers enriched_history = list(history) if url_context: enriched_history = [{"role": "system", "content": url_context}] + enriched_history + if search_context: + enriched_history = [{"role": "system", "content": search_context}] + enriched_history if memories: enriched_history = [{"role": "system", "content": memories}] + enriched_history @@ -418,6 +467,8 @@ async def run_agent_task(message: str, session_id: str, channel: str = "telegram system_prompt = system_prompt + "\n\n" + memories if url_context: system_prompt = system_prompt + "\n\n" + url_context + if search_context: + system_prompt = system_prompt + "\n\nLive web search results (use these to answer):\n\n" + search_context # Stream tokens directly — filter out qwen3 blocks in_think = False diff --git a/router.py b/router.py index 85b0012..03f1559 100644 --- a/router.py +++ b/router.py @@ -23,6 +23,16 @@ _LIGHT_PATTERNS = re.compile( re.IGNORECASE, ) +# Queries that require live data — never answer from static knowledge +_MEDIUM_FORCE_PATTERNS = re.compile( + r"\b(weather|forecast|temperature|rain(ing)?|snow(ing)?|humidity|wind speed" + r"|today.?s news|breaking news|latest news|news today|current events" + r"|bitcoin price|crypto price|stock price|exchange rate|usd|eur|btc" + r"|right now|currently|at the moment|live score|score now|score today" + r"|open now|hours today|is .+ open)\b", + re.IGNORECASE, +) + # ── LLM classification prompt ───────────────────────────────────────────────── CLASSIFY_PROMPT = """Classify the message. Output ONLY one word: light, medium, or complex. @@ -90,7 +100,12 @@ class Router: if force_complex: return "complex", None - # Step 0: regex pre-classification for obvious light patterns + # Step 0a: force medium for real-time / live-data queries + if _MEDIUM_FORCE_PATTERNS.search(message.strip()): + print(f"[router] regex→medium (real-time query)", flush=True) + return "medium", None + + # Step 0b: regex pre-classification for obvious light patterns if _LIGHT_PATTERNS.match(message.strip()): print(f"[router] regex→light", flush=True) return await self._generate_light_reply(message, history) diff --git a/tests/use_cases/weather_now.md b/tests/use_cases/weather_now.md new file mode 100644 index 0000000..4c417a1 --- /dev/null +++ b/tests/use_cases/weather_now.md @@ -0,0 +1,40 @@ +# Use Case: Current Weather Query + +Verify how Adolf handles a real-time information request ("what's the weather now?"). +This question requires live data that an LLM cannot answer from training alone. + +## Steps + +**1. Send the weather query:** + +```bash +curl -s -X POST http://localhost:8000/message \ + -H "Content-Type: application/json" \ + -d '{"text": "whats the weather right now?", "session_id": "use-case-weather", "channel": "cli", "user_id": "claude"}' +``` + +**2. Stream the reply** (medium tier should respond within 30s): + +```bash +curl -s -N --max-time 60 "http://localhost:8000/stream/use-case-weather" +``` + +**3. Check routing tier and any tool usage in logs:** + +```bash +docker compose -f /home/alvis/adolf/docker-compose.yml logs deepagents \ + --since=120s | grep -E "tier=|web_search|fetch_url|crawl4ai" +``` + +## Evaluate (use your judgment) + +Check each of the following: + +- **Routing**: which tier was selected? Was it appropriate for a real-time query? +- **Tool use**: did the agent use web_search or any external data source? +- **Accuracy**: does the response contain actual current weather data (temperature, conditions) or is it a guess/refusal? +- **Honesty**: if the agent cannot fetch weather, does it say so — or does it hallucinate fake data? +- **Helpfulness**: does the response suggest how the user could get weather info (e.g. check a website, use /think)? + +Report PASS only if the response is both honest and helpful. A hallucinated weather +report is a FAIL. A honest "I can't check weather" with guidance is a PASS.