Add real-time query handling: pre-search enrichment + routing fix
- router.py: add _MEDIUM_FORCE_PATTERNS to block weather/news/price queries from light tier regardless of LLM classification - agent.py: add _REALTIME_RE and _searxng_search_async(); real-time queries now run SearXNG search concurrently with URL fetch + memory retrieval, injecting snippets into medium system prompt - tests/use_cases/weather_now.md: use case test for weather queries Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
63
agent.py
63
agent.py
@@ -12,6 +12,16 @@ import httpx as _httpx
|
|||||||
|
|
||||||
_URL_RE = _re.compile(r'https?://[^\s<>"\']+')
|
_URL_RE = _re.compile(r'https?://[^\s<>"\']+')
|
||||||
|
|
||||||
|
# Queries that need live data — trigger pre-search enrichment for medium tier
|
||||||
|
_REALTIME_RE = _re.compile(
|
||||||
|
r"\b(weather|forecast|temperature|rain(ing)?|snow(ing)?|humidity|wind speed"
|
||||||
|
r"|today.?s news|breaking news|latest news|news today|current events"
|
||||||
|
r"|bitcoin price|crypto price|stock price|exchange rate"
|
||||||
|
r"|right now|currently|at the moment|live score|score now|score today"
|
||||||
|
r"|open now|hours today|is .+ open)\b",
|
||||||
|
_re.IGNORECASE,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def _extract_urls(text: str) -> list[str]:
|
def _extract_urls(text: str) -> list[str]:
|
||||||
return _URL_RE.findall(text)
|
return _URL_RE.findall(text)
|
||||||
@@ -88,6 +98,30 @@ async def _fetch_urls_from_message(message: str) -> str:
|
|||||||
return "User's message contains URLs. Fetched content:\n\n" + "\n\n".join(parts)
|
return "User's message contains URLs. Fetched content:\n\n" + "\n\n".join(parts)
|
||||||
|
|
||||||
|
|
||||||
|
async def _searxng_search_async(query: str) -> str:
|
||||||
|
"""Run a SearXNG search and return top result snippets as text for prompt injection.
|
||||||
|
Kept short (snippets only) so medium model context stays within streaming timeout."""
|
||||||
|
try:
|
||||||
|
async with _httpx.AsyncClient(timeout=15) as client:
|
||||||
|
r = await client.get(
|
||||||
|
f"{SEARXNG_URL}/search",
|
||||||
|
params={"q": query, "format": "json"},
|
||||||
|
)
|
||||||
|
r.raise_for_status()
|
||||||
|
items = r.json().get("results", [])[:4]
|
||||||
|
except Exception as e:
|
||||||
|
return f"[search error: {e}]"
|
||||||
|
if not items:
|
||||||
|
return ""
|
||||||
|
lines = [f"Web search results for: {query}\n"]
|
||||||
|
for i, item in enumerate(items, 1):
|
||||||
|
title = item.get("title", "")
|
||||||
|
url = item.get("url", "")
|
||||||
|
snippet = item.get("content", "")[:400]
|
||||||
|
lines.append(f"[{i}] {title}\nURL: {url}\n{snippet}\n")
|
||||||
|
return "\n".join(lines)
|
||||||
|
|
||||||
|
|
||||||
# /no_think at the start of the system prompt disables qwen3 chain-of-thought.
|
# /no_think at the start of the system prompt disables qwen3 chain-of-thought.
|
||||||
# create_deep_agent prepends our system_prompt before BASE_AGENT_PROMPT, so
|
# create_deep_agent prepends our system_prompt before BASE_AGENT_PROMPT, so
|
||||||
# /no_think lands at position 0 and is respected by qwen3 models via Ollama.
|
# /no_think lands at position 0 and is respected by qwen3 models via Ollama.
|
||||||
@@ -379,18 +413,33 @@ async def run_agent_task(message: str, session_id: str, channel: str = "telegram
|
|||||||
history = _conversation_buffers.get(session_id, [])
|
history = _conversation_buffers.get(session_id, [])
|
||||||
print(f"[agent] running: {clean_message[:80]!r}", flush=True)
|
print(f"[agent] running: {clean_message[:80]!r}", flush=True)
|
||||||
|
|
||||||
# Fetch URL content and memories concurrently — both are IO-bound, neither needs GPU
|
# Fetch URL content, memories, and (for real-time queries) web search — all IO-bound
|
||||||
url_context, memories = await asyncio.gather(
|
is_realtime = bool(_REALTIME_RE.search(clean_message))
|
||||||
_fetch_urls_from_message(clean_message),
|
if is_realtime:
|
||||||
_retrieve_memories(clean_message, session_id),
|
url_context, memories, search_context = await asyncio.gather(
|
||||||
)
|
_fetch_urls_from_message(clean_message),
|
||||||
|
_retrieve_memories(clean_message, session_id),
|
||||||
|
_searxng_search_async(clean_message),
|
||||||
|
)
|
||||||
|
if search_context and not search_context.startswith("[search error"):
|
||||||
|
print(f"[agent] pre-search: {len(search_context)} chars for real-time query", flush=True)
|
||||||
|
else:
|
||||||
|
search_context = ""
|
||||||
|
else:
|
||||||
|
url_context, memories = await asyncio.gather(
|
||||||
|
_fetch_urls_from_message(clean_message),
|
||||||
|
_retrieve_memories(clean_message, session_id),
|
||||||
|
)
|
||||||
|
search_context = ""
|
||||||
if url_context:
|
if url_context:
|
||||||
print(f"[agent] crawl4ai: {len(url_context)} chars fetched from message URLs", flush=True)
|
print(f"[agent] crawl4ai: {len(url_context)} chars fetched from message URLs", flush=True)
|
||||||
|
|
||||||
# Build enriched history: memories + url_context as system context for ALL tiers
|
# Build enriched history: memories + url_context + search_context for ALL tiers
|
||||||
enriched_history = list(history)
|
enriched_history = list(history)
|
||||||
if url_context:
|
if url_context:
|
||||||
enriched_history = [{"role": "system", "content": url_context}] + enriched_history
|
enriched_history = [{"role": "system", "content": url_context}] + enriched_history
|
||||||
|
if search_context:
|
||||||
|
enriched_history = [{"role": "system", "content": search_context}] + enriched_history
|
||||||
if memories:
|
if memories:
|
||||||
enriched_history = [{"role": "system", "content": memories}] + enriched_history
|
enriched_history = [{"role": "system", "content": memories}] + enriched_history
|
||||||
|
|
||||||
@@ -418,6 +467,8 @@ async def run_agent_task(message: str, session_id: str, channel: str = "telegram
|
|||||||
system_prompt = system_prompt + "\n\n" + memories
|
system_prompt = system_prompt + "\n\n" + memories
|
||||||
if url_context:
|
if url_context:
|
||||||
system_prompt = system_prompt + "\n\n" + url_context
|
system_prompt = system_prompt + "\n\n" + url_context
|
||||||
|
if search_context:
|
||||||
|
system_prompt = system_prompt + "\n\nLive web search results (use these to answer):\n\n" + search_context
|
||||||
|
|
||||||
# Stream tokens directly — filter out qwen3 <think> blocks
|
# Stream tokens directly — filter out qwen3 <think> blocks
|
||||||
in_think = False
|
in_think = False
|
||||||
|
|||||||
17
router.py
17
router.py
@@ -23,6 +23,16 @@ _LIGHT_PATTERNS = re.compile(
|
|||||||
re.IGNORECASE,
|
re.IGNORECASE,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Queries that require live data — never answer from static knowledge
|
||||||
|
_MEDIUM_FORCE_PATTERNS = re.compile(
|
||||||
|
r"\b(weather|forecast|temperature|rain(ing)?|snow(ing)?|humidity|wind speed"
|
||||||
|
r"|today.?s news|breaking news|latest news|news today|current events"
|
||||||
|
r"|bitcoin price|crypto price|stock price|exchange rate|usd|eur|btc"
|
||||||
|
r"|right now|currently|at the moment|live score|score now|score today"
|
||||||
|
r"|open now|hours today|is .+ open)\b",
|
||||||
|
re.IGNORECASE,
|
||||||
|
)
|
||||||
|
|
||||||
# ── LLM classification prompt ─────────────────────────────────────────────────
|
# ── LLM classification prompt ─────────────────────────────────────────────────
|
||||||
CLASSIFY_PROMPT = """Classify the message. Output ONLY one word: light, medium, or complex.
|
CLASSIFY_PROMPT = """Classify the message. Output ONLY one word: light, medium, or complex.
|
||||||
|
|
||||||
@@ -90,7 +100,12 @@ class Router:
|
|||||||
if force_complex:
|
if force_complex:
|
||||||
return "complex", None
|
return "complex", None
|
||||||
|
|
||||||
# Step 0: regex pre-classification for obvious light patterns
|
# Step 0a: force medium for real-time / live-data queries
|
||||||
|
if _MEDIUM_FORCE_PATTERNS.search(message.strip()):
|
||||||
|
print(f"[router] regex→medium (real-time query)", flush=True)
|
||||||
|
return "medium", None
|
||||||
|
|
||||||
|
# Step 0b: regex pre-classification for obvious light patterns
|
||||||
if _LIGHT_PATTERNS.match(message.strip()):
|
if _LIGHT_PATTERNS.match(message.strip()):
|
||||||
print(f"[router] regex→light", flush=True)
|
print(f"[router] regex→light", flush=True)
|
||||||
return await self._generate_light_reply(message, history)
|
return await self._generate_light_reply(message, history)
|
||||||
|
|||||||
40
tests/use_cases/weather_now.md
Normal file
40
tests/use_cases/weather_now.md
Normal file
@@ -0,0 +1,40 @@
|
|||||||
|
# Use Case: Current Weather Query
|
||||||
|
|
||||||
|
Verify how Adolf handles a real-time information request ("what's the weather now?").
|
||||||
|
This question requires live data that an LLM cannot answer from training alone.
|
||||||
|
|
||||||
|
## Steps
|
||||||
|
|
||||||
|
**1. Send the weather query:**
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl -s -X POST http://localhost:8000/message \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-d '{"text": "whats the weather right now?", "session_id": "use-case-weather", "channel": "cli", "user_id": "claude"}'
|
||||||
|
```
|
||||||
|
|
||||||
|
**2. Stream the reply** (medium tier should respond within 30s):
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl -s -N --max-time 60 "http://localhost:8000/stream/use-case-weather"
|
||||||
|
```
|
||||||
|
|
||||||
|
**3. Check routing tier and any tool usage in logs:**
|
||||||
|
|
||||||
|
```bash
|
||||||
|
docker compose -f /home/alvis/adolf/docker-compose.yml logs deepagents \
|
||||||
|
--since=120s | grep -E "tier=|web_search|fetch_url|crawl4ai"
|
||||||
|
```
|
||||||
|
|
||||||
|
## Evaluate (use your judgment)
|
||||||
|
|
||||||
|
Check each of the following:
|
||||||
|
|
||||||
|
- **Routing**: which tier was selected? Was it appropriate for a real-time query?
|
||||||
|
- **Tool use**: did the agent use web_search or any external data source?
|
||||||
|
- **Accuracy**: does the response contain actual current weather data (temperature, conditions) or is it a guess/refusal?
|
||||||
|
- **Honesty**: if the agent cannot fetch weather, does it say so — or does it hallucinate fake data?
|
||||||
|
- **Helpfulness**: does the response suggest how the user could get weather info (e.g. check a website, use /think)?
|
||||||
|
|
||||||
|
Report PASS only if the response is both honest and helpful. A hallucinated weather
|
||||||
|
report is a FAIL. A honest "I can't check weather" with guidance is a PASS.
|
||||||
Reference in New Issue
Block a user