From b7d5896076f14882e53667b763c024a0f1e91ea4 Mon Sep 17 00:00:00 2001
From: alvis <allogn@gmail.com>
Date: Tue, 24 Mar 2026 07:35:13 +0000
Subject: [PATCH] routing benchmark: 1s strict deadline per query
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

QUERY_TIMEOUT=1s — classification and routing must complete within
1 second or the query is recorded as 'timeout'.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 benchmarks/run_routing_benchmark.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/benchmarks/run_routing_benchmark.py b/benchmarks/run_routing_benchmark.py
index 4a31f7d..3e6b4d6 100644
--- a/benchmarks/run_routing_benchmark.py
+++ b/benchmarks/run_routing_benchmark.py
@@ -30,7 +30,7 @@ import httpx
 ADOLF_URL = "http://localhost:8000"
 DATASET = Path(__file__).parent / "benchmark.json"
 RESULTS = Path(__file__).parent / "routing_results_latest.json"
-QUERY_TIMEOUT = 30  # seconds — routing is fast, no LLM wait
+QUERY_TIMEOUT = 1  # 1s strict deadline — routing must decide within 1 second
 
 
 # ── Log helpers ────────────────────────────────────────────────────────────────
@@ -139,9 +139,10 @@ async def run(queries: list[dict]) -> list[dict]:
             except Exception:
                 pass  # timeout or connection issue — check logs anyway
 
-            await asyncio.sleep(0.3)
             logs_after = get_log_tail(300)
             actual = extract_tier_from_logs(logs_before, logs_after)
+            if actual is None:
+                actual = "timeout"
 
             elapsed = time.monotonic() - t0
             match = actual == expected or (actual == "fast" and expected == "medium")
@@ -149,7 +150,7 @@ async def run(queries: list[dict]) -> list[dict]:
                 correct += 1
 
             mark = "✓" if match else "✗"
-            actual_str = actual or "?"
+            actual_str = actual
             print(f"{actual_str:8}  {mark:3}  {elapsed:5.1f}s  {category:22}  {query_text[:40]}")
 
             results.append({