From b7d5896076f14882e53667b763c024a0f1e91ea4 Mon Sep 17 00:00:00 2001 From: alvis Date: Tue, 24 Mar 2026 07:35:13 +0000 Subject: [PATCH] routing benchmark: 1s strict deadline per query MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit QUERY_TIMEOUT=1s — classification and routing must complete within 1 second or the query is recorded as 'timeout'. Co-Authored-By: Claude Sonnet 4.6 --- benchmarks/run_routing_benchmark.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/benchmarks/run_routing_benchmark.py b/benchmarks/run_routing_benchmark.py index 4a31f7d..3e6b4d6 100644 --- a/benchmarks/run_routing_benchmark.py +++ b/benchmarks/run_routing_benchmark.py @@ -30,7 +30,7 @@ import httpx ADOLF_URL = "http://localhost:8000" DATASET = Path(__file__).parent / "benchmark.json" RESULTS = Path(__file__).parent / "routing_results_latest.json" -QUERY_TIMEOUT = 30 # seconds — routing is fast, no LLM wait +QUERY_TIMEOUT = 1 # 1s strict deadline — routing must decide within 1 second # ── Log helpers ──────────────────────────────────────────────────────────────── @@ -139,9 +139,10 @@ async def run(queries: list[dict]) -> list[dict]: except Exception: pass # timeout or connection issue — check logs anyway - await asyncio.sleep(0.3) logs_after = get_log_tail(300) actual = extract_tier_from_logs(logs_before, logs_after) + if actual is None: + actual = "timeout" elapsed = time.monotonic() - t0 match = actual == expected or (actual == "fast" and expected == "medium") @@ -149,7 +150,7 @@ async def run(queries: list[dict]) -> list[dict]: correct += 1 mark = "✓" if match else "✗" - actual_str = actual or "?" + actual_str = actual print(f"{actual_str:8} {mark:3} {elapsed:5.1f}s {category:22} {query_text[:40]}") results.append({