Merge pull request 'Fix benchmark log extraction: first tier match, increase log tail to 300' (#12 ) from fix/benchmark-log-extraction into main

Reviewed-on: #12
Fix benchmark log extraction: first tier match, increase log tail to 300
2026-03-24 02:43:26 +00:00 · 2026-03-24 02:42:27 +00:00
1 changed files with 3 additions and 3 deletions
--- a/benchmarks/run_benchmark.py
+++ b/benchmarks/run_benchmark.py
@@ -120,7 +120,7 @@ def extract_tier_from_logs(logs_before: str, logs_after: str) -> str | None:
    """Find new tier= lines that appeared after we sent the query."""
    before_lines = set(logs_before.splitlines())
    new_lines = [l for l in logs_after.splitlines() if l not in before_lines]
-    for line in reversed(new_lines):
+    for line in new_lines:
        m = re.search(r"tier=(\w+(?:\s*\(dry-run\))?)", line)
        if m:
            tier_raw = m.group(1)
@@ -203,7 +203,7 @@ async def run(queries: list[dict], dry_run: bool = False) -> list[dict]:

            print(f"{qid:>3}  {expected:8}  ", end="", flush=True)

-            logs_before = get_log_tail(80)
+            logs_before = get_log_tail(300)
            t0 = time.monotonic()

            ok_post = await post_message(client, qid, query_text, dry_run=send_dry)
@@ -225,7 +225,7 @@ async def run(queries: list[dict], dry_run: bool = False) -> list[dict]:

            # Now the query is done — check logs for tier
            await asyncio.sleep(0.3)
-            logs_after = get_log_tail(80)
+            logs_after = get_log_tail(300)
            actual = extract_tier_from_logs(logs_before, logs_after)

            elapsed = time.monotonic() - t0
Author	SHA1	Message	Date
alvis	0b428e4ada	Merge pull request 'Fix benchmark log extraction: first tier match, increase log tail to 300' (#12 ) from fix/benchmark-log-extraction into main Reviewed-on: #12	2026-03-24 02:43:26 +00:00
alvis	98095679be	Fix benchmark log extraction: first tier match, increase log tail to 300 - Remove reversed() from extract_tier_from_logs: first match = routing decision (dry-run complex logs tier=complex early, then overwrites with tier=medium at done) - Increase log tail from 80→300 to handle concurrent log activity Fixes #7, #10 Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>	2026-03-24 02:42:27 +00:00