From 8ef489786908f7cae043570945ee9ee07c757c87 Mon Sep 17 00:00:00 2001
From: alvis <allogn@gmail.com>
Date: Tue, 24 Mar 2026 02:41:59 +0000
Subject: [PATCH] Fix tier logging: capture actual_tier, fix parse_run_block
 regex, remove reply_text truncation

- Add tier_capture param to _run_agent_pipeline; append tier after determination
- Capture actual_tier in run_agent_task from tier_capture list
- Log tier in replied-in line: [agent] replied in Xs tier=Y
- Remove reply_text[:200] truncation (was breaking benchmark keyword matching)
- Update parse_run_block regex to match new log format; llm/send fields now None

Fixes #1, #3, #4

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 agent.py                    | 13 ++++++++-----
 tests/integration/common.py |  9 ++++-----
 2 files changed, 12 insertions(+), 10 deletions(-)

diff --git a/agent.py b/agent.py
index 37b163b..b532c58 100644
--- a/agent.py
+++ b/agent.py
@@ -432,6 +432,7 @@ async def _run_agent_pipeline(
     session_id: str,
     tier_override: str | None = None,
     dry_run: bool = False,
+    tier_capture: list | None = None,
 ) -> AsyncGenerator[str, None]:
     """Core pipeline: pre-flight → routing → inference. Yields text chunks.
 
@@ -501,6 +502,8 @@ async def _run_agent_pipeline(
                 else:
                     print(f"[agent] tier={tier} message={clean_message[:60]!r}", flush=True)
                 tier = effective_tier
+                if tier_capture is not None:
+                    tier_capture.append(tier)
 
                 if tier == "light":
                     final_text = light_reply
@@ -597,10 +600,9 @@ async def run_agent_task(
     history = _conversation_buffers.get(session_id, [])
     final_text = None
     actual_tier = "unknown"
+    tier_capture: list = []
 
-    # Patch pipeline to capture tier for logging
-    # We read it from logs post-hoc; capture via a wrapper
-    async for chunk in _run_agent_pipeline(message, history, session_id, dry_run=dry_run):
+    async for chunk in _run_agent_pipeline(message, history, session_id, dry_run=dry_run, tier_capture=tier_capture):
         await _push_stream_chunk(session_id, chunk)
         if final_text is None:
             final_text = chunk
@@ -608,6 +610,7 @@ async def run_agent_task(
             final_text += chunk
 
     await _end_stream(session_id)
+    actual_tier = tier_capture[0] if tier_capture else "unknown"
 
     elapsed_ms = int((time.monotonic() - t0) * 1000)
 
@@ -621,8 +624,8 @@ async def run_agent_task(
             except Exception as e:
                 print(f"[agent] delivery error (non-fatal): {e}", flush=True)
 
-        print(f"[agent] replied in {elapsed_ms / 1000:.1f}s", flush=True)
-        print(f"[agent] reply_text: {final_text[:200]}", flush=True)
+        print(f"[agent] replied in {elapsed_ms / 1000:.1f}s tier={actual_tier}", flush=True)
+        print(f"[agent] reply_text: {final_text}", flush=True)
 
         # Update conversation buffer
         buf = _conversation_buffers.get(session_id, [])
diff --git a/tests/integration/common.py b/tests/integration/common.py
index 6390096..f5a3bd0 100644
--- a/tests/integration/common.py
+++ b/tests/integration/common.py
@@ -199,14 +199,13 @@ def parse_run_block(lines, msg_prefix):
             if txt:
                 last_ai_text = txt
 
-        m = re.search(r"replied in ([\d.]+)s \(llm=([\d.]+)s, send=([\d.]+)s\)", line)
+        m = re.search(r"replied in ([\d.]+)s(?:\s+tier=(\w+))?", line)
         if m:
-            tier_m = re.search(r"\btier=(\w+)", line)
-            tier = tier_m.group(1) if tier_m else "unknown"
+            tier = m.group(2) if m.group(2) else "unknown"
             reply_data = {
                 "reply_total": float(m.group(1)),
-                "llm":         float(m.group(2)),
-                "send":        float(m.group(3)),
+                "llm":         None,
+                "send":        None,
                 "tier":        tier,
                 "reply_text":  last_ai_text,
                 "memory_s":    None,
-- 
2.49.1