From 8ef489786908f7cae043570945ee9ee07c757c87 Mon Sep 17 00:00:00 2001 From: alvis Date: Tue, 24 Mar 2026 02:41:59 +0000 Subject: [PATCH] Fix tier logging: capture actual_tier, fix parse_run_block regex, remove reply_text truncation - Add tier_capture param to _run_agent_pipeline; append tier after determination - Capture actual_tier in run_agent_task from tier_capture list - Log tier in replied-in line: [agent] replied in Xs tier=Y - Remove reply_text[:200] truncation (was breaking benchmark keyword matching) - Update parse_run_block regex to match new log format; llm/send fields now None Fixes #1, #3, #4 Co-Authored-By: Claude Sonnet 4.6 --- agent.py | 13 ++++++++----- tests/integration/common.py | 9 ++++----- 2 files changed, 12 insertions(+), 10 deletions(-) diff --git a/agent.py b/agent.py index 37b163b..b532c58 100644 --- a/agent.py +++ b/agent.py @@ -432,6 +432,7 @@ async def _run_agent_pipeline( session_id: str, tier_override: str | None = None, dry_run: bool = False, + tier_capture: list | None = None, ) -> AsyncGenerator[str, None]: """Core pipeline: pre-flight → routing → inference. Yields text chunks. @@ -501,6 +502,8 @@ async def _run_agent_pipeline( else: print(f"[agent] tier={tier} message={clean_message[:60]!r}", flush=True) tier = effective_tier + if tier_capture is not None: + tier_capture.append(tier) if tier == "light": final_text = light_reply @@ -597,10 +600,9 @@ async def run_agent_task( history = _conversation_buffers.get(session_id, []) final_text = None actual_tier = "unknown" + tier_capture: list = [] - # Patch pipeline to capture tier for logging - # We read it from logs post-hoc; capture via a wrapper - async for chunk in _run_agent_pipeline(message, history, session_id, dry_run=dry_run): + async for chunk in _run_agent_pipeline(message, history, session_id, dry_run=dry_run, tier_capture=tier_capture): await _push_stream_chunk(session_id, chunk) if final_text is None: final_text = chunk @@ -608,6 +610,7 @@ async def run_agent_task( final_text += chunk await _end_stream(session_id) + actual_tier = tier_capture[0] if tier_capture else "unknown" elapsed_ms = int((time.monotonic() - t0) * 1000) @@ -621,8 +624,8 @@ async def run_agent_task( except Exception as e: print(f"[agent] delivery error (non-fatal): {e}", flush=True) - print(f"[agent] replied in {elapsed_ms / 1000:.1f}s", flush=True) - print(f"[agent] reply_text: {final_text[:200]}", flush=True) + print(f"[agent] replied in {elapsed_ms / 1000:.1f}s tier={actual_tier}", flush=True) + print(f"[agent] reply_text: {final_text}", flush=True) # Update conversation buffer buf = _conversation_buffers.get(session_id, []) diff --git a/tests/integration/common.py b/tests/integration/common.py index 6390096..f5a3bd0 100644 --- a/tests/integration/common.py +++ b/tests/integration/common.py @@ -199,14 +199,13 @@ def parse_run_block(lines, msg_prefix): if txt: last_ai_text = txt - m = re.search(r"replied in ([\d.]+)s \(llm=([\d.]+)s, send=([\d.]+)s\)", line) + m = re.search(r"replied in ([\d.]+)s(?:\s+tier=(\w+))?", line) if m: - tier_m = re.search(r"\btier=(\w+)", line) - tier = tier_m.group(1) if tier_m else "unknown" + tier = m.group(2) if m.group(2) else "unknown" reply_data = { "reply_total": float(m.group(1)), - "llm": float(m.group(2)), - "send": float(m.group(3)), + "llm": None, + "send": None, "tier": tier, "reply_text": last_ai_text, "memory_s": None, -- 2.49.1