Merge pull request 'Remove Bifrost: replace test 4 with LiteLLM health check' (#14 ) from fix/remove-bifrost into main

Reviewed-on: #14
Remove Bifrost: replace test 4 with LiteLLM health check
2026-03-24 02:48:40 +00:00 · 2026-03-24 02:46:01 +00:00 · 2026-03-24 02:44:35 +00:00 · 2026-03-24 02:43:26 +00:00 · 2026-03-24 02:41:59 +00:00
3 changed files with 23 additions and 35 deletions
--- a/agent.py
+++ b/agent.py
@@ -432,6 +432,7 @@ async def _run_agent_pipeline(
    session_id: str,
    tier_override: str | None = None,
    dry_run: bool = False,
+    tier_capture: list | None = None,
 ) -> AsyncGenerator[str, None]:
    """Core pipeline: pre-flight → routing → inference. Yields text chunks.

@@ -501,6 +502,8 @@ async def _run_agent_pipeline(
                else:
                    print(f"[agent] tier={tier} message={clean_message[:60]!r}", flush=True)
                tier = effective_tier
+                if tier_capture is not None:
+                    tier_capture.append(tier)

                if tier == "light":
                    final_text = light_reply
@@ -597,10 +600,9 @@ async def run_agent_task(
    history = _conversation_buffers.get(session_id, [])
    final_text = None
    actual_tier = "unknown"
+    tier_capture: list = []

-    # Patch pipeline to capture tier for logging
-    # We read it from logs post-hoc; capture via a wrapper
-    async for chunk in _run_agent_pipeline(message, history, session_id, dry_run=dry_run):
+    async for chunk in _run_agent_pipeline(message, history, session_id, dry_run=dry_run, tier_capture=tier_capture):
        await _push_stream_chunk(session_id, chunk)
        if final_text is None:
            final_text = chunk
@@ -608,6 +610,7 @@ async def run_agent_task(
            final_text += chunk

    await _end_stream(session_id)
+    actual_tier = tier_capture[0] if tier_capture else "unknown"

    elapsed_ms = int((time.monotonic() - t0) * 1000)

@@ -621,8 +624,8 @@ async def run_agent_task(
            except Exception as e:
                print(f"[agent] delivery error (non-fatal): {e}", flush=True)

-        print(f"[agent] replied in {elapsed_ms / 1000:.1f}s", flush=True)
-        print(f"[agent] reply_text: {final_text[:200]}", flush=True)
+        print(f"[agent] replied in {elapsed_ms / 1000:.1f}s tier={actual_tier}", flush=True)
+        print(f"[agent] reply_text: {final_text}", flush=True)

        # Update conversation buffer
        buf = _conversation_buffers.get(session_id, [])
--- a/tests/integration/common.py
+++ b/tests/integration/common.py
@@ -11,7 +11,7 @@ import urllib.request

 # ── config ────────────────────────────────────────────────────────────────────
 DEEPAGENTS   = "http://localhost:8000"
-BIFROST      = "http://localhost:8080"
+LITELLM      = "http://localhost:4000"
 OPENMEMORY   = "http://localhost:8765"
 GRAMMY_HOST  = "localhost"
 GRAMMY_PORT  = 3001
@@ -156,19 +156,6 @@ def fetch_logs(since_s=600):
        return []


-def fetch_bifrost_logs(since_s=120):
-    """Return bifrost container log lines from the last since_s seconds."""
-    try:
-        r = subprocess.run(
-            ["docker", "compose", "-f", COMPOSE_FILE, "logs", "bifrost",
-             f"--since={int(since_s)}s", "--no-log-prefix"],
-            capture_output=True, text=True, timeout=10,
-        )
-        return r.stdout.splitlines()
-    except Exception:
-        return []
-
-
 def parse_run_block(lines, msg_prefix):
    """
    Scan log lines for the LAST '[agent] running: <msg_prefix>' block.
@@ -199,14 +186,13 @@ def parse_run_block(lines, msg_prefix):
            if txt:
                last_ai_text = txt

-        m = re.search(r"replied in ([\d.]+)s \(llm=([\d.]+)s, send=([\d.]+)s\)", line)
+        m = re.search(r"replied in ([\d.]+)s(?:\s+tier=(\w+))?", line)
        if m:
-            tier_m = re.search(r"\btier=(\w+)", line)
-            tier = tier_m.group(1) if tier_m else "unknown"
+            tier = m.group(2) if m.group(2) else "unknown"
            reply_data = {
                "reply_total": float(m.group(1)),
-                "llm":         float(m.group(2)),
-                "send":        float(m.group(3)),
+                "llm":         None,
+                "send":        None,
                "tier":        tier,
                "reply_text":  last_ai_text,
                "memory_s":    None,
--- a/tests/integration/test_memory.py
+++ b/tests/integration/test_memory.py
@@ -6,7 +6,7 @@ Tests:
  1. Name store   — POST "remember that your name is <RandomName>"
  2. Qdrant point — verifies a new vector was written after store
  3. Name recall  — POST "what is your name?" → reply must contain <RandomName>
-  4. Bifrost      — verifies store/recall requests passed through Bifrost
+  4. LiteLLM      — verifies LiteLLM proxy is reachable (replaced Bifrost)
  5. Timing profile — breakdown of store and recall latencies
  6. Memory benchmark — store 5 personal facts, recall with 10 questions
  7. Dedup test   — same fact stored twice must not grow Qdrant by 2 points
@@ -24,11 +24,11 @@ import time
 import urllib.request

 from common import (
-    DEEPAGENTS, QDRANT, COMPOSE_FILE, DEFAULT_CHAT_ID,
+    DEEPAGENTS, LITELLM, QDRANT, COMPOSE_FILE, DEFAULT_CHAT_ID,
    NAMES,
    INFO, PASS, FAIL, WARN,
    report, print_summary, tf,
-    get, post_json, qdrant_count, fetch_logs, fetch_bifrost_logs,
+    get, post_json, qdrant_count, fetch_logs,
    parse_run_block, wait_for,
 )

@@ -155,14 +155,13 @@ if _run_name:
        report(results, "Agent replied to recall message", False, "timeout")
        report(results, f"Reply contains '{random_name}'", False, "no reply")

-    # ── 4. Bifrost pass-through check ─────────────────────────────────────────
-    bifrost_lines = fetch_bifrost_logs(since_s=300)
-    report(results, "Bifrost container has log output (requests forwarded)",
-           len(bifrost_lines) > 0, f"{len(bifrost_lines)} lines in bifrost logs")
-    bifrost_raw = "\n".join(bifrost_lines)
-    report(results, "  Bifrost log shows AsyncOpenAI agent requests",
-           "AsyncOpenAI" in bifrost_raw,
-           f"{'found' if 'AsyncOpenAI' in bifrost_raw else 'NOT found'} in bifrost logs")
+    # ── 4. LiteLLM proxy reachable (replaced Bifrost) ─────────────────────────
+    try:
+        status, _ = get(f"{LITELLM}/health", timeout=5)
+        litellm_ok = status == 200
+    except Exception:
+        litellm_ok = False
+    report(results, "LiteLLM proxy reachable", litellm_ok)

    # ── 5. Timing profile ─────────────────────────────────────────────────────
    print(f"\n[{INFO}] 5. Timing profile")
Author	SHA1	Message	Date
alvis	1d2787766e	Merge pull request 'Remove Bifrost: replace test 4 with LiteLLM health check' (#14 ) from fix/remove-bifrost into main Reviewed-on: #14	2026-03-24 02:48:40 +00:00
alvis	abf792a2ec	Remove Bifrost: replace test 4 with LiteLLM health check - Remove BIFROST constant and fetch_bifrost_logs() from common.py - Add LITELLM constant (localhost:4000) - Replace test_memory.py test 4 (Bifrost pass-through) with LiteLLM health check Fixes #5 Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>	2026-03-24 02:46:01 +00:00
alvis	186e16284b	Merge pull request 'Fix tier logging: capture actual_tier, fix parse_run_block regex, remove reply_text truncation' (#11 ) from fix/tier-logging into main Reviewed-on: #11	2026-03-24 02:44:35 +00:00
alvis	0b428e4ada	Merge pull request 'Fix benchmark log extraction: first tier match, increase log tail to 300' (#12 ) from fix/benchmark-log-extraction into main Reviewed-on: #12	2026-03-24 02:43:26 +00:00
alvis	8ef4897869	Fix tier logging: capture actual_tier, fix parse_run_block regex, remove reply_text truncation - Add tier_capture param to _run_agent_pipeline; append tier after determination - Capture actual_tier in run_agent_task from tier_capture list - Log tier in replied-in line: [agent] replied in Xs tier=Y - Remove reply_text[:200] truncation (was breaking benchmark keyword matching) - Update parse_run_block regex to match new log format; llm/send fields now None Fixes #1, #3, #4 Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>	2026-03-24 02:41:59 +00:00