Merge pull request 'Remove Bifrost: replace test 4 with LiteLLM health check' (#14 ) from fix/remove-bifrost into main

Reviewed-on: #14
Remove Bifrost: replace test 4 with LiteLLM health check
2026-03-24 02:48:40 +00:00 · 2026-03-24 02:46:01 +00:00 · 2026-03-24 02:44:35 +00:00 · 2026-03-24 02:43:26 +00:00 · 2026-03-24 02:42:27 +00:00
3 changed files with 14 additions and 28 deletions
--- a/benchmarks/run_benchmark.py
+++ b/benchmarks/run_benchmark.py
@@ -120,7 +120,7 @@ def extract_tier_from_logs(logs_before: str, logs_after: str) -> str | None:
    """Find new tier= lines that appeared after we sent the query."""
    before_lines = set(logs_before.splitlines())
    new_lines = [l for l in logs_after.splitlines() if l not in before_lines]
-    for line in reversed(new_lines):
+    for line in new_lines:
        m = re.search(r"tier=(\w+(?:\s*\(dry-run\))?)", line)
        if m:
            tier_raw = m.group(1)
@@ -203,7 +203,7 @@ async def run(queries: list[dict], dry_run: bool = False) -> list[dict]:
            print(f"{qid:>3}  {expected:8}  ", end="", flush=True)
-            logs_before = get_log_tail(80)
+            logs_before = get_log_tail(300)
            t0 = time.monotonic()
            ok_post = await post_message(client, qid, query_text, dry_run=send_dry)
@@ -225,7 +225,7 @@ async def run(queries: list[dict], dry_run: bool = False) -> list[dict]:
            # Now the query is done — check logs for tier
            await asyncio.sleep(0.3)
-            logs_after = get_log_tail(80)
+            logs_after = get_log_tail(300)
            actual = extract_tier_from_logs(logs_before, logs_after)
            elapsed = time.monotonic() - t0
--- a/tests/integration/common.py
+++ b/tests/integration/common.py
@@ -11,7 +11,7 @@ import urllib.request
 # ── config ────────────────────────────────────────────────────────────────────
 DEEPAGENTS   = "http://localhost:8000"
-BIFROST      = "http://localhost:8080"
+LITELLM      = "http://localhost:4000"
 OPENMEMORY   = "http://localhost:8765"
 GRAMMY_HOST  = "localhost"
 GRAMMY_PORT  = 3001
@@ -156,19 +156,6 @@ def fetch_logs(since_s=600):
        return []
 def fetch_bifrost_logs(since_s=120):
    """Return bifrost container log lines from the last since_s seconds."""
    try:
        r = subprocess.run(
            ["docker", "compose", "-f", COMPOSE_FILE, "logs", "bifrost",
             f"--since={int(since_s)}s", "--no-log-prefix"],
            capture_output=True, text=True, timeout=10,
        )
        return r.stdout.splitlines()
    except Exception:
        return []
 def parse_run_block(lines, msg_prefix):
    """
    Scan log lines for the LAST '[agent] running: <msg_prefix>' block.
--- a/tests/integration/test_memory.py
+++ b/tests/integration/test_memory.py
@@ -6,7 +6,7 @@ Tests:
  1. Name store   — POST "remember that your name is <RandomName>"
  2. Qdrant point — verifies a new vector was written after store
  3. Name recall  — POST "what is your name?" → reply must contain <RandomName>
-  4. Bifrost      — verifies store/recall requests passed through Bifrost
+  4. LiteLLM      — verifies LiteLLM proxy is reachable (replaced Bifrost)
  5. Timing profile — breakdown of store and recall latencies
  6. Memory benchmark — store 5 personal facts, recall with 10 questions
  7. Dedup test   — same fact stored twice must not grow Qdrant by 2 points
@@ -24,11 +24,11 @@ import time
 import urllib.request
 from common import (
-    DEEPAGENTS, QDRANT, COMPOSE_FILE, DEFAULT_CHAT_ID,
+    DEEPAGENTS, LITELLM, QDRANT, COMPOSE_FILE, DEFAULT_CHAT_ID,
    NAMES,
    INFO, PASS, FAIL, WARN,
    report, print_summary, tf,
-    get, post_json, qdrant_count, fetch_logs, fetch_bifrost_logs,
+    get, post_json, qdrant_count, fetch_logs,
    parse_run_block, wait_for,
 )
@@ -155,14 +155,13 @@ if _run_name:
        report(results, "Agent replied to recall message", False, "timeout")
        report(results, f"Reply contains '{random_name}'", False, "no reply")
-    # ── 4. Bifrost pass-through check ─────────────────────────────────────────
+    # ── 4. LiteLLM proxy reachable (replaced Bifrost) ─────────────────────────
-    bifrost_lines = fetch_bifrost_logs(since_s=300)
+    try:
-    report(results, "Bifrost container has log output (requests forwarded)",
+        status, _ = get(f"{LITELLM}/health", timeout=5)
-           len(bifrost_lines) > 0, f"{len(bifrost_lines)} lines in bifrost logs")
+        litellm_ok = status == 200
-    bifrost_raw = "\n".join(bifrost_lines)
+    except Exception:
-    report(results, "  Bifrost log shows AsyncOpenAI agent requests",
+        litellm_ok = False
-           "AsyncOpenAI" in bifrost_raw,
+    report(results, "LiteLLM proxy reachable", litellm_ok)
           f"{'found' if 'AsyncOpenAI' in bifrost_raw else 'NOT found'} in bifrost logs")
    # ── 5. Timing profile ─────────────────────────────────────────────────────
    print(f"\n[{INFO}] 5. Timing profile")
Author	SHA1	Message	Date
alvis	1d2787766e	Merge pull request 'Remove Bifrost: replace test 4 with LiteLLM health check' (#14 ) from fix/remove-bifrost into main Reviewed-on: #14	2026-03-24 02:48:40 +00:00
alvis	abf792a2ec	Remove Bifrost: replace test 4 with LiteLLM health check - Remove BIFROST constant and fetch_bifrost_logs() from common.py - Add LITELLM constant (localhost:4000) - Replace test_memory.py test 4 (Bifrost pass-through) with LiteLLM health check Fixes #5 Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>	2026-03-24 02:46:01 +00:00
alvis	186e16284b	Merge pull request 'Fix tier logging: capture actual_tier, fix parse_run_block regex, remove reply_text truncation' (#11 ) from fix/tier-logging into main Reviewed-on: #11	2026-03-24 02:44:35 +00:00
alvis	0b428e4ada	Merge pull request 'Fix benchmark log extraction: first tier match, increase log tail to 300' (#12 ) from fix/benchmark-log-extraction into main Reviewed-on: #12	2026-03-24 02:43:26 +00:00
alvis	98095679be	Fix benchmark log extraction: first tier match, increase log tail to 300 - Remove reversed() from extract_tier_from_logs: first match = routing decision (dry-run complex logs tier=complex early, then overwrites with tier=medium at done) - Increase log tail from 80→300 to handle concurrent log activity Fixes #7, #10 Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>	2026-03-24 02:42:27 +00:00