5 Commits

Author SHA1 Message Date
1d2787766e Merge pull request 'Remove Bifrost: replace test 4 with LiteLLM health check' (#14) from fix/remove-bifrost into main
Reviewed-on: #14
2026-03-24 02:48:40 +00:00
abf792a2ec Remove Bifrost: replace test 4 with LiteLLM health check
- Remove BIFROST constant and fetch_bifrost_logs() from common.py
- Add LITELLM constant (localhost:4000)
- Replace test_memory.py test 4 (Bifrost pass-through) with LiteLLM health check

Fixes #5

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-24 02:46:01 +00:00
186e16284b Merge pull request 'Fix tier logging: capture actual_tier, fix parse_run_block regex, remove reply_text truncation' (#11) from fix/tier-logging into main
Reviewed-on: #11
2026-03-24 02:44:35 +00:00
0b428e4ada Merge pull request 'Fix benchmark log extraction: first tier match, increase log tail to 300' (#12) from fix/benchmark-log-extraction into main
Reviewed-on: #12
2026-03-24 02:43:26 +00:00
98095679be Fix benchmark log extraction: first tier match, increase log tail to 300
- Remove reversed() from extract_tier_from_logs: first match = routing decision
  (dry-run complex logs tier=complex early, then overwrites with tier=medium at done)
- Increase log tail from 80→300 to handle concurrent log activity

Fixes #7, #10

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-24 02:42:27 +00:00
3 changed files with 14 additions and 28 deletions

View File

@@ -120,7 +120,7 @@ def extract_tier_from_logs(logs_before: str, logs_after: str) -> str | None:
"""Find new tier= lines that appeared after we sent the query.""" """Find new tier= lines that appeared after we sent the query."""
before_lines = set(logs_before.splitlines()) before_lines = set(logs_before.splitlines())
new_lines = [l for l in logs_after.splitlines() if l not in before_lines] new_lines = [l for l in logs_after.splitlines() if l not in before_lines]
for line in reversed(new_lines): for line in new_lines:
m = re.search(r"tier=(\w+(?:\s*\(dry-run\))?)", line) m = re.search(r"tier=(\w+(?:\s*\(dry-run\))?)", line)
if m: if m:
tier_raw = m.group(1) tier_raw = m.group(1)
@@ -203,7 +203,7 @@ async def run(queries: list[dict], dry_run: bool = False) -> list[dict]:
print(f"{qid:>3} {expected:8} ", end="", flush=True) print(f"{qid:>3} {expected:8} ", end="", flush=True)
logs_before = get_log_tail(80) logs_before = get_log_tail(300)
t0 = time.monotonic() t0 = time.monotonic()
ok_post = await post_message(client, qid, query_text, dry_run=send_dry) ok_post = await post_message(client, qid, query_text, dry_run=send_dry)
@@ -225,7 +225,7 @@ async def run(queries: list[dict], dry_run: bool = False) -> list[dict]:
# Now the query is done — check logs for tier # Now the query is done — check logs for tier
await asyncio.sleep(0.3) await asyncio.sleep(0.3)
logs_after = get_log_tail(80) logs_after = get_log_tail(300)
actual = extract_tier_from_logs(logs_before, logs_after) actual = extract_tier_from_logs(logs_before, logs_after)
elapsed = time.monotonic() - t0 elapsed = time.monotonic() - t0

View File

@@ -11,7 +11,7 @@ import urllib.request
# ── config ──────────────────────────────────────────────────────────────────── # ── config ────────────────────────────────────────────────────────────────────
DEEPAGENTS = "http://localhost:8000" DEEPAGENTS = "http://localhost:8000"
BIFROST = "http://localhost:8080" LITELLM = "http://localhost:4000"
OPENMEMORY = "http://localhost:8765" OPENMEMORY = "http://localhost:8765"
GRAMMY_HOST = "localhost" GRAMMY_HOST = "localhost"
GRAMMY_PORT = 3001 GRAMMY_PORT = 3001
@@ -156,19 +156,6 @@ def fetch_logs(since_s=600):
return [] return []
def fetch_bifrost_logs(since_s=120):
"""Return bifrost container log lines from the last since_s seconds."""
try:
r = subprocess.run(
["docker", "compose", "-f", COMPOSE_FILE, "logs", "bifrost",
f"--since={int(since_s)}s", "--no-log-prefix"],
capture_output=True, text=True, timeout=10,
)
return r.stdout.splitlines()
except Exception:
return []
def parse_run_block(lines, msg_prefix): def parse_run_block(lines, msg_prefix):
""" """
Scan log lines for the LAST '[agent] running: <msg_prefix>' block. Scan log lines for the LAST '[agent] running: <msg_prefix>' block.

View File

@@ -6,7 +6,7 @@ Tests:
1. Name store — POST "remember that your name is <RandomName>" 1. Name store — POST "remember that your name is <RandomName>"
2. Qdrant point — verifies a new vector was written after store 2. Qdrant point — verifies a new vector was written after store
3. Name recall — POST "what is your name?" → reply must contain <RandomName> 3. Name recall — POST "what is your name?" → reply must contain <RandomName>
4. Bifrost — verifies store/recall requests passed through Bifrost 4. LiteLLM — verifies LiteLLM proxy is reachable (replaced Bifrost)
5. Timing profile — breakdown of store and recall latencies 5. Timing profile — breakdown of store and recall latencies
6. Memory benchmark — store 5 personal facts, recall with 10 questions 6. Memory benchmark — store 5 personal facts, recall with 10 questions
7. Dedup test — same fact stored twice must not grow Qdrant by 2 points 7. Dedup test — same fact stored twice must not grow Qdrant by 2 points
@@ -24,11 +24,11 @@ import time
import urllib.request import urllib.request
from common import ( from common import (
DEEPAGENTS, QDRANT, COMPOSE_FILE, DEFAULT_CHAT_ID, DEEPAGENTS, LITELLM, QDRANT, COMPOSE_FILE, DEFAULT_CHAT_ID,
NAMES, NAMES,
INFO, PASS, FAIL, WARN, INFO, PASS, FAIL, WARN,
report, print_summary, tf, report, print_summary, tf,
get, post_json, qdrant_count, fetch_logs, fetch_bifrost_logs, get, post_json, qdrant_count, fetch_logs,
parse_run_block, wait_for, parse_run_block, wait_for,
) )
@@ -155,14 +155,13 @@ if _run_name:
report(results, "Agent replied to recall message", False, "timeout") report(results, "Agent replied to recall message", False, "timeout")
report(results, f"Reply contains '{random_name}'", False, "no reply") report(results, f"Reply contains '{random_name}'", False, "no reply")
# ── 4. Bifrost pass-through check ───────────────────────────────────────── # ── 4. LiteLLM proxy reachable (replaced Bifrost) ─────────────────────────
bifrost_lines = fetch_bifrost_logs(since_s=300) try:
report(results, "Bifrost container has log output (requests forwarded)", status, _ = get(f"{LITELLM}/health", timeout=5)
len(bifrost_lines) > 0, f"{len(bifrost_lines)} lines in bifrost logs") litellm_ok = status == 200
bifrost_raw = "\n".join(bifrost_lines) except Exception:
report(results, " Bifrost log shows AsyncOpenAI agent requests", litellm_ok = False
"AsyncOpenAI" in bifrost_raw, report(results, "LiteLLM proxy reachable", litellm_ok)
f"{'found' if 'AsyncOpenAI' in bifrost_raw else 'NOT found'} in bifrost logs")
# ── 5. Timing profile ───────────────────────────────────────────────────── # ── 5. Timing profile ─────────────────────────────────────────────────────
print(f"\n[{INFO}] 5. Timing profile") print(f"\n[{INFO}] 5. Timing profile")