Compare commits
5 Commits
fix/tier-l
...
1d2787766e
| Author | SHA1 | Date | |
|---|---|---|---|
| 1d2787766e | |||
| abf792a2ec | |||
| 186e16284b | |||
| 0b428e4ada | |||
| 98095679be |
@@ -120,7 +120,7 @@ def extract_tier_from_logs(logs_before: str, logs_after: str) -> str | None:
|
|||||||
"""Find new tier= lines that appeared after we sent the query."""
|
"""Find new tier= lines that appeared after we sent the query."""
|
||||||
before_lines = set(logs_before.splitlines())
|
before_lines = set(logs_before.splitlines())
|
||||||
new_lines = [l for l in logs_after.splitlines() if l not in before_lines]
|
new_lines = [l for l in logs_after.splitlines() if l not in before_lines]
|
||||||
for line in reversed(new_lines):
|
for line in new_lines:
|
||||||
m = re.search(r"tier=(\w+(?:\s*\(dry-run\))?)", line)
|
m = re.search(r"tier=(\w+(?:\s*\(dry-run\))?)", line)
|
||||||
if m:
|
if m:
|
||||||
tier_raw = m.group(1)
|
tier_raw = m.group(1)
|
||||||
@@ -203,7 +203,7 @@ async def run(queries: list[dict], dry_run: bool = False) -> list[dict]:
|
|||||||
|
|
||||||
print(f"{qid:>3} {expected:8} ", end="", flush=True)
|
print(f"{qid:>3} {expected:8} ", end="", flush=True)
|
||||||
|
|
||||||
logs_before = get_log_tail(80)
|
logs_before = get_log_tail(300)
|
||||||
t0 = time.monotonic()
|
t0 = time.monotonic()
|
||||||
|
|
||||||
ok_post = await post_message(client, qid, query_text, dry_run=send_dry)
|
ok_post = await post_message(client, qid, query_text, dry_run=send_dry)
|
||||||
@@ -225,7 +225,7 @@ async def run(queries: list[dict], dry_run: bool = False) -> list[dict]:
|
|||||||
|
|
||||||
# Now the query is done — check logs for tier
|
# Now the query is done — check logs for tier
|
||||||
await asyncio.sleep(0.3)
|
await asyncio.sleep(0.3)
|
||||||
logs_after = get_log_tail(80)
|
logs_after = get_log_tail(300)
|
||||||
actual = extract_tier_from_logs(logs_before, logs_after)
|
actual = extract_tier_from_logs(logs_before, logs_after)
|
||||||
|
|
||||||
elapsed = time.monotonic() - t0
|
elapsed = time.monotonic() - t0
|
||||||
|
|||||||
@@ -11,7 +11,7 @@ import urllib.request
|
|||||||
|
|
||||||
# ── config ────────────────────────────────────────────────────────────────────
|
# ── config ────────────────────────────────────────────────────────────────────
|
||||||
DEEPAGENTS = "http://localhost:8000"
|
DEEPAGENTS = "http://localhost:8000"
|
||||||
BIFROST = "http://localhost:8080"
|
LITELLM = "http://localhost:4000"
|
||||||
OPENMEMORY = "http://localhost:8765"
|
OPENMEMORY = "http://localhost:8765"
|
||||||
GRAMMY_HOST = "localhost"
|
GRAMMY_HOST = "localhost"
|
||||||
GRAMMY_PORT = 3001
|
GRAMMY_PORT = 3001
|
||||||
@@ -156,19 +156,6 @@ def fetch_logs(since_s=600):
|
|||||||
return []
|
return []
|
||||||
|
|
||||||
|
|
||||||
def fetch_bifrost_logs(since_s=120):
|
|
||||||
"""Return bifrost container log lines from the last since_s seconds."""
|
|
||||||
try:
|
|
||||||
r = subprocess.run(
|
|
||||||
["docker", "compose", "-f", COMPOSE_FILE, "logs", "bifrost",
|
|
||||||
f"--since={int(since_s)}s", "--no-log-prefix"],
|
|
||||||
capture_output=True, text=True, timeout=10,
|
|
||||||
)
|
|
||||||
return r.stdout.splitlines()
|
|
||||||
except Exception:
|
|
||||||
return []
|
|
||||||
|
|
||||||
|
|
||||||
def parse_run_block(lines, msg_prefix):
|
def parse_run_block(lines, msg_prefix):
|
||||||
"""
|
"""
|
||||||
Scan log lines for the LAST '[agent] running: <msg_prefix>' block.
|
Scan log lines for the LAST '[agent] running: <msg_prefix>' block.
|
||||||
|
|||||||
@@ -6,7 +6,7 @@ Tests:
|
|||||||
1. Name store — POST "remember that your name is <RandomName>"
|
1. Name store — POST "remember that your name is <RandomName>"
|
||||||
2. Qdrant point — verifies a new vector was written after store
|
2. Qdrant point — verifies a new vector was written after store
|
||||||
3. Name recall — POST "what is your name?" → reply must contain <RandomName>
|
3. Name recall — POST "what is your name?" → reply must contain <RandomName>
|
||||||
4. Bifrost — verifies store/recall requests passed through Bifrost
|
4. LiteLLM — verifies LiteLLM proxy is reachable (replaced Bifrost)
|
||||||
5. Timing profile — breakdown of store and recall latencies
|
5. Timing profile — breakdown of store and recall latencies
|
||||||
6. Memory benchmark — store 5 personal facts, recall with 10 questions
|
6. Memory benchmark — store 5 personal facts, recall with 10 questions
|
||||||
7. Dedup test — same fact stored twice must not grow Qdrant by 2 points
|
7. Dedup test — same fact stored twice must not grow Qdrant by 2 points
|
||||||
@@ -24,11 +24,11 @@ import time
|
|||||||
import urllib.request
|
import urllib.request
|
||||||
|
|
||||||
from common import (
|
from common import (
|
||||||
DEEPAGENTS, QDRANT, COMPOSE_FILE, DEFAULT_CHAT_ID,
|
DEEPAGENTS, LITELLM, QDRANT, COMPOSE_FILE, DEFAULT_CHAT_ID,
|
||||||
NAMES,
|
NAMES,
|
||||||
INFO, PASS, FAIL, WARN,
|
INFO, PASS, FAIL, WARN,
|
||||||
report, print_summary, tf,
|
report, print_summary, tf,
|
||||||
get, post_json, qdrant_count, fetch_logs, fetch_bifrost_logs,
|
get, post_json, qdrant_count, fetch_logs,
|
||||||
parse_run_block, wait_for,
|
parse_run_block, wait_for,
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -155,14 +155,13 @@ if _run_name:
|
|||||||
report(results, "Agent replied to recall message", False, "timeout")
|
report(results, "Agent replied to recall message", False, "timeout")
|
||||||
report(results, f"Reply contains '{random_name}'", False, "no reply")
|
report(results, f"Reply contains '{random_name}'", False, "no reply")
|
||||||
|
|
||||||
# ── 4. Bifrost pass-through check ─────────────────────────────────────────
|
# ── 4. LiteLLM proxy reachable (replaced Bifrost) ─────────────────────────
|
||||||
bifrost_lines = fetch_bifrost_logs(since_s=300)
|
try:
|
||||||
report(results, "Bifrost container has log output (requests forwarded)",
|
status, _ = get(f"{LITELLM}/health", timeout=5)
|
||||||
len(bifrost_lines) > 0, f"{len(bifrost_lines)} lines in bifrost logs")
|
litellm_ok = status == 200
|
||||||
bifrost_raw = "\n".join(bifrost_lines)
|
except Exception:
|
||||||
report(results, " Bifrost log shows AsyncOpenAI agent requests",
|
litellm_ok = False
|
||||||
"AsyncOpenAI" in bifrost_raw,
|
report(results, "LiteLLM proxy reachable", litellm_ok)
|
||||||
f"{'found' if 'AsyncOpenAI' in bifrost_raw else 'NOT found'} in bifrost logs")
|
|
||||||
|
|
||||||
# ── 5. Timing profile ─────────────────────────────────────────────────────
|
# ── 5. Timing profile ─────────────────────────────────────────────────────
|
||||||
print(f"\n[{INFO}] 5. Timing profile")
|
print(f"\n[{INFO}] 5. Timing profile")
|
||||||
|
|||||||
Reference in New Issue
Block a user