Compare commits
7 Commits
fix/benchm
...
a363347ae5
| Author | SHA1 | Date | |
|---|---|---|---|
| a363347ae5 | |||
| 1d2787766e | |||
| abf792a2ec | |||
| 537e927146 | |||
| 186e16284b | |||
| 0b428e4ada | |||
| 8ef4897869 |
13
agent.py
13
agent.py
@@ -432,6 +432,7 @@ async def _run_agent_pipeline(
|
||||
session_id: str,
|
||||
tier_override: str | None = None,
|
||||
dry_run: bool = False,
|
||||
tier_capture: list | None = None,
|
||||
) -> AsyncGenerator[str, None]:
|
||||
"""Core pipeline: pre-flight → routing → inference. Yields text chunks.
|
||||
|
||||
@@ -501,6 +502,8 @@ async def _run_agent_pipeline(
|
||||
else:
|
||||
print(f"[agent] tier={tier} message={clean_message[:60]!r}", flush=True)
|
||||
tier = effective_tier
|
||||
if tier_capture is not None:
|
||||
tier_capture.append(tier)
|
||||
|
||||
if tier == "light":
|
||||
final_text = light_reply
|
||||
@@ -597,10 +600,9 @@ async def run_agent_task(
|
||||
history = _conversation_buffers.get(session_id, [])
|
||||
final_text = None
|
||||
actual_tier = "unknown"
|
||||
tier_capture: list = []
|
||||
|
||||
# Patch pipeline to capture tier for logging
|
||||
# We read it from logs post-hoc; capture via a wrapper
|
||||
async for chunk in _run_agent_pipeline(message, history, session_id, dry_run=dry_run):
|
||||
async for chunk in _run_agent_pipeline(message, history, session_id, dry_run=dry_run, tier_capture=tier_capture):
|
||||
await _push_stream_chunk(session_id, chunk)
|
||||
if final_text is None:
|
||||
final_text = chunk
|
||||
@@ -608,6 +610,7 @@ async def run_agent_task(
|
||||
final_text += chunk
|
||||
|
||||
await _end_stream(session_id)
|
||||
actual_tier = tier_capture[0] if tier_capture else "unknown"
|
||||
|
||||
elapsed_ms = int((time.monotonic() - t0) * 1000)
|
||||
|
||||
@@ -621,8 +624,8 @@ async def run_agent_task(
|
||||
except Exception as e:
|
||||
print(f"[agent] delivery error (non-fatal): {e}", flush=True)
|
||||
|
||||
print(f"[agent] replied in {elapsed_ms / 1000:.1f}s", flush=True)
|
||||
print(f"[agent] reply_text: {final_text[:200]}", flush=True)
|
||||
print(f"[agent] replied in {elapsed_ms / 1000:.1f}s tier={actual_tier}", flush=True)
|
||||
print(f"[agent] reply_text: {final_text}", flush=True)
|
||||
|
||||
# Update conversation buffer
|
||||
buf = _conversation_buffers.get(session_id, [])
|
||||
|
||||
11
router.py
11
router.py
@@ -52,6 +52,13 @@ _LIGHT_PATTERNS = re.compile(
|
||||
r"|окей|хорошо|отлично|понятно|ок|ладно|договорились|спс|благодарю"
|
||||
r"|пожалуйста|не за что|всё понятно|ясно"
|
||||
r"|как дела|как ты|как жизнь|всё хорошо|всё ок"
|
||||
# Russian tech definitions — static knowledge (no tools needed)
|
||||
r"|что\s+такое\s+\S+"
|
||||
r"|что\s+означает\s+\S+"
|
||||
r"|сколько\s+(?:бит|байт|байтов|мегабайт|мегабайтов|гигабайт|гигабайтов)(?:\s+\w+)*"
|
||||
# Compound Russian greetings
|
||||
r"|привет[,!]?\s+как\s+дела"
|
||||
r"|добрый\s+(?:день|вечер|утро)[,!]?\s+как\s+дела"
|
||||
r")[\s!.?]*$",
|
||||
re.IGNORECASE,
|
||||
)
|
||||
@@ -314,6 +321,10 @@ _MEDIUM_PATTERNS = re.compile(
|
||||
r"|курс (?:доллара|биткоина|евро|рубл)"
|
||||
r"|(?:последние |свежие )?новости\b"
|
||||
r"|(?:погода|температура)\s+(?:на завтра|на неделю)"
|
||||
# Smart home commands that don't use verb-first pattern
|
||||
r"|(?:свет|лампочк|освещени)\w*\s+(?:включ|выключ|убавь|прибавь)"
|
||||
r"|(?:дома|в доме|по всему дому)\s+(?:свет|лампочк)"
|
||||
r"|(?:режим|сцена)\s+(?:ночной|утренний|вечерний|кинотеатр)"
|
||||
r")",
|
||||
re.IGNORECASE,
|
||||
)
|
||||
|
||||
@@ -11,7 +11,7 @@ import urllib.request
|
||||
|
||||
# ── config ────────────────────────────────────────────────────────────────────
|
||||
DEEPAGENTS = "http://localhost:8000"
|
||||
BIFROST = "http://localhost:8080"
|
||||
LITELLM = "http://localhost:4000"
|
||||
OPENMEMORY = "http://localhost:8765"
|
||||
GRAMMY_HOST = "localhost"
|
||||
GRAMMY_PORT = 3001
|
||||
@@ -156,19 +156,6 @@ def fetch_logs(since_s=600):
|
||||
return []
|
||||
|
||||
|
||||
def fetch_bifrost_logs(since_s=120):
|
||||
"""Return bifrost container log lines from the last since_s seconds."""
|
||||
try:
|
||||
r = subprocess.run(
|
||||
["docker", "compose", "-f", COMPOSE_FILE, "logs", "bifrost",
|
||||
f"--since={int(since_s)}s", "--no-log-prefix"],
|
||||
capture_output=True, text=True, timeout=10,
|
||||
)
|
||||
return r.stdout.splitlines()
|
||||
except Exception:
|
||||
return []
|
||||
|
||||
|
||||
def parse_run_block(lines, msg_prefix):
|
||||
"""
|
||||
Scan log lines for the LAST '[agent] running: <msg_prefix>' block.
|
||||
@@ -199,14 +186,13 @@ def parse_run_block(lines, msg_prefix):
|
||||
if txt:
|
||||
last_ai_text = txt
|
||||
|
||||
m = re.search(r"replied in ([\d.]+)s \(llm=([\d.]+)s, send=([\d.]+)s\)", line)
|
||||
m = re.search(r"replied in ([\d.]+)s(?:\s+tier=(\w+))?", line)
|
||||
if m:
|
||||
tier_m = re.search(r"\btier=(\w+)", line)
|
||||
tier = tier_m.group(1) if tier_m else "unknown"
|
||||
tier = m.group(2) if m.group(2) else "unknown"
|
||||
reply_data = {
|
||||
"reply_total": float(m.group(1)),
|
||||
"llm": float(m.group(2)),
|
||||
"send": float(m.group(3)),
|
||||
"llm": None,
|
||||
"send": None,
|
||||
"tier": tier,
|
||||
"reply_text": last_ai_text,
|
||||
"memory_s": None,
|
||||
|
||||
@@ -6,7 +6,7 @@ Tests:
|
||||
1. Name store — POST "remember that your name is <RandomName>"
|
||||
2. Qdrant point — verifies a new vector was written after store
|
||||
3. Name recall — POST "what is your name?" → reply must contain <RandomName>
|
||||
4. Bifrost — verifies store/recall requests passed through Bifrost
|
||||
4. LiteLLM — verifies LiteLLM proxy is reachable (replaced Bifrost)
|
||||
5. Timing profile — breakdown of store and recall latencies
|
||||
6. Memory benchmark — store 5 personal facts, recall with 10 questions
|
||||
7. Dedup test — same fact stored twice must not grow Qdrant by 2 points
|
||||
@@ -24,11 +24,11 @@ import time
|
||||
import urllib.request
|
||||
|
||||
from common import (
|
||||
DEEPAGENTS, QDRANT, COMPOSE_FILE, DEFAULT_CHAT_ID,
|
||||
DEEPAGENTS, LITELLM, QDRANT, COMPOSE_FILE, DEFAULT_CHAT_ID,
|
||||
NAMES,
|
||||
INFO, PASS, FAIL, WARN,
|
||||
report, print_summary, tf,
|
||||
get, post_json, qdrant_count, fetch_logs, fetch_bifrost_logs,
|
||||
get, post_json, qdrant_count, fetch_logs,
|
||||
parse_run_block, wait_for,
|
||||
)
|
||||
|
||||
@@ -155,14 +155,13 @@ if _run_name:
|
||||
report(results, "Agent replied to recall message", False, "timeout")
|
||||
report(results, f"Reply contains '{random_name}'", False, "no reply")
|
||||
|
||||
# ── 4. Bifrost pass-through check ─────────────────────────────────────────
|
||||
bifrost_lines = fetch_bifrost_logs(since_s=300)
|
||||
report(results, "Bifrost container has log output (requests forwarded)",
|
||||
len(bifrost_lines) > 0, f"{len(bifrost_lines)} lines in bifrost logs")
|
||||
bifrost_raw = "\n".join(bifrost_lines)
|
||||
report(results, " Bifrost log shows AsyncOpenAI agent requests",
|
||||
"AsyncOpenAI" in bifrost_raw,
|
||||
f"{'found' if 'AsyncOpenAI' in bifrost_raw else 'NOT found'} in bifrost logs")
|
||||
# ── 4. LiteLLM proxy reachable (replaced Bifrost) ─────────────────────────
|
||||
try:
|
||||
status, _ = get(f"{LITELLM}/health", timeout=5)
|
||||
litellm_ok = status == 200
|
||||
except Exception:
|
||||
litellm_ok = False
|
||||
report(results, "LiteLLM proxy reachable", litellm_ok)
|
||||
|
||||
# ── 5. Timing profile ─────────────────────────────────────────────────────
|
||||
print(f"\n[{INFO}] 5. Timing profile")
|
||||
|
||||
Reference in New Issue
Block a user