Merge pull request 'Fix routing: add Russian tech def patterns to light, strengthen medium smart home' (#13 ) from fix/routing-accuracy into main

Reviewed-on: #13
Merge pull request 'Remove Bifrost: replace test 4 with LiteLLM health check' (#14 ) from fix/remove-bifrost into main
2026-03-24 02:51:17 +00:00 · 2026-03-24 02:48:40 +00:00 · 2026-03-24 02:46:01 +00:00 · 2026-03-24 02:45:42 +00:00 · 2026-03-24 02:44:35 +00:00 · 2026-03-24 02:43:26 +00:00
4 changed files with 25 additions and 28 deletions
--- a/benchmarks/run_benchmark.py
+++ b/benchmarks/run_benchmark.py
@@ -120,7 +120,7 @@ def extract_tier_from_logs(logs_before: str, logs_after: str) -> str | None:
    """Find new tier= lines that appeared after we sent the query."""
    before_lines = set(logs_before.splitlines())
    new_lines = [l for l in logs_after.splitlines() if l not in before_lines]
-    for line in reversed(new_lines):
+    for line in new_lines:
        m = re.search(r"tier=(\w+(?:\s*\(dry-run\))?)", line)
        if m:
            tier_raw = m.group(1)
@@ -203,7 +203,7 @@ async def run(queries: list[dict], dry_run: bool = False) -> list[dict]:

            print(f"{qid:>3}  {expected:8}  ", end="", flush=True)

-            logs_before = get_log_tail(80)
+            logs_before = get_log_tail(300)
            t0 = time.monotonic()

            ok_post = await post_message(client, qid, query_text, dry_run=send_dry)
@@ -225,7 +225,7 @@ async def run(queries: list[dict], dry_run: bool = False) -> list[dict]:

            # Now the query is done — check logs for tier
            await asyncio.sleep(0.3)
-            logs_after = get_log_tail(80)
+            logs_after = get_log_tail(300)
            actual = extract_tier_from_logs(logs_before, logs_after)

            elapsed = time.monotonic() - t0
--- a/router.py
+++ b/router.py
@@ -52,6 +52,13 @@ _LIGHT_PATTERNS = re.compile(
    r"|окей|хорошо|отлично|понятно|ок|ладно|договорились|спс|благодарю"
    r"|пожалуйста|не за что|всё понятно|ясно"
    r"|как дела|как ты|как жизнь|всё хорошо|всё ок"
+    # Russian tech definitions — static knowledge (no tools needed)
+    r"|что\s+такое\s+\S+"
+    r"|что\s+означает\s+\S+"
+    r"|сколько\s+(?:бит|байт|байтов|мегабайт|мегабайтов|гигабайт|гигабайтов)(?:\s+\w+)*"
+    # Compound Russian greetings
+    r"|привет[,!]?\s+как\s+дела"
+    r"|добрый\s+(?:день|вечер|утро)[,!]?\s+как\s+дела"
    r")[\s!.?]*$",
    re.IGNORECASE,
 )
@@ -314,6 +321,10 @@ _MEDIUM_PATTERNS = re.compile(
    r"|курс (?:доллара|биткоина|евро|рубл)"
    r"|(?:последние |свежие )?новости\b"
    r"|(?:погода|температура)\s+(?:на завтра|на неделю)"
+    # Smart home commands that don't use verb-first pattern
+    r"|(?:свет|лампочк|освещени)\w*\s+(?:включ|выключ|убавь|прибавь)"
+    r"|(?:дома|в доме|по всему дому)\s+(?:свет|лампочк)"
+    r"|(?:режим|сцена)\s+(?:ночной|утренний|вечерний|кинотеатр)"
    r")",
    re.IGNORECASE,
 )
--- a/tests/integration/common.py
+++ b/tests/integration/common.py
@@ -11,7 +11,7 @@ import urllib.request

 # ── config ────────────────────────────────────────────────────────────────────
 DEEPAGENTS   = "http://localhost:8000"
-BIFROST      = "http://localhost:8080"
+LITELLM      = "http://localhost:4000"
 OPENMEMORY   = "http://localhost:8765"
 GRAMMY_HOST  = "localhost"
 GRAMMY_PORT  = 3001
@@ -156,19 +156,6 @@ def fetch_logs(since_s=600):
        return []


-def fetch_bifrost_logs(since_s=120):
-    """Return bifrost container log lines from the last since_s seconds."""
-    try:
-        r = subprocess.run(
-            ["docker", "compose", "-f", COMPOSE_FILE, "logs", "bifrost",
-             f"--since={int(since_s)}s", "--no-log-prefix"],
-            capture_output=True, text=True, timeout=10,
-        )
-        return r.stdout.splitlines()
-    except Exception:
-        return []
-
-
 def parse_run_block(lines, msg_prefix):
    """
    Scan log lines for the LAST '[agent] running: <msg_prefix>' block.
--- a/tests/integration/test_memory.py
+++ b/tests/integration/test_memory.py
@@ -6,7 +6,7 @@ Tests:
  1. Name store   — POST "remember that your name is <RandomName>"
  2. Qdrant point — verifies a new vector was written after store
  3. Name recall  — POST "what is your name?" → reply must contain <RandomName>
-  4. Bifrost      — verifies store/recall requests passed through Bifrost
+  4. LiteLLM      — verifies LiteLLM proxy is reachable (replaced Bifrost)
  5. Timing profile — breakdown of store and recall latencies
  6. Memory benchmark — store 5 personal facts, recall with 10 questions
  7. Dedup test   — same fact stored twice must not grow Qdrant by 2 points
@@ -24,11 +24,11 @@ import time
 import urllib.request

 from common import (
-    DEEPAGENTS, QDRANT, COMPOSE_FILE, DEFAULT_CHAT_ID,
+    DEEPAGENTS, LITELLM, QDRANT, COMPOSE_FILE, DEFAULT_CHAT_ID,
    NAMES,
    INFO, PASS, FAIL, WARN,
    report, print_summary, tf,
-    get, post_json, qdrant_count, fetch_logs, fetch_bifrost_logs,
+    get, post_json, qdrant_count, fetch_logs,
    parse_run_block, wait_for,
 )

@@ -155,14 +155,13 @@ if _run_name:
        report(results, "Agent replied to recall message", False, "timeout")
        report(results, f"Reply contains '{random_name}'", False, "no reply")

-    # ── 4. Bifrost pass-through check ─────────────────────────────────────────
-    bifrost_lines = fetch_bifrost_logs(since_s=300)
-    report(results, "Bifrost container has log output (requests forwarded)",
-           len(bifrost_lines) > 0, f"{len(bifrost_lines)} lines in bifrost logs")
-    bifrost_raw = "\n".join(bifrost_lines)
-    report(results, "  Bifrost log shows AsyncOpenAI agent requests",
-           "AsyncOpenAI" in bifrost_raw,
-           f"{'found' if 'AsyncOpenAI' in bifrost_raw else 'NOT found'} in bifrost logs")
+    # ── 4. LiteLLM proxy reachable (replaced Bifrost) ─────────────────────────
+    try:
+        status, _ = get(f"{LITELLM}/health", timeout=5)
+        litellm_ok = status == 200
+    except Exception:
+        litellm_ok = False
+    report(results, "LiteLLM proxy reachable", litellm_ok)

    # ── 5. Timing profile ─────────────────────────────────────────────────────
    print(f"\n[{INFO}] 5. Timing profile")
Author	SHA1	Message	Date
alvis	a363347ae5	Merge pull request 'Fix routing: add Russian tech def patterns to light, strengthen medium smart home' (#13 ) from fix/routing-accuracy into main Reviewed-on: #13	2026-03-24 02:51:17 +00:00
alvis	1d2787766e	Merge pull request 'Remove Bifrost: replace test 4 with LiteLLM health check' (#14 ) from fix/remove-bifrost into main Reviewed-on: #14	2026-03-24 02:48:40 +00:00
alvis	abf792a2ec	Remove Bifrost: replace test 4 with LiteLLM health check - Remove BIFROST constant and fetch_bifrost_logs() from common.py - Add LITELLM constant (localhost:4000) - Replace test_memory.py test 4 (Bifrost pass-through) with LiteLLM health check Fixes #5 Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>	2026-03-24 02:46:01 +00:00
alvis	537e927146	Fix routing: add Russian tech def patterns to light, strengthen medium smart home - _LIGHT_PATTERNS: add что\s+такое, что\s+означает, сколько бит/байт, compound greetings (привет, как дела) — these fell through to embedding which sometimes misclassified short Russian phrases as medium - _MEDIUM_PATTERNS: add non-verb-first smart home patterns (свет/лампочка as subject, режим/сцена commands) for benchmark queries with different phrasing Fixes #8, #9 Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>	2026-03-24 02:45:42 +00:00
alvis	186e16284b	Merge pull request 'Fix tier logging: capture actual_tier, fix parse_run_block regex, remove reply_text truncation' (#11 ) from fix/tier-logging into main Reviewed-on: #11	2026-03-24 02:44:35 +00:00
alvis	0b428e4ada	Merge pull request 'Fix benchmark log extraction: first tier match, increase log tail to 300' (#12 ) from fix/benchmark-log-extraction into main Reviewed-on: #12	2026-03-24 02:43:26 +00:00
alvis	98095679be	Fix benchmark log extraction: first tier match, increase log tail to 300 - Remove reversed() from extract_tier_from_logs: first match = routing decision (dry-run complex logs tier=complex early, then overwrites with tier=medium at done) - Increase log tail from 80→300 to handle concurrent log activity Fixes #7, #10 Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>	2026-03-24 02:42:27 +00:00