Integrate Bifrost LLM gateway, add test suite, implement memory pipeline

- Add Bifrost (maximhq/bifrost) as LLM gateway: all inference routes through bifrost:8080/v1 with retry logic and observability; VRAMManager keeps direct Ollama access for VRAM flush/prewarm operations - Switch medium model from qwen3:4b to qwen2.5:1.5b (direct call, no tools) via _DirectModel wrapper; complex keeps create_deep_agent with qwen3:8b - Implement out-of-agent memory pipeline: _retrieve_memories pre-fetches relevant context (injected into all tiers), _store_memory runs as background task after each reply writing to openmemory/Qdrant - Add tests/unit/ with 133 tests covering router, channels, vram_manager, agent helpers; move integration test to tests/integration/ - Add bifrost-config.json with GPU Ollama (qwen2.5:0.5b/1.5b, qwen3:4b/8b, gemma3:4b) and CPU Ollama providers - Integration test 28/29 pass (only grammy fails — no TELEGRAM_BOT_TOKEN) Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-12 13:50:12 +00:00
parent ec45d255f0
commit f9618a9bbf
16 changed files with 1195 additions and 36 deletions
--- a/tests/unit/test_router.py
+++ b/tests/unit/test_router.py
@@ -0,0 +1,200 @@
+"""Unit tests for router.py — Router, _parse_tier, _format_history, _LIGHT_PATTERNS."""
+
+import pytest
+from unittest.mock import AsyncMock, MagicMock, patch
+
+from router import Router, _parse_tier, _format_history, _LIGHT_PATTERNS
+
+
+# ── _LIGHT_PATTERNS regex ──────────────────────────────────────────────────────
+
+class TestLightPatterns:
+    @pytest.mark.parametrize("text", [
+        "hi", "Hi", "HI",
+        "hello", "hey", "yo", "sup",
+        "good morning", "good evening", "good night", "good afternoon",
+        "bye", "goodbye", "see you", "cya", "later", "ttyl",
+        "thanks", "thank you", "thx", "ty",
+        "ok", "okay", "k", "cool", "great", "awesome", "perfect",
+        "sounds good", "got it", "nice", "sure",
+        "how are you", "how are you?", "how are you doing today?",
+        "what's up",
+        "what day comes after Monday?",
+        "what day follows Friday?",
+        "what comes after summer?",
+        "what does NASA stand for?",
+        "what does AI stand for?",
+        # with trailing punctuation
+        "hi!", "hello.", "thanks!",
+    ])
+    def test_matches(self, text):
+        assert _LIGHT_PATTERNS.match(text.strip()), f"Expected light match for: {text!r}"
+
+    @pytest.mark.parametrize("text", [
+        "what is the capital of France",
+        "tell me about bitcoin",
+        "what is 2+2",
+        "write me a poem",
+        "search for news about the election",
+        "what did we talk about last time",
+        "what is my name",
+        "/think compare these frameworks",
+        "how do I install Python",
+        "explain machine learning",
+        "",  # empty string doesn't match the pattern
+    ])
+    def test_no_match(self, text):
+        assert not _LIGHT_PATTERNS.match(text.strip()), f"Expected NO light match for: {text!r}"
+
+
+# ── _parse_tier ────────────────────────────────────────────────────────────────
+
+class TestParseTier:
+    @pytest.mark.parametrize("raw,expected", [
+        ("light", "light"),
+        ("Light", "light"),
+        ("LIGHT\n", "light"),
+        ("medium", "medium"),
+        ("Medium.", "medium"),
+        ("complex", "complex"),
+        ("Complex!", "complex"),
+        # descriptive words → light
+        ("simplefact", "light"),
+        ("trivial question", "light"),
+        ("basic", "light"),
+        ("easy answer", "light"),
+        ("general knowledge", "light"),
+        # unknown → medium
+        ("unknown_category", "medium"),
+        ("", "medium"),
+        ("I don't know", "medium"),
+        # complex only if 'complex' appears in first 60 chars
+        ("this is a complex query requiring search", "complex"),
+        # _parse_tier checks "complex" before "medium", so complex wins even if medium appears first
+        ("medium complexity, not complex", "complex"),
+    ])
+    def test_parse_tier(self, raw, expected):
+        assert _parse_tier(raw) == expected
+
+
+# ── _format_history ────────────────────────────────────────────────────────────
+
+class TestFormatHistory:
+    def test_empty(self):
+        assert _format_history([]) == "(none)"
+
+    def test_single_user_message(self):
+        history = [{"role": "user", "content": "hello there"}]
+        result = _format_history(history)
+        assert "user: hello there" in result
+
+    def test_multiple_turns(self):
+        history = [
+            {"role": "user", "content": "What is Python?"},
+            {"role": "assistant", "content": "Python is a programming language."},
+        ]
+        result = _format_history(history)
+        assert "user: What is Python?" in result
+        assert "assistant: Python is a programming language." in result
+
+    def test_truncates_long_content(self):
+        long_content = "x" * 300
+        history = [{"role": "user", "content": long_content}]
+        result = _format_history(history)
+        # content is truncated to 200 chars in _format_history
+        assert len(result) < 250
+
+    def test_missing_keys_handled(self):
+        # Should not raise — uses .get() with defaults
+        history = [{"role": "user"}]  # no content key
+        result = _format_history(history)
+        assert "user:" in result
+
+
+# ── Router.route() ─────────────────────────────────────────────────────────────
+
+class TestRouterRoute:
+    def _make_router(self, classify_response: str, reply_response: str = "Sure!") -> Router:
+        """Return a Router with a mock model that returns given classification and reply."""
+        model = MagicMock()
+        classify_msg = MagicMock()
+        classify_msg.content = classify_response
+        reply_msg = MagicMock()
+        reply_msg.content = reply_response
+        # First ainvoke call → classification; second → reply
+        model.ainvoke = AsyncMock(side_effect=[classify_msg, reply_msg])
+        return Router(model=model)
+
+    async def test_force_complex_bypasses_classification(self):
+        router = self._make_router("medium")
+        tier, reply = await router.route("some question", [], force_complex=True)
+        assert tier == "complex"
+        assert reply is None
+        # Model should NOT have been called
+        router.model.ainvoke.assert_not_called()
+
+    async def test_regex_light_skips_llm_classification(self):
+        # Regex match bypasses classification entirely; the only ainvoke call is the reply.
+        model = MagicMock()
+        reply_msg = MagicMock()
+        reply_msg.content = "I'm doing great!"
+        model.ainvoke = AsyncMock(return_value=reply_msg)
+        router = Router(model=model)
+        tier, reply = await router.route("how are you", [], force_complex=False)
+        assert tier == "light"
+        assert reply == "I'm doing great!"
+        # Exactly one model call — no classification step
+        assert router.model.ainvoke.call_count == 1
+
+    async def test_llm_classifies_medium(self):
+        router = self._make_router("medium")
+        tier, reply = await router.route("what is the bitcoin price?", [], force_complex=False)
+        assert tier == "medium"
+        assert reply is None
+
+    async def test_llm_classifies_light_generates_reply(self):
+        router = self._make_router("light", "Paris is the capital of France.")
+        tier, reply = await router.route("what is the capital of France?", [], force_complex=False)
+        assert tier == "light"
+        assert reply == "Paris is the capital of France."
+
+    async def test_llm_classifies_complex_downgraded_to_medium(self):
+        # Without /think prefix, complex classification → downgraded to medium
+        router = self._make_router("complex")
+        tier, reply = await router.route("compare React and Vue", [], force_complex=False)
+        assert tier == "medium"
+        assert reply is None
+
+    async def test_llm_error_falls_back_to_medium(self):
+        model = MagicMock()
+        model.ainvoke = AsyncMock(side_effect=Exception("connection error"))
+        router = Router(model=model)
+        tier, reply = await router.route("some question", [], force_complex=False)
+        assert tier == "medium"
+        assert reply is None
+
+    async def test_light_reply_empty_falls_back_to_medium(self):
+        """If the light reply comes back empty, router returns medium instead."""
+        router = self._make_router("light", "")  # empty reply
+        tier, reply = await router.route("what is 2+2", [], force_complex=False)
+        assert tier == "medium"
+        assert reply is None
+
+    async def test_strips_think_tags_from_classification(self):
+        """Router strips <think>...</think> from model output before parsing tier."""
+        model = MagicMock()
+        classify_msg = MagicMock()
+        classify_msg.content = "<think>Hmm let me think...</think>medium"
+        reply_msg = MagicMock()
+        reply_msg.content = "I'm fine!"
+        model.ainvoke = AsyncMock(side_effect=[classify_msg, reply_msg])
+        router = Router(model=model)
+        tier, _ = await router.route("what is the news?", [], force_complex=False)
+        assert tier == "medium"
+
+    async def test_think_prefix_forces_complex(self):
+        """/think prefix is already stripped by agent.py; force_complex=True is passed."""
+        router = self._make_router("medium")
+        tier, reply = await router.route("analyse this", [], force_complex=True)
+        assert tier == "complex"
+        assert reply is None