Integrate Bifrost LLM gateway, add test suite, implement memory pipeline
- Add Bifrost (maximhq/bifrost) as LLM gateway: all inference routes through bifrost:8080/v1 with retry logic and observability; VRAMManager keeps direct Ollama access for VRAM flush/prewarm operations - Switch medium model from qwen3:4b to qwen2.5:1.5b (direct call, no tools) via _DirectModel wrapper; complex keeps create_deep_agent with qwen3:8b - Implement out-of-agent memory pipeline: _retrieve_memories pre-fetches relevant context (injected into all tiers), _store_memory runs as background task after each reply writing to openmemory/Qdrant - Add tests/unit/ with 133 tests covering router, channels, vram_manager, agent helpers; move integration test to tests/integration/ - Add bifrost-config.json with GPU Ollama (qwen2.5:0.5b/1.5b, qwen3:4b/8b, gemma3:4b) and CPU Ollama providers - Integration test 28/29 pass (only grammy fails — no TELEGRAM_BOT_TOKEN) Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
200
tests/unit/test_router.py
Normal file
200
tests/unit/test_router.py
Normal file
@@ -0,0 +1,200 @@
|
||||
"""Unit tests for router.py — Router, _parse_tier, _format_history, _LIGHT_PATTERNS."""
|
||||
|
||||
import pytest
|
||||
from unittest.mock import AsyncMock, MagicMock, patch
|
||||
|
||||
from router import Router, _parse_tier, _format_history, _LIGHT_PATTERNS
|
||||
|
||||
|
||||
# ── _LIGHT_PATTERNS regex ──────────────────────────────────────────────────────
|
||||
|
||||
class TestLightPatterns:
|
||||
@pytest.mark.parametrize("text", [
|
||||
"hi", "Hi", "HI",
|
||||
"hello", "hey", "yo", "sup",
|
||||
"good morning", "good evening", "good night", "good afternoon",
|
||||
"bye", "goodbye", "see you", "cya", "later", "ttyl",
|
||||
"thanks", "thank you", "thx", "ty",
|
||||
"ok", "okay", "k", "cool", "great", "awesome", "perfect",
|
||||
"sounds good", "got it", "nice", "sure",
|
||||
"how are you", "how are you?", "how are you doing today?",
|
||||
"what's up",
|
||||
"what day comes after Monday?",
|
||||
"what day follows Friday?",
|
||||
"what comes after summer?",
|
||||
"what does NASA stand for?",
|
||||
"what does AI stand for?",
|
||||
# with trailing punctuation
|
||||
"hi!", "hello.", "thanks!",
|
||||
])
|
||||
def test_matches(self, text):
|
||||
assert _LIGHT_PATTERNS.match(text.strip()), f"Expected light match for: {text!r}"
|
||||
|
||||
@pytest.mark.parametrize("text", [
|
||||
"what is the capital of France",
|
||||
"tell me about bitcoin",
|
||||
"what is 2+2",
|
||||
"write me a poem",
|
||||
"search for news about the election",
|
||||
"what did we talk about last time",
|
||||
"what is my name",
|
||||
"/think compare these frameworks",
|
||||
"how do I install Python",
|
||||
"explain machine learning",
|
||||
"", # empty string doesn't match the pattern
|
||||
])
|
||||
def test_no_match(self, text):
|
||||
assert not _LIGHT_PATTERNS.match(text.strip()), f"Expected NO light match for: {text!r}"
|
||||
|
||||
|
||||
# ── _parse_tier ────────────────────────────────────────────────────────────────
|
||||
|
||||
class TestParseTier:
|
||||
@pytest.mark.parametrize("raw,expected", [
|
||||
("light", "light"),
|
||||
("Light", "light"),
|
||||
("LIGHT\n", "light"),
|
||||
("medium", "medium"),
|
||||
("Medium.", "medium"),
|
||||
("complex", "complex"),
|
||||
("Complex!", "complex"),
|
||||
# descriptive words → light
|
||||
("simplefact", "light"),
|
||||
("trivial question", "light"),
|
||||
("basic", "light"),
|
||||
("easy answer", "light"),
|
||||
("general knowledge", "light"),
|
||||
# unknown → medium
|
||||
("unknown_category", "medium"),
|
||||
("", "medium"),
|
||||
("I don't know", "medium"),
|
||||
# complex only if 'complex' appears in first 60 chars
|
||||
("this is a complex query requiring search", "complex"),
|
||||
# _parse_tier checks "complex" before "medium", so complex wins even if medium appears first
|
||||
("medium complexity, not complex", "complex"),
|
||||
])
|
||||
def test_parse_tier(self, raw, expected):
|
||||
assert _parse_tier(raw) == expected
|
||||
|
||||
|
||||
# ── _format_history ────────────────────────────────────────────────────────────
|
||||
|
||||
class TestFormatHistory:
|
||||
def test_empty(self):
|
||||
assert _format_history([]) == "(none)"
|
||||
|
||||
def test_single_user_message(self):
|
||||
history = [{"role": "user", "content": "hello there"}]
|
||||
result = _format_history(history)
|
||||
assert "user: hello there" in result
|
||||
|
||||
def test_multiple_turns(self):
|
||||
history = [
|
||||
{"role": "user", "content": "What is Python?"},
|
||||
{"role": "assistant", "content": "Python is a programming language."},
|
||||
]
|
||||
result = _format_history(history)
|
||||
assert "user: What is Python?" in result
|
||||
assert "assistant: Python is a programming language." in result
|
||||
|
||||
def test_truncates_long_content(self):
|
||||
long_content = "x" * 300
|
||||
history = [{"role": "user", "content": long_content}]
|
||||
result = _format_history(history)
|
||||
# content is truncated to 200 chars in _format_history
|
||||
assert len(result) < 250
|
||||
|
||||
def test_missing_keys_handled(self):
|
||||
# Should not raise — uses .get() with defaults
|
||||
history = [{"role": "user"}] # no content key
|
||||
result = _format_history(history)
|
||||
assert "user:" in result
|
||||
|
||||
|
||||
# ── Router.route() ─────────────────────────────────────────────────────────────
|
||||
|
||||
class TestRouterRoute:
|
||||
def _make_router(self, classify_response: str, reply_response: str = "Sure!") -> Router:
|
||||
"""Return a Router with a mock model that returns given classification and reply."""
|
||||
model = MagicMock()
|
||||
classify_msg = MagicMock()
|
||||
classify_msg.content = classify_response
|
||||
reply_msg = MagicMock()
|
||||
reply_msg.content = reply_response
|
||||
# First ainvoke call → classification; second → reply
|
||||
model.ainvoke = AsyncMock(side_effect=[classify_msg, reply_msg])
|
||||
return Router(model=model)
|
||||
|
||||
async def test_force_complex_bypasses_classification(self):
|
||||
router = self._make_router("medium")
|
||||
tier, reply = await router.route("some question", [], force_complex=True)
|
||||
assert tier == "complex"
|
||||
assert reply is None
|
||||
# Model should NOT have been called
|
||||
router.model.ainvoke.assert_not_called()
|
||||
|
||||
async def test_regex_light_skips_llm_classification(self):
|
||||
# Regex match bypasses classification entirely; the only ainvoke call is the reply.
|
||||
model = MagicMock()
|
||||
reply_msg = MagicMock()
|
||||
reply_msg.content = "I'm doing great!"
|
||||
model.ainvoke = AsyncMock(return_value=reply_msg)
|
||||
router = Router(model=model)
|
||||
tier, reply = await router.route("how are you", [], force_complex=False)
|
||||
assert tier == "light"
|
||||
assert reply == "I'm doing great!"
|
||||
# Exactly one model call — no classification step
|
||||
assert router.model.ainvoke.call_count == 1
|
||||
|
||||
async def test_llm_classifies_medium(self):
|
||||
router = self._make_router("medium")
|
||||
tier, reply = await router.route("what is the bitcoin price?", [], force_complex=False)
|
||||
assert tier == "medium"
|
||||
assert reply is None
|
||||
|
||||
async def test_llm_classifies_light_generates_reply(self):
|
||||
router = self._make_router("light", "Paris is the capital of France.")
|
||||
tier, reply = await router.route("what is the capital of France?", [], force_complex=False)
|
||||
assert tier == "light"
|
||||
assert reply == "Paris is the capital of France."
|
||||
|
||||
async def test_llm_classifies_complex_downgraded_to_medium(self):
|
||||
# Without /think prefix, complex classification → downgraded to medium
|
||||
router = self._make_router("complex")
|
||||
tier, reply = await router.route("compare React and Vue", [], force_complex=False)
|
||||
assert tier == "medium"
|
||||
assert reply is None
|
||||
|
||||
async def test_llm_error_falls_back_to_medium(self):
|
||||
model = MagicMock()
|
||||
model.ainvoke = AsyncMock(side_effect=Exception("connection error"))
|
||||
router = Router(model=model)
|
||||
tier, reply = await router.route("some question", [], force_complex=False)
|
||||
assert tier == "medium"
|
||||
assert reply is None
|
||||
|
||||
async def test_light_reply_empty_falls_back_to_medium(self):
|
||||
"""If the light reply comes back empty, router returns medium instead."""
|
||||
router = self._make_router("light", "") # empty reply
|
||||
tier, reply = await router.route("what is 2+2", [], force_complex=False)
|
||||
assert tier == "medium"
|
||||
assert reply is None
|
||||
|
||||
async def test_strips_think_tags_from_classification(self):
|
||||
"""Router strips <think>...</think> from model output before parsing tier."""
|
||||
model = MagicMock()
|
||||
classify_msg = MagicMock()
|
||||
classify_msg.content = "<think>Hmm let me think...</think>medium"
|
||||
reply_msg = MagicMock()
|
||||
reply_msg.content = "I'm fine!"
|
||||
model.ainvoke = AsyncMock(side_effect=[classify_msg, reply_msg])
|
||||
router = Router(model=model)
|
||||
tier, _ = await router.route("what is the news?", [], force_complex=False)
|
||||
assert tier == "medium"
|
||||
|
||||
async def test_think_prefix_forces_complex(self):
|
||||
"""/think prefix is already stripped by agent.py; force_complex=True is passed."""
|
||||
router = self._make_router("medium")
|
||||
tier, reply = await router.route("analyse this", [], force_complex=True)
|
||||
assert tier == "complex"
|
||||
assert reply is None
|
||||
Reference in New Issue
Block a user