Files
adolf/tests/unit/test_router.py
Alvis f9618a9bbf Integrate Bifrost LLM gateway, add test suite, implement memory pipeline
- Add Bifrost (maximhq/bifrost) as LLM gateway: all inference routes through
  bifrost:8080/v1 with retry logic and observability; VRAMManager keeps direct
  Ollama access for VRAM flush/prewarm operations
- Switch medium model from qwen3:4b to qwen2.5:1.5b (direct call, no tools)
  via _DirectModel wrapper; complex keeps create_deep_agent with qwen3:8b
- Implement out-of-agent memory pipeline: _retrieve_memories pre-fetches
  relevant context (injected into all tiers), _store_memory runs as background
  task after each reply writing to openmemory/Qdrant
- Add tests/unit/ with 133 tests covering router, channels, vram_manager,
  agent helpers; move integration test to tests/integration/
- Add bifrost-config.json with GPU Ollama (qwen2.5:0.5b/1.5b, qwen3:4b/8b,
  gemma3:4b) and CPU Ollama providers
- Integration test 28/29 pass (only grammy fails — no TELEGRAM_BOT_TOKEN)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-12 13:50:12 +00:00

201 lines
8.6 KiB
Python

"""Unit tests for router.py — Router, _parse_tier, _format_history, _LIGHT_PATTERNS."""
import pytest
from unittest.mock import AsyncMock, MagicMock, patch
from router import Router, _parse_tier, _format_history, _LIGHT_PATTERNS
# ── _LIGHT_PATTERNS regex ──────────────────────────────────────────────────────
class TestLightPatterns:
@pytest.mark.parametrize("text", [
"hi", "Hi", "HI",
"hello", "hey", "yo", "sup",
"good morning", "good evening", "good night", "good afternoon",
"bye", "goodbye", "see you", "cya", "later", "ttyl",
"thanks", "thank you", "thx", "ty",
"ok", "okay", "k", "cool", "great", "awesome", "perfect",
"sounds good", "got it", "nice", "sure",
"how are you", "how are you?", "how are you doing today?",
"what's up",
"what day comes after Monday?",
"what day follows Friday?",
"what comes after summer?",
"what does NASA stand for?",
"what does AI stand for?",
# with trailing punctuation
"hi!", "hello.", "thanks!",
])
def test_matches(self, text):
assert _LIGHT_PATTERNS.match(text.strip()), f"Expected light match for: {text!r}"
@pytest.mark.parametrize("text", [
"what is the capital of France",
"tell me about bitcoin",
"what is 2+2",
"write me a poem",
"search for news about the election",
"what did we talk about last time",
"what is my name",
"/think compare these frameworks",
"how do I install Python",
"explain machine learning",
"", # empty string doesn't match the pattern
])
def test_no_match(self, text):
assert not _LIGHT_PATTERNS.match(text.strip()), f"Expected NO light match for: {text!r}"
# ── _parse_tier ────────────────────────────────────────────────────────────────
class TestParseTier:
@pytest.mark.parametrize("raw,expected", [
("light", "light"),
("Light", "light"),
("LIGHT\n", "light"),
("medium", "medium"),
("Medium.", "medium"),
("complex", "complex"),
("Complex!", "complex"),
# descriptive words → light
("simplefact", "light"),
("trivial question", "light"),
("basic", "light"),
("easy answer", "light"),
("general knowledge", "light"),
# unknown → medium
("unknown_category", "medium"),
("", "medium"),
("I don't know", "medium"),
# complex only if 'complex' appears in first 60 chars
("this is a complex query requiring search", "complex"),
# _parse_tier checks "complex" before "medium", so complex wins even if medium appears first
("medium complexity, not complex", "complex"),
])
def test_parse_tier(self, raw, expected):
assert _parse_tier(raw) == expected
# ── _format_history ────────────────────────────────────────────────────────────
class TestFormatHistory:
def test_empty(self):
assert _format_history([]) == "(none)"
def test_single_user_message(self):
history = [{"role": "user", "content": "hello there"}]
result = _format_history(history)
assert "user: hello there" in result
def test_multiple_turns(self):
history = [
{"role": "user", "content": "What is Python?"},
{"role": "assistant", "content": "Python is a programming language."},
]
result = _format_history(history)
assert "user: What is Python?" in result
assert "assistant: Python is a programming language." in result
def test_truncates_long_content(self):
long_content = "x" * 300
history = [{"role": "user", "content": long_content}]
result = _format_history(history)
# content is truncated to 200 chars in _format_history
assert len(result) < 250
def test_missing_keys_handled(self):
# Should not raise — uses .get() with defaults
history = [{"role": "user"}] # no content key
result = _format_history(history)
assert "user:" in result
# ── Router.route() ─────────────────────────────────────────────────────────────
class TestRouterRoute:
def _make_router(self, classify_response: str, reply_response: str = "Sure!") -> Router:
"""Return a Router with a mock model that returns given classification and reply."""
model = MagicMock()
classify_msg = MagicMock()
classify_msg.content = classify_response
reply_msg = MagicMock()
reply_msg.content = reply_response
# First ainvoke call → classification; second → reply
model.ainvoke = AsyncMock(side_effect=[classify_msg, reply_msg])
return Router(model=model)
async def test_force_complex_bypasses_classification(self):
router = self._make_router("medium")
tier, reply = await router.route("some question", [], force_complex=True)
assert tier == "complex"
assert reply is None
# Model should NOT have been called
router.model.ainvoke.assert_not_called()
async def test_regex_light_skips_llm_classification(self):
# Regex match bypasses classification entirely; the only ainvoke call is the reply.
model = MagicMock()
reply_msg = MagicMock()
reply_msg.content = "I'm doing great!"
model.ainvoke = AsyncMock(return_value=reply_msg)
router = Router(model=model)
tier, reply = await router.route("how are you", [], force_complex=False)
assert tier == "light"
assert reply == "I'm doing great!"
# Exactly one model call — no classification step
assert router.model.ainvoke.call_count == 1
async def test_llm_classifies_medium(self):
router = self._make_router("medium")
tier, reply = await router.route("what is the bitcoin price?", [], force_complex=False)
assert tier == "medium"
assert reply is None
async def test_llm_classifies_light_generates_reply(self):
router = self._make_router("light", "Paris is the capital of France.")
tier, reply = await router.route("what is the capital of France?", [], force_complex=False)
assert tier == "light"
assert reply == "Paris is the capital of France."
async def test_llm_classifies_complex_downgraded_to_medium(self):
# Without /think prefix, complex classification → downgraded to medium
router = self._make_router("complex")
tier, reply = await router.route("compare React and Vue", [], force_complex=False)
assert tier == "medium"
assert reply is None
async def test_llm_error_falls_back_to_medium(self):
model = MagicMock()
model.ainvoke = AsyncMock(side_effect=Exception("connection error"))
router = Router(model=model)
tier, reply = await router.route("some question", [], force_complex=False)
assert tier == "medium"
assert reply is None
async def test_light_reply_empty_falls_back_to_medium(self):
"""If the light reply comes back empty, router returns medium instead."""
router = self._make_router("light", "") # empty reply
tier, reply = await router.route("what is 2+2", [], force_complex=False)
assert tier == "medium"
assert reply is None
async def test_strips_think_tags_from_classification(self):
"""Router strips <think>...</think> from model output before parsing tier."""
model = MagicMock()
classify_msg = MagicMock()
classify_msg.content = "<think>Hmm let me think...</think>medium"
reply_msg = MagicMock()
reply_msg.content = "I'm fine!"
model.ainvoke = AsyncMock(side_effect=[classify_msg, reply_msg])
router = Router(model=model)
tier, _ = await router.route("what is the news?", [], force_complex=False)
assert tier == "medium"
async def test_think_prefix_forces_complex(self):
"""/think prefix is already stripped by agent.py; force_complex=True is passed."""
router = self._make_router("medium")
tier, reply = await router.route("analyse this", [], force_complex=True)
assert tier == "complex"
assert reply is None