Integrate Bifrost LLM gateway, add test suite, implement memory pipeline

- Add Bifrost (maximhq/bifrost) as LLM gateway: all inference routes through
  bifrost:8080/v1 with retry logic and observability; VRAMManager keeps direct
  Ollama access for VRAM flush/prewarm operations
- Switch medium model from qwen3:4b to qwen2.5:1.5b (direct call, no tools)
  via _DirectModel wrapper; complex keeps create_deep_agent with qwen3:8b
- Implement out-of-agent memory pipeline: _retrieve_memories pre-fetches
  relevant context (injected into all tiers), _store_memory runs as background
  task after each reply writing to openmemory/Qdrant
- Add tests/unit/ with 133 tests covering router, channels, vram_manager,
  agent helpers; move integration test to tests/integration/
- Add bifrost-config.json with GPU Ollama (qwen2.5:0.5b/1.5b, qwen3:4b/8b,
  gemma3:4b) and CPU Ollama providers
- Integration test 28/29 pass (only grammy fails — no TELEGRAM_BOT_TOKEN)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Alvis
2026-03-12 13:50:12 +00:00
parent ec45d255f0
commit f9618a9bbf
16 changed files with 1195 additions and 36 deletions

80
tests/unit/conftest.py Normal file
View File

@@ -0,0 +1,80 @@
"""
Stub out all third-party packages that Adolf's source modules import.
This lets the unit tests run without a virtualenv or Docker environment.
Stubs are installed into sys.modules before any test file is collected.
"""
import sys
from unittest.mock import MagicMock
# ── helpers ────────────────────────────────────────────────────────────────────
def _mock(name: str) -> MagicMock:
m = MagicMock(name=name)
sys.modules[name] = m
return m
# ── pydantic: BaseModel must be a real class so `class Foo(BaseModel)` works ──
class _FakeBaseModel:
model_fields: dict = {}
def __init_subclass__(cls, **kwargs):
pass
def __init__(self, **data):
for k, v in data.items():
setattr(self, k, v)
_pydantic = _mock("pydantic")
_pydantic.BaseModel = _FakeBaseModel
# ── httpx: used by channels.py, vram_manager.py, agent.py ────────────────────
_mock("httpx")
# ── fastapi ───────────────────────────────────────────────────────────────────
_fastapi = _mock("fastapi")
_mock("fastapi.responses")
# ── langchain stack ───────────────────────────────────────────────────────────
_mock("langchain_openai")
_lc_core = _mock("langchain_core")
_lc_msgs = _mock("langchain_core.messages")
_mock("langchain_core.tools")
# Provide real-ish message classes so router.py can instantiate them
class _FakeMsg:
def __init__(self, content=""):
self.content = content
class SystemMessage(_FakeMsg):
pass
class HumanMessage(_FakeMsg):
pass
class AIMessage(_FakeMsg):
def __init__(self, content="", tool_calls=None):
super().__init__(content)
self.tool_calls = tool_calls or []
_lc_msgs.SystemMessage = SystemMessage
_lc_msgs.HumanMessage = HumanMessage
_lc_msgs.AIMessage = AIMessage
_mock("langchain_mcp_adapters")
_mock("langchain_mcp_adapters.client")
_mock("langchain_community")
_mock("langchain_community.utilities")
# ── deepagents (agent_factory.py) ─────────────────────────────────────────────
_mock("deepagents")