Integrate Bifrost LLM gateway, add test suite, implement memory pipeline

- Add Bifrost (maximhq/bifrost) as LLM gateway: all inference routes through
  bifrost:8080/v1 with retry logic and observability; VRAMManager keeps direct
  Ollama access for VRAM flush/prewarm operations
- Switch medium model from qwen3:4b to qwen2.5:1.5b (direct call, no tools)
  via _DirectModel wrapper; complex keeps create_deep_agent with qwen3:8b
- Implement out-of-agent memory pipeline: _retrieve_memories pre-fetches
  relevant context (injected into all tiers), _store_memory runs as background
  task after each reply writing to openmemory/Qdrant
- Add tests/unit/ with 133 tests covering router, channels, vram_manager,
  agent helpers; move integration test to tests/integration/
- Add bifrost-config.json with GPU Ollama (qwen2.5:0.5b/1.5b, qwen3:4b/8b,
  gemma3:4b) and CPU Ollama providers
- Integration test 28/29 pass (only grammy fails — no TELEGRAM_BOT_TOKEN)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Alvis
2026-03-12 13:50:12 +00:00
parent ec45d255f0
commit f9618a9bbf
16 changed files with 1195 additions and 36 deletions

125
tests/unit/test_channels.py Normal file
View File

@@ -0,0 +1,125 @@
"""Unit tests for channels.py — register, deliver, pending_replies queue."""
import asyncio
import pytest
from unittest.mock import AsyncMock, patch
import channels
@pytest.fixture(autouse=True)
def reset_channels_state():
"""Clear module-level state before and after every test."""
channels._callbacks.clear()
channels.pending_replies.clear()
yield
channels._callbacks.clear()
channels.pending_replies.clear()
# ── register ───────────────────────────────────────────────────────────────────
class TestRegister:
def test_register_stores_callback(self):
cb = AsyncMock()
channels.register("test_channel", cb)
assert channels._callbacks["test_channel"] is cb
def test_register_overwrites_existing(self):
cb1 = AsyncMock()
cb2 = AsyncMock()
channels.register("ch", cb1)
channels.register("ch", cb2)
assert channels._callbacks["ch"] is cb2
def test_register_multiple_channels(self):
cb_a = AsyncMock()
cb_b = AsyncMock()
channels.register("a", cb_a)
channels.register("b", cb_b)
assert channels._callbacks["a"] is cb_a
assert channels._callbacks["b"] is cb_b
# ── deliver ────────────────────────────────────────────────────────────────────
class TestDeliver:
async def test_deliver_enqueues_reply(self):
channels.register("cli", AsyncMock())
await channels.deliver("cli-alvis", "cli", "hello world")
q = channels.pending_replies["cli-alvis"]
assert not q.empty()
assert await q.get() == "hello world"
async def test_deliver_calls_channel_callback(self):
cb = AsyncMock()
channels.register("telegram", cb)
await channels.deliver("tg-123", "telegram", "reply text")
cb.assert_awaited_once_with("tg-123", "reply text")
async def test_deliver_unknown_channel_still_enqueues(self):
"""No registered callback for channel → reply still goes to the queue."""
await channels.deliver("cli-bob", "nonexistent", "fallback reply")
q = channels.pending_replies["cli-bob"]
assert await q.get() == "fallback reply"
async def test_deliver_unknown_channel_does_not_raise(self):
"""Missing callback must not raise an exception."""
await channels.deliver("cli-x", "ghost_channel", "msg")
async def test_deliver_creates_queue_if_absent(self):
channels.register("cli", AsyncMock())
assert "cli-new" not in channels.pending_replies
await channels.deliver("cli-new", "cli", "hi")
assert "cli-new" in channels.pending_replies
async def test_deliver_reuses_existing_queue(self):
"""Second deliver to the same session appends to the same queue."""
channels.register("cli", AsyncMock())
await channels.deliver("cli-alvis", "cli", "first")
await channels.deliver("cli-alvis", "cli", "second")
q = channels.pending_replies["cli-alvis"]
assert await q.get() == "first"
assert await q.get() == "second"
async def test_deliver_telegram_sends_to_callback(self):
sent = []
async def fake_tg(session_id, text):
sent.append((session_id, text))
channels.register("telegram", fake_tg)
await channels.deliver("tg-999", "telegram", "test message")
assert sent == [("tg-999", "test message")]
# ── register_defaults ──────────────────────────────────────────────────────────
class TestRegisterDefaults:
def test_registers_telegram_and_cli(self):
channels.register_defaults()
assert "telegram" in channels._callbacks
assert "cli" in channels._callbacks
async def test_cli_callback_is_noop(self):
"""CLI send callback does nothing (replies are handled via SSE queue)."""
channels.register_defaults()
cb = channels._callbacks["cli"]
# Should not raise and should return None
result = await cb("cli-alvis", "some reply")
assert result is None
async def test_telegram_callback_chunks_long_messages(self):
"""Telegram callback splits messages > 4000 chars into chunks."""
channels.register_defaults()
cb = channels._callbacks["telegram"]
long_text = "x" * 9000 # > 4000 chars → should produce 3 chunks
with patch("channels.httpx.AsyncClient") as mock_client_cls:
mock_client = AsyncMock()
mock_client.__aenter__ = AsyncMock(return_value=mock_client)
mock_client.__aexit__ = AsyncMock(return_value=False)
mock_client.post = AsyncMock()
mock_client_cls.return_value = mock_client
await cb("tg-123", long_text)
# 9000 chars / 4000 per chunk = 3 POST calls
assert mock_client.post.await_count == 3