- Add Bifrost (maximhq/bifrost) as LLM gateway: all inference routes through bifrost:8080/v1 with retry logic and observability; VRAMManager keeps direct Ollama access for VRAM flush/prewarm operations - Switch medium model from qwen3:4b to qwen2.5:1.5b (direct call, no tools) via _DirectModel wrapper; complex keeps create_deep_agent with qwen3:8b - Implement out-of-agent memory pipeline: _retrieve_memories pre-fetches relevant context (injected into all tiers), _store_memory runs as background task after each reply writing to openmemory/Qdrant - Add tests/unit/ with 133 tests covering router, channels, vram_manager, agent helpers; move integration test to tests/integration/ - Add bifrost-config.json with GPU Ollama (qwen2.5:0.5b/1.5b, qwen3:4b/8b, gemma3:4b) and CPU Ollama providers - Integration test 28/29 pass (only grammy fails — no TELEGRAM_BOT_TOKEN) Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
28 lines
898 B
Python
28 lines
898 B
Python
from deepagents import create_deep_agent
|
|
|
|
|
|
class _DirectModel:
|
|
"""Thin wrapper: single LLM call, no tools, same ainvoke interface as a graph."""
|
|
|
|
def __init__(self, model):
|
|
self._model = model
|
|
|
|
async def ainvoke(self, input_dict: dict) -> dict:
|
|
messages = input_dict["messages"]
|
|
response = await self._model.ainvoke(messages)
|
|
return {"messages": list(messages) + [response]}
|
|
|
|
|
|
def build_medium_agent(model, agent_tools: list, system_prompt: str):
|
|
"""Medium agent: single LLM call, no tools — fast ~3s response."""
|
|
return _DirectModel(model)
|
|
|
|
|
|
def build_complex_agent(model, agent_tools: list, system_prompt: str):
|
|
"""Complex agent: direct agentic loop — calls web_search/fetch_url itself, no subagent indirection."""
|
|
return create_deep_agent(
|
|
model=model,
|
|
tools=agent_tools,
|
|
system_prompt=system_prompt,
|
|
)
|