- Pre-routing URL fetch: any message with URLs gets content fetched async (httpx.AsyncClient) before routing via _fetch_urls_from_message() - URL context and memories gathered concurrently with asyncio.gather - Light tier upgraded to medium when URL content is present - url_context injected into system prompt for medium and complex agents - Complex agent retains web_search/fetch_url tools + receives pre-fetched content - Medium model restored to qwen3:4b (was temporarily qwen2.5:1.5b) - Unit tests added for _extract_urls - ARCHITECTURE.md: added Tool Handling, Crawl4AI Integration, Memory Pipeline sections - CLAUDE.md: updated request flow and Crawl4AI integration docs Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
77 lines
2.1 KiB
YAML
77 lines
2.1 KiB
YAML
services:
|
|
bifrost:
|
|
image: maximhq/bifrost
|
|
container_name: bifrost
|
|
ports:
|
|
- "8080:8080"
|
|
volumes:
|
|
- ./bifrost-config.json:/app/data/config.json:ro
|
|
environment:
|
|
- APP_DIR=/app/data
|
|
- APP_PORT=8080
|
|
- LOG_LEVEL=info
|
|
extra_hosts:
|
|
- "host.docker.internal:host-gateway"
|
|
restart: unless-stopped
|
|
|
|
deepagents:
|
|
build: .
|
|
container_name: deepagents
|
|
ports:
|
|
- "8000:8000"
|
|
environment:
|
|
- PYTHONUNBUFFERED=1
|
|
# Bifrost gateway — all LLM inference goes through here
|
|
- BIFROST_URL=http://bifrost:8080/v1
|
|
# Direct Ollama GPU URL — used only by VRAMManager for flush/prewarm
|
|
- OLLAMA_BASE_URL=http://host.docker.internal:11436
|
|
- DEEPAGENTS_MODEL=qwen3:4b
|
|
- DEEPAGENTS_COMPLEX_MODEL=qwen3:8b
|
|
- DEEPAGENTS_ROUTER_MODEL=qwen2.5:1.5b
|
|
- SEARXNG_URL=http://host.docker.internal:11437
|
|
- GRAMMY_URL=http://grammy:3001
|
|
- CRAWL4AI_URL=http://crawl4ai:11235
|
|
extra_hosts:
|
|
- "host.docker.internal:host-gateway"
|
|
depends_on:
|
|
- openmemory
|
|
- grammy
|
|
- crawl4ai
|
|
- bifrost
|
|
restart: unless-stopped
|
|
|
|
openmemory:
|
|
build: ./openmemory
|
|
container_name: openmemory
|
|
ports:
|
|
- "8765:8765"
|
|
environment:
|
|
# Extraction LLM runs on GPU — qwen2.5:1.5b for speed (~3s)
|
|
- OLLAMA_GPU_URL=http://host.docker.internal:11436
|
|
- OLLAMA_EXTRACTION_MODEL=qwen2.5:1.5b
|
|
# Embedding (nomic-embed-text) runs on CPU — fast enough for search (50-150ms)
|
|
- OLLAMA_CPU_URL=http://host.docker.internal:11435
|
|
extra_hosts:
|
|
- "host.docker.internal:host-gateway"
|
|
restart: unless-stopped
|
|
|
|
grammy:
|
|
build: ./grammy
|
|
container_name: grammy
|
|
ports:
|
|
- "3001:3001"
|
|
environment:
|
|
- TELEGRAM_BOT_TOKEN=${TELEGRAM_BOT_TOKEN}
|
|
- DEEPAGENTS_URL=http://deepagents:8000
|
|
restart: unless-stopped
|
|
|
|
crawl4ai:
|
|
image: unclecode/crawl4ai:latest
|
|
container_name: crawl4ai
|
|
ports:
|
|
- "11235:11235"
|
|
environment:
|
|
- CRAWL4AI_LOG_LEVEL=WARNING
|
|
shm_size: "1g"
|
|
restart: unless-stopped
|