Infrastructure: - docker-compose.yml: replace bifrost container with LiteLLM proxy (host.docker.internal:4000); complex model → deepseek-r1:free via OpenRouter; add Matrix URL env var; mount logs volume - bifrost-config.json: add auth_config + postgres config_store (archived) Routing: - router.py: full semantic 3-tier classifier rewrite — nomic-embed-text centroids for light/medium/complex; regex pre-classifiers for all tiers; Russian utterance sets expanded - agent.py: wire LiteLLM URL; add dry_run support; add Matrix channel Channels: - channels.py: add Matrix adapter (_matrix_send via mx- session prefix) Rules / docs: - agent-pipeline.md: remove /think prefix requirement; document automatic complex tier classification - llm-inference.md: update BIFROST_URL → LITELLM_URL references; add remote model note for complex tier - ARCHITECTURE.md: deleted (superseded by README.md) Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
95 lines
2.6 KiB
YAML
95 lines
2.6 KiB
YAML
services:
|
|
deepagents:
|
|
build: .
|
|
container_name: deepagents
|
|
ports:
|
|
- "8000:8000"
|
|
environment:
|
|
- PYTHONUNBUFFERED=1
|
|
# LiteLLM proxy — all LLM inference goes through here
|
|
- LITELLM_URL=http://host.docker.internal:4000/v1
|
|
- LITELLM_API_KEY=sk-fjQC1BxAiGFSMs
|
|
# Direct Ollama GPU URL — used only by VRAMManager for flush/prewarm
|
|
- OLLAMA_BASE_URL=http://host.docker.internal:11436
|
|
- DEEPAGENTS_MODEL=qwen3:4b
|
|
- DEEPAGENTS_COMPLEX_MODEL=deepseek/deepseek-r1:free
|
|
- DEEPAGENTS_ROUTER_MODEL=qwen2.5:1.5b
|
|
- SEARXNG_URL=http://host.docker.internal:11437
|
|
- GRAMMY_URL=http://grammy:3001
|
|
- MATRIX_URL=http://host.docker.internal:3002
|
|
- CRAWL4AI_URL=http://crawl4ai:11235
|
|
- ROUTECHECK_URL=http://routecheck:8090
|
|
- ROUTECHECK_TOKEN=${ROUTECHECK_TOKEN}
|
|
volumes:
|
|
- ./logs:/app/logs
|
|
extra_hosts:
|
|
- "host.docker.internal:host-gateway"
|
|
depends_on:
|
|
- openmemory
|
|
- grammy
|
|
- crawl4ai
|
|
- routecheck
|
|
restart: unless-stopped
|
|
|
|
openmemory:
|
|
build: ./openmemory
|
|
container_name: openmemory
|
|
ports:
|
|
- "8765:8765"
|
|
environment:
|
|
# Extraction LLM runs on GPU — qwen2.5:1.5b for speed (~3s)
|
|
- OLLAMA_GPU_URL=http://host.docker.internal:11436
|
|
- OLLAMA_EXTRACTION_MODEL=qwen2.5:1.5b
|
|
# Embedding (nomic-embed-text) runs on CPU — fast enough for search (50-150ms)
|
|
- OLLAMA_CPU_URL=http://host.docker.internal:11435
|
|
extra_hosts:
|
|
- "host.docker.internal:host-gateway"
|
|
restart: unless-stopped
|
|
|
|
grammy:
|
|
build: ./grammy
|
|
container_name: grammy
|
|
ports:
|
|
- "3001:3001"
|
|
environment:
|
|
- TELEGRAM_BOT_TOKEN=${TELEGRAM_BOT_TOKEN}
|
|
- DEEPAGENTS_URL=http://deepagents:8000
|
|
restart: unless-stopped
|
|
|
|
cli:
|
|
build:
|
|
context: .
|
|
dockerfile: Dockerfile.cli
|
|
container_name: cli
|
|
environment:
|
|
- DEEPAGENTS_URL=http://deepagents:8000
|
|
depends_on:
|
|
- deepagents
|
|
stdin_open: true
|
|
tty: true
|
|
profiles:
|
|
- tools
|
|
|
|
routecheck:
|
|
build: ./routecheck
|
|
container_name: routecheck
|
|
ports:
|
|
- "8090:8090"
|
|
environment:
|
|
- YANDEX_ROUTING_KEY=${YANDEX_ROUTING_KEY}
|
|
- INTERNAL_TOKEN=${ROUTECHECK_TOKEN}
|
|
- HTTPS_PROXY=http://host.docker.internal:56928
|
|
extra_hosts:
|
|
- "host.docker.internal:host-gateway"
|
|
restart: unless-stopped
|
|
|
|
crawl4ai:
|
|
image: unclecode/crawl4ai:latest
|
|
container_name: crawl4ai
|
|
ports:
|
|
- "11235:11235"
|
|
environment:
|
|
- CRAWL4AI_LOG_LEVEL=WARNING
|
|
shm_size: "1g"
|
|
restart: unless-stopped
|