Files
adolf/docker-compose.yml
Alvis 1f5e272600 Switch from Bifrost to LiteLLM; add Matrix channel; update rules
Infrastructure:
- docker-compose.yml: replace bifrost container with LiteLLM proxy
  (host.docker.internal:4000); complex model → deepseek-r1:free via
  OpenRouter; add Matrix URL env var; mount logs volume
- bifrost-config.json: add auth_config + postgres config_store (archived)

Routing:
- router.py: full semantic 3-tier classifier rewrite — nomic-embed-text
  centroids for light/medium/complex; regex pre-classifiers for all tiers;
  Russian utterance sets expanded
- agent.py: wire LiteLLM URL; add dry_run support; add Matrix channel

Channels:
- channels.py: add Matrix adapter (_matrix_send via mx- session prefix)

Rules / docs:
- agent-pipeline.md: remove /think prefix requirement; document automatic
  complex tier classification
- llm-inference.md: update BIFROST_URL → LITELLM_URL references; add
  remote model note for complex tier
- ARCHITECTURE.md: deleted (superseded by README.md)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-24 02:14:13 +00:00

95 lines
2.6 KiB
YAML

services:
deepagents:
build: .
container_name: deepagents
ports:
- "8000:8000"
environment:
- PYTHONUNBUFFERED=1
# LiteLLM proxy — all LLM inference goes through here
- LITELLM_URL=http://host.docker.internal:4000/v1
- LITELLM_API_KEY=sk-fjQC1BxAiGFSMs
# Direct Ollama GPU URL — used only by VRAMManager for flush/prewarm
- OLLAMA_BASE_URL=http://host.docker.internal:11436
- DEEPAGENTS_MODEL=qwen3:4b
- DEEPAGENTS_COMPLEX_MODEL=deepseek/deepseek-r1:free
- DEEPAGENTS_ROUTER_MODEL=qwen2.5:1.5b
- SEARXNG_URL=http://host.docker.internal:11437
- GRAMMY_URL=http://grammy:3001
- MATRIX_URL=http://host.docker.internal:3002
- CRAWL4AI_URL=http://crawl4ai:11235
- ROUTECHECK_URL=http://routecheck:8090
- ROUTECHECK_TOKEN=${ROUTECHECK_TOKEN}
volumes:
- ./logs:/app/logs
extra_hosts:
- "host.docker.internal:host-gateway"
depends_on:
- openmemory
- grammy
- crawl4ai
- routecheck
restart: unless-stopped
openmemory:
build: ./openmemory
container_name: openmemory
ports:
- "8765:8765"
environment:
# Extraction LLM runs on GPU — qwen2.5:1.5b for speed (~3s)
- OLLAMA_GPU_URL=http://host.docker.internal:11436
- OLLAMA_EXTRACTION_MODEL=qwen2.5:1.5b
# Embedding (nomic-embed-text) runs on CPU — fast enough for search (50-150ms)
- OLLAMA_CPU_URL=http://host.docker.internal:11435
extra_hosts:
- "host.docker.internal:host-gateway"
restart: unless-stopped
grammy:
build: ./grammy
container_name: grammy
ports:
- "3001:3001"
environment:
- TELEGRAM_BOT_TOKEN=${TELEGRAM_BOT_TOKEN}
- DEEPAGENTS_URL=http://deepagents:8000
restart: unless-stopped
cli:
build:
context: .
dockerfile: Dockerfile.cli
container_name: cli
environment:
- DEEPAGENTS_URL=http://deepagents:8000
depends_on:
- deepagents
stdin_open: true
tty: true
profiles:
- tools
routecheck:
build: ./routecheck
container_name: routecheck
ports:
- "8090:8090"
environment:
- YANDEX_ROUTING_KEY=${YANDEX_ROUTING_KEY}
- INTERNAL_TOKEN=${ROUTECHECK_TOKEN}
- HTTPS_PROXY=http://host.docker.internal:56928
extra_hosts:
- "host.docker.internal:host-gateway"
restart: unless-stopped
crawl4ai:
image: unclecode/crawl4ai:latest
container_name: crawl4ai
ports:
- "11235:11235"
environment:
- CRAWL4AI_LOG_LEVEL=WARNING
shm_size: "1g"
restart: unless-stopped