adolf/docker-compose.yml

services:
  deepagents:
    build: .
    container_name: deepagents
    ports:
      - "8000:8000"
    environment:
      - PYTHONUNBUFFERED=1
      - OLLAMA_BASE_URL=http://host.docker.internal:11436
      - DEEPAGENTS_MODEL=qwen3:4b
      - DEEPAGENTS_COMPLEX_MODEL=qwen3:8b
      - DEEPAGENTS_ROUTER_MODEL=qwen2.5:1.5b
      - SEARXNG_URL=http://host.docker.internal:11437
    extra_hosts:
      - "host.docker.internal:host-gateway"
    depends_on:
      - openmemory
      - grammy
    restart: unless-stopped

  openmemory:
    build: ./openmemory
    container_name: openmemory
    ports:
      - "8765:8765"
    environment:
      # Extraction LLM (qwen2.5:1.5b) runs on GPU after reply — fast 2-5s extraction
      - OLLAMA_GPU_URL=http://host.docker.internal:11436
      # Embedding (nomic-embed-text) runs on CPU — fast enough for search (50-150ms)
      - OLLAMA_CPU_URL=http://host.docker.internal:11435
    extra_hosts:
      - "host.docker.internal:host-gateway"
    restart: unless-stopped

  grammy:
    build: ./grammy
    container_name: grammy
    ports:
      - "3001:3001"
    environment:
      - TELEGRAM_BOT_TOKEN=${TELEGRAM_BOT_TOKEN}
      - DEEPAGENTS_URL=http://deepagents:8000
    restart: unless-stopped