Files
AgapHost/openai/docker-compose.yml
2026-03-05 11:22:34 +00:00

70 lines
1.7 KiB
YAML

services:
ollama:
image: ollama/ollama
container_name: ollama
ports:
- "11436:11434"
volumes:
- /mnt/ssd/ai/ollama:/root/.ollama
- /mnt/ssd/ai/open-webui:/app/backend/data
restart: always
environment:
# Allow qwen3:8b + qwen2.5:1.5b to coexist in VRAM (~6.7-7.7 GB on 8 GB GPU)
- OLLAMA_MAX_LOADED_MODELS=2
# One GPU inference at a time — prevents compute contention between models
- OLLAMA_NUM_PARALLEL=1
deploy:
resources:
reservations:
devices:
- driver: nvidia
count: all
capabilities: [gpu]
ollama-cpu:
image: ollama/ollama
container_name: ollama-cpu
ports:
- "11435:11434"
volumes:
- /mnt/ssd/ai/ollama-cpu:/root/.ollama
restart: always
open-webui:
image: ghcr.io/open-webui/open-webui:main
container_name: open-webui
ports:
- "3125:8080"
volumes:
- /mnt/ssd/ai/open-webui:/app/backend/data
restart: always
deploy:
resources:
reservations:
devices:
- driver: nvidia
count: all
capabilities: [gpu]
environment:
- ANTHROPIC_API_KEY=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
searxng:
image: docker.io/searxng/searxng:latest
container_name: searxng
volumes:
- /mnt/ssd/ai/searxng/config/:/etc/searxng/
- /mnt/ssd/ai/searxng/data/:/var/cache/searxng/
restart: always
ports:
- "11437:8080"
qdrant:
image: qdrant/qdrant
container_name: qdrant
ports:
- "6333:6333"
- "6334:6334"
restart: always
volumes:
- /mnt/ssd/dbs/qdrant:/qdrant/storage:z