70 lines
1.7 KiB
YAML
70 lines
1.7 KiB
YAML
services:
|
|
ollama:
|
|
image: ollama/ollama
|
|
container_name: ollama
|
|
ports:
|
|
- "11436:11434"
|
|
volumes:
|
|
- /mnt/ssd/ai/ollama:/root/.ollama
|
|
- /mnt/ssd/ai/open-webui:/app/backend/data
|
|
restart: always
|
|
environment:
|
|
# Allow qwen3:8b + qwen2.5:1.5b to coexist in VRAM (~6.7-7.7 GB on 8 GB GPU)
|
|
- OLLAMA_MAX_LOADED_MODELS=2
|
|
# One GPU inference at a time — prevents compute contention between models
|
|
- OLLAMA_NUM_PARALLEL=1
|
|
deploy:
|
|
resources:
|
|
reservations:
|
|
devices:
|
|
- driver: nvidia
|
|
count: all
|
|
capabilities: [gpu]
|
|
|
|
ollama-cpu:
|
|
image: ollama/ollama
|
|
container_name: ollama-cpu
|
|
ports:
|
|
- "11435:11434"
|
|
volumes:
|
|
- /mnt/ssd/ai/ollama-cpu:/root/.ollama
|
|
restart: always
|
|
|
|
open-webui:
|
|
image: ghcr.io/open-webui/open-webui:main
|
|
container_name: open-webui
|
|
ports:
|
|
- "3125:8080"
|
|
volumes:
|
|
- /mnt/ssd/ai/open-webui:/app/backend/data
|
|
restart: always
|
|
deploy:
|
|
resources:
|
|
reservations:
|
|
devices:
|
|
- driver: nvidia
|
|
count: all
|
|
capabilities: [gpu]
|
|
environment:
|
|
- ANTHROPIC_API_KEY=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
|
|
|
|
searxng:
|
|
image: docker.io/searxng/searxng:latest
|
|
container_name: searxng
|
|
volumes:
|
|
- /mnt/ssd/ai/searxng/config/:/etc/searxng/
|
|
- /mnt/ssd/ai/searxng/data/:/var/cache/searxng/
|
|
restart: always
|
|
ports:
|
|
- "11437:8080"
|
|
|
|
qdrant:
|
|
image: qdrant/qdrant
|
|
container_name: qdrant
|
|
ports:
|
|
- "6333:6333"
|
|
- "6334:6334"
|
|
restart: always
|
|
volumes:
|
|
- /mnt/ssd/dbs/qdrant:/qdrant/storage:z
|