refactor(infra): drop ai profile; ollama + litellm move to Agap
Ollama and LiteLLM are shared Agap services (agap_git/openai/docker-compose.yml); oO never starts them. Removes the ai profile, the litellm config, and the --profile ai runbook; points ml-serving at https://llm.alogins.net by default and adds host.docker.internal host-gateway so the container can hit Agap ollama on the host. Also updates the tip-generator model alias to qwen2.5:1.5b to match the model actually pulled on Agap ollama (7b is ~4.7 GB and would blow VRAM budget). Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
@@ -65,8 +65,10 @@ services:
|
||||
profiles: [full]
|
||||
env_file: ../../.env.local
|
||||
environment:
|
||||
LITELLM_URL: ${LITELLM_URL:-http://litellm:4000}
|
||||
OLLAMA_URL: ${OLLAMA_URL:-http://ollama:11434}
|
||||
LITELLM_URL: ${LITELLM_URL:-https://llm.alogins.net}
|
||||
OLLAMA_URL: ${OLLAMA_URL:-http://host.docker.internal:11434}
|
||||
extra_hosts:
|
||||
- "host.docker.internal:host-gateway"
|
||||
ports:
|
||||
- "127.0.0.1:8000:8000"
|
||||
healthcheck:
|
||||
@@ -159,25 +161,6 @@ services:
|
||||
airflow-init:
|
||||
condition: service_completed_successfully
|
||||
|
||||
# ── ai profile — Ollama + LiteLLM ────────────────────────────────────────
|
||||
# Start: docker compose --profile ai up
|
||||
# LiteLLM proxy: http://localhost:4000 (master key from LITELLM_MASTER_KEY)
|
||||
# Ollama API: http://localhost:11434
|
||||
# In prod both are shared Agap services; set LITELLM_URL + OLLAMA_URL in .env.local
|
||||
|
||||
ollama:
|
||||
image: ollama/ollama:latest
|
||||
profiles: [ai]
|
||||
volumes:
|
||||
- /mnt/ssd/dbs/oo/ollama:/root/.ollama
|
||||
ports:
|
||||
- "127.0.0.1:11434:11434"
|
||||
healthcheck:
|
||||
test: ["CMD", "curl", "--fail", "http://localhost:11434"]
|
||||
interval: 15s
|
||||
timeout: 5s
|
||||
retries: 5
|
||||
|
||||
# ── events profile — NATS JetStream ─────────────────────────────────────
|
||||
# Start: docker compose --profile events up
|
||||
# NATS monitoring: http://localhost:8222
|
||||
@@ -198,26 +181,6 @@ services:
|
||||
timeout: 5s
|
||||
retries: 5
|
||||
|
||||
litellm:
|
||||
image: ghcr.io/berriai/litellm:main-latest
|
||||
profiles: [ai]
|
||||
command: ["--config", "/app/litellm_config.yaml", "--port", "4000"]
|
||||
environment:
|
||||
LITELLM_MASTER_KEY: ${LITELLM_MASTER_KEY:-sk-oo-dev}
|
||||
OLLAMA_URL: ${OLLAMA_URL:-http://ollama:11434}
|
||||
volumes:
|
||||
- ../../infra/litellm/litellm_config.yaml:/app/litellm_config.yaml:ro
|
||||
ports:
|
||||
- "127.0.0.1:4000:4000"
|
||||
depends_on:
|
||||
ollama:
|
||||
condition: service_healthy
|
||||
healthcheck:
|
||||
test: ["CMD", "curl", "--fail", "http://localhost:4000/health"]
|
||||
interval: 15s
|
||||
timeout: 5s
|
||||
retries: 5
|
||||
|
||||
mlflow:
|
||||
image: ghcr.io/mlflow/mlflow:v2.14.3
|
||||
profiles: [mlops]
|
||||
|
||||
Reference in New Issue
Block a user