refactor(infra): drop ai profile; ollama + litellm move to Agap
Ollama and LiteLLM are shared Agap services (agap_git/openai/docker-compose.yml); oO never starts them. Removes the ai profile, the litellm config, and the --profile ai runbook; points ml-serving at https://llm.alogins.net by default and adds host.docker.internal host-gateway so the container can hit Agap ollama on the host. Also updates the tip-generator model alias to qwen2.5:1.5b to match the model actually pulled on Agap ollama (7b is ~4.7 GB and would blow VRAM budget). Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
@@ -10,10 +10,12 @@ API_BASE_URL=http://localhost:3078
|
|||||||
WEB_BASE_URL=http://localhost:3000
|
WEB_BASE_URL=http://localhost:3000
|
||||||
ML_SERVING_URL=http://localhost:8000
|
ML_SERVING_URL=http://localhost:8000
|
||||||
|
|
||||||
# AI stack — Ollama + LiteLLM (docker compose --profile ai)
|
# AI stack — shared Agap services (ollama + litellm + langfuse). Not run from oO.
|
||||||
LITELLM_URL=http://localhost:4000
|
# Prod: https://llm.alogins.net | Dev: http://host.docker.internal:4000 from containers,
|
||||||
|
# http://localhost:4000 from host. Ollama: http://host.docker.internal:11434 / :11434.
|
||||||
|
LITELLM_URL=https://llm.alogins.net
|
||||||
LITELLM_MASTER_KEY=sk-oo-dev
|
LITELLM_MASTER_KEY=sk-oo-dev
|
||||||
OLLAMA_URL=http://localhost:11434
|
OLLAMA_URL=http://host.docker.internal:11434
|
||||||
|
|
||||||
# Google OAuth — https://console.cloud.google.com/
|
# Google OAuth — https://console.cloud.google.com/
|
||||||
GOOGLE_CLIENT_ID=
|
GOOGLE_CLIENT_ID=
|
||||||
|
|||||||
@@ -82,13 +82,13 @@ oO generates tips with an LLM and ranks them with a bandit. All LLM calls route
|
|||||||
|
|
||||||
| Alias | Model | Used by |
|
| Alias | Model | Used by |
|
||||||
|-------|-------|---------|
|
|-------|-------|---------|
|
||||||
| `tip-generator` | qwen2.5:7b (default) | `ml/serving` tip generation |
|
| `tip-generator` | qwen2.5:1.5b (default) | `ml/serving` tip generation |
|
||||||
| `embedder` | nomic-embed-text | task clustering, dedup |
|
| `embedder` | nomic-embed-text | task clustering, dedup |
|
||||||
| `judge` | claude-haiku-4-5 (cloud, eval only) | offline sim |
|
| `judge` | claude-haiku-4-5 (cloud, eval only) | offline sim |
|
||||||
|
|
||||||
Env vars: `LITELLM_URL` (default `http://localhost:4000`), `OLLAMA_URL` (default `http://localhost:11434`).
|
Env vars: `LITELLM_URL` (prod `https://llm.alogins.net`), `OLLAMA_URL` (Agap host, `http://host.docker.internal:11434` from containers).
|
||||||
|
|
||||||
Start with: `docker compose --profile ai up` (adds Ollama + LiteLLM locally). In prod both are shared Agap services.
|
Ollama and LiteLLM are **shared Agap services**, not oO services — they live in `agap_git/openai/docker-compose.yml` along with langfuse (observability). oO never starts them; ml-serving just calls the alias.
|
||||||
|
|
||||||
**LLM tip generation pipeline:**
|
**LLM tip generation pipeline:**
|
||||||
1. `ml/features/context.py` assembles user signals → structured prompt context
|
1. `ml/features/context.py` assembles user signals → structured prompt context
|
||||||
|
|||||||
@@ -65,8 +65,10 @@ services:
|
|||||||
profiles: [full]
|
profiles: [full]
|
||||||
env_file: ../../.env.local
|
env_file: ../../.env.local
|
||||||
environment:
|
environment:
|
||||||
LITELLM_URL: ${LITELLM_URL:-http://litellm:4000}
|
LITELLM_URL: ${LITELLM_URL:-https://llm.alogins.net}
|
||||||
OLLAMA_URL: ${OLLAMA_URL:-http://ollama:11434}
|
OLLAMA_URL: ${OLLAMA_URL:-http://host.docker.internal:11434}
|
||||||
|
extra_hosts:
|
||||||
|
- "host.docker.internal:host-gateway"
|
||||||
ports:
|
ports:
|
||||||
- "127.0.0.1:8000:8000"
|
- "127.0.0.1:8000:8000"
|
||||||
healthcheck:
|
healthcheck:
|
||||||
@@ -159,25 +161,6 @@ services:
|
|||||||
airflow-init:
|
airflow-init:
|
||||||
condition: service_completed_successfully
|
condition: service_completed_successfully
|
||||||
|
|
||||||
# ── ai profile — Ollama + LiteLLM ────────────────────────────────────────
|
|
||||||
# Start: docker compose --profile ai up
|
|
||||||
# LiteLLM proxy: http://localhost:4000 (master key from LITELLM_MASTER_KEY)
|
|
||||||
# Ollama API: http://localhost:11434
|
|
||||||
# In prod both are shared Agap services; set LITELLM_URL + OLLAMA_URL in .env.local
|
|
||||||
|
|
||||||
ollama:
|
|
||||||
image: ollama/ollama:latest
|
|
||||||
profiles: [ai]
|
|
||||||
volumes:
|
|
||||||
- /mnt/ssd/dbs/oo/ollama:/root/.ollama
|
|
||||||
ports:
|
|
||||||
- "127.0.0.1:11434:11434"
|
|
||||||
healthcheck:
|
|
||||||
test: ["CMD", "curl", "--fail", "http://localhost:11434"]
|
|
||||||
interval: 15s
|
|
||||||
timeout: 5s
|
|
||||||
retries: 5
|
|
||||||
|
|
||||||
# ── events profile — NATS JetStream ─────────────────────────────────────
|
# ── events profile — NATS JetStream ─────────────────────────────────────
|
||||||
# Start: docker compose --profile events up
|
# Start: docker compose --profile events up
|
||||||
# NATS monitoring: http://localhost:8222
|
# NATS monitoring: http://localhost:8222
|
||||||
@@ -198,26 +181,6 @@ services:
|
|||||||
timeout: 5s
|
timeout: 5s
|
||||||
retries: 5
|
retries: 5
|
||||||
|
|
||||||
litellm:
|
|
||||||
image: ghcr.io/berriai/litellm:main-latest
|
|
||||||
profiles: [ai]
|
|
||||||
command: ["--config", "/app/litellm_config.yaml", "--port", "4000"]
|
|
||||||
environment:
|
|
||||||
LITELLM_MASTER_KEY: ${LITELLM_MASTER_KEY:-sk-oo-dev}
|
|
||||||
OLLAMA_URL: ${OLLAMA_URL:-http://ollama:11434}
|
|
||||||
volumes:
|
|
||||||
- ../../infra/litellm/litellm_config.yaml:/app/litellm_config.yaml:ro
|
|
||||||
ports:
|
|
||||||
- "127.0.0.1:4000:4000"
|
|
||||||
depends_on:
|
|
||||||
ollama:
|
|
||||||
condition: service_healthy
|
|
||||||
healthcheck:
|
|
||||||
test: ["CMD", "curl", "--fail", "http://localhost:4000/health"]
|
|
||||||
interval: 15s
|
|
||||||
timeout: 5s
|
|
||||||
retries: 5
|
|
||||||
|
|
||||||
mlflow:
|
mlflow:
|
||||||
image: ghcr.io/mlflow/mlflow:v2.14.3
|
image: ghcr.io/mlflow/mlflow:v2.14.3
|
||||||
profiles: [mlops]
|
profiles: [mlops]
|
||||||
|
|||||||
@@ -1,17 +0,0 @@
|
|||||||
model_list:
|
|
||||||
- model_name: tip-generator
|
|
||||||
litellm_params:
|
|
||||||
model: ollama/qwen2.5:7b
|
|
||||||
api_base: "${OLLAMA_URL}"
|
|
||||||
|
|
||||||
- model_name: embedder
|
|
||||||
litellm_params:
|
|
||||||
model: ollama/nomic-embed-text
|
|
||||||
api_base: "${OLLAMA_URL}"
|
|
||||||
|
|
||||||
- model_name: judge
|
|
||||||
litellm_params:
|
|
||||||
model: claude-haiku-4-5-20251001
|
|
||||||
|
|
||||||
general_settings:
|
|
||||||
master_key: "${LITELLM_MASTER_KEY}"
|
|
||||||
Reference in New Issue
Block a user