diff --git a/.env.example b/.env.example index 0047b53..9af2780 100644 --- a/.env.example +++ b/.env.example @@ -10,10 +10,12 @@ API_BASE_URL=http://localhost:3078 WEB_BASE_URL=http://localhost:3000 ML_SERVING_URL=http://localhost:8000 -# AI stack — Ollama + LiteLLM (docker compose --profile ai) -LITELLM_URL=http://localhost:4000 +# AI stack — shared Agap services (ollama + litellm + langfuse). Not run from oO. +# Prod: https://llm.alogins.net | Dev: http://host.docker.internal:4000 from containers, +# http://localhost:4000 from host. Ollama: http://host.docker.internal:11434 / :11434. +LITELLM_URL=https://llm.alogins.net LITELLM_MASTER_KEY=sk-oo-dev -OLLAMA_URL=http://localhost:11434 +OLLAMA_URL=http://host.docker.internal:11434 # Google OAuth — https://console.cloud.google.com/ GOOGLE_CLIENT_ID= diff --git a/CLAUDE.md b/CLAUDE.md index e03c06d..f3ac970 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -82,13 +82,13 @@ oO generates tips with an LLM and ranks them with a bandit. All LLM calls route | Alias | Model | Used by | |-------|-------|---------| -| `tip-generator` | qwen2.5:7b (default) | `ml/serving` tip generation | +| `tip-generator` | qwen2.5:1.5b (default) | `ml/serving` tip generation | | `embedder` | nomic-embed-text | task clustering, dedup | | `judge` | claude-haiku-4-5 (cloud, eval only) | offline sim | -Env vars: `LITELLM_URL` (default `http://localhost:4000`), `OLLAMA_URL` (default `http://localhost:11434`). +Env vars: `LITELLM_URL` (prod `https://llm.alogins.net`), `OLLAMA_URL` (Agap host, `http://host.docker.internal:11434` from containers). -Start with: `docker compose --profile ai up` (adds Ollama + LiteLLM locally). In prod both are shared Agap services. +Ollama and LiteLLM are **shared Agap services**, not oO services — they live in `agap_git/openai/docker-compose.yml` along with langfuse (observability). oO never starts them; ml-serving just calls the alias. **LLM tip generation pipeline:** 1. `ml/features/context.py` assembles user signals → structured prompt context diff --git a/infra/docker/docker-compose.yml b/infra/docker/docker-compose.yml index 5eec49c..1902dea 100644 --- a/infra/docker/docker-compose.yml +++ b/infra/docker/docker-compose.yml @@ -65,8 +65,10 @@ services: profiles: [full] env_file: ../../.env.local environment: - LITELLM_URL: ${LITELLM_URL:-http://litellm:4000} - OLLAMA_URL: ${OLLAMA_URL:-http://ollama:11434} + LITELLM_URL: ${LITELLM_URL:-https://llm.alogins.net} + OLLAMA_URL: ${OLLAMA_URL:-http://host.docker.internal:11434} + extra_hosts: + - "host.docker.internal:host-gateway" ports: - "127.0.0.1:8000:8000" healthcheck: @@ -159,25 +161,6 @@ services: airflow-init: condition: service_completed_successfully - # ── ai profile — Ollama + LiteLLM ──────────────────────────────────────── - # Start: docker compose --profile ai up - # LiteLLM proxy: http://localhost:4000 (master key from LITELLM_MASTER_KEY) - # Ollama API: http://localhost:11434 - # In prod both are shared Agap services; set LITELLM_URL + OLLAMA_URL in .env.local - - ollama: - image: ollama/ollama:latest - profiles: [ai] - volumes: - - /mnt/ssd/dbs/oo/ollama:/root/.ollama - ports: - - "127.0.0.1:11434:11434" - healthcheck: - test: ["CMD", "curl", "--fail", "http://localhost:11434"] - interval: 15s - timeout: 5s - retries: 5 - # ── events profile — NATS JetStream ───────────────────────────────────── # Start: docker compose --profile events up # NATS monitoring: http://localhost:8222 @@ -198,26 +181,6 @@ services: timeout: 5s retries: 5 - litellm: - image: ghcr.io/berriai/litellm:main-latest - profiles: [ai] - command: ["--config", "/app/litellm_config.yaml", "--port", "4000"] - environment: - LITELLM_MASTER_KEY: ${LITELLM_MASTER_KEY:-sk-oo-dev} - OLLAMA_URL: ${OLLAMA_URL:-http://ollama:11434} - volumes: - - ../../infra/litellm/litellm_config.yaml:/app/litellm_config.yaml:ro - ports: - - "127.0.0.1:4000:4000" - depends_on: - ollama: - condition: service_healthy - healthcheck: - test: ["CMD", "curl", "--fail", "http://localhost:4000/health"] - interval: 15s - timeout: 5s - retries: 5 - mlflow: image: ghcr.io/mlflow/mlflow:v2.14.3 profiles: [mlops] diff --git a/infra/litellm/litellm_config.yaml b/infra/litellm/litellm_config.yaml deleted file mode 100644 index b14f697..0000000 --- a/infra/litellm/litellm_config.yaml +++ /dev/null @@ -1,17 +0,0 @@ -model_list: - - model_name: tip-generator - litellm_params: - model: ollama/qwen2.5:7b - api_base: "${OLLAMA_URL}" - - - model_name: embedder - litellm_params: - model: ollama/nomic-embed-text - api_base: "${OLLAMA_URL}" - - - model_name: judge - litellm_params: - model: claude-haiku-4-5-20251001 - -general_settings: - master_key: "${LITELLM_MASTER_KEY}"