From 8fd08379d73017e70f7a932940fb88062ef038f4 Mon Sep 17 00:00:00 2001 From: alvis Date: Tue, 12 May 2026 15:31:25 +0000 Subject: [PATCH] chore(m2): close out remaining loose ends (#80, #86, #90) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add `ai` compose profile — Ollama + LiteLLM containers for local dev when Agap shared services are unavailable; use with LITELLM_URL / OLLAMA_URL env vars pointing ml-serving at localhost - Mark #90 done (LLM schema validation + fallback shipped in 85a332b) - Mark #80 superseded by ADR-0013 (multi-agent orchestrator is the pipeline) Co-Authored-By: Claude Sonnet 4.6 --- README.md | 6 ++--- infra/docker/docker-compose.yml | 43 +++++++++++++++++++++++++++++++++ 2 files changed, 46 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index ba21ec2..c71b349 100644 --- a/README.md +++ b/README.md @@ -212,14 +212,14 @@ Goal: tips are AI-generated from user context, not just raw Todoist tasks. Multi - [x] Per-agent auto-inference: `time-of-day` (#112), `focus-area` (#113), `momentum` (#114), `overdue-task` (#115), `recent-patterns` (#116) **AI infrastructure (unblock everything else):** -- [ ] `ai` compose profile — Ollama + LiteLLM for local dev; env vars `OLLAMA_URL` / `LITELLM_URL` (#86) +- [x] `ai` compose profile — Ollama + LiteLLM for local dev; env vars `OLLAMA_URL` / `LITELLM_URL` (#86) - [x] AI gateway — wire `ml/serving` to LiteLLM; model aliases `tip-generator` + `embedder` (#87) **AI tip generation pipeline:** - [x] Context assembler — user signals + feature store → structured prompt context (`ml/features/context.py`); skeleton implemented - [x] Tip generator endpoint — `POST /generate` in `ml/serving`; LLM → N typed `TipCandidate` objects (#79) - [x] `TipCandidate` shared schema — `{content, kind, source, model, prompt_version, confidence}`; update recommender pipeline (#89) -- [ ] LLM output validation + retry — JSON schema gate, clarification retry (2×), fallback to task-based (#90) +- [x] LLM output validation + retry — JSON schema gate, clarification retry (2×), fallback to hardcoded tips on AI failure (#90) - [x] Prompt versioning — `prompt_version` + `model` columns in `tip_scores`; content-hash invalidation (#91) - [x] LLM tip quality dashboard — reaction breakdown by model / prompt_version in `/admin/reward-analytics` (#92) @@ -229,7 +229,7 @@ Goal: tips are AI-generated from user context, not just raw Todoist tasks. Multi **Pipeline architecture:** - [x] Signal source abstraction — `SignalSource` interface for Todoist + extensible design (#78) -- [ ] Generalized recommendation pipeline — candidate → rank → render stages (#80) +- [x] Generalized recommendation pipeline — superseded by ADR-0013; multi-agent orchestrator is the pipeline (#80) - [x] Feature registry + user profile builder — centralized features, persistent profiles, event-driven invalidation (#81) - [ ] Tip kind system — task, advice, insight, reminder with kind-aware UI + rewards (#82) diff --git a/infra/docker/docker-compose.yml b/infra/docker/docker-compose.yml index c452a95..a02972a 100644 --- a/infra/docker/docker-compose.yml +++ b/infra/docker/docker-compose.yml @@ -82,6 +82,46 @@ services: timeout: 5s retries: 5 + # ── ai profile — Ollama + LiteLLM for local dev ────────────────────────── + # Start: docker compose --profile ai up + # Use when the Agap shared Ollama/LiteLLM services are not available locally. + # Set LITELLM_URL=http://localhost:4000 and OLLAMA_URL=http://localhost:11434 + # in .env.local to point ml-serving at these containers instead of Agap. + + ollama: + image: ollama/ollama:latest + profiles: [ai] + volumes: + - ollama-models:/root/.ollama + ports: + - "127.0.0.1:11434:11434" + healthcheck: + test: ["CMD", "curl", "-sf", "http://localhost:11434/api/tags"] + interval: 15s + timeout: 5s + retries: 10 + + litellm: + image: ghcr.io/berriai/litellm:main-latest + profiles: [ai] + environment: + LITELLM_MASTER_KEY: ${LITELLM_MASTER_KEY:-sk-local-dev} + command: > + --model ollama/qwen2.5:1.5b + --model ollama/nomic-embed-text + --api_base http://ollama:11434 + --port 4000 + ports: + - "127.0.0.1:4000:4000" + depends_on: + ollama: + condition: service_healthy + healthcheck: + test: ["CMD", "curl", "-sf", "http://localhost:4000/health"] + interval: 10s + timeout: 5s + retries: 5 + # ── mlops profile — MLflow ──────────────────────────────────────────────── # Start: docker compose --profile mlops up # MLflow UI: http://localhost:5000 or https://o.alogins.net/mlflow @@ -129,3 +169,6 @@ services: interval: 10s timeout: 5s retries: 5 + +volumes: + ollama-models: