feat: M2 AI tips — LiteLLM gateway, context assembler, end-to-end generation pipeline

Issues closed: #86, #87, #88, #89, #90, #91, #79, #80, #82

infra:
- docker-compose `ai` profile: Ollama + LiteLLM services
- infra/litellm/litellm_config.yaml: tip-generator / embedder / judge aliases
- .env.example: LITELLM_URL, LITELLM_MASTER_KEY, OLLAMA_URL

ml/serving:
- POST /generate: calls LiteLLM tip-generator alias, returns TipCandidate[]
- JSON retry loop (2 retries with correction prompt on malformed response)
- _parse_llm_json strips markdown fences

ml/features:
- context.py: build_context() assembles user signals → PromptContext
  (sorts overdue/high-priority tasks first for LLM prompt quality)

shared-types:
- TipKind, TipSource, TipCandidate types
- Tip gains kind + rationale fields

services/api:
- recommender: 3-stage pipeline (assemble → score → serve)
  Stage 1: Todoist tasks + LLM candidates fetched in parallel
  Stage 2: egreedy bandit scores merged candidate pool
  Stage 3: serve + log with prompt_version, llm_model, tip_kind
- tip_scores: prompt_version, llm_model, tip_kind columns + migrations
- config: LITELLM_URL added
- integrations: surface token_status in /integrations response

tests:
- ml/serving/tests/test_generate.py: 13 tests (retry, 502/503, fence variants)
- ml/features/test_context.py: 9 tests (sorting, edge cases)
- services/api recommender.unit.test.ts: 16 pure-function tests (inferReward, dueAgeDays)
- services/api recommender.test.ts: 4 integration tests (tip_scores columns, LLM fallback)
- shared-types: TipCandidate, rationale, full TipFeedback action set

docs:
- ADR-0008: LiteLLM AI gateway decision
- overview.md: M2 pipeline description updated
- ml/README.md: serving + features roles updated

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-04-17 14:09:02 +00:00
parent 85367aeaa0
commit ffdf70733f
22 changed files with 1017 additions and 45 deletions

View File

@@ -14,9 +14,9 @@ services:
volumes:
- /mnt/ssd/dbs/oo:/mnt/ssd/dbs/oo
ports:
- "127.0.0.1:3078:3078"
- "127.0.0.1:3001:3001"
healthcheck:
test: ["CMD", "wget", "--spider", "-q", "http://localhost:3078/health"]
test: ["CMD", "wget", "--spider", "-q", "http://localhost:3001/health"]
interval: 10s
timeout: 5s
retries: 5
@@ -49,7 +49,7 @@ services:
PORT: "3080"
HOSTNAME: "0.0.0.0"
NEXT_PUBLIC_API_URL: ""
INTERNAL_API_URL: "http://api:3078"
INTERNAL_API_URL: "http://api:3001"
ports:
- "127.0.0.1:3080:3080"
depends_on:
@@ -63,6 +63,10 @@ services:
context: ../..
dockerfile: infra/docker/Dockerfile.ml
profiles: [full]
env_file: ../../.env.local
environment:
LITELLM_URL: ${LITELLM_URL:-http://litellm:4000}
OLLAMA_URL: ${OLLAMA_URL:-http://ollama:11434}
ports:
- "127.0.0.1:8000:8000"
healthcheck:
@@ -155,6 +159,45 @@ services:
airflow-init:
condition: service_completed_successfully
# ── ai profile — Ollama + LiteLLM ────────────────────────────────────────
# Start: docker compose --profile ai up
# LiteLLM proxy: http://localhost:4000 (master key from LITELLM_MASTER_KEY)
# Ollama API: http://localhost:11434
# In prod both are shared Agap services; set LITELLM_URL + OLLAMA_URL in .env.local
ollama:
image: ollama/ollama:latest
profiles: [ai]
volumes:
- /mnt/ssd/dbs/oo/ollama:/root/.ollama
ports:
- "127.0.0.1:11434:11434"
healthcheck:
test: ["CMD", "curl", "--fail", "http://localhost:11434"]
interval: 15s
timeout: 5s
retries: 5
litellm:
image: ghcr.io/berriai/litellm:main-latest
profiles: [ai]
command: ["--config", "/app/litellm_config.yaml", "--port", "4000"]
environment:
LITELLM_MASTER_KEY: ${LITELLM_MASTER_KEY:-sk-oo-dev}
OLLAMA_URL: ${OLLAMA_URL:-http://ollama:11434}
volumes:
- ../../infra/litellm/litellm_config.yaml:/app/litellm_config.yaml:ro
ports:
- "127.0.0.1:4000:4000"
depends_on:
ollama:
condition: service_healthy
healthcheck:
test: ["CMD", "curl", "--fail", "http://localhost:4000/health"]
interval: 15s
timeout: 5s
retries: 5
mlflow:
image: ghcr.io/mlflow/mlflow:2.14.3
profiles: [mlops]

View File

@@ -0,0 +1,17 @@
model_list:
- model_name: tip-generator
litellm_params:
model: ollama/qwen2.5:7b
api_base: "${OLLAMA_URL}"
- model_name: embedder
litellm_params:
model: ollama/nomic-embed-text
api_base: "${OLLAMA_URL}"
- model_name: judge
litellm_params:
model: claude-haiku-4-5-20251001
general_settings:
master_key: "${LITELLM_MASTER_KEY}"