feat: M2 AI tips — LiteLLM gateway, context assembler, end-to-end generation pipeline

Issues closed: #86, #87, #88, #89, #90, #91, #79, #80, #82 infra: - docker-compose `ai` profile: Ollama + LiteLLM services - infra/litellm/litellm_config.yaml: tip-generator / embedder / judge aliases - .env.example: LITELLM_URL, LITELLM_MASTER_KEY, OLLAMA_URL ml/serving: - POST /generate: calls LiteLLM tip-generator alias, returns TipCandidate[] - JSON retry loop (2 retries with correction prompt on malformed response) - _parse_llm_json strips markdown fences ml/features: - context.py: build_context() assembles user signals → PromptContext (sorts overdue/high-priority tasks first for LLM prompt quality) shared-types: - TipKind, TipSource, TipCandidate types - Tip gains kind + rationale fields services/api: - recommender: 3-stage pipeline (assemble → score → serve) Stage 1: Todoist tasks + LLM candidates fetched in parallel Stage 2: egreedy bandit scores merged candidate pool Stage 3: serve + log with prompt_version, llm_model, tip_kind - tip_scores: prompt_version, llm_model, tip_kind columns + migrations - config: LITELLM_URL added - integrations: surface token_status in /integrations response tests: - ml/serving/tests/test_generate.py: 13 tests (retry, 502/503, fence variants) - ml/features/test_context.py: 9 tests (sorting, edge cases) - services/api recommender.unit.test.ts: 16 pure-function tests (inferReward, dueAgeDays) - services/api recommender.test.ts: 4 integration tests (tip_scores columns, LLM fallback) - shared-types: TipCandidate, rationale, full TipFeedback action set docs: - ADR-0008: LiteLLM AI gateway decision - overview.md: M2 pipeline description updated - ml/README.md: serving + features roles updated Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-17 14:09:02 +00:00
parent 85367aeaa0
commit ffdf70733f
22 changed files with 1017 additions and 45 deletions
--- a/infra/docker/docker-compose.yml
+++ b/infra/docker/docker-compose.yml
@@ -14,9 +14,9 @@ services:
    volumes:
      - /mnt/ssd/dbs/oo:/mnt/ssd/dbs/oo
    ports:
-      - "127.0.0.1:3078:3078"
+      - "127.0.0.1:3001:3001"
    healthcheck:
-      test: ["CMD", "wget", "--spider", "-q", "http://localhost:3078/health"]
+      test: ["CMD", "wget", "--spider", "-q", "http://localhost:3001/health"]
      interval: 10s
      timeout: 5s
      retries: 5
@@ -49,7 +49,7 @@ services:
      PORT: "3080"
      HOSTNAME: "0.0.0.0"
      NEXT_PUBLIC_API_URL: ""
-      INTERNAL_API_URL: "http://api:3078"
+      INTERNAL_API_URL: "http://api:3001"
    ports:
      - "127.0.0.1:3080:3080"
    depends_on:
@@ -63,6 +63,10 @@ services:
      context: ../..
      dockerfile: infra/docker/Dockerfile.ml
    profiles: [full]
+    env_file: ../../.env.local
+    environment:
+      LITELLM_URL: ${LITELLM_URL:-http://litellm:4000}
+      OLLAMA_URL: ${OLLAMA_URL:-http://ollama:11434}
    ports:
      - "127.0.0.1:8000:8000"
    healthcheck:
@@ -155,6 +159,45 @@ services:
      airflow-init:
        condition: service_completed_successfully

+  # ── ai profile — Ollama + LiteLLM ────────────────────────────────────────
+  # Start: docker compose --profile ai up
+  # LiteLLM proxy: http://localhost:4000  (master key from LITELLM_MASTER_KEY)
+  # Ollama API:    http://localhost:11434
+  # In prod both are shared Agap services; set LITELLM_URL + OLLAMA_URL in .env.local
+
+  ollama:
+    image: ollama/ollama:latest
+    profiles: [ai]
+    volumes:
+      - /mnt/ssd/dbs/oo/ollama:/root/.ollama
+    ports:
+      - "127.0.0.1:11434:11434"
+    healthcheck:
+      test: ["CMD", "curl", "--fail", "http://localhost:11434"]
+      interval: 15s
+      timeout: 5s
+      retries: 5
+
+  litellm:
+    image: ghcr.io/berriai/litellm:main-latest
+    profiles: [ai]
+    command: ["--config", "/app/litellm_config.yaml", "--port", "4000"]
+    environment:
+      LITELLM_MASTER_KEY: ${LITELLM_MASTER_KEY:-sk-oo-dev}
+      OLLAMA_URL: ${OLLAMA_URL:-http://ollama:11434}
+    volumes:
+      - ../../infra/litellm/litellm_config.yaml:/app/litellm_config.yaml:ro
+    ports:
+      - "127.0.0.1:4000:4000"
+    depends_on:
+      ollama:
+        condition: service_healthy
+    healthcheck:
+      test: ["CMD", "curl", "--fail", "http://localhost:4000/health"]
+      interval: 15s
+      timeout: 5s
+      retries: 5
+
  mlflow:
    image: ghcr.io/mlflow/mlflow:2.14.3
    profiles: [mlops]
--- a/infra/litellm/litellm_config.yaml
+++ b/infra/litellm/litellm_config.yaml
@@ -0,0 +1,17 @@
+model_list:
+  - model_name: tip-generator
+    litellm_params:
+      model: ollama/qwen2.5:7b
+      api_base: "${OLLAMA_URL}"
+
+  - model_name: embedder
+    litellm_params:
+      model: ollama/nomic-embed-text
+      api_base: "${OLLAMA_URL}"
+
+  - model_name: judge
+    litellm_params:
+      model: claude-haiku-4-5-20251001
+
+general_settings:
+  master_key: "${LITELLM_MASTER_KEY}"