feat: M2 AI tips — LiteLLM gateway, context assembler, end-to-end generation pipeline
Issues closed: #86, #87, #88, #89, #90, #91, #79, #80, #82 infra: - docker-compose `ai` profile: Ollama + LiteLLM services - infra/litellm/litellm_config.yaml: tip-generator / embedder / judge aliases - .env.example: LITELLM_URL, LITELLM_MASTER_KEY, OLLAMA_URL ml/serving: - POST /generate: calls LiteLLM tip-generator alias, returns TipCandidate[] - JSON retry loop (2 retries with correction prompt on malformed response) - _parse_llm_json strips markdown fences ml/features: - context.py: build_context() assembles user signals → PromptContext (sorts overdue/high-priority tasks first for LLM prompt quality) shared-types: - TipKind, TipSource, TipCandidate types - Tip gains kind + rationale fields services/api: - recommender: 3-stage pipeline (assemble → score → serve) Stage 1: Todoist tasks + LLM candidates fetched in parallel Stage 2: egreedy bandit scores merged candidate pool Stage 3: serve + log with prompt_version, llm_model, tip_kind - tip_scores: prompt_version, llm_model, tip_kind columns + migrations - config: LITELLM_URL added - integrations: surface token_status in /integrations response tests: - ml/serving/tests/test_generate.py: 13 tests (retry, 502/503, fence variants) - ml/features/test_context.py: 9 tests (sorting, edge cases) - services/api recommender.unit.test.ts: 16 pure-function tests (inferReward, dueAgeDays) - services/api recommender.test.ts: 4 integration tests (tip_scores columns, LLM fallback) - shared-types: TipCandidate, rationale, full TipFeedback action set docs: - ADR-0008: LiteLLM AI gateway decision - overview.md: M2 pipeline description updated - ml/README.md: serving + features roles updated Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -14,9 +14,9 @@ services:
|
||||
volumes:
|
||||
- /mnt/ssd/dbs/oo:/mnt/ssd/dbs/oo
|
||||
ports:
|
||||
- "127.0.0.1:3078:3078"
|
||||
- "127.0.0.1:3001:3001"
|
||||
healthcheck:
|
||||
test: ["CMD", "wget", "--spider", "-q", "http://localhost:3078/health"]
|
||||
test: ["CMD", "wget", "--spider", "-q", "http://localhost:3001/health"]
|
||||
interval: 10s
|
||||
timeout: 5s
|
||||
retries: 5
|
||||
@@ -49,7 +49,7 @@ services:
|
||||
PORT: "3080"
|
||||
HOSTNAME: "0.0.0.0"
|
||||
NEXT_PUBLIC_API_URL: ""
|
||||
INTERNAL_API_URL: "http://api:3078"
|
||||
INTERNAL_API_URL: "http://api:3001"
|
||||
ports:
|
||||
- "127.0.0.1:3080:3080"
|
||||
depends_on:
|
||||
@@ -63,6 +63,10 @@ services:
|
||||
context: ../..
|
||||
dockerfile: infra/docker/Dockerfile.ml
|
||||
profiles: [full]
|
||||
env_file: ../../.env.local
|
||||
environment:
|
||||
LITELLM_URL: ${LITELLM_URL:-http://litellm:4000}
|
||||
OLLAMA_URL: ${OLLAMA_URL:-http://ollama:11434}
|
||||
ports:
|
||||
- "127.0.0.1:8000:8000"
|
||||
healthcheck:
|
||||
@@ -155,6 +159,45 @@ services:
|
||||
airflow-init:
|
||||
condition: service_completed_successfully
|
||||
|
||||
# ── ai profile — Ollama + LiteLLM ────────────────────────────────────────
|
||||
# Start: docker compose --profile ai up
|
||||
# LiteLLM proxy: http://localhost:4000 (master key from LITELLM_MASTER_KEY)
|
||||
# Ollama API: http://localhost:11434
|
||||
# In prod both are shared Agap services; set LITELLM_URL + OLLAMA_URL in .env.local
|
||||
|
||||
ollama:
|
||||
image: ollama/ollama:latest
|
||||
profiles: [ai]
|
||||
volumes:
|
||||
- /mnt/ssd/dbs/oo/ollama:/root/.ollama
|
||||
ports:
|
||||
- "127.0.0.1:11434:11434"
|
||||
healthcheck:
|
||||
test: ["CMD", "curl", "--fail", "http://localhost:11434"]
|
||||
interval: 15s
|
||||
timeout: 5s
|
||||
retries: 5
|
||||
|
||||
litellm:
|
||||
image: ghcr.io/berriai/litellm:main-latest
|
||||
profiles: [ai]
|
||||
command: ["--config", "/app/litellm_config.yaml", "--port", "4000"]
|
||||
environment:
|
||||
LITELLM_MASTER_KEY: ${LITELLM_MASTER_KEY:-sk-oo-dev}
|
||||
OLLAMA_URL: ${OLLAMA_URL:-http://ollama:11434}
|
||||
volumes:
|
||||
- ../../infra/litellm/litellm_config.yaml:/app/litellm_config.yaml:ro
|
||||
ports:
|
||||
- "127.0.0.1:4000:4000"
|
||||
depends_on:
|
||||
ollama:
|
||||
condition: service_healthy
|
||||
healthcheck:
|
||||
test: ["CMD", "curl", "--fail", "http://localhost:4000/health"]
|
||||
interval: 15s
|
||||
timeout: 5s
|
||||
retries: 5
|
||||
|
||||
mlflow:
|
||||
image: ghcr.io/mlflow/mlflow:2.14.3
|
||||
profiles: [mlops]
|
||||
|
||||
17
infra/litellm/litellm_config.yaml
Normal file
17
infra/litellm/litellm_config.yaml
Normal file
@@ -0,0 +1,17 @@
|
||||
model_list:
|
||||
- model_name: tip-generator
|
||||
litellm_params:
|
||||
model: ollama/qwen2.5:7b
|
||||
api_base: "${OLLAMA_URL}"
|
||||
|
||||
- model_name: embedder
|
||||
litellm_params:
|
||||
model: ollama/nomic-embed-text
|
||||
api_base: "${OLLAMA_URL}"
|
||||
|
||||
- model_name: judge
|
||||
litellm_params:
|
||||
model: claude-haiku-4-5-20251001
|
||||
|
||||
general_settings:
|
||||
master_key: "${LITELLM_MASTER_KEY}"
|
||||
Reference in New Issue
Block a user