feat: M2 AI tips — LiteLLM gateway, context assembler, end-to-end generation pipeline

Issues closed: #86, #87, #88, #89, #90, #91, #79, #80, #82

infra:
- docker-compose `ai` profile: Ollama + LiteLLM services
- infra/litellm/litellm_config.yaml: tip-generator / embedder / judge aliases
- .env.example: LITELLM_URL, LITELLM_MASTER_KEY, OLLAMA_URL

ml/serving:
- POST /generate: calls LiteLLM tip-generator alias, returns TipCandidate[]
- JSON retry loop (2 retries with correction prompt on malformed response)
- _parse_llm_json strips markdown fences

ml/features:
- context.py: build_context() assembles user signals → PromptContext
  (sorts overdue/high-priority tasks first for LLM prompt quality)

shared-types:
- TipKind, TipSource, TipCandidate types
- Tip gains kind + rationale fields

services/api:
- recommender: 3-stage pipeline (assemble → score → serve)
  Stage 1: Todoist tasks + LLM candidates fetched in parallel
  Stage 2: egreedy bandit scores merged candidate pool
  Stage 3: serve + log with prompt_version, llm_model, tip_kind
- tip_scores: prompt_version, llm_model, tip_kind columns + migrations
- config: LITELLM_URL added
- integrations: surface token_status in /integrations response

tests:
- ml/serving/tests/test_generate.py: 13 tests (retry, 502/503, fence variants)
- ml/features/test_context.py: 9 tests (sorting, edge cases)
- services/api recommender.unit.test.ts: 16 pure-function tests (inferReward, dueAgeDays)
- services/api recommender.test.ts: 4 integration tests (tip_scores columns, LLM fallback)
- shared-types: TipCandidate, rationale, full TipFeedback action set

docs:
- ADR-0008: LiteLLM AI gateway decision
- overview.md: M2 pipeline description updated
- ml/README.md: serving + features roles updated

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-04-17 14:09:02 +00:00
parent 85367aeaa0
commit ffdf70733f
22 changed files with 1017 additions and 45 deletions

View File

@@ -0,0 +1,39 @@
/**
* Pure-function unit tests for recommender logic — no DB, no HTTP.
* These can import directly from the module without any mocking.
*/
import { describe, it, expect } from 'vitest';
import { inferReward, dueAgeDays } from '../recommender.js';
describe('inferReward', () => {
it('dismiss → -1', () => expect(inferReward('dismiss', null)).toBe(-1.0));
it('snooze → +0.1', () => expect(inferReward('snooze', null)).toBe(0.1));
it('helpful → +0.5', () => expect(inferReward('helpful', null)).toBe(0.5));
it('not_helpful → -0.5', () => expect(inferReward('not_helpful', null)).toBe(-0.5));
it('done with null dwell → +0.5', () => expect(inferReward('done', null)).toBe(0.5));
it('done < 15s (reflex) → -0.3', () => expect(inferReward('done', 5_000)).toBe(-0.3));
it('done 15s2min (magic) → +1.0', () => expect(inferReward('done', 60_000)).toBe(1.0));
it('done 210min (good) → +0.6', () => expect(inferReward('done', 300_000)).toBe(0.6));
it('done > 10min (eventual) → +0.3', () => expect(inferReward('done', 700_000)).toBe(0.3));
it('done exactly 15s (boundary) → magic zone', () => expect(inferReward('done', 15_000)).toBe(1.0));
it('done exactly 2min (boundary) → good zone', () => expect(inferReward('done', 120_000)).toBe(0.6));
});
describe('dueAgeDays', () => {
it('null due → 0', () => expect(dueAgeDays(null)).toBe(0));
it('empty object → 0', () => expect(dueAgeDays({})).toBe(0));
it('future date → 0 (clamped)', () => {
const future = new Date(Date.now() + 86_400_000).toISOString();
expect(dueAgeDays({ datetime: future })).toBe(0);
});
it('past date → positive age', () => {
const twoDaysAgo = new Date(Date.now() - 2 * 86_400_000).toISOString();
const age = dueAgeDays({ datetime: twoDaysAgo });
expect(age).toBeGreaterThan(1.9);
expect(age).toBeLessThan(2.1);
});
it('date-only field used when datetime absent', () => {
const yesterday = new Date(Date.now() - 86_400_000).toISOString().slice(0, 10);
expect(dueAgeDays({ date: yesterday })).toBeGreaterThan(0);
});
});