feat: M2 AI tips — LiteLLM gateway, context assembler, end-to-end generation pipeline

Issues closed: #86, #87, #88, #89, #90, #91, #79, #80, #82

infra:
- docker-compose `ai` profile: Ollama + LiteLLM services
- infra/litellm/litellm_config.yaml: tip-generator / embedder / judge aliases
- .env.example: LITELLM_URL, LITELLM_MASTER_KEY, OLLAMA_URL

ml/serving:
- POST /generate: calls LiteLLM tip-generator alias, returns TipCandidate[]
- JSON retry loop (2 retries with correction prompt on malformed response)
- _parse_llm_json strips markdown fences

ml/features:
- context.py: build_context() assembles user signals → PromptContext
  (sorts overdue/high-priority tasks first for LLM prompt quality)

shared-types:
- TipKind, TipSource, TipCandidate types
- Tip gains kind + rationale fields

services/api:
- recommender: 3-stage pipeline (assemble → score → serve)
  Stage 1: Todoist tasks + LLM candidates fetched in parallel
  Stage 2: egreedy bandit scores merged candidate pool
  Stage 3: serve + log with prompt_version, llm_model, tip_kind
- tip_scores: prompt_version, llm_model, tip_kind columns + migrations
- config: LITELLM_URL added
- integrations: surface token_status in /integrations response

tests:
- ml/serving/tests/test_generate.py: 13 tests (retry, 502/503, fence variants)
- ml/features/test_context.py: 9 tests (sorting, edge cases)
- services/api recommender.unit.test.ts: 16 pure-function tests (inferReward, dueAgeDays)
- services/api recommender.test.ts: 4 integration tests (tip_scores columns, LLM fallback)
- shared-types: TipCandidate, rationale, full TipFeedback action set

docs:
- ADR-0008: LiteLLM AI gateway decision
- overview.md: M2 pipeline description updated
- ml/README.md: serving + features roles updated

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-04-17 14:09:02 +00:00
parent 85367aeaa0
commit ffdf70733f
22 changed files with 1017 additions and 45 deletions

View File

@@ -0,0 +1,190 @@
/**
* Integration tests for POST /recommend and tip_scores DB writes.
* Uses a real in-memory SQLite DB. recommender is imported dynamically
* inside beforeAll (same pattern as admin.test.ts) to avoid TDZ issues.
* Uses http.request (not fetch) as the test client so that globalThis.fetch
* mocking doesn't interfere with the test runner itself.
*/
import { describe, it, expect, vi, beforeAll, afterEach } from 'vitest';
import express from 'express';
import * as http from 'http';
import { makeTestDb } from '../../test/db.js';
import { users, integrationTokens, tipScores } from '../../db/schema.js';
const testDb = makeTestDb();
vi.mock('../../db/index.js', () => ({ db: testDb }));
vi.mock('../../middleware/session.js', () => ({
sessionMiddleware: (_req: express.Request, _res: express.Response, next: express.NextFunction) => next(),
requireAuth: (req: express.Request, _res: express.Response, next: express.NextFunction) => {
(req as any).userId = 'user-1';
next();
},
}));
vi.mock('../../events/bus.js', () => ({ bus: { publish: vi.fn() } }));
/** Minimal http.request wrapper → { status, body } */
function post(url: string): Promise<{ status: number; body: any }> {
return new Promise((resolve, reject) => {
const u = new URL(url);
const req = http.request(
{ hostname: u.hostname, port: Number(u.port), path: u.pathname, method: 'POST',
headers: { 'Content-Type': 'application/json' } },
(res) => {
let data = '';
res.on('data', (c) => { data += c; });
res.on('end', () => {
try { resolve({ status: res.statusCode ?? 0, body: data ? JSON.parse(data) : null }); }
catch { resolve({ status: res.statusCode ?? 0, body: data }); }
});
},
);
req.on('error', reject);
req.end();
});
}
describe('POST /recommend integration', () => {
let server: http.Server;
let baseUrl: string;
let savedFetch: typeof globalThis.fetch;
let clearCache: () => void;
beforeAll(async () => {
await testDb.insert(users).values({
id: 'user-1', email: 'u@test.com', role: 'user',
consentGiven: 1, createdAt: new Date().toISOString(),
});
await testDb.insert(integrationTokens).values({
id: 'tok-1', userId: 'user-1', provider: 'todoist',
accessToken: 'fake-token', connectedAt: new Date().toISOString(),
});
const mod = await import('../recommender.js');
const { recommenderRouter } = mod;
clearCache = (mod as any)._clearTaskCacheForTests;
const app = express();
app.use(express.json());
app.use('/api', recommenderRouter);
server = app.listen(0);
const addr = server.address() as { port: number };
baseUrl = `http://localhost:${addr.port}`;
savedFetch = globalThis.fetch;
});
afterEach(() => {
globalThis.fetch = savedFetch;
clearCache?.();
});
it('returns 204 when Todoist + LLM both return empty', async () => {
globalThis.fetch = vi.fn().mockResolvedValue({
ok: true, status: 200,
json: async () => ({ results: [] }),
} as any);
const { status } = await post(`${baseUrl}/api/recommend`);
expect(status).toBe(204);
});
it('serves todoist tip and writes correct tip_scores columns', async () => {
globalThis.fetch = vi.fn().mockImplementation((url: string) => {
if (String(url).includes('todoist.com')) {
return Promise.resolve({
ok: true, status: 200,
json: async () => ({
results: [{ id: 'task-1', content: 'Write tests', priority: 3, due: { date: '2026-04-10' } }],
}),
} as any);
}
if (String(url).includes('/generate')) {
return Promise.resolve({ ok: false, status: 503, json: async () => ({}) } as any);
}
if (String(url).includes('/score')) {
return Promise.resolve({
ok: true, status: 200,
json: async () => ({ tip_id: 'todoist:task-1', score: 0.8 }),
} as any);
}
return Promise.resolve({ ok: false, status: 500, json: async () => ({}) } as any);
});
const { status, body } = await post(`${baseUrl}/api/recommend`);
expect(status).toBe(200);
expect(body.tip.source).toBe('todoist');
expect(body.tip.kind).toBe('task');
const rows = await testDb.select().from(tipScores);
const row = rows[rows.length - 1];
expect(row.tipKind).toBe('task');
expect(row.promptVersion).toBeNull();
expect(row.llmModel).toBeNull();
});
it('writes prompt_version + llm_model when LLM tip is served', async () => {
globalThis.fetch = vi.fn().mockImplementation((url: string) => {
if (String(url).includes('todoist.com')) {
return Promise.resolve({
ok: true, status: 200,
json: async () => ({ results: [] }),
} as any);
}
if (String(url).includes('/generate')) {
return Promise.resolve({
ok: true, status: 200,
json: async () => ({
candidates: [{ id: 'adv-1', content: 'Take a break.', rationale: 'You deserve it.' }],
model: 'tip-generator',
}),
} as any);
}
if (String(url).includes('/score')) {
return Promise.resolve({
ok: true, status: 200,
json: async () => ({ tip_id: 'llm:adv-1', score: 0.9 }),
} as any);
}
return Promise.resolve({ ok: false, status: 500, json: async () => ({}) } as any);
});
const { status, body } = await post(`${baseUrl}/api/recommend`);
expect(status).toBe(200);
expect(body.tip.source).toBe('llm');
expect(body.tip.kind).toBe('advice');
expect(body.tip.rationale).toBe('You deserve it.');
const rows = await testDb.select().from(tipScores);
const row = rows[rows.length - 1];
expect(row.promptVersion).toBe('v1');
expect(row.llmModel).toBe('tip-generator');
expect(row.tipKind).toBe('advice');
});
it('falls back to todoist tip when /generate returns non-200', async () => {
globalThis.fetch = vi.fn().mockImplementation((url: string) => {
if (String(url).includes('todoist.com')) {
return Promise.resolve({
ok: true, status: 200,
json: async () => ({
results: [{ id: 'fallback-1', content: 'Do stuff', priority: 2, due: null }],
}),
} as any);
}
if (String(url).includes('/generate')) {
return Promise.resolve({ ok: false, status: 502, json: async () => ({}) } as any);
}
if (String(url).includes('/score')) {
return Promise.resolve({
ok: true, status: 200,
json: async () => ({ tip_id: 'todoist:fallback-1', score: 0.5 }),
} as any);
}
return Promise.resolve({ ok: false, status: 500, json: async () => ({}) } as any);
});
const { status, body } = await post(`${baseUrl}/api/recommend`);
expect([200, 204]).toContain(status);
if (status === 200) {
expect(body.tip.source).toBe('todoist');
}
});
});