feat: M2 AI tips — LiteLLM gateway, context assembler, end-to-end generation pipeline

Issues closed: #86, #87, #88, #89, #90, #91, #79, #80, #82

infra:
- docker-compose `ai` profile: Ollama + LiteLLM services
- infra/litellm/litellm_config.yaml: tip-generator / embedder / judge aliases
- .env.example: LITELLM_URL, LITELLM_MASTER_KEY, OLLAMA_URL

ml/serving:
- POST /generate: calls LiteLLM tip-generator alias, returns TipCandidate[]
- JSON retry loop (2 retries with correction prompt on malformed response)
- _parse_llm_json strips markdown fences

ml/features:
- context.py: build_context() assembles user signals → PromptContext
  (sorts overdue/high-priority tasks first for LLM prompt quality)

shared-types:
- TipKind, TipSource, TipCandidate types
- Tip gains kind + rationale fields

services/api:
- recommender: 3-stage pipeline (assemble → score → serve)
  Stage 1: Todoist tasks + LLM candidates fetched in parallel
  Stage 2: egreedy bandit scores merged candidate pool
  Stage 3: serve + log with prompt_version, llm_model, tip_kind
- tip_scores: prompt_version, llm_model, tip_kind columns + migrations
- config: LITELLM_URL added
- integrations: surface token_status in /integrations response

tests:
- ml/serving/tests/test_generate.py: 13 tests (retry, 502/503, fence variants)
- ml/features/test_context.py: 9 tests (sorting, edge cases)
- services/api recommender.unit.test.ts: 16 pure-function tests (inferReward, dueAgeDays)
- services/api recommender.test.ts: 4 integration tests (tip_scores columns, LLM fallback)
- shared-types: TipCandidate, rationale, full TipFeedback action set

docs:
- ADR-0008: LiteLLM AI gateway decision
- overview.md: M2 pipeline description updated
- ml/README.md: serving + features roles updated

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-04-17 14:09:02 +00:00
parent 85367aeaa0
commit ffdf70733f
22 changed files with 1017 additions and 45 deletions

View File

@@ -32,6 +32,7 @@ export const config = {
WEB_BASE_URL: optional('WEB_BASE_URL', 'http://localhost:3000'),
ML_SERVING_URL: optional('ML_SERVING_URL', 'http://localhost:8000'),
LITELLM_URL: optional('LITELLM_URL', 'http://localhost:4000'),
VAPID_PUBLIC_KEY: optional('VAPID_PUBLIC_KEY', ''),
VAPID_PRIVATE_KEY: optional('VAPID_PRIVATE_KEY', ''),

View File

@@ -142,6 +142,10 @@ export function runMigrations() {
`ALTER TABLE push_subscriptions ADD COLUMN created_at TEXT NOT NULL DEFAULT ''`,
`ALTER TABLE tip_feedback ADD COLUMN dwell_ms INTEGER`,
`ALTER TABLE tip_feedback ADD COLUMN reward_milli INTEGER`,
`ALTER TABLE integration_tokens ADD COLUMN token_status TEXT NOT NULL DEFAULT 'active'`,
`ALTER TABLE tip_scores ADD COLUMN prompt_version TEXT`,
`ALTER TABLE tip_scores ADD COLUMN llm_model TEXT`,
`ALTER TABLE tip_scores ADD COLUMN tip_kind TEXT`,
]) {
try { sqlite.exec(stmt); } catch { /* column already exists */ }
}

View File

@@ -20,6 +20,7 @@ export const integrationTokens = sqliteTable('integration_tokens', {
accessToken: text('access_token').notNull(),
refreshToken: text('refresh_token'),
expiresAt: text('expires_at'),
tokenStatus: text('token_status').notNull().default('active'), // 'active' | 'needs_reconnect'
connectedAt: text('connected_at').notNull(),
});
@@ -81,6 +82,9 @@ export const tipScores = sqliteTable('tip_scores', {
candidateCount: integer('candidate_count'),
latencyMs: integer('latency_ms'),
servedAt: text('served_at').notNull(),
promptVersion: text('prompt_version'), // e.g. 'v1' — tracks which prompt template generated this tip
llmModel: text('llm_model'), // e.g. 'tip-generator/qwen2.5:7b' — null for bandit-only tips
tipKind: text('tip_kind'), // 'task' | 'advice' | 'insight' | 'reminder'
});
// ── Simulation runs ──────────────────────────────────────────────────────────

View File

@@ -0,0 +1,190 @@
/**
* Integration tests for POST /recommend and tip_scores DB writes.
* Uses a real in-memory SQLite DB. recommender is imported dynamically
* inside beforeAll (same pattern as admin.test.ts) to avoid TDZ issues.
* Uses http.request (not fetch) as the test client so that globalThis.fetch
* mocking doesn't interfere with the test runner itself.
*/
import { describe, it, expect, vi, beforeAll, afterEach } from 'vitest';
import express from 'express';
import * as http from 'http';
import { makeTestDb } from '../../test/db.js';
import { users, integrationTokens, tipScores } from '../../db/schema.js';
const testDb = makeTestDb();
vi.mock('../../db/index.js', () => ({ db: testDb }));
vi.mock('../../middleware/session.js', () => ({
sessionMiddleware: (_req: express.Request, _res: express.Response, next: express.NextFunction) => next(),
requireAuth: (req: express.Request, _res: express.Response, next: express.NextFunction) => {
(req as any).userId = 'user-1';
next();
},
}));
vi.mock('../../events/bus.js', () => ({ bus: { publish: vi.fn() } }));
/** Minimal http.request wrapper → { status, body } */
function post(url: string): Promise<{ status: number; body: any }> {
return new Promise((resolve, reject) => {
const u = new URL(url);
const req = http.request(
{ hostname: u.hostname, port: Number(u.port), path: u.pathname, method: 'POST',
headers: { 'Content-Type': 'application/json' } },
(res) => {
let data = '';
res.on('data', (c) => { data += c; });
res.on('end', () => {
try { resolve({ status: res.statusCode ?? 0, body: data ? JSON.parse(data) : null }); }
catch { resolve({ status: res.statusCode ?? 0, body: data }); }
});
},
);
req.on('error', reject);
req.end();
});
}
describe('POST /recommend integration', () => {
let server: http.Server;
let baseUrl: string;
let savedFetch: typeof globalThis.fetch;
let clearCache: () => void;
beforeAll(async () => {
await testDb.insert(users).values({
id: 'user-1', email: 'u@test.com', role: 'user',
consentGiven: 1, createdAt: new Date().toISOString(),
});
await testDb.insert(integrationTokens).values({
id: 'tok-1', userId: 'user-1', provider: 'todoist',
accessToken: 'fake-token', connectedAt: new Date().toISOString(),
});
const mod = await import('../recommender.js');
const { recommenderRouter } = mod;
clearCache = (mod as any)._clearTaskCacheForTests;
const app = express();
app.use(express.json());
app.use('/api', recommenderRouter);
server = app.listen(0);
const addr = server.address() as { port: number };
baseUrl = `http://localhost:${addr.port}`;
savedFetch = globalThis.fetch;
});
afterEach(() => {
globalThis.fetch = savedFetch;
clearCache?.();
});
it('returns 204 when Todoist + LLM both return empty', async () => {
globalThis.fetch = vi.fn().mockResolvedValue({
ok: true, status: 200,
json: async () => ({ results: [] }),
} as any);
const { status } = await post(`${baseUrl}/api/recommend`);
expect(status).toBe(204);
});
it('serves todoist tip and writes correct tip_scores columns', async () => {
globalThis.fetch = vi.fn().mockImplementation((url: string) => {
if (String(url).includes('todoist.com')) {
return Promise.resolve({
ok: true, status: 200,
json: async () => ({
results: [{ id: 'task-1', content: 'Write tests', priority: 3, due: { date: '2026-04-10' } }],
}),
} as any);
}
if (String(url).includes('/generate')) {
return Promise.resolve({ ok: false, status: 503, json: async () => ({}) } as any);
}
if (String(url).includes('/score')) {
return Promise.resolve({
ok: true, status: 200,
json: async () => ({ tip_id: 'todoist:task-1', score: 0.8 }),
} as any);
}
return Promise.resolve({ ok: false, status: 500, json: async () => ({}) } as any);
});
const { status, body } = await post(`${baseUrl}/api/recommend`);
expect(status).toBe(200);
expect(body.tip.source).toBe('todoist');
expect(body.tip.kind).toBe('task');
const rows = await testDb.select().from(tipScores);
const row = rows[rows.length - 1];
expect(row.tipKind).toBe('task');
expect(row.promptVersion).toBeNull();
expect(row.llmModel).toBeNull();
});
it('writes prompt_version + llm_model when LLM tip is served', async () => {
globalThis.fetch = vi.fn().mockImplementation((url: string) => {
if (String(url).includes('todoist.com')) {
return Promise.resolve({
ok: true, status: 200,
json: async () => ({ results: [] }),
} as any);
}
if (String(url).includes('/generate')) {
return Promise.resolve({
ok: true, status: 200,
json: async () => ({
candidates: [{ id: 'adv-1', content: 'Take a break.', rationale: 'You deserve it.' }],
model: 'tip-generator',
}),
} as any);
}
if (String(url).includes('/score')) {
return Promise.resolve({
ok: true, status: 200,
json: async () => ({ tip_id: 'llm:adv-1', score: 0.9 }),
} as any);
}
return Promise.resolve({ ok: false, status: 500, json: async () => ({}) } as any);
});
const { status, body } = await post(`${baseUrl}/api/recommend`);
expect(status).toBe(200);
expect(body.tip.source).toBe('llm');
expect(body.tip.kind).toBe('advice');
expect(body.tip.rationale).toBe('You deserve it.');
const rows = await testDb.select().from(tipScores);
const row = rows[rows.length - 1];
expect(row.promptVersion).toBe('v1');
expect(row.llmModel).toBe('tip-generator');
expect(row.tipKind).toBe('advice');
});
it('falls back to todoist tip when /generate returns non-200', async () => {
globalThis.fetch = vi.fn().mockImplementation((url: string) => {
if (String(url).includes('todoist.com')) {
return Promise.resolve({
ok: true, status: 200,
json: async () => ({
results: [{ id: 'fallback-1', content: 'Do stuff', priority: 2, due: null }],
}),
} as any);
}
if (String(url).includes('/generate')) {
return Promise.resolve({ ok: false, status: 502, json: async () => ({}) } as any);
}
if (String(url).includes('/score')) {
return Promise.resolve({
ok: true, status: 200,
json: async () => ({ tip_id: 'todoist:fallback-1', score: 0.5 }),
} as any);
}
return Promise.resolve({ ok: false, status: 500, json: async () => ({}) } as any);
});
const { status, body } = await post(`${baseUrl}/api/recommend`);
expect([200, 204]).toContain(status);
if (status === 200) {
expect(body.tip.source).toBe('todoist');
}
});
});

View File

@@ -0,0 +1,39 @@
/**
* Pure-function unit tests for recommender logic — no DB, no HTTP.
* These can import directly from the module without any mocking.
*/
import { describe, it, expect } from 'vitest';
import { inferReward, dueAgeDays } from '../recommender.js';
describe('inferReward', () => {
it('dismiss → -1', () => expect(inferReward('dismiss', null)).toBe(-1.0));
it('snooze → +0.1', () => expect(inferReward('snooze', null)).toBe(0.1));
it('helpful → +0.5', () => expect(inferReward('helpful', null)).toBe(0.5));
it('not_helpful → -0.5', () => expect(inferReward('not_helpful', null)).toBe(-0.5));
it('done with null dwell → +0.5', () => expect(inferReward('done', null)).toBe(0.5));
it('done < 15s (reflex) → -0.3', () => expect(inferReward('done', 5_000)).toBe(-0.3));
it('done 15s2min (magic) → +1.0', () => expect(inferReward('done', 60_000)).toBe(1.0));
it('done 210min (good) → +0.6', () => expect(inferReward('done', 300_000)).toBe(0.6));
it('done > 10min (eventual) → +0.3', () => expect(inferReward('done', 700_000)).toBe(0.3));
it('done exactly 15s (boundary) → magic zone', () => expect(inferReward('done', 15_000)).toBe(1.0));
it('done exactly 2min (boundary) → good zone', () => expect(inferReward('done', 120_000)).toBe(0.6));
});
describe('dueAgeDays', () => {
it('null due → 0', () => expect(dueAgeDays(null)).toBe(0));
it('empty object → 0', () => expect(dueAgeDays({})).toBe(0));
it('future date → 0 (clamped)', () => {
const future = new Date(Date.now() + 86_400_000).toISOString();
expect(dueAgeDays({ datetime: future })).toBe(0);
});
it('past date → positive age', () => {
const twoDaysAgo = new Date(Date.now() - 2 * 86_400_000).toISOString();
const age = dueAgeDays({ datetime: twoDaysAgo });
expect(age).toBeGreaterThan(1.9);
expect(age).toBeLessThan(2.1);
});
it('date-only field used when datetime absent', () => {
const yesterday = new Date(Date.now() - 86_400_000).toISOString().slice(0, 10);
expect(dueAgeDays({ date: yesterday })).toBeGreaterThan(0);
});
});

View File

@@ -24,7 +24,7 @@ router.get('/', requireAuth, async (req: AuthenticatedRequest, res: Response) =>
const integrations = tokens.map((t) => ({
provider: t.provider,
status: 'connected',
status: t.tokenStatus === 'needs_reconnect' ? 'needs_reconnect' : 'connected',
connectedAt: t.connectedAt,
}));
@@ -97,6 +97,7 @@ router.get('/todoist/callback', async (req: Request, res: Response) => {
userId: pending.userId,
provider: 'todoist',
accessToken: access_token,
tokenStatus: 'active',
connectedAt: now,
});

View File

@@ -6,23 +6,15 @@ import { eq, and, desc } from 'drizzle-orm';
import { requireAuth, AuthenticatedRequest } from '../middleware/session.js';
import { config } from '../config.js';
import { bus } from '../events/bus.js';
import type { Tip } from '@oo/shared-types';
import type { TipCandidate } from '@oo/shared-types';
const router: ExpressRouter = Router();
const CACHE_TTL_MS = 30_000;
const PROMPT_VERSION = 'v1';
interface TaskFeatures {
is_overdue: boolean;
task_age_days: number;
priority: number;
}
interface CachedTask extends Tip {
features: TaskFeatures;
}
const taskCache = new Map<string, { tasks: CachedTask[]; fetchedAt: number }>();
const taskCache = new Map<string, { tasks: TipCandidate[]; fetchedAt: number }>();
export const _clearTaskCacheForTests = () => taskCache.clear();
// ---------------------------------------------------------------------------
// Shadow-policy registry
@@ -49,7 +41,7 @@ export function setPolicyActive(name: string, active: boolean): boolean {
// Todoist helpers
// ---------------------------------------------------------------------------
function dueAgeDays(due: { date?: string; datetime?: string } | null | undefined): number {
export function dueAgeDays(due: { date?: string; datetime?: string } | null | undefined): number {
if (!due) return 0;
const dateStr = due.datetime ?? due.date;
if (!dateStr) return 0;
@@ -57,7 +49,7 @@ function dueAgeDays(due: { date?: string; datetime?: string } | null | undefined
return Math.max(0, (Date.now() - dueMs) / (1000 * 60 * 60 * 24));
}
async function fetchTodoistTasks(userId: string, accessToken: string): Promise<CachedTask[]> {
async function fetchTodoistTasks(userId: string, accessToken: string): Promise<TipCandidate[]> {
const cached = taskCache.get(userId);
if (cached && Date.now() - cached.fetchedAt < CACHE_TTL_MS) return cached.tasks;
@@ -73,6 +65,10 @@ async function fetchTodoistTasks(userId: string, accessToken: string): Promise<C
provider: 'todoist',
detectedAt: new Date().toISOString(),
});
await db
.update(integrationTokens)
.set({ tokenStatus: 'needs_reconnect' })
.where(and(eq(integrationTokens.userId, userId), eq(integrationTokens.provider, 'todoist')));
}
return cached?.tasks ?? [];
}
@@ -87,13 +83,14 @@ async function fetchTodoistTasks(userId: string, accessToken: string): Promise<C
};
const now = new Date();
const tasks: CachedTask[] = (body.results ?? []).map((t) => {
const tasks: TipCandidate[] = (body.results ?? []).map((t) => {
const ageDays = dueAgeDays(t.due);
const isOverdue = ageDays > 0;
return {
id: `todoist:${t.id}`,
content: t.content,
source: 'todoist' as const,
kind: 'task' as const,
sourceId: t.id,
createdAt: now.toISOString(),
features: {
@@ -111,10 +108,14 @@ async function fetchTodoistTasks(userId: string, accessToken: string): Promise<C
return tasks;
}
// ---------------------------------------------------------------------------
// Stage 2: score candidates via ml/serving bandit
// ---------------------------------------------------------------------------
/** Call ml/serving for scored selection; returns { tip_id, score } or null on failure */
async function remotePolicy(
userId: string,
tasks: CachedTask[],
tasks: TipCandidate[],
): Promise<{ tipId: string; score: number; policy: string } | null> {
const hour = new Date().getHours();
const dayOfWeek = new Date().getDay();
@@ -147,13 +148,64 @@ async function remotePolicy(
}
}
function randomPolicy(candidates: CachedTask[]): CachedTask | null {
function randomPolicy(candidates: TipCandidate[]): TipCandidate | null {
if (!candidates.length) return null;
return candidates[Math.floor(Math.random() * candidates.length)];
}
// ---------------------------------------------------------------------------
// Stage 1b: fetch LLM candidates from ml/serving /generate
// ---------------------------------------------------------------------------
interface LlmCandidate {
id: string;
content: string;
rationale?: string;
}
async function fetchLlmCandidates(
userId: string,
todoistTasks: TipCandidate[],
hour: number,
dayOfWeek: number,
): Promise<TipCandidate[]> {
try {
const tasks = todoistTasks.slice(0, 10).map((t) => ({
content: t.content,
priority: t.features.priority,
is_overdue: t.features.is_overdue,
task_age_days: t.features.task_age_days,
}));
const res = await fetch(`${config.ML_SERVING_URL}/generate`, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({
user_id: userId,
context: { tasks, hour_of_day: hour, day_of_week: dayOfWeek },
n: 3,
}),
signal: AbortSignal.timeout(15_000),
});
if (!res.ok) return [];
const data = (await res.json()) as { candidates: LlmCandidate[]; model?: string };
const now = new Date().toISOString();
return data.candidates.map((c) => ({
id: `llm:${c.id}`,
content: c.content,
source: 'llm' as const,
kind: 'advice' as const,
rationale: c.rationale,
createdAt: now,
features: { is_overdue: false, task_age_days: 0, priority: 1 },
}));
} catch {
return [];
}
}
// ---------------------------------------------------------------------------
// POST /api/recommend
// Pipeline: [Stage 1] assemble candidates → [Stage 2] score → [Stage 3] serve
// ---------------------------------------------------------------------------
router.post('/recommend', requireAuth, async (req: AuthenticatedRequest, res: Response) => {
const [token] = await db
@@ -167,34 +219,42 @@ router.post('/recommend', requireAuth, async (req: AuthenticatedRequest, res: Re
return;
}
const tasks = await fetchTodoistTasks(req.userId!, token.accessToken);
if (!tasks.length) {
const hour = new Date().getHours();
const dayOfWeek = new Date().getDay();
// Stage 1: assemble candidates — Todoist tasks + LLM-generated advice (parallel)
const [todoistTasks, llmCandidates] = await Promise.all([
fetchTodoistTasks(req.userId!, token.accessToken),
fetchLlmCandidates(req.userId!, taskCache.get(req.userId!)?.tasks ?? [], hour, dayOfWeek),
]);
const allCandidates: TipCandidate[] = [...todoistTasks, ...llmCandidates];
if (!allCandidates.length) {
res.status(204).end();
return;
}
const hour = new Date().getHours();
const dayOfWeek = new Date().getDay();
const t0 = Date.now();
// RemotePolicy with RandomPolicy fallback
const scored = await remotePolicy(req.userId!, tasks);
// Stage 2: score — egreedy bandit with random fallback
const scored = await remotePolicy(req.userId!, allCandidates);
const latencyMs = Date.now() - t0;
const tip = scored
? (tasks.find((t) => t.id === scored.tipId) ?? randomPolicy(tasks))
: randomPolicy(tasks);
? (allCandidates.find((t) => t.id === scored.tipId) ?? randomPolicy(allCandidates))
: randomPolicy(allCandidates);
if (!tip) {
res.status(204).end();
return;
}
// Stage 3: serve + log
const policy = scored ? scored.policy : 'random';
const isLlmTip = tip.source === 'llm';
const servedAt = new Date().toISOString();
await db.insert(tipViews).values({ id: nanoid(), userId: req.userId!, tipId: tip.id, servedAt });
// Log recommendation explainability
await db.insert(tipScores).values({
id: nanoid(),
userId: req.userId!,
@@ -208,9 +268,12 @@ router.post('/recommend', requireAuth, async (req: AuthenticatedRequest, res: Re
hour_of_day: hour,
day_of_week: dayOfWeek,
}),
candidateCount: tasks.length,
candidateCount: allCandidates.length,
latencyMs,
servedAt,
promptVersion: isLlmTip ? PROMPT_VERSION : null,
llmModel: isLlmTip ? 'tip-generator' : null,
tipKind: tip.kind ?? null,
});
bus.publish('signals.tip.served', {
@@ -224,7 +287,7 @@ router.post('/recommend', requireAuth, async (req: AuthenticatedRequest, res: Re
for (const [name, s] of shadowPolicies) {
if (!s.active) continue;
if (name.startsWith('random')) {
const shadowTip = randomPolicy(tasks);
const shadowTip = randomPolicy(allCandidates);
bus.publish('signals.tip.served', {
userId: req.userId!,
tipId: shadowTip?.id ?? 'none',
@@ -249,7 +312,7 @@ router.post('/recommend', requireAuth, async (req: AuthenticatedRequest, res: Re
// done 2 10 min → +0.6 (good: user engaged, acted in same session)
// done > 10 min → +0.3 (eventually done; tip may have helped, unclear)
// ---------------------------------------------------------------------------
function inferReward(action: string, dwellMs: number | null): number {
export function inferReward(action: string, dwellMs: number | null): number {
if (action === 'dismiss') return -1.0;
if (action === 'snooze') return 0.1;
if (action === 'helpful') return 0.5;
@@ -269,7 +332,7 @@ async function sendRewardWithRetry(
userId: string,
tipId: string,
reward: number,
features: TaskFeatures,
features: TipCandidate['features'],
): Promise<void> {
const body = JSON.stringify({
user_id: userId,
@@ -347,7 +410,7 @@ router.post('/tip/:id/feedback', requireAuth, async (req: AuthenticatedRequest,
createdAt: now.toISOString(),
});
const task = taskCache.get(req.userId!)?.tasks.find((t) => t.id === tipId);
const task: TipCandidate | undefined = taskCache.get(req.userId!)?.tasks.find((t) => t.id === tipId);
taskCache.delete(req.userId!);

View File

@@ -32,6 +32,7 @@ export function makeTestDb() {
refresh_token TEXT,
expires_at TEXT,
connected_at TEXT NOT NULL,
token_status TEXT NOT NULL DEFAULT 'active',
UNIQUE(user_id, provider)
);
@@ -88,7 +89,10 @@ export function makeTestDb() {
features_json TEXT,
candidate_count INTEGER,
latency_ms INTEGER,
served_at TEXT NOT NULL
served_at TEXT NOT NULL,
prompt_version TEXT,
llm_model TEXT,
tip_kind TEXT
);
CREATE TABLE IF NOT EXISTS saved_queries (

View File

@@ -4,6 +4,13 @@ export default defineConfig({
test: {
globals: true,
environment: 'node',
env: {
SESSION_SECRET: 'test-secret',
GOOGLE_CLIENT_ID: 'test-google-id',
GOOGLE_CLIENT_SECRET: 'test-google-secret',
TODOIST_CLIENT_ID: 'test-todoist-id',
TODOIST_CLIENT_SECRET: 'test-todoist-secret',
},
coverage: {
provider: 'v8',
reporter: ['text', 'lcov'],