feat(ml): prompt registry + per-request variant selection
Replaces the hardcoded "v1" label with a real prompt registry:
ml/serving/prompts.py — keyed by version: v1 (baseline),
v2-mentor (calm/specific persona),
v3-few-shot (v1 persona + curated examples)
ml/serving/main.py — POST /generate accepts optional prompt_version,
422 on unknown, echoes the version actually used
back in the response
services/api/src/config.ts — TIP_PROMPT_VERSION: empty / single / comma-list
(uniform random per request)
services/api/src/routes/recommender.ts
— pickPromptVersion() drives selection; the
response's prompt_version (not a stale TS
constant) is what lands in tip_scores so the
#92 reward-analytics dashboard shows real
per-variant reaction rates
Closes #84.
Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
@@ -43,4 +43,12 @@ export const config = {
|
||||
|
||||
/** How often to proactively sync Todoist tasks in the background (ms) */
|
||||
TODOIST_SYNC_INTERVAL_MS: parseInt(optional('TODOIST_SYNC_INTERVAL_MS', String(15 * 60 * 1000)), 10),
|
||||
|
||||
/**
|
||||
* Tip prompt version selection. Single value (e.g. "v2-mentor") pins one
|
||||
* variant; comma-separated list (e.g. "v1,v2-mentor,v3-few-shot") rotates
|
||||
* uniformly per request so #92's reward-analytics dashboard accumulates
|
||||
* comparable buckets. Empty → ml/serving's own default ("v1").
|
||||
*/
|
||||
TIP_PROMPT_VERSION: optional('TIP_PROMPT_VERSION', ''),
|
||||
};
|
||||
|
||||
@@ -134,6 +134,7 @@ describe('POST /recommend integration', () => {
|
||||
json: async () => ({
|
||||
candidates: [{ id: 'adv-1', content: 'Take a break.', rationale: 'You deserve it.' }],
|
||||
model: 'tip-generator',
|
||||
prompt_version: 'v1',
|
||||
}),
|
||||
} as any);
|
||||
}
|
||||
|
||||
@@ -2,8 +2,9 @@
|
||||
* Pure-function unit tests for recommender logic — no DB, no HTTP.
|
||||
* These can import directly from the module without any mocking.
|
||||
*/
|
||||
import { describe, it, expect } from 'vitest';
|
||||
import { inferReward, dueAgeDays } from '../recommender.js';
|
||||
import { describe, it, expect, beforeEach, afterEach, vi } from 'vitest';
|
||||
import { inferReward, dueAgeDays, pickPromptVersion } from '../recommender.js';
|
||||
import { config } from '../../config.js';
|
||||
|
||||
describe('inferReward', () => {
|
||||
it('dismiss → -1', () => expect(inferReward('dismiss', null)).toBe(-1.0));
|
||||
@@ -37,3 +38,45 @@ describe('dueAgeDays', () => {
|
||||
expect(dueAgeDays({ date: yesterday })).toBeGreaterThan(0);
|
||||
});
|
||||
});
|
||||
|
||||
describe('pickPromptVersion', () => {
|
||||
// Save + restore the original env-driven config field across tests.
|
||||
let original: string;
|
||||
beforeEach(() => { original = config.TIP_PROMPT_VERSION; });
|
||||
afterEach(() => { (config as { TIP_PROMPT_VERSION: string }).TIP_PROMPT_VERSION = original; });
|
||||
|
||||
it('empty config → null (let ml/serving pick its default)', () => {
|
||||
(config as { TIP_PROMPT_VERSION: string }).TIP_PROMPT_VERSION = '';
|
||||
expect(pickPromptVersion()).toBeNull();
|
||||
});
|
||||
|
||||
it('whitespace-only config → null', () => {
|
||||
(config as { TIP_PROMPT_VERSION: string }).TIP_PROMPT_VERSION = ' ';
|
||||
expect(pickPromptVersion()).toBeNull();
|
||||
});
|
||||
|
||||
it('single value → that value', () => {
|
||||
(config as { TIP_PROMPT_VERSION: string }).TIP_PROMPT_VERSION = 'v2-mentor';
|
||||
expect(pickPromptVersion()).toBe('v2-mentor');
|
||||
});
|
||||
|
||||
it('comma-separated → uniformly samples from the set', () => {
|
||||
(config as { TIP_PROMPT_VERSION: string }).TIP_PROMPT_VERSION = 'v1,v2-mentor,v3-few-shot';
|
||||
const seen = new Set<string>();
|
||||
// With 100 trials, the chance of missing any of 3 buckets is (2/3)^100 ≈ 0 — test is reliable.
|
||||
for (let i = 0; i < 100; i++) {
|
||||
const picked = pickPromptVersion();
|
||||
expect(picked).not.toBeNull();
|
||||
seen.add(picked!);
|
||||
}
|
||||
expect(seen).toEqual(new Set(['v1', 'v2-mentor', 'v3-few-shot']));
|
||||
});
|
||||
|
||||
it('trims whitespace around comma-separated entries', () => {
|
||||
(config as { TIP_PROMPT_VERSION: string }).TIP_PROMPT_VERSION = ' v1 , v2-mentor ';
|
||||
for (let i = 0; i < 20; i++) {
|
||||
const picked = pickPromptVersion()!;
|
||||
expect(['v1', 'v2-mentor']).toContain(picked);
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
@@ -13,7 +13,19 @@ import { SignalAggregator } from '../signals/aggregator.js';
|
||||
|
||||
const router: ExpressRouter = Router();
|
||||
|
||||
const PROMPT_VERSION = 'v1';
|
||||
/**
|
||||
* Pick a prompt version for this request. `config.TIP_PROMPT_VERSION` is either
|
||||
* empty (let ml/serving pick its default), a single version, or a comma-separated
|
||||
* list to rotate uniformly across requests so the #92 dashboard accumulates
|
||||
* comparable buckets per variant. Exported for testing.
|
||||
*/
|
||||
export function pickPromptVersion(): string | null {
|
||||
const raw = config.TIP_PROMPT_VERSION.trim();
|
||||
if (!raw) return null;
|
||||
const versions = raw.split(',').map((v) => v.trim()).filter(Boolean);
|
||||
if (!versions.length) return null;
|
||||
return versions[Math.floor(Math.random() * versions.length)] ?? null;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Signal aggregator — register sources here as new integrations are added
|
||||
@@ -117,12 +129,19 @@ interface LlmCandidate {
|
||||
rationale?: string;
|
||||
}
|
||||
|
||||
interface LlmGenerateResult {
|
||||
candidates: TipCandidate[];
|
||||
promptVersion: string | null;
|
||||
model: string | null;
|
||||
}
|
||||
|
||||
async function fetchLlmCandidates(
|
||||
userId: string,
|
||||
signals: Signal[],
|
||||
hour: number,
|
||||
dayOfWeek: number,
|
||||
): Promise<TipCandidate[]> {
|
||||
promptVersion: string | null,
|
||||
): Promise<LlmGenerateResult> {
|
||||
try {
|
||||
const tasks = signals.slice(0, 10).map((s) => ({
|
||||
content: s.content,
|
||||
@@ -137,13 +156,18 @@ async function fetchLlmCandidates(
|
||||
user_id: userId,
|
||||
context: { tasks, hour_of_day: hour, day_of_week: dayOfWeek },
|
||||
n: 3,
|
||||
...(promptVersion ? { prompt_version: promptVersion } : {}),
|
||||
}),
|
||||
signal: AbortSignal.timeout(15_000),
|
||||
});
|
||||
if (!res.ok) return [];
|
||||
const data = (await res.json()) as { candidates: LlmCandidate[]; model?: string };
|
||||
if (!res.ok) return { candidates: [], promptVersion: null, model: null };
|
||||
const data = (await res.json()) as {
|
||||
candidates: LlmCandidate[];
|
||||
model?: string;
|
||||
prompt_version?: string;
|
||||
};
|
||||
const now = new Date().toISOString();
|
||||
return data.candidates.map((c) => ({
|
||||
const candidates: TipCandidate[] = data.candidates.map((c) => ({
|
||||
id: `llm:${c.id}`,
|
||||
content: c.content,
|
||||
source: 'llm' as const,
|
||||
@@ -152,8 +176,13 @@ async function fetchLlmCandidates(
|
||||
createdAt: now,
|
||||
features: { is_overdue: false, task_age_days: 0, priority: 1 },
|
||||
}));
|
||||
return {
|
||||
candidates,
|
||||
promptVersion: data.prompt_version ?? null,
|
||||
model: data.model ?? null,
|
||||
};
|
||||
} catch {
|
||||
return [];
|
||||
return { candidates: [], promptVersion: null, model: null };
|
||||
}
|
||||
}
|
||||
|
||||
@@ -181,9 +210,16 @@ router.post('/recommend', requireAuth, async (req: AuthenticatedRequest, res: Re
|
||||
const signals = await aggregator.fetchAll(req.userId!);
|
||||
|
||||
const signalCandidates = signals.map(signalToCandidate);
|
||||
const llmCandidates = await fetchLlmCandidates(req.userId!, signals, hour, dayOfWeek);
|
||||
const requestedPromptVersion = pickPromptVersion();
|
||||
const llmResult = await fetchLlmCandidates(
|
||||
req.userId!,
|
||||
signals,
|
||||
hour,
|
||||
dayOfWeek,
|
||||
requestedPromptVersion,
|
||||
);
|
||||
|
||||
const allCandidates: TipCandidate[] = [...signalCandidates, ...llmCandidates];
|
||||
const allCandidates: TipCandidate[] = [...signalCandidates, ...llmResult.candidates];
|
||||
if (!allCandidates.length) {
|
||||
res.status(204).end();
|
||||
return;
|
||||
@@ -227,8 +263,10 @@ router.post('/recommend', requireAuth, async (req: AuthenticatedRequest, res: Re
|
||||
candidateCount: allCandidates.length,
|
||||
latencyMs,
|
||||
servedAt,
|
||||
promptVersion: isLlmTip ? PROMPT_VERSION : null,
|
||||
llmModel: isLlmTip ? 'tip-generator' : null,
|
||||
// Trust the version/model the generator reports; falls back to whatever
|
||||
// we asked for so the bucket isn't mislabeled if /generate omits it.
|
||||
promptVersion: isLlmTip ? (llmResult.promptVersion ?? requestedPromptVersion ?? null) : null,
|
||||
llmModel: isLlmTip ? (llmResult.model ?? 'tip-generator') : null,
|
||||
tipKind: tip.kind ?? null,
|
||||
});
|
||||
|
||||
|
||||
Reference in New Issue
Block a user