feat(ml): prompt registry + per-request variant selection

Replaces the hardcoded "v1" label with a real prompt registry:

  ml/serving/prompts.py       — keyed by version: v1 (baseline),
                                v2-mentor (calm/specific persona),
                                v3-few-shot (v1 persona + curated examples)
  ml/serving/main.py          — POST /generate accepts optional prompt_version,
                                422 on unknown, echoes the version actually used
                                back in the response
  services/api/src/config.ts  — TIP_PROMPT_VERSION: empty / single / comma-list
                                (uniform random per request)
  services/api/src/routes/recommender.ts
                              — pickPromptVersion() drives selection; the
                                response's prompt_version (not a stale TS
                                constant) is what lands in tip_scores so the
                                #92 reward-analytics dashboard shows real
                                per-variant reaction rates

Closes #84.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
2026-04-24 15:44:04 +00:00
parent aa4bdd8f09
commit 430804e9a5
9 changed files with 294 additions and 44 deletions

View File

@@ -13,7 +13,19 @@ import { SignalAggregator } from '../signals/aggregator.js';
const router: ExpressRouter = Router();
const PROMPT_VERSION = 'v1';
/**
* Pick a prompt version for this request. `config.TIP_PROMPT_VERSION` is either
* empty (let ml/serving pick its default), a single version, or a comma-separated
* list to rotate uniformly across requests so the #92 dashboard accumulates
* comparable buckets per variant. Exported for testing.
*/
export function pickPromptVersion(): string | null {
const raw = config.TIP_PROMPT_VERSION.trim();
if (!raw) return null;
const versions = raw.split(',').map((v) => v.trim()).filter(Boolean);
if (!versions.length) return null;
return versions[Math.floor(Math.random() * versions.length)] ?? null;
}
// ---------------------------------------------------------------------------
// Signal aggregator — register sources here as new integrations are added
@@ -117,12 +129,19 @@ interface LlmCandidate {
rationale?: string;
}
interface LlmGenerateResult {
candidates: TipCandidate[];
promptVersion: string | null;
model: string | null;
}
async function fetchLlmCandidates(
userId: string,
signals: Signal[],
hour: number,
dayOfWeek: number,
): Promise<TipCandidate[]> {
promptVersion: string | null,
): Promise<LlmGenerateResult> {
try {
const tasks = signals.slice(0, 10).map((s) => ({
content: s.content,
@@ -137,13 +156,18 @@ async function fetchLlmCandidates(
user_id: userId,
context: { tasks, hour_of_day: hour, day_of_week: dayOfWeek },
n: 3,
...(promptVersion ? { prompt_version: promptVersion } : {}),
}),
signal: AbortSignal.timeout(15_000),
});
if (!res.ok) return [];
const data = (await res.json()) as { candidates: LlmCandidate[]; model?: string };
if (!res.ok) return { candidates: [], promptVersion: null, model: null };
const data = (await res.json()) as {
candidates: LlmCandidate[];
model?: string;
prompt_version?: string;
};
const now = new Date().toISOString();
return data.candidates.map((c) => ({
const candidates: TipCandidate[] = data.candidates.map((c) => ({
id: `llm:${c.id}`,
content: c.content,
source: 'llm' as const,
@@ -152,8 +176,13 @@ async function fetchLlmCandidates(
createdAt: now,
features: { is_overdue: false, task_age_days: 0, priority: 1 },
}));
return {
candidates,
promptVersion: data.prompt_version ?? null,
model: data.model ?? null,
};
} catch {
return [];
return { candidates: [], promptVersion: null, model: null };
}
}
@@ -181,9 +210,16 @@ router.post('/recommend', requireAuth, async (req: AuthenticatedRequest, res: Re
const signals = await aggregator.fetchAll(req.userId!);
const signalCandidates = signals.map(signalToCandidate);
const llmCandidates = await fetchLlmCandidates(req.userId!, signals, hour, dayOfWeek);
const requestedPromptVersion = pickPromptVersion();
const llmResult = await fetchLlmCandidates(
req.userId!,
signals,
hour,
dayOfWeek,
requestedPromptVersion,
);
const allCandidates: TipCandidate[] = [...signalCandidates, ...llmCandidates];
const allCandidates: TipCandidate[] = [...signalCandidates, ...llmResult.candidates];
if (!allCandidates.length) {
res.status(204).end();
return;
@@ -227,8 +263,10 @@ router.post('/recommend', requireAuth, async (req: AuthenticatedRequest, res: Re
candidateCount: allCandidates.length,
latencyMs,
servedAt,
promptVersion: isLlmTip ? PROMPT_VERSION : null,
llmModel: isLlmTip ? 'tip-generator' : null,
// Trust the version/model the generator reports; falls back to whatever
// we asked for so the bucket isn't mislabeled if /generate omits it.
promptVersion: isLlmTip ? (llmResult.promptVersion ?? requestedPromptVersion ?? null) : null,
llmModel: isLlmTip ? (llmResult.model ?? 'tip-generator') : null,
tipKind: tip.kind ?? null,
});