Replaces the hardcoded "v1" label with a real prompt registry:
ml/serving/prompts.py — keyed by version: v1 (baseline),
v2-mentor (calm/specific persona),
v3-few-shot (v1 persona + curated examples)
ml/serving/main.py — POST /generate accepts optional prompt_version,
422 on unknown, echoes the version actually used
back in the response
services/api/src/config.ts — TIP_PROMPT_VERSION: empty / single / comma-list
(uniform random per request)
services/api/src/routes/recommender.ts
— pickPromptVersion() drives selection; the
response's prompt_version (not a stale TS
constant) is what lands in tip_scores so the
#92 reward-analytics dashboard shows real
per-variant reaction rates
Closes #84.
Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
83 lines
3.8 KiB
TypeScript
83 lines
3.8 KiB
TypeScript
/**
|
||
* Pure-function unit tests for recommender logic — no DB, no HTTP.
|
||
* These can import directly from the module without any mocking.
|
||
*/
|
||
import { describe, it, expect, beforeEach, afterEach, vi } from 'vitest';
|
||
import { inferReward, dueAgeDays, pickPromptVersion } from '../recommender.js';
|
||
import { config } from '../../config.js';
|
||
|
||
describe('inferReward', () => {
|
||
it('dismiss → -1', () => expect(inferReward('dismiss', null)).toBe(-1.0));
|
||
it('snooze → +0.1', () => expect(inferReward('snooze', null)).toBe(0.1));
|
||
it('helpful → +0.5', () => expect(inferReward('helpful', null)).toBe(0.5));
|
||
it('not_helpful → -0.5', () => expect(inferReward('not_helpful', null)).toBe(-0.5));
|
||
it('done with null dwell → +0.5', () => expect(inferReward('done', null)).toBe(0.5));
|
||
it('done < 15s (reflex) → -0.3', () => expect(inferReward('done', 5_000)).toBe(-0.3));
|
||
it('done 15s–2min (magic) → +1.0', () => expect(inferReward('done', 60_000)).toBe(1.0));
|
||
it('done 2–10min (good) → +0.6', () => expect(inferReward('done', 300_000)).toBe(0.6));
|
||
it('done > 10min (eventual) → +0.3', () => expect(inferReward('done', 700_000)).toBe(0.3));
|
||
it('done exactly 15s (boundary) → magic zone', () => expect(inferReward('done', 15_000)).toBe(1.0));
|
||
it('done exactly 2min (boundary) → good zone', () => expect(inferReward('done', 120_000)).toBe(0.6));
|
||
});
|
||
|
||
describe('dueAgeDays', () => {
|
||
it('null due → 0', () => expect(dueAgeDays(null)).toBe(0));
|
||
it('empty object → 0', () => expect(dueAgeDays({})).toBe(0));
|
||
it('future date → 0 (clamped)', () => {
|
||
const future = new Date(Date.now() + 86_400_000).toISOString();
|
||
expect(dueAgeDays({ datetime: future })).toBe(0);
|
||
});
|
||
it('past date → positive age', () => {
|
||
const twoDaysAgo = new Date(Date.now() - 2 * 86_400_000).toISOString();
|
||
const age = dueAgeDays({ datetime: twoDaysAgo });
|
||
expect(age).toBeGreaterThan(1.9);
|
||
expect(age).toBeLessThan(2.1);
|
||
});
|
||
it('date-only field used when datetime absent', () => {
|
||
const yesterday = new Date(Date.now() - 86_400_000).toISOString().slice(0, 10);
|
||
expect(dueAgeDays({ date: yesterday })).toBeGreaterThan(0);
|
||
});
|
||
});
|
||
|
||
describe('pickPromptVersion', () => {
|
||
// Save + restore the original env-driven config field across tests.
|
||
let original: string;
|
||
beforeEach(() => { original = config.TIP_PROMPT_VERSION; });
|
||
afterEach(() => { (config as { TIP_PROMPT_VERSION: string }).TIP_PROMPT_VERSION = original; });
|
||
|
||
it('empty config → null (let ml/serving pick its default)', () => {
|
||
(config as { TIP_PROMPT_VERSION: string }).TIP_PROMPT_VERSION = '';
|
||
expect(pickPromptVersion()).toBeNull();
|
||
});
|
||
|
||
it('whitespace-only config → null', () => {
|
||
(config as { TIP_PROMPT_VERSION: string }).TIP_PROMPT_VERSION = ' ';
|
||
expect(pickPromptVersion()).toBeNull();
|
||
});
|
||
|
||
it('single value → that value', () => {
|
||
(config as { TIP_PROMPT_VERSION: string }).TIP_PROMPT_VERSION = 'v2-mentor';
|
||
expect(pickPromptVersion()).toBe('v2-mentor');
|
||
});
|
||
|
||
it('comma-separated → uniformly samples from the set', () => {
|
||
(config as { TIP_PROMPT_VERSION: string }).TIP_PROMPT_VERSION = 'v1,v2-mentor,v3-few-shot';
|
||
const seen = new Set<string>();
|
||
// With 100 trials, the chance of missing any of 3 buckets is (2/3)^100 ≈ 0 — test is reliable.
|
||
for (let i = 0; i < 100; i++) {
|
||
const picked = pickPromptVersion();
|
||
expect(picked).not.toBeNull();
|
||
seen.add(picked!);
|
||
}
|
||
expect(seen).toEqual(new Set(['v1', 'v2-mentor', 'v3-few-shot']));
|
||
});
|
||
|
||
it('trims whitespace around comma-separated entries', () => {
|
||
(config as { TIP_PROMPT_VERSION: string }).TIP_PROMPT_VERSION = ' v1 , v2-mentor ';
|
||
for (let i = 0; i < 20; i++) {
|
||
const picked = pickPromptVersion()!;
|
||
expect(['v1', 'v2-mentor']).toContain(picked);
|
||
}
|
||
});
|
||
});
|