Files
oO/services/api/src/routes/__tests__/recommender.unit.test.ts
alvis 430804e9a5 feat(ml): prompt registry + per-request variant selection
Replaces the hardcoded "v1" label with a real prompt registry:

  ml/serving/prompts.py       — keyed by version: v1 (baseline),
                                v2-mentor (calm/specific persona),
                                v3-few-shot (v1 persona + curated examples)
  ml/serving/main.py          — POST /generate accepts optional prompt_version,
                                422 on unknown, echoes the version actually used
                                back in the response
  services/api/src/config.ts  — TIP_PROMPT_VERSION: empty / single / comma-list
                                (uniform random per request)
  services/api/src/routes/recommender.ts
                              — pickPromptVersion() drives selection; the
                                response's prompt_version (not a stale TS
                                constant) is what lands in tip_scores so the
                                #92 reward-analytics dashboard shows real
                                per-variant reaction rates

Closes #84.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-04-24 15:44:04 +00:00

83 lines
3.8 KiB
TypeScript
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
/**
* Pure-function unit tests for recommender logic — no DB, no HTTP.
* These can import directly from the module without any mocking.
*/
import { describe, it, expect, beforeEach, afterEach, vi } from 'vitest';
import { inferReward, dueAgeDays, pickPromptVersion } from '../recommender.js';
import { config } from '../../config.js';
describe('inferReward', () => {
it('dismiss → -1', () => expect(inferReward('dismiss', null)).toBe(-1.0));
it('snooze → +0.1', () => expect(inferReward('snooze', null)).toBe(0.1));
it('helpful → +0.5', () => expect(inferReward('helpful', null)).toBe(0.5));
it('not_helpful → -0.5', () => expect(inferReward('not_helpful', null)).toBe(-0.5));
it('done with null dwell → +0.5', () => expect(inferReward('done', null)).toBe(0.5));
it('done < 15s (reflex) → -0.3', () => expect(inferReward('done', 5_000)).toBe(-0.3));
it('done 15s2min (magic) → +1.0', () => expect(inferReward('done', 60_000)).toBe(1.0));
it('done 210min (good) → +0.6', () => expect(inferReward('done', 300_000)).toBe(0.6));
it('done > 10min (eventual) → +0.3', () => expect(inferReward('done', 700_000)).toBe(0.3));
it('done exactly 15s (boundary) → magic zone', () => expect(inferReward('done', 15_000)).toBe(1.0));
it('done exactly 2min (boundary) → good zone', () => expect(inferReward('done', 120_000)).toBe(0.6));
});
describe('dueAgeDays', () => {
it('null due → 0', () => expect(dueAgeDays(null)).toBe(0));
it('empty object → 0', () => expect(dueAgeDays({})).toBe(0));
it('future date → 0 (clamped)', () => {
const future = new Date(Date.now() + 86_400_000).toISOString();
expect(dueAgeDays({ datetime: future })).toBe(0);
});
it('past date → positive age', () => {
const twoDaysAgo = new Date(Date.now() - 2 * 86_400_000).toISOString();
const age = dueAgeDays({ datetime: twoDaysAgo });
expect(age).toBeGreaterThan(1.9);
expect(age).toBeLessThan(2.1);
});
it('date-only field used when datetime absent', () => {
const yesterday = new Date(Date.now() - 86_400_000).toISOString().slice(0, 10);
expect(dueAgeDays({ date: yesterday })).toBeGreaterThan(0);
});
});
describe('pickPromptVersion', () => {
// Save + restore the original env-driven config field across tests.
let original: string;
beforeEach(() => { original = config.TIP_PROMPT_VERSION; });
afterEach(() => { (config as { TIP_PROMPT_VERSION: string }).TIP_PROMPT_VERSION = original; });
it('empty config → null (let ml/serving pick its default)', () => {
(config as { TIP_PROMPT_VERSION: string }).TIP_PROMPT_VERSION = '';
expect(pickPromptVersion()).toBeNull();
});
it('whitespace-only config → null', () => {
(config as { TIP_PROMPT_VERSION: string }).TIP_PROMPT_VERSION = ' ';
expect(pickPromptVersion()).toBeNull();
});
it('single value → that value', () => {
(config as { TIP_PROMPT_VERSION: string }).TIP_PROMPT_VERSION = 'v2-mentor';
expect(pickPromptVersion()).toBe('v2-mentor');
});
it('comma-separated → uniformly samples from the set', () => {
(config as { TIP_PROMPT_VERSION: string }).TIP_PROMPT_VERSION = 'v1,v2-mentor,v3-few-shot';
const seen = new Set<string>();
// With 100 trials, the chance of missing any of 3 buckets is (2/3)^100 ≈ 0 — test is reliable.
for (let i = 0; i < 100; i++) {
const picked = pickPromptVersion();
expect(picked).not.toBeNull();
seen.add(picked!);
}
expect(seen).toEqual(new Set(['v1', 'v2-mentor', 'v3-few-shot']));
});
it('trims whitespace around comma-separated entries', () => {
(config as { TIP_PROMPT_VERSION: string }).TIP_PROMPT_VERSION = ' v1 , v2-mentor ';
for (let i = 0; i < 20; i++) {
const picked = pickPromptVersion()!;
expect(['v1', 'v2-mentor']).toContain(picked);
}
});
});