feat(ml): prompt registry + per-request variant selection

Replaces the hardcoded "v1" label with a real prompt registry: ml/serving/prompts.py — keyed by version: v1 (baseline), v2-mentor (calm/specific persona), v3-few-shot (v1 persona + curated examples) ml/serving/main.py — POST /generate accepts optional prompt_version, 422 on unknown, echoes the version actually used back in the response services/api/src/config.ts — TIP_PROMPT_VERSION: empty / single / comma-list (uniform random per request) services/api/src/routes/recommender.ts — pickPromptVersion() drives selection; the response's prompt_version (not a stale TS constant) is what lands in tip_scores so the #92 reward-analytics dashboard shows real per-variant reaction rates Closes #84. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-04-24 15:44:04 +00:00
parent aa4bdd8f09
commit 430804e9a5
9 changed files with 294 additions and 44 deletions
--- a/services/api/src/routes/tests/recommender.unit.test.ts
+++ b/services/api/src/routes/tests/recommender.unit.test.ts
@@ -2,8 +2,9 @@
 * Pure-function unit tests for recommender logic — no DB, no HTTP.
 * These can import directly from the module without any mocking.
 */
-import { describe, it, expect } from 'vitest';
-import { inferReward, dueAgeDays } from '../recommender.js';
+import { describe, it, expect, beforeEach, afterEach, vi } from 'vitest';
+import { inferReward, dueAgeDays, pickPromptVersion } from '../recommender.js';
+import { config } from '../../config.js';

 describe('inferReward', () => {
  it('dismiss → -1', () => expect(inferReward('dismiss', null)).toBe(-1.0));
@@ -37,3 +38,45 @@ describe('dueAgeDays', () => {
    expect(dueAgeDays({ date: yesterday })).toBeGreaterThan(0);
  });
 });
+
+describe('pickPromptVersion', () => {
+  // Save + restore the original env-driven config field across tests.
+  let original: string;
+  beforeEach(() => { original = config.TIP_PROMPT_VERSION; });
+  afterEach(() => { (config as { TIP_PROMPT_VERSION: string }).TIP_PROMPT_VERSION = original; });
+
+  it('empty config → null (let ml/serving pick its default)', () => {
+    (config as { TIP_PROMPT_VERSION: string }).TIP_PROMPT_VERSION = '';
+    expect(pickPromptVersion()).toBeNull();
+  });
+
+  it('whitespace-only config → null', () => {
+    (config as { TIP_PROMPT_VERSION: string }).TIP_PROMPT_VERSION = '  ';
+    expect(pickPromptVersion()).toBeNull();
+  });
+
+  it('single value → that value', () => {
+    (config as { TIP_PROMPT_VERSION: string }).TIP_PROMPT_VERSION = 'v2-mentor';
+    expect(pickPromptVersion()).toBe('v2-mentor');
+  });
+
+  it('comma-separated → uniformly samples from the set', () => {
+    (config as { TIP_PROMPT_VERSION: string }).TIP_PROMPT_VERSION = 'v1,v2-mentor,v3-few-shot';
+    const seen = new Set<string>();
+    // With 100 trials, the chance of missing any of 3 buckets is (2/3)^100 ≈ 0 — test is reliable.
+    for (let i = 0; i < 100; i++) {
+      const picked = pickPromptVersion();
+      expect(picked).not.toBeNull();
+      seen.add(picked!);
+    }
+    expect(seen).toEqual(new Set(['v1', 'v2-mentor', 'v3-few-shot']));
+  });
+
+  it('trims whitespace around comma-separated entries', () => {
+    (config as { TIP_PROMPT_VERSION: string }).TIP_PROMPT_VERSION = ' v1 , v2-mentor ';
+    for (let i = 0; i < 20; i++) {
+      const picked = pickPromptVersion()!;
+      expect(['v1', 'v2-mentor']).toContain(picked);
+    }
+  });
+});