feat(bandit): promote egreedy-v2 (D=12, profile features) as active policy (#99)

Offline sim gate passed — egreedy-v2 mean reward −0.629 vs egreedy-v1 −0.642 (5 users × 20 rounds, rule judge, seed 42). v2 wins 3/5 personas. - recommender.ts: switch remotePolicy() to /score/egreedy/v2 - recommender.ts: switch sendRewardWithRetry() to /reward/egreedy/v2 with profile_features payload so the ridge update uses the full D=12 vector - recommender.ts: re-fetch profile at feedback time (TTL-cached, near-instant) - ADR-0012: status Accepted → Promoted, promotion record appended Shadow entry egreedy-v2-shadow kept in registry (active: false) for rollback. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-26 03:08:28 +00:00
parent cba3f1a184
commit 7281af83a4
2 changed files with 29 additions and 8 deletions
--- a/services/api/src/routes/recommender.ts
+++ b/services/api/src/routes/recommender.ts
@@ -47,7 +47,8 @@ export const _clearCandidateCacheForTests = () => {
 // Shadow-policy registry
 // ---------------------------------------------------------------------------
 const shadowPolicies = new Map<string, { active: boolean }>([
-  // egreedy-v2 (D=12, profile features) — disabled until sim gate per ADR-0012
+  // egreedy-v2 promoted to active policy (ADR-0012). Shadow entry kept for
+  // rollback toggle; leave disabled in normal operation.
  ['egreedy-v2-shadow', { active: false }],
 ]);

@@ -101,9 +102,9 @@ async function remotePolicy(
    profile_features: profile,
  };

-  // Active policy: egreedy-v1 (selected over linucb-v1 after offline sim — ADR-0007)
+  // Active policy: egreedy-v2 (promoted from shadow after offline sim — ADR-0012)
  try {
-    const res = await fetch(`${config.ML_SERVING_URL}/score/egreedy`, {
+    const res = await fetch(`${config.ML_SERVING_URL}/score/egreedy/v2`, {
      method: 'POST',
      headers: { 'Content-Type': 'application/json' },
      body: JSON.stringify(body),
@@ -111,7 +112,7 @@ async function remotePolicy(
    });
    if (!res.ok) return null;
    const data = (await res.json()) as { tip_id: string; score: number };
-    return { tipId: data.tip_id, score: data.score, policy: 'egreedy-v1' };
+    return { tipId: data.tip_id, score: data.score, policy: 'egreedy-v2' };
  } catch {
    return null;
  }
@@ -371,6 +372,7 @@ async function sendRewardWithRetry(
  tipId: string,
  reward: number,
  features: TipCandidate['features'],
+  profile: Profile,
 ): Promise<void> {
  const body = JSON.stringify({
    user_id: userId,
@@ -378,11 +380,12 @@ async function sendRewardWithRetry(
    reward,
    features,
    day_of_week: new Date().getDay(),
+    profile_features: profile,
  });

  for (let attempt = 1; attempt <= 3; attempt++) {
    try {
-      const res = await fetch(`${config.ML_SERVING_URL}/reward/egreedy`, {
+      const res = await fetch(`${config.ML_SERVING_URL}/reward/egreedy/v2`, {
        method: 'POST',
        headers: { 'Content-Type': 'application/json' },
        body,
@@ -463,7 +466,9 @@ router.post('/tip/:id/feedback', requireAuth, async (req: AuthenticatedRequest,
  });

  if (candidate) {
-    sendRewardWithRetry(req.userId!, tipId, reward, candidate.features);
+    // Re-fetch profile for the v2 ridge update; TTL cache makes this near-instant.
+    const profile = await getProfile(req.userId!);
+    sendRewardWithRetry(req.userId!, tipId, reward, candidate.features, profile);
  }

  // Delegate action to the owning signal source (e.g. mark done in Todoist)