diff --git a/docs/adr/0012-egreedy-v2-profile-features.md b/docs/adr/0012-egreedy-v2-profile-features.md index 27d0958..cf69e49 100644 --- a/docs/adr/0012-egreedy-v2-profile-features.md +++ b/docs/adr/0012-egreedy-v2-profile-features.md @@ -1,7 +1,7 @@ # ADR-0012 — ε-greedy v2: profile features in the bandit (D=7→12) -**Status:** Accepted -**Date:** 2026-04-25 +**Status:** Promoted +**Date:** 2026-04-25 (accepted) / 2026-04-26 (promoted) **Issue:** #99 ## Context @@ -106,3 +106,19 @@ projecting theta without the corresponding `A` matrix cannot be done correctly. the D=12 target in the issue spec and complicates the sim comparison. Deferred. **In-place v1 promotion without shadow** — violates ADR-0002. + +## Promotion record (2026-04-26) + +Offline sim (`runner.py --policies egreedy-v1 egreedy-v2 --judge rule --n-users 5 --n-rounds 20 --seed 42`): + +| policy | total reward | mean reward | pulls | +|--------|-------------|-------------|-------| +| egreedy-v1 | −64.20 | −0.6420 | 100 | +| egreedy-v2 | −62.90 | −0.6290 | 100 | + +**Gate passed** (v2 mean ≥ v1 mean). Per-persona: v2 wins deadline-driven, evening-relaxed, low-priority-first; v1 wins consistent-responder, overdue-ignorer. + +Changes applied: +- `recommender.ts` `remotePolicy()`: `/score/egreedy` → `/score/egreedy/v2` +- `recommender.ts` `sendRewardWithRetry()`: `/reward/egreedy` → `/reward/egreedy/v2`, added `profile_features` to payload +- Shadow entry `egreedy-v2-shadow` left in registry (`active: false`) for rollback. diff --git a/services/api/src/routes/recommender.ts b/services/api/src/routes/recommender.ts index 30bfc05..819b97c 100644 --- a/services/api/src/routes/recommender.ts +++ b/services/api/src/routes/recommender.ts @@ -47,7 +47,8 @@ export const _clearCandidateCacheForTests = () => { // Shadow-policy registry // --------------------------------------------------------------------------- const shadowPolicies = new Map([ - // egreedy-v2 (D=12, profile features) — disabled until sim gate per ADR-0012 + // egreedy-v2 promoted to active policy (ADR-0012). Shadow entry kept for + // rollback toggle; leave disabled in normal operation. ['egreedy-v2-shadow', { active: false }], ]); @@ -101,9 +102,9 @@ async function remotePolicy( profile_features: profile, }; - // Active policy: egreedy-v1 (selected over linucb-v1 after offline sim — ADR-0007) + // Active policy: egreedy-v2 (promoted from shadow after offline sim — ADR-0012) try { - const res = await fetch(`${config.ML_SERVING_URL}/score/egreedy`, { + const res = await fetch(`${config.ML_SERVING_URL}/score/egreedy/v2`, { method: 'POST', headers: { 'Content-Type': 'application/json' }, body: JSON.stringify(body), @@ -111,7 +112,7 @@ async function remotePolicy( }); if (!res.ok) return null; const data = (await res.json()) as { tip_id: string; score: number }; - return { tipId: data.tip_id, score: data.score, policy: 'egreedy-v1' }; + return { tipId: data.tip_id, score: data.score, policy: 'egreedy-v2' }; } catch { return null; } @@ -371,6 +372,7 @@ async function sendRewardWithRetry( tipId: string, reward: number, features: TipCandidate['features'], + profile: Profile, ): Promise { const body = JSON.stringify({ user_id: userId, @@ -378,11 +380,12 @@ async function sendRewardWithRetry( reward, features, day_of_week: new Date().getDay(), + profile_features: profile, }); for (let attempt = 1; attempt <= 3; attempt++) { try { - const res = await fetch(`${config.ML_SERVING_URL}/reward/egreedy`, { + const res = await fetch(`${config.ML_SERVING_URL}/reward/egreedy/v2`, { method: 'POST', headers: { 'Content-Type': 'application/json' }, body, @@ -463,7 +466,9 @@ router.post('/tip/:id/feedback', requireAuth, async (req: AuthenticatedRequest, }); if (candidate) { - sendRewardWithRetry(req.userId!, tipId, reward, candidate.features); + // Re-fetch profile for the v2 ridge update; TTL cache makes this near-instant. + const profile = await getProfile(req.userId!); + sendRewardWithRetry(req.userId!, tipId, reward, candidate.features, profile); } // Delegate action to the owning signal source (e.g. mark done in Todoist)