feat: ε-greedy v1 as active policy; dwell-time reward inference; offline sim framework
- Promote egreedy-v1 to active serving policy (ADR-0007): /score/egreedy + /reward/egreedy
replaces linucb-v1 endpoints after offline sim shows +10.7% mean reward (−0.548 vs −0.606)
- Replace explicit helpful/not_helpful feedback with dwell-time inferred reward (inferReward):
dismiss=−1.0, snooze=+0.1, done<15s=−0.3, done 15s–2min=+1.0, done 2–10min=+0.6, done>10min=+0.3
- Add ml/serving ε-greedy endpoints: /score/egreedy, /reward/egreedy, /stats/egreedy/{user_id}
with d=7 feature vector (base 5 + sin/cos day-of-week encoding)
- Add offline simulation framework (ml/experiments/sim): rule/LLM/claude-code judges,
two-phase score+reward, synthetic personas, task generator; results stored in sim_runs/sim_events
- Add /admin/simulations page: start runs, live-poll status, reward curve SVG, action/persona tables
- Fix egreedy day_of_week training skew: reward endpoint now uses actual dow instead of hardcoded 0
- Fix runner.py proxy bypass: httpx.Client(trust_env=False) for localhost ML calls
- Add dwellMs to TipFeedbackEvent contract and bus.test.ts fixture
- Schema: sim_runs, sim_events tables; tip_feedback gains dwell_ms, reward_milli columns
- ADR-0006: admin console framework; ADR-0007: egreedy-v1 policy selection rationale
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -2,7 +2,7 @@ import { type Router as ExpressRouter, Router, Response } from 'express';
|
||||
import { nanoid } from 'nanoid';
|
||||
import { db } from '../db/index.js';
|
||||
import { integrationTokens, tipFeedback, tipViews, tipScores } from '../db/schema.js';
|
||||
import { eq, and } from 'drizzle-orm';
|
||||
import { eq, and, desc } from 'drizzle-orm';
|
||||
import { requireAuth, AuthenticatedRequest } from '../middleware/session.js';
|
||||
import { config } from '../config.js';
|
||||
import { bus } from '../events/bus.js';
|
||||
@@ -105,7 +105,7 @@ async function fetchTodoistTasks(userId: string, accessToken: string): Promise<C
|
||||
async function remotePolicy(
|
||||
userId: string,
|
||||
tasks: CachedTask[],
|
||||
): Promise<{ tipId: string; score: number } | null> {
|
||||
): Promise<{ tipId: string; score: number; policy: string } | null> {
|
||||
const hour = new Date().getHours();
|
||||
const dayOfWeek = new Date().getDay();
|
||||
|
||||
@@ -121,8 +121,9 @@ async function remotePolicy(
|
||||
context: { hour_of_day: hour, day_of_week: dayOfWeek },
|
||||
};
|
||||
|
||||
// Active policy: egreedy-v1 (selected over linucb-v1 after offline sim — ADR-0007)
|
||||
try {
|
||||
const res = await fetch(`${config.ML_SERVING_URL}/score`, {
|
||||
const res = await fetch(`${config.ML_SERVING_URL}/score/egreedy`, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify(body),
|
||||
@@ -130,7 +131,7 @@ async function remotePolicy(
|
||||
});
|
||||
if (!res.ok) return null;
|
||||
const data = (await res.json()) as { tip_id: string; score: number };
|
||||
return { tipId: data.tip_id, score: data.score };
|
||||
return { tipId: data.tip_id, score: data.score, policy: 'egreedy-v1' };
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
@@ -178,7 +179,7 @@ router.post('/recommend', requireAuth, async (req: AuthenticatedRequest, res: Re
|
||||
return;
|
||||
}
|
||||
|
||||
const policy = scored ? 'linucb-v1' : 'random';
|
||||
const policy = scored ? scored.policy : 'random';
|
||||
const servedAt = new Date().toISOString();
|
||||
|
||||
await db.insert(tipViews).values({ id: nanoid(), userId: req.userId!, tipId: tip.id, servedAt });
|
||||
@@ -226,55 +227,85 @@ router.post('/recommend', requireAuth, async (req: AuthenticatedRequest, res: Re
|
||||
res.json({ tip });
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Reward inference from action + dwell time
|
||||
//
|
||||
// Feedback is now 3 signals only: done / snooze / dismiss.
|
||||
// "Helpfulness" is inferred from how long the user took to act on a tip:
|
||||
// dismiss → -1.0 (clear rejection)
|
||||
// snooze → +0.1 (tip noticed, timing off — mild positive)
|
||||
// done < 15 s → -0.3 (almost certainly a stale task, not magic)
|
||||
// done 15 s – 2 min → +1.0 (magic zone: user saw tip and acted)
|
||||
// done 2 – 10 min → +0.6 (good: user engaged, acted in same session)
|
||||
// done > 10 min → +0.3 (eventually done; tip may have helped, unclear)
|
||||
// ---------------------------------------------------------------------------
|
||||
function inferReward(action: string, dwellMs: number | null): number {
|
||||
if (action === 'dismiss') return -1.0;
|
||||
if (action === 'snooze') return 0.1;
|
||||
// done — use dwell time
|
||||
if (dwellMs === null || dwellMs < 0) return 0.5; // unknown dwell: neutral positive
|
||||
if (dwellMs < 15_000) return -0.3; // stale / reflex
|
||||
if (dwellMs < 120_000) return 1.0; // magic zone
|
||||
if (dwellMs < 600_000) return 0.6; // good
|
||||
return 0.3; // eventually
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// POST /api/tip/:id/feedback
|
||||
// ---------------------------------------------------------------------------
|
||||
router.post('/tip/:id/feedback', requireAuth, async (req: AuthenticatedRequest, res: Response) => {
|
||||
const { action } = req.body as { action: string };
|
||||
const tipId = String(req.params.id);
|
||||
const now = new Date();
|
||||
|
||||
const validActions = ['done', 'dismiss', 'snooze', 'helpful', 'not_helpful'];
|
||||
const validActions = ['done', 'dismiss', 'snooze'];
|
||||
if (!validActions.includes(action)) {
|
||||
res.status(400).json({ error: 'Invalid action' });
|
||||
return;
|
||||
}
|
||||
|
||||
// Compute dwell time from the most recent tipViews record for this user+tip
|
||||
let dwellMs: number | null = null;
|
||||
const [lastView] = await db
|
||||
.select({ servedAt: tipViews.servedAt })
|
||||
.from(tipViews)
|
||||
.where(and(eq(tipViews.userId, req.userId!), eq(tipViews.tipId, tipId)))
|
||||
.orderBy(desc(tipViews.servedAt))
|
||||
.limit(1);
|
||||
|
||||
if (lastView?.servedAt) {
|
||||
dwellMs = now.getTime() - new Date(lastView.servedAt).getTime();
|
||||
}
|
||||
|
||||
const reward = inferReward(action, dwellMs);
|
||||
|
||||
await db.insert(tipFeedback).values({
|
||||
id: nanoid(),
|
||||
userId: req.userId!,
|
||||
tipId,
|
||||
action,
|
||||
sourceId: tipId.startsWith('todoist:') ? tipId.slice(8) : null,
|
||||
createdAt: new Date().toISOString(),
|
||||
dwellMs: dwellMs !== null ? Math.round(dwellMs) : null,
|
||||
rewardMilli: Math.round(reward * 1000),
|
||||
createdAt: now.toISOString(),
|
||||
});
|
||||
|
||||
// Map action to reward (helpful/not_helpful supplement behavioural signals)
|
||||
const rewardMap: Record<string, number> = {
|
||||
done: 1.0,
|
||||
helpful: 0.5,
|
||||
snooze: 0.0,
|
||||
not_helpful: -0.5,
|
||||
dismiss: -1.0,
|
||||
};
|
||||
const reward = rewardMap[action] ?? 0.0;
|
||||
|
||||
const task = taskCache.get(req.userId!)?.tasks.find((t) => t.id === tipId);
|
||||
|
||||
// Clear cache on behavioural actions (not on explicit helpful/not_helpful)
|
||||
if (['done', 'dismiss', 'snooze'].includes(action)) {
|
||||
taskCache.delete(req.userId!);
|
||||
}
|
||||
taskCache.delete(req.userId!);
|
||||
|
||||
bus.publish('signals.tip.feedback', {
|
||||
userId: req.userId!,
|
||||
tipId,
|
||||
action: action as 'done' | 'dismiss' | 'snooze' | 'helpful' | 'not_helpful',
|
||||
action: action as 'done' | 'dismiss' | 'snooze',
|
||||
reward,
|
||||
createdAt: new Date().toISOString(),
|
||||
dwellMs,
|
||||
createdAt: now.toISOString(),
|
||||
});
|
||||
|
||||
if (task) {
|
||||
fetch(`${config.ML_SERVING_URL}/reward`, {
|
||||
// Send reward to egreedy-v1 (active policy — ADR-0007)
|
||||
fetch(`${config.ML_SERVING_URL}/reward/egreedy`, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({
|
||||
@@ -282,6 +313,7 @@ router.post('/tip/:id/feedback', requireAuth, async (req: AuthenticatedRequest,
|
||||
tip_id: tipId,
|
||||
reward,
|
||||
features: task.features,
|
||||
day_of_week: new Date().getDay(),
|
||||
}),
|
||||
}).catch(() => {});
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user