feat: ε-greedy v1 as active policy; dwell-time reward inference; offline sim framework
- Promote egreedy-v1 to active serving policy (ADR-0007): /score/egreedy + /reward/egreedy
replaces linucb-v1 endpoints after offline sim shows +10.7% mean reward (−0.548 vs −0.606)
- Replace explicit helpful/not_helpful feedback with dwell-time inferred reward (inferReward):
dismiss=−1.0, snooze=+0.1, done<15s=−0.3, done 15s–2min=+1.0, done 2–10min=+0.6, done>10min=+0.3
- Add ml/serving ε-greedy endpoints: /score/egreedy, /reward/egreedy, /stats/egreedy/{user_id}
with d=7 feature vector (base 5 + sin/cos day-of-week encoding)
- Add offline simulation framework (ml/experiments/sim): rule/LLM/claude-code judges,
two-phase score+reward, synthetic personas, task generator; results stored in sim_runs/sim_events
- Add /admin/simulations page: start runs, live-poll status, reward curve SVG, action/persona tables
- Fix egreedy day_of_week training skew: reward endpoint now uses actual dow instead of hardcoded 0
- Fix runner.py proxy bypass: httpx.Client(trust_env=False) for localhost ML calls
- Add dwellMs to TipFeedbackEvent contract and bus.test.ts fixture
- Schema: sim_runs, sim_events tables; tip_feedback gains dwell_ms, reward_milli columns
- ADR-0006: admin console framework; ADR-0007: egreedy-v1 policy selection rationale
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -99,6 +99,40 @@ export function runMigrations() {
|
||||
sql TEXT NOT NULL,
|
||||
created_at TEXT NOT NULL
|
||||
);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS sim_runs (
|
||||
id TEXT PRIMARY KEY,
|
||||
policy_a TEXT NOT NULL,
|
||||
policy_b TEXT NOT NULL,
|
||||
n_users INTEGER NOT NULL,
|
||||
n_rounds INTEGER NOT NULL,
|
||||
tasks_per_round INTEGER NOT NULL DEFAULT 8,
|
||||
use_llm INTEGER NOT NULL DEFAULT 0,
|
||||
status TEXT NOT NULL DEFAULT 'pending',
|
||||
summary_json TEXT,
|
||||
winner TEXT,
|
||||
persona_breakdown_json TEXT,
|
||||
created_at TEXT NOT NULL,
|
||||
finished_at TEXT
|
||||
);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS sim_events (
|
||||
id TEXT PRIMARY KEY,
|
||||
run_id TEXT NOT NULL REFERENCES sim_runs(id),
|
||||
round INTEGER NOT NULL,
|
||||
user_id TEXT NOT NULL,
|
||||
persona TEXT NOT NULL,
|
||||
policy TEXT NOT NULL,
|
||||
tip_content TEXT NOT NULL,
|
||||
priority INTEGER NOT NULL,
|
||||
is_overdue INTEGER NOT NULL,
|
||||
action TEXT NOT NULL,
|
||||
dwell_ms INTEGER,
|
||||
reward_milli INTEGER NOT NULL,
|
||||
hour INTEGER NOT NULL,
|
||||
day_of_week INTEGER NOT NULL,
|
||||
created_at TEXT NOT NULL
|
||||
);
|
||||
`);
|
||||
|
||||
// Additive column migrations — safe to run on existing DBs.
|
||||
@@ -106,6 +140,8 @@ export function runMigrations() {
|
||||
for (const stmt of [
|
||||
`ALTER TABLE users ADD COLUMN role TEXT NOT NULL DEFAULT 'user'`,
|
||||
`ALTER TABLE push_subscriptions ADD COLUMN created_at TEXT NOT NULL DEFAULT ''`,
|
||||
`ALTER TABLE tip_feedback ADD COLUMN dwell_ms INTEGER`,
|
||||
`ALTER TABLE tip_feedback ADD COLUMN reward_milli INTEGER`,
|
||||
]) {
|
||||
try { sqlite.exec(stmt); } catch { /* column already exists */ }
|
||||
}
|
||||
|
||||
@@ -29,6 +29,8 @@ export const tipFeedback = sqliteTable('tip_feedback', {
|
||||
tipId: text('tip_id').notNull(),
|
||||
action: text('action').notNull(), // 'done' | 'dismiss' | 'snooze'
|
||||
sourceId: text('source_id'),
|
||||
dwellMs: integer('dwell_ms'), // ms between servedAt and feedback; null if unknown
|
||||
rewardMilli: integer('reward_milli'), // inferred reward × 1000 (e.g. 1000 = +1.0)
|
||||
createdAt: text('created_at').notNull(),
|
||||
});
|
||||
|
||||
@@ -81,6 +83,43 @@ export const tipScores = sqliteTable('tip_scores', {
|
||||
servedAt: text('served_at').notNull(),
|
||||
});
|
||||
|
||||
// ── Simulation runs ──────────────────────────────────────────────────────────
|
||||
// One row per offline simulation run (two-policy comparison).
|
||||
export const simRuns = sqliteTable('sim_runs', {
|
||||
id: text('id').primaryKey(),
|
||||
policyA: text('policy_a').notNull(),
|
||||
policyB: text('policy_b').notNull(),
|
||||
nUsers: integer('n_users').notNull(),
|
||||
nRounds: integer('n_rounds').notNull(),
|
||||
tasksPerRound: integer('tasks_per_round').notNull().default(8),
|
||||
useLlm: integer('use_llm', { mode: 'boolean' }).notNull().default(false),
|
||||
status: text('status').notNull().default('pending'), // 'pending'|'running'|'done'|'failed'
|
||||
summaryJson: text('summary_json'), // JSON: { [policy]: PolicySummary }
|
||||
winner: text('winner'),
|
||||
personaBreakdownJson: text('persona_breakdown_json'), // JSON: { [persona]: { [policy]: {reward,n} } }
|
||||
createdAt: text('created_at').notNull(),
|
||||
finishedAt: text('finished_at'),
|
||||
});
|
||||
|
||||
// One row per tip served in a simulation round.
|
||||
export const simEvents = sqliteTable('sim_events', {
|
||||
id: text('id').primaryKey(),
|
||||
runId: text('run_id').notNull().references(() => simRuns.id),
|
||||
round: integer('round').notNull(),
|
||||
userId: text('user_id').notNull(),
|
||||
persona: text('persona').notNull(),
|
||||
policy: text('policy').notNull(),
|
||||
tipContent: text('tip_content').notNull(),
|
||||
priority: integer('priority').notNull(),
|
||||
isOverdue: integer('is_overdue', { mode: 'boolean' }).notNull(),
|
||||
action: text('action').notNull(), // 'done' | 'snooze' | 'dismiss'
|
||||
dwellMs: integer('dwell_ms'), // simulated ms between tip appear and user action
|
||||
rewardMilli: integer('reward_milli').notNull(), // inferred reward × 1000
|
||||
hour: integer('hour').notNull(),
|
||||
dayOfWeek: integer('day_of_week').notNull(),
|
||||
createdAt: text('created_at').notNull(),
|
||||
});
|
||||
|
||||
// Admin saved SQL queries.
|
||||
export const savedQueries = sqliteTable('saved_queries', {
|
||||
id: text('id').primaryKey(),
|
||||
|
||||
Reference in New Issue
Block a user