feat: ε-greedy v1 as active policy; dwell-time reward inference; offline sim framework

- Promote egreedy-v1 to active serving policy (ADR-0007): /score/egreedy + /reward/egreedy replaces linucb-v1 endpoints after offline sim shows +10.7% mean reward (−0.548 vs −0.606) - Replace explicit helpful/not_helpful feedback with dwell-time inferred reward (inferReward): dismiss=−1.0, snooze=+0.1, done<15s=−0.3, done 15s–2min=+1.0, done 2–10min=+0.6, done>10min=+0.3 - Add ml/serving ε-greedy endpoints: /score/egreedy, /reward/egreedy, /stats/egreedy/{user_id} with d=7 feature vector (base 5 + sin/cos day-of-week encoding) - Add offline simulation framework (ml/experiments/sim): rule/LLM/claude-code judges, two-phase score+reward, synthetic personas, task generator; results stored in sim_runs/sim_events - Add /admin/simulations page: start runs, live-poll status, reward curve SVG, action/persona tables - Fix egreedy day_of_week training skew: reward endpoint now uses actual dow instead of hardcoded 0 - Fix runner.py proxy bypass: httpx.Client(trust_env=False) for localhost ML calls - Add dwellMs to TipFeedbackEvent contract and bus.test.ts fixture - Schema: sim_runs, sim_events tables; tip_feedback gains dwell_ms, reward_milli columns - ADR-0006: admin console framework; ADR-0007: egreedy-v1 policy selection rationale Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-16 07:44:37 +00:00
parent c5ea18ec6e
commit faf44c18fc
48 changed files with 6151 additions and 40 deletions
--- a/services/api/src/db/schema.ts
+++ b/services/api/src/db/schema.ts
@@ -29,6 +29,8 @@ export const tipFeedback = sqliteTable('tip_feedback', {
  tipId: text('tip_id').notNull(),
  action: text('action').notNull(), // 'done' | 'dismiss' | 'snooze'
  sourceId: text('source_id'),
+  dwellMs: integer('dwell_ms'),     // ms between servedAt and feedback; null if unknown
+  rewardMilli: integer('reward_milli'), // inferred reward × 1000 (e.g. 1000 = +1.0)
  createdAt: text('created_at').notNull(),
 });

@@ -81,6 +83,43 @@ export const tipScores = sqliteTable('tip_scores', {
  servedAt: text('served_at').notNull(),
 });

+// ── Simulation runs ──────────────────────────────────────────────────────────
+// One row per offline simulation run (two-policy comparison).
+export const simRuns = sqliteTable('sim_runs', {
+  id: text('id').primaryKey(),
+  policyA: text('policy_a').notNull(),
+  policyB: text('policy_b').notNull(),
+  nUsers: integer('n_users').notNull(),
+  nRounds: integer('n_rounds').notNull(),
+  tasksPerRound: integer('tasks_per_round').notNull().default(8),
+  useLlm: integer('use_llm', { mode: 'boolean' }).notNull().default(false),
+  status: text('status').notNull().default('pending'),  // 'pending'|'running'|'done'|'failed'
+  summaryJson: text('summary_json'),           // JSON: { [policy]: PolicySummary }
+  winner: text('winner'),
+  personaBreakdownJson: text('persona_breakdown_json'), // JSON: { [persona]: { [policy]: {reward,n} } }
+  createdAt: text('created_at').notNull(),
+  finishedAt: text('finished_at'),
+});
+
+// One row per tip served in a simulation round.
+export const simEvents = sqliteTable('sim_events', {
+  id: text('id').primaryKey(),
+  runId: text('run_id').notNull().references(() => simRuns.id),
+  round: integer('round').notNull(),
+  userId: text('user_id').notNull(),
+  persona: text('persona').notNull(),
+  policy: text('policy').notNull(),
+  tipContent: text('tip_content').notNull(),
+  priority: integer('priority').notNull(),
+  isOverdue: integer('is_overdue', { mode: 'boolean' }).notNull(),
+  action: text('action').notNull(),   // 'done' | 'snooze' | 'dismiss'
+  dwellMs: integer('dwell_ms'),       // simulated ms between tip appear and user action
+  rewardMilli: integer('reward_milli').notNull(), // inferred reward × 1000
+  hour: integer('hour').notNull(),
+  dayOfWeek: integer('day_of_week').notNull(),
+  createdAt: text('created_at').notNull(),
+});
+
 // Admin saved SQL queries.
 export const savedQueries = sqliteTable('saved_queries', {
  id: text('id').primaryKey(),