feat: ε-greedy v1 as active policy; dwell-time reward inference; offline sim framework

- Promote egreedy-v1 to active serving policy (ADR-0007): /score/egreedy + /reward/egreedy replaces linucb-v1 endpoints after offline sim shows +10.7% mean reward (−0.548 vs −0.606) - Replace explicit helpful/not_helpful feedback with dwell-time inferred reward (inferReward): dismiss=−1.0, snooze=+0.1, done<15s=−0.3, done 15s–2min=+1.0, done 2–10min=+0.6, done>10min=+0.3 - Add ml/serving ε-greedy endpoints: /score/egreedy, /reward/egreedy, /stats/egreedy/{user_id} with d=7 feature vector (base 5 + sin/cos day-of-week encoding) - Add offline simulation framework (ml/experiments/sim): rule/LLM/claude-code judges, two-phase score+reward, synthetic personas, task generator; results stored in sim_runs/sim_events - Add /admin/simulations page: start runs, live-poll status, reward curve SVG, action/persona tables - Fix egreedy day_of_week training skew: reward endpoint now uses actual dow instead of hardcoded 0 - Fix runner.py proxy bypass: httpx.Client(trust_env=False) for localhost ML calls - Add dwellMs to TipFeedbackEvent contract and bus.test.ts fixture - Schema: sim_runs, sim_events tables; tip_feedback gains dwell_ms, reward_milli columns - ADR-0006: admin console framework; ADR-0007: egreedy-v1 policy selection rationale Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-16 07:44:37 +00:00
parent c5ea18ec6e
commit faf44c18fc
48 changed files with 6151 additions and 40 deletions
--- a/apps/admin/src/lib/api.ts
+++ b/apps/admin/src/lib/api.ts
@@ -220,3 +220,67 @@ export function saveQuery(name: string, querySql: string) {
 export function deleteSavedQuery(id: string) {
  return apiFetch<{ ok: boolean }>(`/admin/saved-queries/${id}`, { method: 'DELETE' });
 }
+
+// ── Simulation ─────────────────────────────────────────────────────────────
+
+export interface PolicySummary {
+  total_reward: number;
+  mean_reward: number;
+  n_pulls: number;
+  cumulative_rewards: number[];
+  action_counts: Record<string, number>;
+}
+
+export interface SimRun {
+  id: string;
+  policyA: string;
+  policyB: string;
+  nUsers: number;
+  nRounds: number;
+  tasksPerRound: number;
+  useLlm: boolean;
+  status: 'pending' | 'running' | 'done' | 'failed';
+  summaryJson: string | null;
+  winner: string | null;
+  personaBreakdownJson: string | null;
+  createdAt: string;
+  finishedAt: string | null;
+  isRunning?: boolean;
+}
+
+export interface SimEvent {
+  id: string;
+  runId: string;
+  round: number;
+  userId: string;
+  persona: string;
+  policy: string;
+  tipContent: string;
+  priority: number;
+  isOverdue: boolean;
+  action: string;
+  rewardMilli: number;
+  hour: number;
+  dayOfWeek: number;
+}
+
+export function startSimulation(params: {
+  nUsers: number;
+  nRounds: number;
+  tasksPerRound: number;
+  useLlm: boolean;
+  policies: string[];
+}) {
+  return apiFetch<{ id: string; status: string }>('/admin/simulate/start', {
+    method: 'POST',
+    body: JSON.stringify(params),
+  });
+}
+
+export function getSimRuns() {
+  return apiFetch<{ runs: SimRun[] }>('/admin/simulate/runs');
+}
+
+export function getSimRun(id: string) {
+  return apiFetch<{ run: SimRun; events: SimEvent[] }>(`/admin/simulate/${id}`);
+}