feat: ε-greedy v1 as active policy; dwell-time reward inference; offline sim framework
- Promote egreedy-v1 to active serving policy (ADR-0007): /score/egreedy + /reward/egreedy
replaces linucb-v1 endpoints after offline sim shows +10.7% mean reward (−0.548 vs −0.606)
- Replace explicit helpful/not_helpful feedback with dwell-time inferred reward (inferReward):
dismiss=−1.0, snooze=+0.1, done<15s=−0.3, done 15s–2min=+1.0, done 2–10min=+0.6, done>10min=+0.3
- Add ml/serving ε-greedy endpoints: /score/egreedy, /reward/egreedy, /stats/egreedy/{user_id}
with d=7 feature vector (base 5 + sin/cos day-of-week encoding)
- Add offline simulation framework (ml/experiments/sim): rule/LLM/claude-code judges,
two-phase score+reward, synthetic personas, task generator; results stored in sim_runs/sim_events
- Add /admin/simulations page: start runs, live-poll status, reward curve SVG, action/persona tables
- Fix egreedy day_of_week training skew: reward endpoint now uses actual dow instead of hardcoded 0
- Fix runner.py proxy bypass: httpx.Client(trust_env=False) for localhost ML calls
- Add dwellMs to TipFeedbackEvent contract and bus.test.ts fixture
- Schema: sim_runs, sim_events tables; tip_feedback gains dwell_ms, reward_milli columns
- ADR-0006: admin console framework; ADR-0007: egreedy-v1 policy selection rationale
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -220,3 +220,67 @@ export function saveQuery(name: string, querySql: string) {
|
||||
export function deleteSavedQuery(id: string) {
|
||||
return apiFetch<{ ok: boolean }>(`/admin/saved-queries/${id}`, { method: 'DELETE' });
|
||||
}
|
||||
|
||||
// ── Simulation ─────────────────────────────────────────────────────────────
|
||||
|
||||
export interface PolicySummary {
|
||||
total_reward: number;
|
||||
mean_reward: number;
|
||||
n_pulls: number;
|
||||
cumulative_rewards: number[];
|
||||
action_counts: Record<string, number>;
|
||||
}
|
||||
|
||||
export interface SimRun {
|
||||
id: string;
|
||||
policyA: string;
|
||||
policyB: string;
|
||||
nUsers: number;
|
||||
nRounds: number;
|
||||
tasksPerRound: number;
|
||||
useLlm: boolean;
|
||||
status: 'pending' | 'running' | 'done' | 'failed';
|
||||
summaryJson: string | null;
|
||||
winner: string | null;
|
||||
personaBreakdownJson: string | null;
|
||||
createdAt: string;
|
||||
finishedAt: string | null;
|
||||
isRunning?: boolean;
|
||||
}
|
||||
|
||||
export interface SimEvent {
|
||||
id: string;
|
||||
runId: string;
|
||||
round: number;
|
||||
userId: string;
|
||||
persona: string;
|
||||
policy: string;
|
||||
tipContent: string;
|
||||
priority: number;
|
||||
isOverdue: boolean;
|
||||
action: string;
|
||||
rewardMilli: number;
|
||||
hour: number;
|
||||
dayOfWeek: number;
|
||||
}
|
||||
|
||||
export function startSimulation(params: {
|
||||
nUsers: number;
|
||||
nRounds: number;
|
||||
tasksPerRound: number;
|
||||
useLlm: boolean;
|
||||
policies: string[];
|
||||
}) {
|
||||
return apiFetch<{ id: string; status: string }>('/admin/simulate/start', {
|
||||
method: 'POST',
|
||||
body: JSON.stringify(params),
|
||||
});
|
||||
}
|
||||
|
||||
export function getSimRuns() {
|
||||
return apiFetch<{ runs: SimRun[] }>('/admin/simulate/runs');
|
||||
}
|
||||
|
||||
export function getSimRun(id: string) {
|
||||
return apiFetch<{ run: SimRun; events: SimEvent[] }>(`/admin/simulate/${id}`);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user