- Promote egreedy-v1 to active serving policy (ADR-0007): /score/egreedy + /reward/egreedy
replaces linucb-v1 endpoints after offline sim shows +10.7% mean reward (−0.548 vs −0.606)
- Replace explicit helpful/not_helpful feedback with dwell-time inferred reward (inferReward):
dismiss=−1.0, snooze=+0.1, done<15s=−0.3, done 15s–2min=+1.0, done 2–10min=+0.6, done>10min=+0.3
- Add ml/serving ε-greedy endpoints: /score/egreedy, /reward/egreedy, /stats/egreedy/{user_id}
with d=7 feature vector (base 5 + sin/cos day-of-week encoding)
- Add offline simulation framework (ml/experiments/sim): rule/LLM/claude-code judges,
two-phase score+reward, synthetic personas, task generator; results stored in sim_runs/sim_events
- Add /admin/simulations page: start runs, live-poll status, reward curve SVG, action/persona tables
- Fix egreedy day_of_week training skew: reward endpoint now uses actual dow instead of hardcoded 0
- Fix runner.py proxy bypass: httpx.Client(trust_env=False) for localhost ML calls
- Add dwellMs to TipFeedbackEvent contract and bus.test.ts fixture
- Schema: sim_runs, sim_events tables; tip_feedback gains dwell_ms, reward_milli columns
- ADR-0006: admin console framework; ADR-0007: egreedy-v1 policy selection rationale
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
500 lines
19 KiB
TypeScript
500 lines
19 KiB
TypeScript
'use client';
|
||
|
||
import { useEffect, useRef, useState } from 'react';
|
||
import { AdminShell } from '@/components/AdminShell';
|
||
import {
|
||
type PolicySummary,
|
||
type SimEvent,
|
||
type SimRun,
|
||
getSimRun,
|
||
getSimRuns,
|
||
startSimulation,
|
||
} from '@/lib/api';
|
||
|
||
const KNOWN_POLICIES = ['linucb-v1', 'egreedy-v1'];
|
||
const ACTIONS = ['done', 'snooze', 'dismiss'];
|
||
// Shown as reference only — actual reward is dwell-time inferred for 'done'
|
||
const ACTION_REWARDS: Record<string, number> = {
|
||
done: 1.0, snooze: 0.1, dismiss: -1.0,
|
||
};
|
||
|
||
// ── SVG reward curve ────────────────────────────────────────────────────────
|
||
|
||
function RewardCurve({ summary, policies }: { summary: Record<string, PolicySummary>; policies: string[] }) {
|
||
const W = 520, H = 160, PAD = { t: 10, r: 10, b: 30, l: 40 };
|
||
const iW = W - PAD.l - PAD.r;
|
||
const iH = H - PAD.t - PAD.b;
|
||
|
||
const allVals = policies.flatMap((p) => summary[p]?.cumulative_rewards ?? []);
|
||
const minY = Math.min(0, ...allVals);
|
||
const maxY = Math.max(1, ...allVals);
|
||
const n = Math.max(...policies.map((p) => (summary[p]?.cumulative_rewards ?? []).length));
|
||
|
||
const xScale = (i: number) => PAD.l + (i / Math.max(1, n - 1)) * iW;
|
||
const yScale = (v: number) => PAD.t + iH - ((v - minY) / (maxY - minY)) * iH;
|
||
|
||
const COLORS = ['#818cf8', '#34d399', '#f87171', '#fbbf24'];
|
||
|
||
const path = (vals: number[]) =>
|
||
vals
|
||
.map((v, i) => `${i === 0 ? 'M' : 'L'}${xScale(i).toFixed(1)},${yScale(v).toFixed(1)}`)
|
||
.join(' ');
|
||
|
||
// Axis labels
|
||
const yLabels = [minY, (minY + maxY) / 2, maxY];
|
||
|
||
return (
|
||
<svg width={W} height={H} className="overflow-visible">
|
||
{/* Grid */}
|
||
{yLabels.map((v, i) => (
|
||
<g key={i}>
|
||
<line
|
||
x1={PAD.l} y1={yScale(v)} x2={W - PAD.r} y2={yScale(v)}
|
||
stroke="#374151" strokeWidth={0.5} strokeDasharray="3,3"
|
||
/>
|
||
<text x={PAD.l - 4} y={yScale(v) + 4} textAnchor="end" fontSize={10} fill="#9ca3af">
|
||
{v.toFixed(1)}
|
||
</text>
|
||
</g>
|
||
))}
|
||
{/* Zero line */}
|
||
{minY < 0 && (
|
||
<line x1={PAD.l} y1={yScale(0)} x2={W - PAD.r} y2={yScale(0)}
|
||
stroke="#6b7280" strokeWidth={1} />
|
||
)}
|
||
{/* Curves */}
|
||
{policies.map((p, pi) => {
|
||
const vals = summary[p]?.cumulative_rewards ?? [];
|
||
if (!vals.length) return null;
|
||
return (
|
||
<g key={p}>
|
||
<path d={path(vals)} fill="none" stroke={COLORS[pi % COLORS.length]} strokeWidth={2} />
|
||
<circle
|
||
cx={xScale(vals.length - 1)} cy={yScale(vals[vals.length - 1])}
|
||
r={3} fill={COLORS[pi % COLORS.length]}
|
||
/>
|
||
</g>
|
||
);
|
||
})}
|
||
{/* X axis */}
|
||
<line x1={PAD.l} y1={H - PAD.b} x2={W - PAD.r} y2={H - PAD.b} stroke="#4b5563" />
|
||
<text x={W / 2} y={H - 2} textAnchor="middle" fontSize={10} fill="#6b7280">Round</text>
|
||
{/* Legend */}
|
||
{policies.map((p, pi) => (
|
||
<g key={p} transform={`translate(${PAD.l + pi * 130},${H - PAD.b + 14})`}>
|
||
<rect width={12} height={3} y={3} fill={COLORS[pi % COLORS.length]} />
|
||
<text x={16} y={8} fontSize={10} fill="#d1d5db">{p}</text>
|
||
</g>
|
||
))}
|
||
</svg>
|
||
);
|
||
}
|
||
|
||
// ── Action distribution table ───────────────────────────────────────────────
|
||
|
||
function ActionTable({
|
||
summary,
|
||
policies,
|
||
}: {
|
||
summary: Record<string, PolicySummary>;
|
||
policies: string[];
|
||
}) {
|
||
return (
|
||
<table className="text-sm w-full">
|
||
<thead>
|
||
<tr className="text-left text-gray-500 border-b border-gray-800">
|
||
<th className="py-1 pr-4 font-medium">Action</th>
|
||
{policies.map((p) => (
|
||
<th key={p} className="py-1 pr-4 font-medium">{p}</th>
|
||
))}
|
||
<th className="py-1 font-medium text-gray-400">Reward</th>
|
||
</tr>
|
||
</thead>
|
||
<tbody>
|
||
{ACTIONS.map((action) => (
|
||
<tr key={action} className="border-b border-gray-900">
|
||
<td className="py-1.5 pr-4 text-gray-300">{action}</td>
|
||
{policies.map((p) => {
|
||
const n = summary[p]?.action_counts?.[action] ?? 0;
|
||
const total = Object.values(summary[p]?.action_counts ?? {}).reduce(
|
||
(a, b) => a + b, 0
|
||
);
|
||
const pct = total > 0 ? ((n / total) * 100).toFixed(1) : '—';
|
||
return (
|
||
<td key={p} className="py-1.5 pr-4 text-gray-200">
|
||
{n} <span className="text-gray-500 text-xs">({pct}%)</span>
|
||
</td>
|
||
);
|
||
})}
|
||
<td className={`py-1.5 text-xs font-mono ${ACTION_REWARDS[action] > 0 ? 'text-green-400' : ACTION_REWARDS[action] < 0 ? 'text-red-400' : 'text-gray-500'}`}>
|
||
{ACTION_REWARDS[action] >= 0 ? '+' : ''}{ACTION_REWARDS[action]}
|
||
</td>
|
||
</tr>
|
||
))}
|
||
</tbody>
|
||
</table>
|
||
);
|
||
}
|
||
|
||
// ── Per-persona breakdown ───────────────────────────────────────────────────
|
||
|
||
function PersonaTable({
|
||
breakdown,
|
||
policies,
|
||
}: {
|
||
breakdown: Record<string, Record<string, { reward: number; n: number }>>;
|
||
policies: string[];
|
||
}) {
|
||
const personas = Object.keys(breakdown);
|
||
return (
|
||
<table className="text-sm w-full">
|
||
<thead>
|
||
<tr className="text-left text-gray-500 border-b border-gray-800">
|
||
<th className="py-1 pr-6 font-medium">Persona</th>
|
||
{policies.map((p) => (
|
||
<th key={p} className="py-1 pr-6 font-medium">{p}<br /><span className="font-normal text-xs">mean reward</span></th>
|
||
))}
|
||
<th className="py-1 font-medium">Winner</th>
|
||
</tr>
|
||
</thead>
|
||
<tbody>
|
||
{personas.map((persona) => {
|
||
const pdata = breakdown[persona];
|
||
const best = policies.reduce((a, b) =>
|
||
(pdata[a]?.reward ?? -Infinity) >= (pdata[b]?.reward ?? -Infinity) ? a : b
|
||
);
|
||
return (
|
||
<tr key={persona} className="border-b border-gray-900">
|
||
<td className="py-1.5 pr-6 text-gray-300">{persona}</td>
|
||
{policies.map((p) => {
|
||
const d = pdata[p];
|
||
const mean = d && d.n > 0 ? (d.reward / d.n).toFixed(3) : '—';
|
||
return (
|
||
<td key={p} className={`py-1.5 pr-6 font-mono text-xs ${p === best ? 'text-green-400' : 'text-gray-400'}`}>
|
||
{mean}
|
||
</td>
|
||
);
|
||
})}
|
||
<td className="py-1.5 text-xs text-indigo-400">{best}</td>
|
||
</tr>
|
||
);
|
||
})}
|
||
</tbody>
|
||
</table>
|
||
);
|
||
}
|
||
|
||
// ── Run detail panel ────────────────────────────────────────────────────────
|
||
|
||
function RunDetail({ runId, onClose }: { runId: string; onClose: () => void }) {
|
||
const [data, setData] = useState<{ run: SimRun; events: SimEvent[] } | null>(null);
|
||
const [error, setError] = useState('');
|
||
const pollRef = useRef<ReturnType<typeof setInterval> | null>(null);
|
||
|
||
const load = async () => {
|
||
try {
|
||
const d = await getSimRun(runId);
|
||
setData(d);
|
||
if (d.run.status !== 'running' && d.run.status !== 'pending') {
|
||
if (pollRef.current) clearInterval(pollRef.current);
|
||
}
|
||
} catch (e: unknown) {
|
||
setError(e instanceof Error ? e.message : 'Failed to load');
|
||
if (pollRef.current) clearInterval(pollRef.current);
|
||
}
|
||
};
|
||
|
||
useEffect(() => {
|
||
load();
|
||
pollRef.current = setInterval(load, 3000);
|
||
return () => { if (pollRef.current) clearInterval(pollRef.current); };
|
||
}, [runId]);
|
||
|
||
const run = data?.run;
|
||
const summary: Record<string, PolicySummary> | null = run?.summaryJson
|
||
? JSON.parse(run.summaryJson)
|
||
: null;
|
||
const breakdown: Record<string, Record<string, { reward: number; n: number }>> | null =
|
||
run?.personaBreakdownJson ? JSON.parse(run.personaBreakdownJson) : null;
|
||
const policies = run ? [run.policyA, run.policyB] : [];
|
||
|
||
return (
|
||
<div className="fixed inset-0 bg-black/70 z-50 flex items-start justify-center pt-16 px-4 overflow-auto">
|
||
<div className="bg-gray-950 border border-gray-800 rounded-lg w-full max-w-3xl p-6 space-y-6">
|
||
<div className="flex items-center justify-between">
|
||
<div>
|
||
<h2 className="text-lg font-semibold">Simulation {runId}</h2>
|
||
{run && (
|
||
<p className="text-xs text-gray-500 mt-0.5">
|
||
{run.nUsers} users × {run.nRounds} rounds × {run.tasksPerRound} tasks
|
||
{' · '}{run.useLlm ? 'LLM judge' : 'Rule judge'}
|
||
</p>
|
||
)}
|
||
</div>
|
||
<button onClick={onClose} className="text-gray-500 hover:text-white text-sm">✕ Close</button>
|
||
</div>
|
||
|
||
{error && <p className="text-red-400 text-sm">{error}</p>}
|
||
|
||
{run && (
|
||
<div className="flex items-center gap-3">
|
||
<StatusBadge status={run.status} />
|
||
{run.winner && run.status === 'done' && (
|
||
<span className="px-2 py-0.5 bg-indigo-900/60 border border-indigo-700 rounded text-indigo-300 text-xs font-medium">
|
||
Winner: {run.winner}
|
||
</span>
|
||
)}
|
||
</div>
|
||
)}
|
||
|
||
{summary && (
|
||
<>
|
||
{/* Metric cards */}
|
||
<div className="grid grid-cols-2 gap-3">
|
||
{policies.map((p) => (
|
||
<div key={p} className="bg-gray-900 border border-gray-800 rounded p-4 space-y-2">
|
||
<div className="text-xs font-medium text-gray-400 truncate">{p}</div>
|
||
<div className="flex gap-4">
|
||
<Metric label="Total reward" value={summary[p]?.total_reward.toFixed(2)} />
|
||
<Metric label="Mean/pull" value={summary[p]?.mean_reward.toFixed(3)} />
|
||
<Metric label="Pulls" value={String(summary[p]?.n_pulls)} />
|
||
</div>
|
||
</div>
|
||
))}
|
||
</div>
|
||
|
||
{/* Cumulative reward chart */}
|
||
<div className="space-y-2">
|
||
<h3 className="text-sm font-medium text-gray-400">Cumulative reward over rounds</h3>
|
||
<div className="bg-gray-900 border border-gray-800 rounded p-3">
|
||
<RewardCurve summary={summary} policies={policies} />
|
||
</div>
|
||
</div>
|
||
|
||
{/* Action distribution */}
|
||
<div className="space-y-2">
|
||
<h3 className="text-sm font-medium text-gray-400">Action distribution</h3>
|
||
<ActionTable summary={summary} policies={policies} />
|
||
</div>
|
||
</>
|
||
)}
|
||
|
||
{breakdown && (
|
||
<div className="space-y-2">
|
||
<h3 className="text-sm font-medium text-gray-400">Per-persona mean reward</h3>
|
||
<PersonaTable breakdown={breakdown} policies={policies} />
|
||
</div>
|
||
)}
|
||
|
||
{run?.status === 'running' && (
|
||
<p className="text-yellow-400 text-xs animate-pulse">Simulation running — auto-refreshing every 3s…</p>
|
||
)}
|
||
</div>
|
||
</div>
|
||
);
|
||
}
|
||
|
||
// ── Status badge ────────────────────────────────────────────────────────────
|
||
|
||
function StatusBadge({ status }: { status: string }) {
|
||
const styles: Record<string, string> = {
|
||
pending: 'bg-gray-800 text-gray-400',
|
||
running: 'bg-yellow-900/60 text-yellow-300 border border-yellow-700',
|
||
done: 'bg-green-900/60 text-green-300 border border-green-700',
|
||
failed: 'bg-red-900/60 text-red-300 border border-red-700',
|
||
};
|
||
return (
|
||
<span className={`px-2 py-0.5 rounded text-xs font-medium ${styles[status] ?? styles.pending}`}>
|
||
{status}
|
||
</span>
|
||
);
|
||
}
|
||
|
||
function Metric({ label, value }: { label: string; value: string | undefined }) {
|
||
return (
|
||
<div>
|
||
<div className="text-[10px] text-gray-500 mb-0.5">{label}</div>
|
||
<div className="text-sm font-mono text-white">{value ?? '—'}</div>
|
||
</div>
|
||
);
|
||
}
|
||
|
||
// ── Main page ───────────────────────────────────────────────────────────────
|
||
|
||
export default function SimulationsPage() {
|
||
const [runs, setRuns] = useState<SimRun[]>([]);
|
||
const [loading, setLoading] = useState(false);
|
||
const [error, setError] = useState('');
|
||
const [selectedId, setSelectedId] = useState<string | null>(null);
|
||
|
||
// Form state
|
||
const [nUsers, setNUsers] = useState(5);
|
||
const [nRounds, setNRounds] = useState(20);
|
||
const [tasksPerRound, setTasksPerRound] = useState(8);
|
||
const [useLlm, setUseLlm] = useState(false);
|
||
const [policyA, setPolicyA] = useState('linucb-v1');
|
||
const [policyB, setPolicyB] = useState('egreedy-v1');
|
||
const [launching, setLaunching] = useState(false);
|
||
const [launchError, setLaunchError] = useState('');
|
||
|
||
const loadRuns = async () => {
|
||
setLoading(true);
|
||
try {
|
||
const { runs: r } = await getSimRuns();
|
||
setRuns(r);
|
||
} catch (e: unknown) {
|
||
setError(e instanceof Error ? e.message : 'Failed to load');
|
||
} finally {
|
||
setLoading(false);
|
||
}
|
||
};
|
||
|
||
useEffect(() => { loadRuns(); }, []);
|
||
|
||
const handleStart = async () => {
|
||
if (policyA === policyB) {
|
||
setLaunchError('Policies must be different');
|
||
return;
|
||
}
|
||
setLaunching(true);
|
||
setLaunchError('');
|
||
try {
|
||
const { id } = await startSimulation({
|
||
nUsers,
|
||
nRounds,
|
||
tasksPerRound,
|
||
useLlm,
|
||
policies: [policyA, policyB],
|
||
});
|
||
await loadRuns();
|
||
setSelectedId(id);
|
||
} catch (e: unknown) {
|
||
setLaunchError(e instanceof Error ? e.message : 'Failed to start');
|
||
} finally {
|
||
setLaunching(false);
|
||
}
|
||
};
|
||
|
||
return (
|
||
<AdminShell>
|
||
<div className="space-y-6 max-w-4xl">
|
||
<div>
|
||
<h1 className="text-xl font-semibold">Simulations</h1>
|
||
<p className="text-sm text-gray-500 mt-1">
|
||
Compare recommendation policies offline using synthetic users and LLM-judged reactions.
|
||
ml/serving must be running.
|
||
</p>
|
||
</div>
|
||
|
||
{/* Launch form */}
|
||
<div className="bg-gray-900 border border-gray-800 rounded-lg p-5 space-y-4">
|
||
<h2 className="text-sm font-semibold text-gray-300">New simulation</h2>
|
||
<div className="grid grid-cols-2 gap-4 sm:grid-cols-3">
|
||
<Field label="Policy A">
|
||
<select value={policyA} onChange={(e) => setPolicyA(e.target.value)} className={selectCls}>
|
||
{KNOWN_POLICIES.map((p) => <option key={p}>{p}</option>)}
|
||
</select>
|
||
</Field>
|
||
<Field label="Policy B">
|
||
<select value={policyB} onChange={(e) => setPolicyB(e.target.value)} className={selectCls}>
|
||
{KNOWN_POLICIES.map((p) => <option key={p}>{p}</option>)}
|
||
</select>
|
||
</Field>
|
||
<Field label="Users">
|
||
<input type="number" min={1} max={20} value={nUsers}
|
||
onChange={(e) => setNUsers(Number(e.target.value))} className={inputCls} />
|
||
</Field>
|
||
<Field label="Rounds">
|
||
<input type="number" min={5} max={100} value={nRounds}
|
||
onChange={(e) => setNRounds(Number(e.target.value))} className={inputCls} />
|
||
</Field>
|
||
<Field label="Tasks/round">
|
||
<input type="number" min={3} max={20} value={tasksPerRound}
|
||
onChange={(e) => setTasksPerRound(Number(e.target.value))} className={inputCls} />
|
||
</Field>
|
||
<Field label="Judge">
|
||
<label className="flex items-center gap-2 cursor-pointer mt-1">
|
||
<input type="checkbox" checked={useLlm} onChange={(e) => setUseLlm(e.target.checked)}
|
||
className="accent-indigo-500" />
|
||
<span className="text-sm text-gray-300">Claude Haiku</span>
|
||
</label>
|
||
{!useLlm && <p className="text-[10px] text-gray-500 mt-0.5">Deterministic rule judge</p>}
|
||
{useLlm && <p className="text-[10px] text-yellow-500 mt-0.5">Requires ANTHROPIC_API_KEY</p>}
|
||
</Field>
|
||
</div>
|
||
{launchError && <p className="text-red-400 text-xs">{launchError}</p>}
|
||
<button
|
||
onClick={handleStart}
|
||
disabled={launching}
|
||
className="bg-indigo-600 hover:bg-indigo-500 disabled:opacity-50 text-white rounded px-4 py-1.5 text-sm"
|
||
>
|
||
{launching ? 'Starting…' : 'Run simulation'}
|
||
</button>
|
||
</div>
|
||
|
||
{/* Runs list */}
|
||
<div className="space-y-2">
|
||
<div className="flex items-center justify-between">
|
||
<h2 className="text-sm font-semibold text-gray-300">Past runs</h2>
|
||
<button onClick={loadRuns} className="text-xs text-gray-500 hover:text-white">Refresh</button>
|
||
</div>
|
||
|
||
{loading && <p className="text-gray-500 text-sm">Loading…</p>}
|
||
{error && <p className="text-red-400 text-sm">{error}</p>}
|
||
|
||
{runs.length === 0 && !loading && (
|
||
<p className="text-gray-600 text-sm">No simulation runs yet.</p>
|
||
)}
|
||
|
||
<div className="space-y-1">
|
||
{runs.map((run) => (
|
||
<button
|
||
key={run.id}
|
||
onClick={() => setSelectedId(run.id)}
|
||
className="w-full text-left bg-gray-900 hover:bg-gray-800 border border-gray-800 rounded px-4 py-3 flex items-center justify-between gap-4"
|
||
>
|
||
<div className="flex items-center gap-3 min-w-0">
|
||
<StatusBadge status={run.status} />
|
||
<span className="text-sm text-gray-300 font-mono truncate">{run.id}</span>
|
||
<span className="text-xs text-gray-500 hidden sm:inline">
|
||
{run.policyA} vs {run.policyB}
|
||
</span>
|
||
</div>
|
||
<div className="flex items-center gap-4 flex-shrink-0">
|
||
{run.winner && (
|
||
<span className="text-xs text-indigo-400">→ {run.winner}</span>
|
||
)}
|
||
<span className="text-xs text-gray-600">{run.nUsers}u × {run.nRounds}r</span>
|
||
<span className="text-xs text-gray-600">
|
||
{new Date(run.createdAt).toLocaleString()}
|
||
</span>
|
||
</div>
|
||
</button>
|
||
))}
|
||
</div>
|
||
</div>
|
||
</div>
|
||
|
||
{selectedId && (
|
||
<RunDetail runId={selectedId} onClose={() => setSelectedId(null)} />
|
||
)}
|
||
</AdminShell>
|
||
);
|
||
}
|
||
|
||
// ── Small helpers ───────────────────────────────────────────────────────────
|
||
|
||
const inputCls =
|
||
'w-full bg-gray-800 border border-gray-700 rounded px-2.5 py-1.5 text-sm text-gray-200 focus:outline-none focus:border-indigo-500';
|
||
const selectCls =
|
||
'w-full bg-gray-800 border border-gray-700 rounded px-2.5 py-1.5 text-sm text-gray-200 focus:outline-none focus:border-indigo-500';
|
||
|
||
function Field({ label, children }: { label: string; children: React.ReactNode }) {
|
||
return (
|
||
<div>
|
||
<label className="block text-xs text-gray-500 mb-1">{label}</label>
|
||
{children}
|
||
</div>
|
||
);
|
||
}
|