Files
oO/apps/admin/src/app/simulations/page.tsx
alvis faf44c18fc feat: ε-greedy v1 as active policy; dwell-time reward inference; offline sim framework
- Promote egreedy-v1 to active serving policy (ADR-0007): /score/egreedy + /reward/egreedy
  replaces linucb-v1 endpoints after offline sim shows +10.7% mean reward (−0.548 vs −0.606)
- Replace explicit helpful/not_helpful feedback with dwell-time inferred reward (inferReward):
  dismiss=−1.0, snooze=+0.1, done<15s=−0.3, done 15s–2min=+1.0, done 2–10min=+0.6, done>10min=+0.3
- Add ml/serving ε-greedy endpoints: /score/egreedy, /reward/egreedy, /stats/egreedy/{user_id}
  with d=7 feature vector (base 5 + sin/cos day-of-week encoding)
- Add offline simulation framework (ml/experiments/sim): rule/LLM/claude-code judges,
  two-phase score+reward, synthetic personas, task generator; results stored in sim_runs/sim_events
- Add /admin/simulations page: start runs, live-poll status, reward curve SVG, action/persona tables
- Fix egreedy day_of_week training skew: reward endpoint now uses actual dow instead of hardcoded 0
- Fix runner.py proxy bypass: httpx.Client(trust_env=False) for localhost ML calls
- Add dwellMs to TipFeedbackEvent contract and bus.test.ts fixture
- Schema: sim_runs, sim_events tables; tip_feedback gains dwell_ms, reward_milli columns
- ADR-0006: admin console framework; ADR-0007: egreedy-v1 policy selection rationale

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-16 07:44:37 +00:00

500 lines
19 KiB
TypeScript
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
'use client';
import { useEffect, useRef, useState } from 'react';
import { AdminShell } from '@/components/AdminShell';
import {
type PolicySummary,
type SimEvent,
type SimRun,
getSimRun,
getSimRuns,
startSimulation,
} from '@/lib/api';
const KNOWN_POLICIES = ['linucb-v1', 'egreedy-v1'];
const ACTIONS = ['done', 'snooze', 'dismiss'];
// Shown as reference only — actual reward is dwell-time inferred for 'done'
const ACTION_REWARDS: Record<string, number> = {
done: 1.0, snooze: 0.1, dismiss: -1.0,
};
// ── SVG reward curve ────────────────────────────────────────────────────────
function RewardCurve({ summary, policies }: { summary: Record<string, PolicySummary>; policies: string[] }) {
const W = 520, H = 160, PAD = { t: 10, r: 10, b: 30, l: 40 };
const iW = W - PAD.l - PAD.r;
const iH = H - PAD.t - PAD.b;
const allVals = policies.flatMap((p) => summary[p]?.cumulative_rewards ?? []);
const minY = Math.min(0, ...allVals);
const maxY = Math.max(1, ...allVals);
const n = Math.max(...policies.map((p) => (summary[p]?.cumulative_rewards ?? []).length));
const xScale = (i: number) => PAD.l + (i / Math.max(1, n - 1)) * iW;
const yScale = (v: number) => PAD.t + iH - ((v - minY) / (maxY - minY)) * iH;
const COLORS = ['#818cf8', '#34d399', '#f87171', '#fbbf24'];
const path = (vals: number[]) =>
vals
.map((v, i) => `${i === 0 ? 'M' : 'L'}${xScale(i).toFixed(1)},${yScale(v).toFixed(1)}`)
.join(' ');
// Axis labels
const yLabels = [minY, (minY + maxY) / 2, maxY];
return (
<svg width={W} height={H} className="overflow-visible">
{/* Grid */}
{yLabels.map((v, i) => (
<g key={i}>
<line
x1={PAD.l} y1={yScale(v)} x2={W - PAD.r} y2={yScale(v)}
stroke="#374151" strokeWidth={0.5} strokeDasharray="3,3"
/>
<text x={PAD.l - 4} y={yScale(v) + 4} textAnchor="end" fontSize={10} fill="#9ca3af">
{v.toFixed(1)}
</text>
</g>
))}
{/* Zero line */}
{minY < 0 && (
<line x1={PAD.l} y1={yScale(0)} x2={W - PAD.r} y2={yScale(0)}
stroke="#6b7280" strokeWidth={1} />
)}
{/* Curves */}
{policies.map((p, pi) => {
const vals = summary[p]?.cumulative_rewards ?? [];
if (!vals.length) return null;
return (
<g key={p}>
<path d={path(vals)} fill="none" stroke={COLORS[pi % COLORS.length]} strokeWidth={2} />
<circle
cx={xScale(vals.length - 1)} cy={yScale(vals[vals.length - 1])}
r={3} fill={COLORS[pi % COLORS.length]}
/>
</g>
);
})}
{/* X axis */}
<line x1={PAD.l} y1={H - PAD.b} x2={W - PAD.r} y2={H - PAD.b} stroke="#4b5563" />
<text x={W / 2} y={H - 2} textAnchor="middle" fontSize={10} fill="#6b7280">Round</text>
{/* Legend */}
{policies.map((p, pi) => (
<g key={p} transform={`translate(${PAD.l + pi * 130},${H - PAD.b + 14})`}>
<rect width={12} height={3} y={3} fill={COLORS[pi % COLORS.length]} />
<text x={16} y={8} fontSize={10} fill="#d1d5db">{p}</text>
</g>
))}
</svg>
);
}
// ── Action distribution table ───────────────────────────────────────────────
function ActionTable({
summary,
policies,
}: {
summary: Record<string, PolicySummary>;
policies: string[];
}) {
return (
<table className="text-sm w-full">
<thead>
<tr className="text-left text-gray-500 border-b border-gray-800">
<th className="py-1 pr-4 font-medium">Action</th>
{policies.map((p) => (
<th key={p} className="py-1 pr-4 font-medium">{p}</th>
))}
<th className="py-1 font-medium text-gray-400">Reward</th>
</tr>
</thead>
<tbody>
{ACTIONS.map((action) => (
<tr key={action} className="border-b border-gray-900">
<td className="py-1.5 pr-4 text-gray-300">{action}</td>
{policies.map((p) => {
const n = summary[p]?.action_counts?.[action] ?? 0;
const total = Object.values(summary[p]?.action_counts ?? {}).reduce(
(a, b) => a + b, 0
);
const pct = total > 0 ? ((n / total) * 100).toFixed(1) : '—';
return (
<td key={p} className="py-1.5 pr-4 text-gray-200">
{n} <span className="text-gray-500 text-xs">({pct}%)</span>
</td>
);
})}
<td className={`py-1.5 text-xs font-mono ${ACTION_REWARDS[action] > 0 ? 'text-green-400' : ACTION_REWARDS[action] < 0 ? 'text-red-400' : 'text-gray-500'}`}>
{ACTION_REWARDS[action] >= 0 ? '+' : ''}{ACTION_REWARDS[action]}
</td>
</tr>
))}
</tbody>
</table>
);
}
// ── Per-persona breakdown ───────────────────────────────────────────────────
function PersonaTable({
breakdown,
policies,
}: {
breakdown: Record<string, Record<string, { reward: number; n: number }>>;
policies: string[];
}) {
const personas = Object.keys(breakdown);
return (
<table className="text-sm w-full">
<thead>
<tr className="text-left text-gray-500 border-b border-gray-800">
<th className="py-1 pr-6 font-medium">Persona</th>
{policies.map((p) => (
<th key={p} className="py-1 pr-6 font-medium">{p}<br /><span className="font-normal text-xs">mean reward</span></th>
))}
<th className="py-1 font-medium">Winner</th>
</tr>
</thead>
<tbody>
{personas.map((persona) => {
const pdata = breakdown[persona];
const best = policies.reduce((a, b) =>
(pdata[a]?.reward ?? -Infinity) >= (pdata[b]?.reward ?? -Infinity) ? a : b
);
return (
<tr key={persona} className="border-b border-gray-900">
<td className="py-1.5 pr-6 text-gray-300">{persona}</td>
{policies.map((p) => {
const d = pdata[p];
const mean = d && d.n > 0 ? (d.reward / d.n).toFixed(3) : '—';
return (
<td key={p} className={`py-1.5 pr-6 font-mono text-xs ${p === best ? 'text-green-400' : 'text-gray-400'}`}>
{mean}
</td>
);
})}
<td className="py-1.5 text-xs text-indigo-400">{best}</td>
</tr>
);
})}
</tbody>
</table>
);
}
// ── Run detail panel ────────────────────────────────────────────────────────
function RunDetail({ runId, onClose }: { runId: string; onClose: () => void }) {
const [data, setData] = useState<{ run: SimRun; events: SimEvent[] } | null>(null);
const [error, setError] = useState('');
const pollRef = useRef<ReturnType<typeof setInterval> | null>(null);
const load = async () => {
try {
const d = await getSimRun(runId);
setData(d);
if (d.run.status !== 'running' && d.run.status !== 'pending') {
if (pollRef.current) clearInterval(pollRef.current);
}
} catch (e: unknown) {
setError(e instanceof Error ? e.message : 'Failed to load');
if (pollRef.current) clearInterval(pollRef.current);
}
};
useEffect(() => {
load();
pollRef.current = setInterval(load, 3000);
return () => { if (pollRef.current) clearInterval(pollRef.current); };
}, [runId]);
const run = data?.run;
const summary: Record<string, PolicySummary> | null = run?.summaryJson
? JSON.parse(run.summaryJson)
: null;
const breakdown: Record<string, Record<string, { reward: number; n: number }>> | null =
run?.personaBreakdownJson ? JSON.parse(run.personaBreakdownJson) : null;
const policies = run ? [run.policyA, run.policyB] : [];
return (
<div className="fixed inset-0 bg-black/70 z-50 flex items-start justify-center pt-16 px-4 overflow-auto">
<div className="bg-gray-950 border border-gray-800 rounded-lg w-full max-w-3xl p-6 space-y-6">
<div className="flex items-center justify-between">
<div>
<h2 className="text-lg font-semibold">Simulation {runId}</h2>
{run && (
<p className="text-xs text-gray-500 mt-0.5">
{run.nUsers} users × {run.nRounds} rounds × {run.tasksPerRound} tasks
{' · '}{run.useLlm ? 'LLM judge' : 'Rule judge'}
</p>
)}
</div>
<button onClick={onClose} className="text-gray-500 hover:text-white text-sm"> Close</button>
</div>
{error && <p className="text-red-400 text-sm">{error}</p>}
{run && (
<div className="flex items-center gap-3">
<StatusBadge status={run.status} />
{run.winner && run.status === 'done' && (
<span className="px-2 py-0.5 bg-indigo-900/60 border border-indigo-700 rounded text-indigo-300 text-xs font-medium">
Winner: {run.winner}
</span>
)}
</div>
)}
{summary && (
<>
{/* Metric cards */}
<div className="grid grid-cols-2 gap-3">
{policies.map((p) => (
<div key={p} className="bg-gray-900 border border-gray-800 rounded p-4 space-y-2">
<div className="text-xs font-medium text-gray-400 truncate">{p}</div>
<div className="flex gap-4">
<Metric label="Total reward" value={summary[p]?.total_reward.toFixed(2)} />
<Metric label="Mean/pull" value={summary[p]?.mean_reward.toFixed(3)} />
<Metric label="Pulls" value={String(summary[p]?.n_pulls)} />
</div>
</div>
))}
</div>
{/* Cumulative reward chart */}
<div className="space-y-2">
<h3 className="text-sm font-medium text-gray-400">Cumulative reward over rounds</h3>
<div className="bg-gray-900 border border-gray-800 rounded p-3">
<RewardCurve summary={summary} policies={policies} />
</div>
</div>
{/* Action distribution */}
<div className="space-y-2">
<h3 className="text-sm font-medium text-gray-400">Action distribution</h3>
<ActionTable summary={summary} policies={policies} />
</div>
</>
)}
{breakdown && (
<div className="space-y-2">
<h3 className="text-sm font-medium text-gray-400">Per-persona mean reward</h3>
<PersonaTable breakdown={breakdown} policies={policies} />
</div>
)}
{run?.status === 'running' && (
<p className="text-yellow-400 text-xs animate-pulse">Simulation running auto-refreshing every 3s</p>
)}
</div>
</div>
);
}
// ── Status badge ────────────────────────────────────────────────────────────
function StatusBadge({ status }: { status: string }) {
const styles: Record<string, string> = {
pending: 'bg-gray-800 text-gray-400',
running: 'bg-yellow-900/60 text-yellow-300 border border-yellow-700',
done: 'bg-green-900/60 text-green-300 border border-green-700',
failed: 'bg-red-900/60 text-red-300 border border-red-700',
};
return (
<span className={`px-2 py-0.5 rounded text-xs font-medium ${styles[status] ?? styles.pending}`}>
{status}
</span>
);
}
function Metric({ label, value }: { label: string; value: string | undefined }) {
return (
<div>
<div className="text-[10px] text-gray-500 mb-0.5">{label}</div>
<div className="text-sm font-mono text-white">{value ?? '—'}</div>
</div>
);
}
// ── Main page ───────────────────────────────────────────────────────────────
export default function SimulationsPage() {
const [runs, setRuns] = useState<SimRun[]>([]);
const [loading, setLoading] = useState(false);
const [error, setError] = useState('');
const [selectedId, setSelectedId] = useState<string | null>(null);
// Form state
const [nUsers, setNUsers] = useState(5);
const [nRounds, setNRounds] = useState(20);
const [tasksPerRound, setTasksPerRound] = useState(8);
const [useLlm, setUseLlm] = useState(false);
const [policyA, setPolicyA] = useState('linucb-v1');
const [policyB, setPolicyB] = useState('egreedy-v1');
const [launching, setLaunching] = useState(false);
const [launchError, setLaunchError] = useState('');
const loadRuns = async () => {
setLoading(true);
try {
const { runs: r } = await getSimRuns();
setRuns(r);
} catch (e: unknown) {
setError(e instanceof Error ? e.message : 'Failed to load');
} finally {
setLoading(false);
}
};
useEffect(() => { loadRuns(); }, []);
const handleStart = async () => {
if (policyA === policyB) {
setLaunchError('Policies must be different');
return;
}
setLaunching(true);
setLaunchError('');
try {
const { id } = await startSimulation({
nUsers,
nRounds,
tasksPerRound,
useLlm,
policies: [policyA, policyB],
});
await loadRuns();
setSelectedId(id);
} catch (e: unknown) {
setLaunchError(e instanceof Error ? e.message : 'Failed to start');
} finally {
setLaunching(false);
}
};
return (
<AdminShell>
<div className="space-y-6 max-w-4xl">
<div>
<h1 className="text-xl font-semibold">Simulations</h1>
<p className="text-sm text-gray-500 mt-1">
Compare recommendation policies offline using synthetic users and LLM-judged reactions.
ml/serving must be running.
</p>
</div>
{/* Launch form */}
<div className="bg-gray-900 border border-gray-800 rounded-lg p-5 space-y-4">
<h2 className="text-sm font-semibold text-gray-300">New simulation</h2>
<div className="grid grid-cols-2 gap-4 sm:grid-cols-3">
<Field label="Policy A">
<select value={policyA} onChange={(e) => setPolicyA(e.target.value)} className={selectCls}>
{KNOWN_POLICIES.map((p) => <option key={p}>{p}</option>)}
</select>
</Field>
<Field label="Policy B">
<select value={policyB} onChange={(e) => setPolicyB(e.target.value)} className={selectCls}>
{KNOWN_POLICIES.map((p) => <option key={p}>{p}</option>)}
</select>
</Field>
<Field label="Users">
<input type="number" min={1} max={20} value={nUsers}
onChange={(e) => setNUsers(Number(e.target.value))} className={inputCls} />
</Field>
<Field label="Rounds">
<input type="number" min={5} max={100} value={nRounds}
onChange={(e) => setNRounds(Number(e.target.value))} className={inputCls} />
</Field>
<Field label="Tasks/round">
<input type="number" min={3} max={20} value={tasksPerRound}
onChange={(e) => setTasksPerRound(Number(e.target.value))} className={inputCls} />
</Field>
<Field label="Judge">
<label className="flex items-center gap-2 cursor-pointer mt-1">
<input type="checkbox" checked={useLlm} onChange={(e) => setUseLlm(e.target.checked)}
className="accent-indigo-500" />
<span className="text-sm text-gray-300">Claude Haiku</span>
</label>
{!useLlm && <p className="text-[10px] text-gray-500 mt-0.5">Deterministic rule judge</p>}
{useLlm && <p className="text-[10px] text-yellow-500 mt-0.5">Requires ANTHROPIC_API_KEY</p>}
</Field>
</div>
{launchError && <p className="text-red-400 text-xs">{launchError}</p>}
<button
onClick={handleStart}
disabled={launching}
className="bg-indigo-600 hover:bg-indigo-500 disabled:opacity-50 text-white rounded px-4 py-1.5 text-sm"
>
{launching ? 'Starting…' : 'Run simulation'}
</button>
</div>
{/* Runs list */}
<div className="space-y-2">
<div className="flex items-center justify-between">
<h2 className="text-sm font-semibold text-gray-300">Past runs</h2>
<button onClick={loadRuns} className="text-xs text-gray-500 hover:text-white">Refresh</button>
</div>
{loading && <p className="text-gray-500 text-sm">Loading</p>}
{error && <p className="text-red-400 text-sm">{error}</p>}
{runs.length === 0 && !loading && (
<p className="text-gray-600 text-sm">No simulation runs yet.</p>
)}
<div className="space-y-1">
{runs.map((run) => (
<button
key={run.id}
onClick={() => setSelectedId(run.id)}
className="w-full text-left bg-gray-900 hover:bg-gray-800 border border-gray-800 rounded px-4 py-3 flex items-center justify-between gap-4"
>
<div className="flex items-center gap-3 min-w-0">
<StatusBadge status={run.status} />
<span className="text-sm text-gray-300 font-mono truncate">{run.id}</span>
<span className="text-xs text-gray-500 hidden sm:inline">
{run.policyA} vs {run.policyB}
</span>
</div>
<div className="flex items-center gap-4 flex-shrink-0">
{run.winner && (
<span className="text-xs text-indigo-400"> {run.winner}</span>
)}
<span className="text-xs text-gray-600">{run.nUsers}u × {run.nRounds}r</span>
<span className="text-xs text-gray-600">
{new Date(run.createdAt).toLocaleString()}
</span>
</div>
</button>
))}
</div>
</div>
</div>
{selectedId && (
<RunDetail runId={selectedId} onClose={() => setSelectedId(null)} />
)}
</AdminShell>
);
}
// ── Small helpers ───────────────────────────────────────────────────────────
const inputCls =
'w-full bg-gray-800 border border-gray-700 rounded px-2.5 py-1.5 text-sm text-gray-200 focus:outline-none focus:border-indigo-500';
const selectCls =
'w-full bg-gray-800 border border-gray-700 rounded px-2.5 py-1.5 text-sm text-gray-200 focus:outline-none focus:border-indigo-500';
function Field({ label, children }: { label: string; children: React.ReactNode }) {
return (
<div>
<label className="block text-xs text-gray-500 mb-1">{label}</label>
{children}
</div>
);
}