feat(simulate): MLflow tracking, Airflow DAG integration, health checks for mlflow/airflow
- sim_runs schema: add judge_mode, n_policies, airflow_dag_run_id, mlflow_run_id columns - admin health endpoint: add mlflow + airflow checks (Basic auth for Airflow API) - admin nav: add Simulations page link; rename section label - runner.py: optional MLflow experiment tracking; multi-policy support - sim_dag.py: Airflow DAG for offline sim pipeline - admin simulate page + API client methods for sim runs - shared-types tsconfig: exclude test files from build Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
220
apps/admin/src/app/simulate/page.tsx
Normal file
220
apps/admin/src/app/simulate/page.tsx
Normal file
@@ -0,0 +1,220 @@
|
||||
'use client';
|
||||
|
||||
import { useEffect, useState } from 'react';
|
||||
import { AdminShell } from '@/components/AdminShell';
|
||||
import {
|
||||
startSimulation,
|
||||
getSimulationRuns,
|
||||
getSimulationRun,
|
||||
SimRun,
|
||||
} from '@/lib/api';
|
||||
|
||||
const POLICIES = ['linucb-v1', 'egreedy-v1', 'egreedy-v2'];
|
||||
const mlflowBase = process.env.NEXT_PUBLIC_MLFLOW_URL ?? '/mlflow';
|
||||
const airflowBase = process.env.NEXT_PUBLIC_AIRFLOW_URL ?? '/airflow';
|
||||
|
||||
function mlflowRunUrl(runId: string) {
|
||||
return `${mlflowBase}/#/experiments/1/runs/${runId}`;
|
||||
}
|
||||
|
||||
function airflowRunUrl(dagRunId: string) {
|
||||
return `${airflowBase}/dags/bandit_sim/grid?dag_run_id=${encodeURIComponent(dagRunId)}`;
|
||||
}
|
||||
|
||||
function StatusBadge({ status }: { status: string }) {
|
||||
const cls: Record<string, string> = {
|
||||
running: 'bg-blue-900 text-blue-300 border-blue-800',
|
||||
done: 'bg-green-900 text-green-300 border-green-800',
|
||||
failed: 'bg-red-900 text-red-300 border-red-800',
|
||||
pending: 'bg-gray-800 text-gray-400 border-gray-700',
|
||||
};
|
||||
return (
|
||||
<span className={`text-xs px-2 py-0.5 rounded border ${cls[status] ?? cls.pending}`}>
|
||||
{status}
|
||||
</span>
|
||||
);
|
||||
}
|
||||
|
||||
function SummaryRow({ run }: { run: SimRun }) {
|
||||
const summary = run.summaryJson ? JSON.parse(run.summaryJson) as Record<string, { total_reward: number; mean_reward: number; n_pulls: number }> : null;
|
||||
return (
|
||||
<div className="bg-gray-900 border border-gray-800 rounded p-4 space-y-2">
|
||||
<div className="flex items-center justify-between">
|
||||
<div className="space-y-0.5">
|
||||
<div className="flex items-center gap-2">
|
||||
<span className="font-mono text-xs text-gray-500">{run.id}</span>
|
||||
<StatusBadge status={run.status} />
|
||||
{run.winner && <span className="text-xs text-indigo-400">winner: {run.winner}</span>}
|
||||
</div>
|
||||
<div className="text-xs text-gray-600">
|
||||
{run.nUsers}u × {run.nRounds}r × {run.tasksPerRound}t/r — {run.judgeMode} judge
|
||||
{' · '}{new Date(run.createdAt).toLocaleString()}
|
||||
</div>
|
||||
</div>
|
||||
<div className="flex items-center gap-2 flex-shrink-0">
|
||||
{run.mlflowRunId && (
|
||||
<a href={mlflowRunUrl(run.mlflowRunId)} target="_blank" rel="noreferrer"
|
||||
className="text-xs text-indigo-400 hover:underline">MLflow ↗</a>
|
||||
)}
|
||||
{run.airflowDagRunId && (
|
||||
<a href={airflowRunUrl(run.airflowDagRunId)} target="_blank" rel="noreferrer"
|
||||
className="text-xs text-indigo-400 hover:underline">Airflow ↗</a>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
{summary && (
|
||||
<div className="grid grid-cols-2 gap-2 pt-1 lg:grid-cols-3">
|
||||
{Object.entries(summary).map(([policy, s]) => (
|
||||
<div key={policy} className={`rounded border p-2 text-xs ${policy === run.winner ? 'border-indigo-700 bg-indigo-950' : 'border-gray-800'}`}>
|
||||
<div className="font-mono font-medium text-gray-300 mb-1">{policy}</div>
|
||||
<div className="text-gray-500 space-y-0.5">
|
||||
<div>total <span className="text-gray-300">{s.total_reward.toFixed(2)}</span></div>
|
||||
<div>mean <span className="text-gray-300">{s.mean_reward.toFixed(4)}</span></div>
|
||||
<div>pulls <span className="text-gray-300">{s.n_pulls}</span></div>
|
||||
</div>
|
||||
</div>
|
||||
))}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
export default function SimulatePage() {
|
||||
const [runs, setRuns] = useState<SimRun[]>([]);
|
||||
const [loading, setLoading] = useState(true);
|
||||
const [launching, setLaunching] = useState(false);
|
||||
const [error, setError] = useState('');
|
||||
const [msg, setMsg] = useState('');
|
||||
|
||||
const [nUsers, setNUsers] = useState(5);
|
||||
const [nRounds, setNRounds] = useState(20);
|
||||
const [tasksPerRound, setTasksPerRound] = useState(8);
|
||||
const [judgeMode, setJudgeMode] = useState<'rule' | 'llm'>('rule');
|
||||
const [selectedPolicies, setSelectedPolicies] = useState<string[]>(['linucb-v1', 'egreedy-v1']);
|
||||
|
||||
const refresh = () =>
|
||||
getSimulationRuns()
|
||||
.then((r) => setRuns(r.runs))
|
||||
.catch((e) => setError(e.message))
|
||||
.finally(() => setLoading(false));
|
||||
|
||||
useEffect(() => {
|
||||
refresh();
|
||||
const t = setInterval(refresh, 8_000);
|
||||
return () => clearInterval(t);
|
||||
}, []);
|
||||
|
||||
const togglePolicy = (p: string) =>
|
||||
setSelectedPolicies((prev) =>
|
||||
prev.includes(p) ? prev.filter((x) => x !== p) : [...prev, p],
|
||||
);
|
||||
|
||||
const handleLaunch = async () => {
|
||||
if (selectedPolicies.length < 2) { setError('Select at least 2 policies.'); return; }
|
||||
setLaunching(true); setError(''); setMsg('');
|
||||
try {
|
||||
const r = await startSimulation({ nUsers, nRounds, tasksPerRound, judgeMode, policies: selectedPolicies });
|
||||
setMsg(r.airflow_dag_run_id
|
||||
? `Launched via Airflow — dag_run_id: ${r.airflow_dag_run_id}`
|
||||
: `Launched locally — run id: ${r.id}`);
|
||||
await refresh();
|
||||
} catch (e: unknown) {
|
||||
setError((e as Error).message);
|
||||
} finally {
|
||||
setLaunching(false);
|
||||
}
|
||||
};
|
||||
|
||||
return (
|
||||
<AdminShell>
|
||||
<div className="space-y-8 max-w-4xl">
|
||||
<h1 className="text-xl font-semibold">Simulations</h1>
|
||||
{error && <p className="text-red-400 text-sm">{error}</p>}
|
||||
{msg && <p className="text-green-400 text-sm">{msg}</p>}
|
||||
|
||||
{/* Launch form */}
|
||||
<section className="bg-gray-900 border border-gray-800 rounded p-5 space-y-4">
|
||||
<h2 className="text-base font-medium text-gray-300">New simulation</h2>
|
||||
|
||||
<div className="grid grid-cols-3 gap-4 text-sm">
|
||||
<label className="space-y-1">
|
||||
<span className="text-gray-500">Users</span>
|
||||
<input type="number" min={1} max={50} value={nUsers}
|
||||
onChange={(e) => setNUsers(Number(e.target.value))}
|
||||
className="w-full bg-gray-950 border border-gray-700 rounded px-2 py-1 text-gray-300" />
|
||||
</label>
|
||||
<label className="space-y-1">
|
||||
<span className="text-gray-500">Rounds</span>
|
||||
<input type="number" min={1} max={200} value={nRounds}
|
||||
onChange={(e) => setNRounds(Number(e.target.value))}
|
||||
className="w-full bg-gray-950 border border-gray-700 rounded px-2 py-1 text-gray-300" />
|
||||
</label>
|
||||
<label className="space-y-1">
|
||||
<span className="text-gray-500">Tasks/round</span>
|
||||
<input type="number" min={1} max={20} value={tasksPerRound}
|
||||
onChange={(e) => setTasksPerRound(Number(e.target.value))}
|
||||
className="w-full bg-gray-950 border border-gray-700 rounded px-2 py-1 text-gray-300" />
|
||||
</label>
|
||||
</div>
|
||||
|
||||
<div className="space-y-1 text-sm">
|
||||
<span className="text-gray-500">Policies (select ≥ 2)</span>
|
||||
<div className="flex gap-2 flex-wrap pt-1">
|
||||
{POLICIES.map((p) => (
|
||||
<button key={p} onClick={() => togglePolicy(p)}
|
||||
className={`px-3 py-1 rounded border text-xs font-mono ${
|
||||
selectedPolicies.includes(p)
|
||||
? 'bg-indigo-900 border-indigo-700 text-indigo-200'
|
||||
: 'border-gray-700 text-gray-500 hover:border-gray-500'
|
||||
}`}>
|
||||
{p}
|
||||
</button>
|
||||
))}
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div className="space-y-1 text-sm">
|
||||
<span className="text-gray-500">Judge</span>
|
||||
<div className="flex gap-2 pt-1">
|
||||
{(['rule', 'llm'] as const).map((m) => (
|
||||
<button key={m} onClick={() => setJudgeMode(m)}
|
||||
className={`px-3 py-1 rounded border text-xs ${
|
||||
judgeMode === m
|
||||
? 'bg-gray-700 border-gray-500 text-white'
|
||||
: 'border-gray-700 text-gray-500 hover:border-gray-500'
|
||||
}`}>
|
||||
{m}
|
||||
</button>
|
||||
))}
|
||||
</div>
|
||||
{judgeMode === 'llm' && (
|
||||
<p className="text-xs text-yellow-600 mt-1">LLM judge requires ANTHROPIC_API_KEY in ml/serving env.</p>
|
||||
)}
|
||||
</div>
|
||||
|
||||
<button onClick={handleLaunch} disabled={launching}
|
||||
className="bg-indigo-600 hover:bg-indigo-500 disabled:opacity-50 text-white rounded px-4 py-2 text-sm">
|
||||
{launching ? 'Launching…' : 'Launch simulation'}
|
||||
</button>
|
||||
<p className="text-xs text-gray-600">
|
||||
Runs via <a href={airflowBase} target="_blank" rel="noreferrer" className="text-indigo-500 hover:underline">Airflow</a> (mlops profile) when available; falls back to local subprocess.
|
||||
Results logged to <a href={mlflowBase} target="_blank" rel="noreferrer" className="text-indigo-500 hover:underline">MLflow</a>.
|
||||
</p>
|
||||
</section>
|
||||
|
||||
{/* Run history */}
|
||||
<section className="space-y-3">
|
||||
<h2 className="text-base font-medium text-gray-300">
|
||||
Run history
|
||||
{loading && <span className="text-xs text-gray-600 ml-2">loading…</span>}
|
||||
</h2>
|
||||
{runs.length === 0 && !loading && (
|
||||
<p className="text-gray-600 text-sm">No simulations yet.</p>
|
||||
)}
|
||||
{runs.map((r) => <SummaryRow key={r.id} run={r} />)}
|
||||
</section>
|
||||
</div>
|
||||
</AdminShell>
|
||||
);
|
||||
}
|
||||
@@ -2,14 +2,16 @@
|
||||
|
||||
import Link from 'next/link';
|
||||
import { usePathname } from 'next/navigation';
|
||||
import { useEffect, useState } from 'react';
|
||||
|
||||
const mlflowUrl = process.env.NEXT_PUBLIC_MLFLOW_URL ?? '/mlflow';
|
||||
const mlflowUrl = process.env.NEXT_PUBLIC_MLFLOW_URL ?? '/mlflow';
|
||||
const airflowUrl = process.env.NEXT_PUBLIC_AIRFLOW_URL ?? '/airflow';
|
||||
|
||||
type NavItem = {
|
||||
href: string;
|
||||
label: string;
|
||||
external?: boolean;
|
||||
svcName?: string; // key in the health services map
|
||||
};
|
||||
|
||||
type NavSection = {
|
||||
@@ -24,40 +26,60 @@ const NAV: NavSection[] = [
|
||||
{
|
||||
label: 'Signals',
|
||||
items: [
|
||||
{ href: '/users', label: 'Users' },
|
||||
{ href: '/events', label: 'Events' },
|
||||
{ href: '/features', label: 'Features' },
|
||||
{ href: '/users', label: 'Users' },
|
||||
{ href: '/events', label: 'Events' },
|
||||
{ href: '/features', label: 'Features' },
|
||||
{ href: '/data-quality', label: 'Data quality' },
|
||||
],
|
||||
},
|
||||
{
|
||||
label: 'Recommender status',
|
||||
label: 'Recommender',
|
||||
items: [
|
||||
{ href: '/tips', label: 'Tips' },
|
||||
{ href: '/tips', label: 'Tips' },
|
||||
{ href: '/reward-analytics', label: 'Rewards' },
|
||||
{ href: '/simulate', label: 'Simulations' },
|
||||
],
|
||||
},
|
||||
{
|
||||
label: 'Operations',
|
||||
items: [
|
||||
{ href: '/health', label: 'Health' },
|
||||
{ href: '/ops', label: 'Ops' },
|
||||
{ href: '/sql', label: 'SQL runner' },
|
||||
{ href: '/audit', label: 'Audit log' },
|
||||
{ href: '/ops', label: 'Ops' },
|
||||
{ href: '/sql', label: 'SQL runner' },
|
||||
{ href: '/audit', label: 'Audit log' },
|
||||
],
|
||||
},
|
||||
{
|
||||
label: 'Resources',
|
||||
items: [
|
||||
{ href: '/docs', label: 'Docs' },
|
||||
{ href: mlflowUrl, label: 'MLflow ↗', external: true },
|
||||
{ href: airflowUrl, label: 'Airflow ↗', external: true },
|
||||
{ href: '/docs', label: 'Docs' },
|
||||
{ href: mlflowUrl, label: 'MLflow ↗', external: true, svcName: 'mlflow' },
|
||||
{ href: airflowUrl, label: 'Airflow ↗', external: true, svcName: 'airflow' },
|
||||
],
|
||||
},
|
||||
];
|
||||
|
||||
const STATUS_DOT: Record<string, string> = {
|
||||
ok: 'bg-green-500',
|
||||
degraded: 'bg-yellow-400',
|
||||
down: 'bg-red-500',
|
||||
};
|
||||
|
||||
export function AdminShell({ children }: { children: React.ReactNode }) {
|
||||
const pathname = usePathname();
|
||||
const [svcStatus, setSvcStatus] = useState<Record<string, string>>({});
|
||||
|
||||
useEffect(() => {
|
||||
fetch('/api/admin/health', { credentials: 'include' })
|
||||
.then((r) => r.json())
|
||||
.then((data: { services?: { name: string; status: string }[] }) => {
|
||||
const map: Record<string, string> = {};
|
||||
for (const s of data.services ?? []) map[s.name] = s.status;
|
||||
setSvcStatus(map);
|
||||
})
|
||||
.catch(() => {});
|
||||
}, []);
|
||||
|
||||
return (
|
||||
<div className="flex min-h-screen">
|
||||
{/* Sidebar */}
|
||||
@@ -83,13 +105,19 @@ export function AdminShell({ children }: { children: React.ReactNode }) {
|
||||
const active =
|
||||
!item.external &&
|
||||
(item.href === '/' ? pathname === '/' : pathname.startsWith(item.href));
|
||||
const className = `flex items-center px-3 py-2 rounded text-sm transition-colors ${
|
||||
const className = `flex items-center gap-2 px-3 py-2 rounded text-sm transition-colors ${
|
||||
active
|
||||
? 'bg-gray-800 text-white font-medium'
|
||||
: item.external
|
||||
? 'text-gray-500 hover:text-white hover:bg-gray-900'
|
||||
: 'text-gray-400 hover:text-white hover:bg-gray-900'
|
||||
}`;
|
||||
const dot = item.svcName
|
||||
? svcStatus[item.svcName]
|
||||
? <span className={`inline-block w-1.5 h-1.5 rounded-full flex-shrink-0 ${STATUS_DOT[svcStatus[item.svcName]] ?? STATUS_DOT.down}`} />
|
||||
: <span className="inline-block w-1.5 h-1.5 rounded-full flex-shrink-0 bg-gray-700" />
|
||||
: null;
|
||||
|
||||
return item.external ? (
|
||||
<a
|
||||
key={item.href}
|
||||
@@ -98,6 +126,7 @@ export function AdminShell({ children }: { children: React.ReactNode }) {
|
||||
rel="noreferrer"
|
||||
className={className}
|
||||
>
|
||||
{dot}
|
||||
{item.label}
|
||||
</a>
|
||||
) : (
|
||||
|
||||
@@ -262,3 +262,49 @@ export function saveQuery(name: string, querySql: string) {
|
||||
export function deleteSavedQuery(id: string) {
|
||||
return apiFetch<{ ok: boolean }>(`/admin/saved-queries/${id}`, { method: 'DELETE' });
|
||||
}
|
||||
|
||||
// ── Simulations ────────────────────────────────────────────────────────────
|
||||
|
||||
export interface SimRun {
|
||||
id: string;
|
||||
policyA: string;
|
||||
policyB: string;
|
||||
nUsers: number;
|
||||
nRounds: number;
|
||||
tasksPerRound: number;
|
||||
judgeMode: string;
|
||||
nPolicies: number;
|
||||
status: 'pending' | 'running' | 'done' | 'failed';
|
||||
summaryJson: string | null;
|
||||
winner: string | null;
|
||||
personaBreakdownJson: string | null;
|
||||
airflowDagRunId: string | null;
|
||||
mlflowRunId: string | null;
|
||||
createdAt: string;
|
||||
finishedAt: string | null;
|
||||
}
|
||||
|
||||
export interface SimStartRequest {
|
||||
nUsers?: number;
|
||||
nRounds?: number;
|
||||
tasksPerRound?: number;
|
||||
judgeMode?: 'rule' | 'llm';
|
||||
policies?: string[];
|
||||
}
|
||||
|
||||
export function startSimulation(req: SimStartRequest) {
|
||||
return apiFetch<{ id: string; status: string; airflow_dag_run_id?: string }>(
|
||||
'/admin/simulate/start',
|
||||
{ method: 'POST', body: JSON.stringify(req) },
|
||||
);
|
||||
}
|
||||
|
||||
export function getSimulationRuns() {
|
||||
return apiFetch<{ runs: SimRun[] }>('/admin/simulate/runs');
|
||||
}
|
||||
|
||||
export function getSimulationRun(id: string) {
|
||||
return apiFetch<{ run: SimRun & { isRunning: boolean }; events: unknown[] }>(
|
||||
`/admin/simulate/${id}`,
|
||||
);
|
||||
}
|
||||
|
||||
File diff suppressed because one or more lines are too long
Reference in New Issue
Block a user