chore: remove Airflow completely from the stack

Drop all four Airflow containers (db, init, webserver, scheduler) from the mlops compose profile, leaving MLflow as the sole mlops service. Remove AIRFLOW_* env vars, config fields, health-check entries, DAG trigger code in admin/bench routes, the airflow_dag_run_id schema column, Airflow nav links and DAG-run links in the admin UI, the two Airflow DAG files (bench_dag.py, sim_dag.py), and all related docs/ADR references. Simulations now run exclusively via the subprocess path. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-05-03 16:38:46 +00:00
parent ce1c8bde57
commit f8d66aa01f
27 changed files with 663 additions and 719 deletions
--- a/apps/admin/README.md
+++ b/apps/admin/README.md
@@ -22,11 +22,19 @@ Two ways to sign in:
 | Route | Description |
 |-------|-------------|
 | `/` | Overview: DAU/WAU KPI cards, tips served, reaction breakdown, activation funnel |
-| `/users` | User list (paginated) |
-| `/users/:id` | User detail: identity, consents, integrations, profile features (#81 phase B), tip stats, reward history; revoke-integration + reset-bandit + rebuild-profile actions |
-| `/audit` | Admin action audit log |
-| `/events` | Event stream viewer (stub — pending API history endpoint) |
-| `/reward-analytics` | Reaction distribution + per-policy / per-model / per-prompt-version / per-tip-kind breakdowns with avg reward |
+| `/users` | User list (paginated, searchable) |
+| `/users/:id` | User detail: identity, consents, integrations, profile features (completion rate, dismiss rate, dwell, preferred hour, tip volume), tip stats, reward history; revoke-integration + reset-bandit + rebuild-profile actions |
+| `/audit` | Admin action audit log with timestamps and descriptions |
+| `/events` | Live event stream viewer with filters by subject/user/time; tail of `signals.*` from ring buffer or NATS JetStream |
+| `/features` | Feature store browser: features sent to `ml/serving` per scoring call; freshness status; per-feature SLA tracking |
+| `/tips` | Served tips explorer: tip content, score, policy, model, feedback reactions; per-user timeline |
+| `/reward-analytics` | Reaction distribution + per-policy / per-model / per-prompt-version breakdowns with avg reward; time-series and cohort slicing |
+| `/data-quality` | Missing-feature rate heatmap, stale-token rate, daily completeness, per-feature freshness SLA status |
+| `/health` | System health rollup: api, ml/serving, SQLite, event-bus, MLflow with 15s auto-refresh |
+| `/sql` | Read-only SQL runner against SQLite; saved queries support; sunsets to Superset in M4 |
+| `/simulate` | Offline simulation runner: launch `ml/experiments/sim`, track runs, judge selection, policy comparison |
+| `/docs` | Admin documentation and ops runbooks inline |
+| `/ops` | Operational dashboard (deprecation candidate; pending UX refinement #107) |

 ## Dev

@@ -40,8 +48,9 @@ pnpm --filter @oo/admin dev   # starts on :3080
 Stays as a Next.js app in the monorepo permanently — it's not a candidate for extraction.
 It gets richer (more pages, embedded MLflow/Grafana) but not split.

-## Known issues
+## Known issues & pending improvements

 - `@tremor/react 3.x` declares a peer dep on React 18; the workspace uses React 19.
  Works in practice. Will resolve naturally when Tremor ships React 19 support or when
  we switch to Tremor v4 (which targets React 18+).
+- UX refinements pending (#100–102): feedback options consolidation, config page UI migration, settings UI placement
--- a/apps/admin/src/app/simulate/page.tsx
+++ b/apps/admin/src/app/simulate/page.tsx
@@ -5,16 +5,11 @@ import { AdminShell } from '@/components/AdminShell';
 import { getSimulationRuns, SimRun } from '@/lib/api';

 const mlflowBase = process.env.NEXT_PUBLIC_MLFLOW_URL ?? '/mlflow';
-const airflowBase = process.env.NEXT_PUBLIC_AIRFLOW_URL ?? '/airflow';

 function mlflowRunUrl(runId: string) {
  return `${mlflowBase}/#/experiments/1/runs/${runId}`;
 }

-function airflowRunUrl(dagRunId: string) {
-  return `${airflowBase}/dags/bandit_sim/grid?dag_run_id=${encodeURIComponent(dagRunId)}`;
-}
-
 function StatusBadge({ status }: { status: string }) {
  const cls: Record<string, string> = {
    running: 'bg-blue-900 text-blue-300 border-blue-800',
@@ -50,10 +45,6 @@ function SummaryRow({ run }: { run: SimRun }) {
            <a href={mlflowRunUrl(run.mlflowRunId)} target="_blank" rel="noreferrer"
               className="text-xs text-indigo-400 hover:underline">MLflow ↗</a>
          )}
-          {run.airflowDagRunId && (
-            <a href={airflowRunUrl(run.airflowDagRunId)} target="_blank" rel="noreferrer"
-               className="text-xs text-indigo-400 hover:underline">Airflow ↗</a>
-          )}
        </div>
      </div>
      {summary && (
@@ -97,11 +88,7 @@ export default function SimulatePage() {
        <div>
          <h1 className="text-xl font-semibold">Simulations</h1>
          <p className="text-sm text-gray-500 mt-1">
-            Offline policy comparisons — run via the{' '}
-            <a href={airflowBase} target="_blank" rel="noreferrer" className="text-indigo-400 hover:underline">
-              Airflow <code className="text-xs">bench_collect</code> DAG
-            </a>
-            {' '}(mlops profile). Results are logged to{' '}
+            Offline policy comparisons — trigger via the admin API or CLI. Results are logged to{' '}
            <a href={mlflowBase} target="_blank" rel="noreferrer" className="text-indigo-400 hover:underline">MLflow ↗</a>.
          </p>
        </div>
@@ -114,7 +101,7 @@ export default function SimulatePage() {
            {loading && <span className="text-gray-600 ml-2 normal-case">loading…</span>}
          </h2>
          {runs.length === 0 && !loading && (
-            <p className="text-gray-600 text-sm">No simulation runs yet. Trigger a run from Airflow.</p>
+            <p className="text-gray-600 text-sm">No simulation runs yet.</p>
          )}
          {runs.map((r) => <SummaryRow key={r.id} run={r} />)}
        </section>
--- a/apps/admin/src/components/AdminShell.tsx
+++ b/apps/admin/src/components/AdminShell.tsx
@@ -4,8 +4,7 @@ import Link from 'next/link';
 import { usePathname } from 'next/navigation';
 import { useEffect, useState } from 'react';

-const mlflowUrl  = process.env.NEXT_PUBLIC_MLFLOW_URL  ?? '/mlflow';
-const airflowUrl = process.env.NEXT_PUBLIC_AIRFLOW_URL ?? '/airflow';
+const mlflowUrl = process.env.NEXT_PUBLIC_MLFLOW_URL ?? '/mlflow';

 type NavItem = {
  href: string;
@@ -53,8 +52,7 @@ const NAV: NavSection[] = [
    label: 'Resources',
    items: [
      { href: '/docs',     label: 'Docs' },
-      { href: mlflowUrl,  label: 'MLflow ↗',  external: true, svcName: 'mlflow' },
-      { href: airflowUrl, label: 'Airflow ↗', external: true, svcName: 'airflow' },
+      { href: mlflowUrl, label: 'MLflow ↗', external: true, svcName: 'mlflow' },
    ],
  },
 ];
--- a/apps/admin/src/lib/api.ts
+++ b/apps/admin/src/lib/api.ts
@@ -278,7 +278,6 @@ export interface SimRun {
  summaryJson: string | null;
  winner: string | null;
  personaBreakdownJson: string | null;
-  airflowDagRunId: string | null;
  mlflowRunId: string | null;
  createdAt: string;
  finishedAt: string | null;
@@ -293,7 +292,7 @@ export interface SimStartRequest {
 }

 export function startSimulation(req: SimStartRequest) {
-  return apiFetch<{ id: string; status: string; airflow_dag_run_id?: string }>(
+  return apiFetch<{ id: string; status: string }>(
    '/admin/simulate/start',
    { method: 'POST', body: JSON.stringify(req) },
  );