feat: MLOps external services, AI stack planning, admin MLOps hub

Infrastructure:
- Add `mlops` compose profile: MLflow (basic-auth, /mlflow path) + Airflow (LocalExecutor, /airflow path) + airflow-db
- infra/mlflow/basic_auth.ini for MLflow auth config
- Caddy routes /mlflow* and /airflow* inside existing o.alogins.net block (see agap_git)
- Dockerfile.admin: NEXT_PUBLIC_MLFLOW_URL / NEXT_PUBLIC_AIRFLOW_URL build args (default /mlflow, /airflow)

Admin panel:
- /admin/models: replace MLflow iframe with external link cards
- /admin/experiments: replace LinUCB stats with MLOps hub (links to MLflow experiments/models + Airflow DAGs/datasets)
- AdminShell: external nav links for MLflow ↗ and Airflow ↗ under MLOps section

Docs & planning:
- README: new AI stack section (Ollama/LiteLLM/OpenWebUI three-tier, tip generation pipeline, model aliases)
- README: Phase 2 expanded with AI infra issues (#86-#93) and granular pipeline breakdown
- README: Phase 4 expanded with LLM MLOps items (#94-#97)
- CLAUDE.md: AI stack section, updated current phase (M1 shipped / M2 in progress), compose profiles, updated What NOT to do
- docs/architecture/overview.md: AI stack section, updated decision flow diagram for Phase 2 LLM pipeline
- ADR-0006: updated to reflect external services (path-based, not embedded)
- Gitea issues #86-#97 created (M2: AI infra + pipeline; M4: LLM MLOps)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-04-17 08:20:44 +00:00
parent faf44c18fc
commit 85367aeaa0
25 changed files with 695 additions and 222 deletions

View File

@@ -17,14 +17,15 @@ function isDocCategory(value: string): value is DocCategory {
export default async function DocDetailPage({
params,
}: {
params: { category: string; slug: string };
params: Promise<{ category: string; slug: string }>;
}) {
if (!isDocCategory(params.category)) notFound();
const { category, slug } = await params;
if (!isDocCategory(category)) notFound();
const doc = await getDoc(params.category, params.slug);
const doc = await getDoc(category, slug);
if (!doc) notFound();
const categoryLabel = CATEGORY_LABELS[params.category];
const categoryLabel = CATEGORY_LABELS[category];
return (
<AdminShell>

View File

@@ -1,124 +1,89 @@
'use client';
import { useEffect, useState } from 'react';
import { AdminShell } from '@/components/AdminShell';
import { resetBandit } from '@/lib/api';
interface BanditStats {
user_id: string;
pulls: number;
reward_count: number;
cumulative_reward: number;
estimated_mean_reward: number;
theta: number[];
last_updated: string | null;
}
const FEATURE_LABELS = ['hour_sin', 'hour_cos', 'is_overdue', 'task_age', 'priority'];
const mlflowUrl = process.env.NEXT_PUBLIC_MLFLOW_URL ?? '/mlflow';
const airflowUrl = process.env.NEXT_PUBLIC_AIRFLOW_URL ?? '/airflow';
export default function ExperimentsPage() {
const [userId, setUserId] = useState('');
const [stats, setStats] = useState<BanditStats | null>(null);
const [loading, setLoading] = useState(false);
const [resetting, setResetting] = useState(false);
const [error, setError] = useState('');
const [resetMsg, setResetMsg] = useState('');
const fetchStats = async () => {
if (!userId.trim()) return;
setLoading(true);
setError('');
try {
const res = await fetch(`/api/ml/stats/${encodeURIComponent(userId.trim())}`, { credentials: 'include' });
if (!res.ok) throw new Error(res.statusText);
setStats(await res.json());
} catch (e: any) {
setError(e.message);
} finally {
setLoading(false);
}
};
const handleReset = async () => {
if (!userId.trim()) return;
if (!confirm(`Reset LinUCB state for user ${userId}?`)) return;
setResetting(true);
try {
await resetBandit(userId.trim());
setResetMsg('Bandit state reset.');
setStats(null);
} catch (e: any) {
setError(e.message);
} finally {
setResetting(false);
}
};
return (
<AdminShell>
<div className="space-y-6">
<h1 className="text-xl font-semibold">Experiment dashboard</h1>
<p className="text-sm text-gray-500">LinUCB per-user bandit stats pulled from ml/serving.</p>
<h1 className="text-xl font-semibold">MLOps</h1>
<p className="text-sm text-gray-500">
Experiment tracking, dataset management, and pipeline orchestration live in dedicated external services.
Each has its own auth see{' '}
<a href="/admin/docs/ops/mlops" className="text-indigo-400 hover:underline">MLOps runbook</a>
{' '}for credentials and first-time setup.
</p>
<div className="flex gap-2">
<input
value={userId}
onChange={(e) => setUserId(e.target.value)}
onKeyDown={(e) => e.key === 'Enter' && fetchStats()}
placeholder="User ID"
className="bg-gray-900 border border-gray-700 rounded px-3 py-1.5 text-sm text-gray-300 w-80"
/>
<button onClick={fetchStats} className="bg-indigo-600 hover:bg-indigo-500 text-white rounded px-4 py-1.5 text-sm">
Load
</button>
{stats && (
<button onClick={handleReset} disabled={resetting} className="bg-red-800 hover:bg-red-700 text-white rounded px-4 py-1.5 text-sm disabled:opacity-50">
Reset bandit
</button>
)}
</div>
{error && <p className="text-red-400 text-sm">{error}</p>}
{resetMsg && <p className="text-green-400 text-sm">{resetMsg}</p>}
{loading && <p className="text-gray-500 text-sm">Loading</p>}
{stats && (
<div className="grid grid-cols-2 gap-4 md:grid-cols-4">
<StatCard label="Pulls" value={stats.pulls} />
<StatCard label="Reward samples" value={stats.reward_count} />
<StatCard label="Cumulative reward" value={stats.cumulative_reward.toFixed(2)} />
<StatCard label="Mean reward" value={stats.estimated_mean_reward.toFixed(3)} />
<section className="space-y-3">
<h2 className="text-sm font-semibold text-gray-400 uppercase tracking-wider">Experiment tracking</h2>
<div className="grid gap-3 md:grid-cols-2">
<ExternalCard
title="Experiments"
description="Training runs · metrics · parameter sweeps · run comparison"
href={`${mlflowUrl}/#/experiments`}
label="Open in MLflow ↗"
/>
<ExternalCard
title="Registered models"
description="Model versions · stage promotion (Staging → Production) · artifact browser"
href={`${mlflowUrl}/#/models`}
label="Open in MLflow ↗"
/>
</div>
)}
</section>
{stats?.theta && (
<div className="space-y-2">
<h2 className="text-sm font-medium text-gray-400">θ (learned weight vector)</h2>
<div className="flex gap-3 flex-wrap">
{stats.theta.map((v, i) => (
<div key={i} className="bg-gray-900 border border-gray-800 rounded p-3 text-center min-w-[100px]">
<div className="text-xs text-gray-500 mb-1">{FEATURE_LABELS[i] ?? `feat_${i}`}</div>
<div className={`text-sm font-mono ${v > 0 ? 'text-green-400' : v < 0 ? 'text-red-400' : 'text-gray-400'}`}>
{v.toFixed(4)}
</div>
</div>
))}
</div>
{stats.last_updated && (
<p className="text-xs text-gray-600">Last updated: {stats.last_updated}</p>
)}
<section className="space-y-3">
<h2 className="text-sm font-semibold text-gray-400 uppercase tracking-wider">Pipeline orchestration</h2>
<div className="grid gap-3 md:grid-cols-2">
<ExternalCard
title="DAGs"
description="Batch feature materialization · retraining pipelines · data quality jobs"
href={`${airflowUrl}/dags`}
label="Open in Airflow ↗"
/>
<ExternalCard
title="Dataset lineage"
description="Pipeline runs · dataset inputs/outputs · data versioning"
href={`${airflowUrl}/datasets`}
label="Open in Airflow ↗"
/>
</div>
)}
</section>
<section className="space-y-2 pt-2 border-t border-gray-800">
<h2 className="text-sm font-semibold text-gray-400 uppercase tracking-wider">Bandit state ops</h2>
<p className="text-xs text-gray-500">
Per-user LinUCB reset is available on the{' '}
<a href="/admin/users" className="text-indigo-400 hover:underline">Users page</a>
{' '} user detail view.
</p>
</section>
</div>
</AdminShell>
);
}
function StatCard({ label, value }: { label: string; value: string | number }) {
function ExternalCard({ title, description, href, label }: {
title: string;
description: string;
href: string;
label: string;
}) {
return (
<div className="bg-gray-900 border border-gray-800 rounded p-4">
<div className="text-xs text-gray-500 mb-1">{label}</div>
<div className="text-2xl font-semibold text-white">{value}</div>
<div className="bg-gray-900 border border-gray-800 rounded-lg p-5 flex items-start justify-between gap-4">
<div className="space-y-1">
<h2 className="text-sm font-medium text-gray-200">{title}</h2>
<p className="text-xs text-gray-500">{description}</p>
</div>
<a
href={href}
target="_blank"
rel="noreferrer"
className="flex-shrink-0 text-indigo-400 hover:text-indigo-300 text-xs whitespace-nowrap"
>
{label}
</a>
</div>
);
}

View File

@@ -5,7 +5,7 @@ export default function LoginPage() {
<h1 className="text-2xl font-semibold">oO Admin</h1>
<p className="text-gray-400 text-sm">Sign in via the main app first, then return here.</p>
<a
href={`${process.env.NEXT_PUBLIC_WEB_URL ?? 'http://localhost:3079'}/sign-in`}
href="/sign-in"
className="inline-block px-4 py-2 bg-white text-black rounded text-sm font-medium hover:bg-gray-200 transition-colors"
>
Sign in with Google

View File

@@ -1,30 +1,53 @@
import { AdminShell } from '@/components/AdminShell';
export default function ModelsPage() {
const mlflowUrl = process.env.NEXT_PUBLIC_MLFLOW_URL ?? 'http://localhost:5000';
const mlflowUrl = process.env.NEXT_PUBLIC_MLFLOW_URL ?? '/mlflow';
export default function ModelsPage() {
return (
<AdminShell>
<div className="space-y-4 h-[calc(100vh-4rem)]">
<div className="flex items-center justify-between flex-shrink-0">
<h1 className="text-xl font-semibold">Model registry</h1>
<a href={mlflowUrl} target="_blank" rel="noreferrer" className="text-xs text-gray-400 hover:text-white border border-gray-700 rounded px-2 py-1">
Open MLflow
</a>
</div>
<p className="text-sm text-gray-500 flex-shrink-0">
MLflow is embedded below when running under the <code className="text-xs bg-gray-800 px-1 rounded">full</code> compose profile.
Promote or archive model versions via the MLflow UI; each action writes to the audit log automatically.
<div className="space-y-6">
<h1 className="text-xl font-semibold">Model registry</h1>
<p className="text-sm text-gray-500">
Model lifecycle (runs, versions, promotions, artifacts) is managed in MLflow.
Auth is separate log in with your MLflow credentials.
</p>
<div className="flex-1 rounded border border-gray-800 overflow-hidden" style={{ height: 'calc(100vh - 12rem)' }}>
<iframe
src={`${mlflowUrl}/#/models`}
className="w-full h-full bg-white"
title="MLflow Model Registry"
sandbox="allow-scripts allow-same-origin allow-forms allow-popups"
/>
</div>
<ExternalCard
title="MLflow Model Registry"
description="Experiment runs · registered models · version promotion · artifact browser"
href={mlflowUrl}
label="Open MLflow"
/>
<ExternalCard
title="MLflow Experiments"
description="Training runs, metrics, parameters, and comparison across runs"
href={`${mlflowUrl}/#/experiments`}
label="Browse experiments"
/>
</div>
</AdminShell>
);
}
function ExternalCard({ title, description, href, label }: {
title: string;
description: string;
href: string;
label: string;
}) {
return (
<div className="bg-gray-900 border border-gray-800 rounded-lg p-5 flex items-start justify-between gap-4">
<div className="space-y-1">
<h2 className="text-sm font-medium text-gray-200">{title}</h2>
<p className="text-xs text-gray-500">{description}</p>
</div>
<a
href={href}
target="_blank"
rel="noreferrer"
className="flex-shrink-0 bg-indigo-600 hover:bg-indigo-500 text-white text-xs rounded px-3 py-1.5 whitespace-nowrap"
>
{label}
</a>
</div>
);
}

View File

@@ -3,10 +3,15 @@ import { UserDetail } from '@/components/UserDetail';
export const dynamic = 'force-dynamic';
export default function UserDetailPage({ params }: { params: { id: string } }) {
export default async function UserDetailPage({
params,
}: {
params: Promise<{ id: string }>;
}) {
const { id } = await params;
return (
<AdminShell>
<UserDetail userId={params.id} />
<UserDetail userId={id} />
</AdminShell>
);
}