chore: remove Airflow completely from the stack

Drop all four Airflow containers (db, init, webserver, scheduler) from the
mlops compose profile, leaving MLflow as the sole mlops service. Remove
AIRFLOW_* env vars, config fields, health-check entries, DAG trigger code
in admin/bench routes, the airflow_dag_run_id schema column, Airflow nav
links and DAG-run links in the admin UI, the two Airflow DAG files
(bench_dag.py, sim_dag.py), and all related docs/ADR references.
Simulations now run exclusively via the subprocess path.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-05-03 16:38:46 +00:00
parent ce1c8bde57
commit f8d66aa01f
27 changed files with 663 additions and 719 deletions

View File

@@ -0,0 +1,220 @@
import { Router } from 'express';
import { nanoid } from 'nanoid';
import { db } from '../db/index.js';
import { agentOutputs, tipFeedback, tipViews } from '../db/schema.js';
import { eq, and, gt, lt } from 'drizzle-orm';
import { config } from '../config.js';
import { getProfile } from '../profile/builder.js';
import { todoistSource } from '../signals/todoist.js';
import { SignalAggregator } from '../signals/aggregator.js';
import type { Request, Response } from 'express';
const router = Router();
// Separate aggregator instance — avoids circular dep with recommender.ts.
const _agentAggregator = new SignalAggregator().register(todoistSource);
// ── Internal auth helper ──────────────────────────────────────────────────────
function checkInternalToken(req: Request, res: Response): boolean {
const token = req.headers['x-internal-token'];
if (!config.INTERNAL_API_TOKEN || token !== config.INTERNAL_API_TOKEN) {
res.status(401).json({ error: 'Unauthorized' });
return false;
}
return true;
}
// ── DB helpers ────────────────────────────────────────────────────────────────
export async function getActiveAgentOutputs(userId: string) {
const now = new Date().toISOString();
return db
.select()
.from(agentOutputs)
.where(and(eq(agentOutputs.userId, userId), gt(agentOutputs.expiresAt, now)));
}
async function storeAgentOutput(output: {
user_id: string;
agent_id: string;
prompt_text: string;
signals_snapshot?: unknown;
computed_at: string;
expires_at: string;
agent_version: string;
}) {
await db
.delete(agentOutputs)
.where(and(eq(agentOutputs.userId, output.user_id), eq(agentOutputs.agentId, output.agent_id)));
await db.insert(agentOutputs).values({
id: nanoid(),
userId: output.user_id,
agentId: output.agent_id,
promptText: output.prompt_text,
signalsSnapshot: output.signals_snapshot ? JSON.stringify(output.signals_snapshot) : null,
computedAt: output.computed_at,
expiresAt: output.expires_at,
agentVersion: output.agent_version,
});
}
// ── GET /api/agents/active-users ──────────────────────────────────────────────
// Returns user IDs that have requested a tip in the last 48 hours.
// Returns user IDs for fan-out precompute tasks.
router.get('/active-users', async (req: Request, res: Response) => {
if (!checkInternalToken(req, res)) return;
const cutoff = new Date(Date.now() - 48 * 60 * 60 * 1000).toISOString();
try {
const rows = await db
.selectDistinct({ userId: tipViews.userId })
.from(tipViews)
.where(gt(tipViews.servedAt, cutoff));
res.json({ user_ids: rows.map((r) => r.userId) });
} catch (err: any) {
res.status(500).json({ error: err.message });
}
});
// ── POST /api/agents/:agentId/compute ─────────────────────────────────────────
// Orchestrating endpoint for per-(user, agent) compute tasks.
// Fetches all signals, calls ml/serving /agents/{agentId}/compute, stores result.
// Body: { user_id: string }
router.post('/:agentId/compute', async (req: Request, res: Response) => {
if (!checkInternalToken(req, res)) return;
const { agentId } = req.params as { agentId: string };
const { user_id } = req.body as { user_id: string };
if (!user_id) {
res.status(422).json({ error: 'Missing user_id' });
return;
}
try {
// Fetch tasks via Todoist integration (gracefully empty if not connected).
let tasks: object[] = [];
try {
const signals = await _agentAggregator.fetchAll(user_id);
tasks = signals.map((s) => ({
id: s.id,
content: s.content,
priority: (s.features.priority as number) ?? 1,
is_overdue: Boolean(s.features.is_overdue),
task_age_days: (s.features.task_age_days as number) ?? 0,
project_id: (s.metadata as Record<string, unknown>).project_id ?? null,
}));
} catch {
// No integration or fetch error — agents that need tasks will report "no tasks"
}
// Fetch profile features (lazy-refreshed from DB).
let profile: Record<string, number | null> = {};
try {
profile = await getProfile(user_id);
} catch {}
// Fetch last 7 days of feedback for RecentPatternsAgent.
const sevenDaysAgo = new Date(Date.now() - 7 * 24 * 60 * 60 * 1000).toISOString();
const feedbackRows = await db
.select({ action: tipFeedback.action, dwellMs: tipFeedback.dwellMs, createdAt: tipFeedback.createdAt })
.from(tipFeedback)
.where(and(eq(tipFeedback.userId, user_id), gt(tipFeedback.createdAt, sevenDaysAgo)));
const feedbackHistory = feedbackRows.map((f) => ({
action: f.action,
dwell_ms: f.dwellMs,
created_at: f.createdAt,
}));
// Call ml/serving to run the agent.
const mlResp = await fetch(`${config.ML_SERVING_URL}/agents/${agentId}/compute`, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ user_id, tasks, profile, feedback_history: feedbackHistory }),
signal: AbortSignal.timeout(15_000),
});
if (!mlResp.ok) {
const detail = await mlResp.text().catch(() => '');
res.status(502).json({ error: `ml/serving returned ${mlResp.status}`, detail });
return;
}
const output = await mlResp.json() as {
user_id: string; agent_id: string; prompt_text: string;
signals_snapshot: unknown; computed_at: string; expires_at: string; agent_version: string;
};
await storeAgentOutput(output);
res.json({ ok: true, agent_id: output.agent_id, user_id: output.user_id, expires_at: output.expires_at });
} catch (err: any) {
res.status(500).json({ error: err.message });
}
});
// ── POST /api/agents/outputs ──────────────────────────────────────────────────
// Stores a pre-computed agent output directly (used if the DAG calls ml/serving
// itself and pushes the result separately).
router.post('/outputs', async (req: Request, res: Response) => {
if (!checkInternalToken(req, res)) return;
const { user_id, agent_id, prompt_text, signals_snapshot, computed_at, expires_at, agent_version } =
req.body as Record<string, string>;
if (!user_id || !agent_id || !prompt_text || !computed_at || !expires_at || !agent_version) {
res.status(422).json({
error: 'Missing required fields: user_id, agent_id, prompt_text, computed_at, expires_at, agent_version',
});
return;
}
try {
await storeAgentOutput({ user_id, agent_id, prompt_text, signals_snapshot, computed_at, expires_at, agent_version });
res.json({ ok: true });
} catch (err: any) {
res.status(500).json({ error: err.message });
}
});
// ── DELETE /api/agents/outputs/expired ───────────────────────────────────────
// Purges rows expired more than 24 hours ago.
router.delete('/outputs/expired', async (req: Request, res: Response) => {
if (!checkInternalToken(req, res)) return;
const cutoff = new Date(Date.now() - 24 * 60 * 60 * 1000).toISOString();
try {
await db.delete(agentOutputs).where(lt(agentOutputs.expiresAt, cutoff));
res.json({ ok: true });
} catch (err: any) {
res.status(500).json({ error: err.message });
}
});
// ── GET /api/agents/:userId/outputs ──────────────────────────────────────────
// Returns non-expired agent outputs. Admin observability; recommender calls
// getActiveAgentOutputs() directly (no HTTP hop).
router.get('/:userId/outputs', async (req: Request, res: Response) => {
const { userId } = req.params as { userId: string };
try {
const rows = await getActiveAgentOutputs(userId);
res.json({
user_id: userId,
outputs: rows.map((r) => ({
agent_id: r.agentId,
prompt_text: r.promptText,
computed_at: r.computedAt,
expires_at: r.expiresAt,
agent_version: r.agentVersion,
})),
});
} catch (err: any) {
res.status(500).json({ error: err.message });
}
});
export default router;