Files
oO/services/api/src/index.ts
alvis 0474ad4deb feat(airflow): integrate bench harness into bench_collect DAG
New DAG (`ml/pipelines/bench_dag.py`) with three linked tasks:
1. collect.py — generates candidates, logs to MLflow
2. export_for_judge — exports pending runs for Claude Code scoring
3. compare — generates leaderboard by (model, prompt) cell

Config via dag_run.conf supports all collect.py options (models, prompts,
n_tips, n_scenarios, temperature, experiment name, max_model_b).

New admin API endpoints (`services/api/src/routes/bench.ts`):
- GET /api/bench/experiments — list tip-bench-* experiments
- POST /api/bench/run — trigger DAG with custom config
- GET /api/bench/runs/:experiment — list runs in experiment
- GET /api/bench/leaderboard/:experiment — leaderboard by (model, prompt)

All endpoints require admin auth. Human judge (Claude Code) scores are
applied manually post-export; future enhancement: add webhook to DAG.

Admin UI can now trigger and monitor benchmarks from a dashboard panel.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-04-27 11:54:30 +00:00

115 lines
3.8 KiB
TypeScript

import 'dotenv/config';
import { logger } from './logger.js';
import express from 'express';
import { pinoHttp } from 'pino-http';
import cookieParser from 'cookie-parser';
import cors from 'cors';
import { tracingMiddleware } from './middleware/tracing.js';
import { config } from './config.js';
import { db, runMigrations } from './db/index.js';
import { tipScores, tipFeedback } from './db/schema.js';
import { lt } from 'drizzle-orm';
import { sessionMiddleware } from './middleware/session.js';
import { authRouter } from './routes/auth.js';
import { integrationsRouter } from './routes/integrations.js';
import { recommenderRouter } from './routes/recommender.js';
import { userRouter } from './routes/user.js';
import { pushRouter } from './routes/push.js';
import { adminRouter, adminInternalRouter } from './routes/admin.js';
import benchRouter from './routes/bench.js';
import { mkdir } from 'fs/promises';
import { dirname } from 'path';
import { requireAuth } from './middleware/session.js';
import { requireAdmin } from './middleware/admin.js';
import type { Request, Response } from 'express';
import { connectNats } from './events/nats.js';
import { startTodoistSyncScheduler } from './signals/scheduler.js';
import { bus } from './events/bus.js';
import { registerProfileSubscriptions } from './profile/subscriber.js';
await mkdir(dirname(config.DATABASE_PATH), { recursive: true });
runMigrations();
process.on('unhandledRejection', (reason) => {
logger.error({ err: reason }, 'unhandledRejection');
});
process.on('uncaughtException', (err) => {
logger.fatal({ err }, 'uncaughtException');
});
const app = express();
app.use(
cors({
origin: config.WEB_BASE_URL,
credentials: true,
}),
);
app.use(tracingMiddleware);
app.use(
pinoHttp({
logger,
genReqId: (req) => req.traceId,
customProps: (req) => ({ traceId: req.traceId }),
autoLogging: { ignore: (req) => req.url === '/health' },
}),
);
app.use(express.json());
app.use(cookieParser());
app.use(sessionMiddleware);
app.get('/health', (_req, res) => res.json({ ok: true }));
app.use('/api/auth', authRouter);
app.use('/api/integrations', integrationsRouter);
app.use('/api', recommenderRouter);
app.use('/api/user', userRouter);
app.use('/api/push', pushRouter);
app.use('/api/admin', adminRouter);
app.use('/api/admin', adminInternalRouter);
app.use('/api/bench', requireAuth as any, requireAdmin as any, benchRouter);
app.use('/api/ml', requireAuth as any, requireAdmin as any, async (req: Request, res: Response) => {
const mlUrl = config.ML_SERVING_URL;
const target = `${mlUrl}${req.path}`;
try {
const upstream = await fetch(target, {
method: req.method,
headers: { 'Content-Type': 'application/json', traceparent: req.traceparent },
body: req.method !== 'GET' ? JSON.stringify(req.body) : undefined,
signal: AbortSignal.timeout(5000),
});
const data = await upstream.json();
res.status(upstream.status).json(data);
} catch (e: any) {
res.status(502).json({ error: 'ml/serving unavailable', detail: e.message });
}
});
async function purgeExpiredData() {
const cutoff = new Date(Date.now() - 30 * 24 * 60 * 60 * 1000).toISOString();
try {
await db.delete(tipScores).where(lt(tipScores.servedAt, cutoff));
await db.delete(tipFeedback).where(lt(tipFeedback.createdAt, cutoff));
} catch (err: any) {
logger.error({ err }, 'retention cleanup failed');
}
}
purgeExpiredData();
setInterval(purgeExpiredData, 24 * 60 * 60 * 1000);
app.listen(config.PORT, () => {
logger.info({ port: config.PORT }, 'oO API listening');
});
if (config.NATS_URL) {
connectNats(config.NATS_URL);
}
startTodoistSyncScheduler(config.TODOIST_SYNC_INTERVAL_MS);
// Profile features are invalidated on relevant signals (#81 phase B.2);
// TTL stays as a safety net for clock drift / dropped events.
registerProfileSubscriptions(bus);