feat(admin): LLM tip quality dashboard — per-model/prompt/kind breakdowns
/admin/reward-analytics now surfaces served count, reaction rate, and avg
reward grouped by llm_model, prompt_version, and tip_kind — closing the
loop so model/prompt iterations in M2 are legible next to the bandit
policy view. Data comes from the tip_scores columns added in ffdf707 and
tip_feedback.reward_milli; bandit-only tips show as "(bandit-only)".
Closes #92.
Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
@@ -17,6 +17,7 @@ Next.js 15 app. Deployed at `admin.o.alogins.net` (dev: `http://localhost:3080`)
|
||||
| `/users/:id` | User detail: identity, consents, integrations, tip stats, reward history; revoke-integration + reset-bandit actions |
|
||||
| `/audit` | Admin action audit log |
|
||||
| `/events` | Event stream viewer (stub — pending API history endpoint) |
|
||||
| `/reward-analytics` | Reaction distribution + per-policy / per-model / per-prompt-version / per-tip-kind breakdowns with avg reward |
|
||||
|
||||
## Dev
|
||||
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
|
||||
import { useEffect, useState } from 'react';
|
||||
import { AdminShell } from '@/components/AdminShell';
|
||||
import { getRewardAnalytics } from '@/lib/api';
|
||||
import { getRewardAnalytics, type QualityBreakdownRow } from '@/lib/api';
|
||||
|
||||
const ACTION_COLORS: Record<string, string> = {
|
||||
done: 'bg-green-500',
|
||||
@@ -12,6 +12,53 @@ const ACTION_COLORS: Record<string, string> = {
|
||||
dismiss: 'bg-red-500',
|
||||
};
|
||||
|
||||
function QualityBreakdown({ title, dimension, rows, emptyLabel }: {
|
||||
title: string;
|
||||
dimension: string;
|
||||
rows: QualityBreakdownRow[];
|
||||
emptyLabel: string; // shown when a row's key is null (e.g. bandit-only tips have no llm_model)
|
||||
}) {
|
||||
if (rows.length === 0) return null;
|
||||
const totalServed = rows.reduce((sum, r) => sum + r.served, 0);
|
||||
return (
|
||||
<div className="space-y-2">
|
||||
<h2 className="text-sm font-medium text-gray-400">{title}</h2>
|
||||
<table className="w-full text-xs">
|
||||
<thead>
|
||||
<tr className="border-b border-gray-800 text-gray-500 text-left">
|
||||
<th className="py-2 pr-4">{dimension}</th>
|
||||
<th className="py-2 pr-4">served</th>
|
||||
<th className="py-2 pr-4">reaction rate</th>
|
||||
<th className="py-2 pr-4">avg reward</th>
|
||||
{['done', 'helpful', 'snooze', 'not_helpful', 'dismiss'].map((a) => (
|
||||
<th key={a} className="py-2 pr-4">{a}</th>
|
||||
))}
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
{rows.map((r) => {
|
||||
const reacted = r.done + r.snooze + r.dismiss + r.helpful + r.not_helpful;
|
||||
const reactionRate = r.served > 0 ? (reacted / r.served) * 100 : 0;
|
||||
const avgReward = r.avgRewardMilli == null ? null : r.avgRewardMilli / 1000;
|
||||
return (
|
||||
<tr key={r.key ?? '__null__'} className="border-b border-gray-800/50">
|
||||
<td className="py-2 pr-4 font-medium text-indigo-300">{r.key ?? <span className="text-gray-500 italic">{emptyLabel}</span>}</td>
|
||||
<td className="py-2 pr-4 text-gray-300">{r.served}</td>
|
||||
<td className="py-2 pr-4 text-gray-300">{reactionRate.toFixed(1)}%</td>
|
||||
<td className="py-2 pr-4 text-gray-300">{avgReward == null ? '—' : avgReward.toFixed(2)}</td>
|
||||
{(['done', 'helpful', 'snooze', 'not_helpful', 'dismiss'] as const).map((a) => (
|
||||
<td key={a} className="py-2 pr-4 text-gray-300">{r[a]}</td>
|
||||
))}
|
||||
</tr>
|
||||
);
|
||||
})}
|
||||
</tbody>
|
||||
</table>
|
||||
<p className="text-xs text-gray-600">{totalServed} tips served total.</p>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
export default function RewardAnalyticsPage() {
|
||||
const [days, setDays] = useState(30);
|
||||
const [data, setData] = useState<Awaited<ReturnType<typeof getRewardAnalytics>> | null>(null);
|
||||
@@ -108,6 +155,30 @@ export default function RewardAnalyticsPage() {
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* LLM quality breakdowns (#92) */}
|
||||
{data && (
|
||||
<>
|
||||
<QualityBreakdown
|
||||
title="Per LLM model"
|
||||
dimension="llm_model"
|
||||
rows={data.byModel ?? []}
|
||||
emptyLabel="(bandit-only)"
|
||||
/>
|
||||
<QualityBreakdown
|
||||
title="Per prompt version"
|
||||
dimension="prompt_version"
|
||||
rows={data.byPromptVersion ?? []}
|
||||
emptyLabel="(unset)"
|
||||
/>
|
||||
<QualityBreakdown
|
||||
title="Per tip kind"
|
||||
dimension="tip_kind"
|
||||
rows={data.byKind ?? []}
|
||||
emptyLabel="(unset)"
|
||||
/>
|
||||
</>
|
||||
)}
|
||||
|
||||
{/* Daily table */}
|
||||
{(data?.daily?.length ?? 0) > 0 && (
|
||||
<div className="space-y-2">
|
||||
|
||||
@@ -158,11 +158,25 @@ export function getTips(params: { limit?: number; offset?: number; userId?: stri
|
||||
return apiFetch<{ tips: TipScore[]; total: number }>(`/admin/tips?${q}`);
|
||||
}
|
||||
|
||||
export type QualityBreakdownRow = {
|
||||
key: string | null;
|
||||
served: number;
|
||||
done: number;
|
||||
snooze: number;
|
||||
dismiss: number;
|
||||
helpful: number;
|
||||
not_helpful: number;
|
||||
avgRewardMilli: number | null;
|
||||
};
|
||||
|
||||
export function getRewardAnalytics(days = 30) {
|
||||
return apiFetch<{
|
||||
daily: { date: string; action: string; count: number }[];
|
||||
byPolicy: { policy: string; action: string; count: number }[];
|
||||
byHour: { action: string; count: number; avgHour: number }[];
|
||||
byModel: QualityBreakdownRow[];
|
||||
byPromptVersion: QualityBreakdownRow[];
|
||||
byKind: QualityBreakdownRow[];
|
||||
}>(`/admin/reward-analytics?days=${days}`);
|
||||
}
|
||||
|
||||
|
||||
File diff suppressed because one or more lines are too long
Reference in New Issue
Block a user