feat(admin): LLM tip quality dashboard — per-model/prompt/kind breakdowns

/admin/reward-analytics now surfaces served count, reaction rate, and avg reward grouped by llm_model, prompt_version, and tip_kind — closing the loop so model/prompt iterations in M2 are legible next to the bandit policy view. Data comes from the tip_scores columns added in ffdf707 and tip_feedback.reward_milli; bandit-only tips show as "(bandit-only)". Closes #92. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-04-24 15:24:52 +00:00
parent 75d0e89906
commit aa4bdd8f09
7 changed files with 227 additions and 9 deletions
--- a/apps/admin/src/lib/api.ts
+++ b/apps/admin/src/lib/api.ts
@@ -158,11 +158,25 @@ export function getTips(params: { limit?: number; offset?: number; userId?: stri
  return apiFetch<{ tips: TipScore[]; total: number }>(`/admin/tips?${q}`);
 }

+export type QualityBreakdownRow = {
+  key: string | null;
+  served: number;
+  done: number;
+  snooze: number;
+  dismiss: number;
+  helpful: number;
+  not_helpful: number;
+  avgRewardMilli: number | null;
+};
+
 export function getRewardAnalytics(days = 30) {
  return apiFetch<{
    daily: { date: string; action: string; count: number }[];
    byPolicy: { policy: string; action: string; count: number }[];
    byHour: { action: string; count: number; avgHour: number }[];
+    byModel: QualityBreakdownRow[];
+    byPromptVersion: QualityBreakdownRow[];
+    byKind: QualityBreakdownRow[];
  }>(`/admin/reward-analytics?days=${days}`);
 }