feat: M1 admin console — all 10 remaining pages + signal/quality/ops infrastructure

Admin console (issues #63–72): - Event stream viewer: live-tail ring buffer (500 events) with subject/user filters - Feature store browser: per-user feature vector history from ml/serving - Model registry panel: MLflow embed at /admin/models - Experiment dashboard: LinUCB per-user stats (pulls, reward, θ) + bandit reset - Recommendation log: per-tip explainability (policy, score, features, latency) - Reward analytics: daily reaction breakdown + per-policy compare - Data quality widget: missing-feature rate, stale-token rate, daily completeness - Ops actions: replay-signal, policy enable/disable; user actions link to Users page - SQL runner: read-only SELECT runner with saved queries - Health rollup: fan-out to api/ml/sqlite/event-bus with auto-refresh Backend: - tip_scores table: logs features+policy+score+latency at every scoring call (#67) - saved_queries table: per-admin saved SQL (#71) - Event bus: 500-event ring buffer + tail() API (#63) - Admin routes: /events, /tips, /reward-analytics, /data-quality, /health, /policies, /replay-signal, /sql, /saved-queries endpoints - /api/ml/* admin-gated proxy to ml/serving (#64, #66) - Shadow-policy registry in recommender (#56) ML serving: - /reset/{user_id}: clear bandit state + feature history (#66) - /stats/{user_id}: pulls, cumulative reward, estimated mean, θ (#66) - /features/{user_id}: last 100 feature vectors logged at scoring time (#64) - Meta (pulls, rewards) persisted alongside A/b matrices Web: - Tip action sheet adds Helpful / Not helpful buttons (#62) - TipFeedback type extended with helpful/not_helpful actions - Rewards mapped: helpful=+0.5, not_helpful=−0.5 Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-16 03:56:48 +00:00
parent 2402a140e9
commit e62c726ea4
37 changed files with 3386 additions and 38 deletions
--- a/ml/serving/main.py
+++ b/ml/serving/main.py
@@ -2,9 +2,12 @@
 oO ML Serving — Phase 1: LinUCB contextual bandit.

 Contract:
-  POST /score   { user_id, candidates, context } → { tip_id, score, policy }
-  POST /reward  { user_id, tip_id, reward, features } → { ok }
-  GET  /health  → { ok }
+  POST /score              { user_id, candidates, context } → { tip_id, score, policy }
+  POST /reward             { user_id, tip_id, reward, features } → { ok }
+  POST /reset/{user_id}    → { ok }
+  GET  /stats/{user_id}    → { pulls, cumulative_reward, estimated_mean, last_updated }
+  GET  /features/{user_id} → { history: [{ ts, features, score }] }
+  GET  /health             → { ok }

 Features (d=5):
  hour_sin, hour_cos  — cyclical time-of-day encoding
@@ -18,9 +21,10 @@ from __future__ import annotations
 import json
 import math
 import os
-import random
+import time
+from collections import deque
 from pathlib import Path
-from typing import Optional
+from typing import Optional, Deque

 import numpy as np
 from fastapi import FastAPI, HTTPException
@@ -33,6 +37,16 @@ STATE_DIR.mkdir(parents=True, exist_ok=True)

 ALPHA = 1.0  # exploration coefficient
 D = 5        # feature dimension
+FEATURE_HISTORY_SIZE = 100  # per-user ring buffer
+
+
+# ── Per-user in-memory feature history ────────────────────────────────────
+_feature_history: dict[str, deque] = {}
+
+def get_feature_history(user_id: str) -> deque:
+    if user_id not in _feature_history:
+        _feature_history[user_id] = deque(maxlen=FEATURE_HISTORY_SIZE)
+    return _feature_history[user_id]


 # ── Feature helpers ────────────────────────────────────────────────────────
@@ -54,20 +68,21 @@ def state_path(user_id: str) -> Path:
    return STATE_DIR / f"{safe}.json"


-def load_state(user_id: str) -> tuple[np.ndarray, np.ndarray]:
-    """Returns (A, b). A is DxD, b is D-vector."""
+def load_state(user_id: str) -> tuple[np.ndarray, np.ndarray, dict]:
+    """Returns (A, b, meta). A is DxD, b is D-vector."""
    p = state_path(user_id)
    if p.exists():
        raw = json.loads(p.read_text())
        A = np.array(raw["A"], dtype=np.float64)
        b = np.array(raw["b"], dtype=np.float64)
-        return A, b
-    return np.identity(D, dtype=np.float64), np.zeros(D, dtype=np.float64)
+        meta = raw.get("meta", {})
+        return A, b, meta
+    return np.identity(D, dtype=np.float64), np.zeros(D, dtype=np.float64), {}


-def save_state(user_id: str, A: np.ndarray, b: np.ndarray) -> None:
+def save_state(user_id: str, A: np.ndarray, b: np.ndarray, meta: dict) -> None:
    p = state_path(user_id)
-    p.write_text(json.dumps({"A": A.tolist(), "b": b.tolist()}))
+    p.write_text(json.dumps({"A": A.tolist(), "b": b.tolist(), "meta": meta}))


 # ── API models ─────────────────────────────────────────────────────────────
@@ -107,7 +122,7 @@ class ScoreResponse(BaseModel):
 class RewardRequest(BaseModel):
    user_id: str
    tip_id: str
-    reward: float          # +1 done, 0 snooze, -1 dismiss
+    reward: float          # +1 done, +0.5 helpful, 0 snooze, -0.5 not_helpful, -1 dismiss
    features: CandidateFeatures


@@ -127,7 +142,7 @@ def score(req: ScoreRequest) -> ScoreResponse:
    if not req.candidates:
        raise HTTPException(status_code=422, detail="No candidates")

-    A, b = load_state(req.user_id)
+    A, b, meta = load_state(req.user_id)
    try:
        A_inv = np.linalg.inv(A)
    except np.linalg.LinAlgError:
@@ -137,6 +152,7 @@ def score(req: ScoreRequest) -> ScoreResponse:

    best_id = None
    best_score = -float("inf")
+    best_features: dict = {}

    for candidate in req.candidates:
        feat_dict = {
@@ -152,13 +168,28 @@ def score(req: ScoreRequest) -> ScoreResponse:
        if ucb > best_score:
            best_score = ucb
            best_id = candidate.id
+            best_features = feat_dict
+
+    # Log to feature history ring buffer
+    history = get_feature_history(req.user_id)
+    history.append({
+        "ts": time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()),
+        "features": best_features,
+        "score": best_score,
+        "tip_id": best_id,
+    })
+
+    # Update meta stats
+    meta["pulls"] = meta.get("pulls", 0) + 1
+    meta["last_updated"] = time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime())
+    save_state(req.user_id, A, b, meta)

    return ScoreResponse(tip_id=best_id, score=best_score, policy="linucb-v1")


@app.post("/reward", response_model=RewardResponse)
 def reward(req: RewardRequest) -> RewardResponse:
-    A, b = load_state(req.user_id)
+    A, b, meta = load_state(req.user_id)
    feat_dict = {
        "hour_of_day": req.features.hour_of_day,
        "is_overdue": req.features.is_overdue,
@@ -168,5 +199,58 @@ def reward(req: RewardRequest) -> RewardResponse:
    x = build_feature_vector(feat_dict)
    A += np.outer(x, x)
    b += req.reward * x
-    save_state(req.user_id, A, b)
+
+    # Track cumulative reward in meta
+    meta["cumulative_reward"] = meta.get("cumulative_reward", 0.0) + req.reward
+    meta["reward_count"] = meta.get("reward_count", 0) + 1
+    meta["last_updated"] = time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime())
+
+    save_state(req.user_id, A, b, meta)
    return RewardResponse(ok=True)
+
+
+@app.post("/reset/{user_id}", response_model=RewardResponse)
+def reset(user_id: str) -> RewardResponse:
+    """Reset per-user bandit state (admin action)."""
+    p = state_path(user_id)
+    if p.exists():
+        p.unlink()
+    if user_id in _feature_history:
+        _feature_history[user_id].clear()
+    return RewardResponse(ok=True)
+
+
+@app.get("/stats/{user_id}")
+def stats(user_id: str):
+    """Return current LinUCB state summary for a user."""
+    A, b, meta = load_state(user_id)
+    try:
+        A_inv = np.linalg.inv(A)
+        theta = (A_inv @ b).tolist()
+    except np.linalg.LinAlgError:
+        theta = [0.0] * D
+
+    pulls = meta.get("pulls", 0)
+    cumulative_reward = meta.get("cumulative_reward", 0.0)
+    reward_count = meta.get("reward_count", 0)
+    estimated_mean = cumulative_reward / reward_count if reward_count > 0 else 0.0
+
+    return {
+        "user_id": user_id,
+        "pulls": pulls,
+        "reward_count": reward_count,
+        "cumulative_reward": cumulative_reward,
+        "estimated_mean_reward": estimated_mean,
+        "theta": theta,
+        "last_updated": meta.get("last_updated"),
+    }
+
+
+@app.get("/features/{user_id}")
+def features(user_id: str):
+    """Return recent feature vectors logged at scoring time."""
+    history = get_feature_history(user_id)
+    return {
+        "user_id": user_id,
+        "history": list(history),
+    }