feat: M1 admin console — all 10 remaining pages + signal/quality/ops infrastructure

Admin console (issues #63–72):
- Event stream viewer: live-tail ring buffer (500 events) with subject/user filters
- Feature store browser: per-user feature vector history from ml/serving
- Model registry panel: MLflow embed at /admin/models
- Experiment dashboard: LinUCB per-user stats (pulls, reward, θ) + bandit reset
- Recommendation log: per-tip explainability (policy, score, features, latency)
- Reward analytics: daily reaction breakdown + per-policy compare
- Data quality widget: missing-feature rate, stale-token rate, daily completeness
- Ops actions: replay-signal, policy enable/disable; user actions link to Users page
- SQL runner: read-only SELECT runner with saved queries
- Health rollup: fan-out to api/ml/sqlite/event-bus with auto-refresh

Backend:
- tip_scores table: logs features+policy+score+latency at every scoring call (#67)
- saved_queries table: per-admin saved SQL (#71)
- Event bus: 500-event ring buffer + tail() API (#63)
- Admin routes: /events, /tips, /reward-analytics, /data-quality, /health,
  /policies, /replay-signal, /sql, /saved-queries endpoints
- /api/ml/* admin-gated proxy to ml/serving (#64, #66)
- Shadow-policy registry in recommender (#56)

ML serving:
- /reset/{user_id}: clear bandit state + feature history (#66)
- /stats/{user_id}: pulls, cumulative reward, estimated mean, θ (#66)
- /features/{user_id}: last 100 feature vectors logged at scoring time (#64)
- Meta (pulls, rewards) persisted alongside A/b matrices

Web:
- Tip action sheet adds Helpful / Not helpful buttons (#62)
- TipFeedback type extended with helpful/not_helpful actions
- Rewards mapped: helpful=+0.5, not_helpful=−0.5

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-04-16 03:56:48 +00:00
parent 2402a140e9
commit e62c726ea4
37 changed files with 3386 additions and 38 deletions

View File

@@ -2,9 +2,12 @@
oO ML Serving — Phase 1: LinUCB contextual bandit.
Contract:
POST /score { user_id, candidates, context } → { tip_id, score, policy }
POST /reward { user_id, tip_id, reward, features } → { ok }
GET /health{ ok }
POST /score { user_id, candidates, context } → { tip_id, score, policy }
POST /reward { user_id, tip_id, reward, features } → { ok }
POST /reset/{user_id} { ok }
GET /stats/{user_id}{ pulls, cumulative_reward, estimated_mean, last_updated }
GET /features/{user_id}{ history: [{ ts, features, score }] }
GET /health → { ok }
Features (d=5):
hour_sin, hour_cos — cyclical time-of-day encoding
@@ -18,9 +21,10 @@ from __future__ import annotations
import json
import math
import os
import random
import time
from collections import deque
from pathlib import Path
from typing import Optional
from typing import Optional, Deque
import numpy as np
from fastapi import FastAPI, HTTPException
@@ -33,6 +37,16 @@ STATE_DIR.mkdir(parents=True, exist_ok=True)
ALPHA = 1.0 # exploration coefficient
D = 5 # feature dimension
FEATURE_HISTORY_SIZE = 100 # per-user ring buffer
# ── Per-user in-memory feature history ────────────────────────────────────
_feature_history: dict[str, deque] = {}
def get_feature_history(user_id: str) -> deque:
if user_id not in _feature_history:
_feature_history[user_id] = deque(maxlen=FEATURE_HISTORY_SIZE)
return _feature_history[user_id]
# ── Feature helpers ────────────────────────────────────────────────────────
@@ -54,20 +68,21 @@ def state_path(user_id: str) -> Path:
return STATE_DIR / f"{safe}.json"
def load_state(user_id: str) -> tuple[np.ndarray, np.ndarray]:
"""Returns (A, b). A is DxD, b is D-vector."""
def load_state(user_id: str) -> tuple[np.ndarray, np.ndarray, dict]:
"""Returns (A, b, meta). A is DxD, b is D-vector."""
p = state_path(user_id)
if p.exists():
raw = json.loads(p.read_text())
A = np.array(raw["A"], dtype=np.float64)
b = np.array(raw["b"], dtype=np.float64)
return A, b
return np.identity(D, dtype=np.float64), np.zeros(D, dtype=np.float64)
meta = raw.get("meta", {})
return A, b, meta
return np.identity(D, dtype=np.float64), np.zeros(D, dtype=np.float64), {}
def save_state(user_id: str, A: np.ndarray, b: np.ndarray) -> None:
def save_state(user_id: str, A: np.ndarray, b: np.ndarray, meta: dict) -> None:
p = state_path(user_id)
p.write_text(json.dumps({"A": A.tolist(), "b": b.tolist()}))
p.write_text(json.dumps({"A": A.tolist(), "b": b.tolist(), "meta": meta}))
# ── API models ─────────────────────────────────────────────────────────────
@@ -107,7 +122,7 @@ class ScoreResponse(BaseModel):
class RewardRequest(BaseModel):
user_id: str
tip_id: str
reward: float # +1 done, 0 snooze, -1 dismiss
reward: float # +1 done, +0.5 helpful, 0 snooze, -0.5 not_helpful, -1 dismiss
features: CandidateFeatures
@@ -127,7 +142,7 @@ def score(req: ScoreRequest) -> ScoreResponse:
if not req.candidates:
raise HTTPException(status_code=422, detail="No candidates")
A, b = load_state(req.user_id)
A, b, meta = load_state(req.user_id)
try:
A_inv = np.linalg.inv(A)
except np.linalg.LinAlgError:
@@ -137,6 +152,7 @@ def score(req: ScoreRequest) -> ScoreResponse:
best_id = None
best_score = -float("inf")
best_features: dict = {}
for candidate in req.candidates:
feat_dict = {
@@ -152,13 +168,28 @@ def score(req: ScoreRequest) -> ScoreResponse:
if ucb > best_score:
best_score = ucb
best_id = candidate.id
best_features = feat_dict
# Log to feature history ring buffer
history = get_feature_history(req.user_id)
history.append({
"ts": time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()),
"features": best_features,
"score": best_score,
"tip_id": best_id,
})
# Update meta stats
meta["pulls"] = meta.get("pulls", 0) + 1
meta["last_updated"] = time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime())
save_state(req.user_id, A, b, meta)
return ScoreResponse(tip_id=best_id, score=best_score, policy="linucb-v1")
@app.post("/reward", response_model=RewardResponse)
def reward(req: RewardRequest) -> RewardResponse:
A, b = load_state(req.user_id)
A, b, meta = load_state(req.user_id)
feat_dict = {
"hour_of_day": req.features.hour_of_day,
"is_overdue": req.features.is_overdue,
@@ -168,5 +199,58 @@ def reward(req: RewardRequest) -> RewardResponse:
x = build_feature_vector(feat_dict)
A += np.outer(x, x)
b += req.reward * x
save_state(req.user_id, A, b)
# Track cumulative reward in meta
meta["cumulative_reward"] = meta.get("cumulative_reward", 0.0) + req.reward
meta["reward_count"] = meta.get("reward_count", 0) + 1
meta["last_updated"] = time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime())
save_state(req.user_id, A, b, meta)
return RewardResponse(ok=True)
@app.post("/reset/{user_id}", response_model=RewardResponse)
def reset(user_id: str) -> RewardResponse:
"""Reset per-user bandit state (admin action)."""
p = state_path(user_id)
if p.exists():
p.unlink()
if user_id in _feature_history:
_feature_history[user_id].clear()
return RewardResponse(ok=True)
@app.get("/stats/{user_id}")
def stats(user_id: str):
"""Return current LinUCB state summary for a user."""
A, b, meta = load_state(user_id)
try:
A_inv = np.linalg.inv(A)
theta = (A_inv @ b).tolist()
except np.linalg.LinAlgError:
theta = [0.0] * D
pulls = meta.get("pulls", 0)
cumulative_reward = meta.get("cumulative_reward", 0.0)
reward_count = meta.get("reward_count", 0)
estimated_mean = cumulative_reward / reward_count if reward_count > 0 else 0.0
return {
"user_id": user_id,
"pulls": pulls,
"reward_count": reward_count,
"cumulative_reward": cumulative_reward,
"estimated_mean_reward": estimated_mean,
"theta": theta,
"last_updated": meta.get("last_updated"),
}
@app.get("/features/{user_id}")
def features(user_id: str):
"""Return recent feature vectors logged at scoring time."""
history = get_feature_history(user_id)
return {
"user_id": user_id,
"history": list(history),
}