feat: M1 admin console — all 10 remaining pages + signal/quality/ops infrastructure
Admin console (issues #63–72): - Event stream viewer: live-tail ring buffer (500 events) with subject/user filters - Feature store browser: per-user feature vector history from ml/serving - Model registry panel: MLflow embed at /admin/models - Experiment dashboard: LinUCB per-user stats (pulls, reward, θ) + bandit reset - Recommendation log: per-tip explainability (policy, score, features, latency) - Reward analytics: daily reaction breakdown + per-policy compare - Data quality widget: missing-feature rate, stale-token rate, daily completeness - Ops actions: replay-signal, policy enable/disable; user actions link to Users page - SQL runner: read-only SELECT runner with saved queries - Health rollup: fan-out to api/ml/sqlite/event-bus with auto-refresh Backend: - tip_scores table: logs features+policy+score+latency at every scoring call (#67) - saved_queries table: per-admin saved SQL (#71) - Event bus: 500-event ring buffer + tail() API (#63) - Admin routes: /events, /tips, /reward-analytics, /data-quality, /health, /policies, /replay-signal, /sql, /saved-queries endpoints - /api/ml/* admin-gated proxy to ml/serving (#64, #66) - Shadow-policy registry in recommender (#56) ML serving: - /reset/{user_id}: clear bandit state + feature history (#66) - /stats/{user_id}: pulls, cumulative reward, estimated mean, θ (#66) - /features/{user_id}: last 100 feature vectors logged at scoring time (#64) - Meta (pulls, rewards) persisted alongside A/b matrices Web: - Tip action sheet adds Helpful / Not helpful buttons (#62) - TipFeedback type extended with helpful/not_helpful actions - Rewards mapped: helpful=+0.5, not_helpful=−0.5 Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -2,9 +2,12 @@
|
||||
oO ML Serving — Phase 1: LinUCB contextual bandit.
|
||||
|
||||
Contract:
|
||||
POST /score { user_id, candidates, context } → { tip_id, score, policy }
|
||||
POST /reward { user_id, tip_id, reward, features } → { ok }
|
||||
GET /health → { ok }
|
||||
POST /score { user_id, candidates, context } → { tip_id, score, policy }
|
||||
POST /reward { user_id, tip_id, reward, features } → { ok }
|
||||
POST /reset/{user_id} → { ok }
|
||||
GET /stats/{user_id} → { pulls, cumulative_reward, estimated_mean, last_updated }
|
||||
GET /features/{user_id} → { history: [{ ts, features, score }] }
|
||||
GET /health → { ok }
|
||||
|
||||
Features (d=5):
|
||||
hour_sin, hour_cos — cyclical time-of-day encoding
|
||||
@@ -18,9 +21,10 @@ from __future__ import annotations
|
||||
import json
|
||||
import math
|
||||
import os
|
||||
import random
|
||||
import time
|
||||
from collections import deque
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
from typing import Optional, Deque
|
||||
|
||||
import numpy as np
|
||||
from fastapi import FastAPI, HTTPException
|
||||
@@ -33,6 +37,16 @@ STATE_DIR.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
ALPHA = 1.0 # exploration coefficient
|
||||
D = 5 # feature dimension
|
||||
FEATURE_HISTORY_SIZE = 100 # per-user ring buffer
|
||||
|
||||
|
||||
# ── Per-user in-memory feature history ────────────────────────────────────
|
||||
_feature_history: dict[str, deque] = {}
|
||||
|
||||
def get_feature_history(user_id: str) -> deque:
|
||||
if user_id not in _feature_history:
|
||||
_feature_history[user_id] = deque(maxlen=FEATURE_HISTORY_SIZE)
|
||||
return _feature_history[user_id]
|
||||
|
||||
|
||||
# ── Feature helpers ────────────────────────────────────────────────────────
|
||||
@@ -54,20 +68,21 @@ def state_path(user_id: str) -> Path:
|
||||
return STATE_DIR / f"{safe}.json"
|
||||
|
||||
|
||||
def load_state(user_id: str) -> tuple[np.ndarray, np.ndarray]:
|
||||
"""Returns (A, b). A is DxD, b is D-vector."""
|
||||
def load_state(user_id: str) -> tuple[np.ndarray, np.ndarray, dict]:
|
||||
"""Returns (A, b, meta). A is DxD, b is D-vector."""
|
||||
p = state_path(user_id)
|
||||
if p.exists():
|
||||
raw = json.loads(p.read_text())
|
||||
A = np.array(raw["A"], dtype=np.float64)
|
||||
b = np.array(raw["b"], dtype=np.float64)
|
||||
return A, b
|
||||
return np.identity(D, dtype=np.float64), np.zeros(D, dtype=np.float64)
|
||||
meta = raw.get("meta", {})
|
||||
return A, b, meta
|
||||
return np.identity(D, dtype=np.float64), np.zeros(D, dtype=np.float64), {}
|
||||
|
||||
|
||||
def save_state(user_id: str, A: np.ndarray, b: np.ndarray) -> None:
|
||||
def save_state(user_id: str, A: np.ndarray, b: np.ndarray, meta: dict) -> None:
|
||||
p = state_path(user_id)
|
||||
p.write_text(json.dumps({"A": A.tolist(), "b": b.tolist()}))
|
||||
p.write_text(json.dumps({"A": A.tolist(), "b": b.tolist(), "meta": meta}))
|
||||
|
||||
|
||||
# ── API models ─────────────────────────────────────────────────────────────
|
||||
@@ -107,7 +122,7 @@ class ScoreResponse(BaseModel):
|
||||
class RewardRequest(BaseModel):
|
||||
user_id: str
|
||||
tip_id: str
|
||||
reward: float # +1 done, 0 snooze, -1 dismiss
|
||||
reward: float # +1 done, +0.5 helpful, 0 snooze, -0.5 not_helpful, -1 dismiss
|
||||
features: CandidateFeatures
|
||||
|
||||
|
||||
@@ -127,7 +142,7 @@ def score(req: ScoreRequest) -> ScoreResponse:
|
||||
if not req.candidates:
|
||||
raise HTTPException(status_code=422, detail="No candidates")
|
||||
|
||||
A, b = load_state(req.user_id)
|
||||
A, b, meta = load_state(req.user_id)
|
||||
try:
|
||||
A_inv = np.linalg.inv(A)
|
||||
except np.linalg.LinAlgError:
|
||||
@@ -137,6 +152,7 @@ def score(req: ScoreRequest) -> ScoreResponse:
|
||||
|
||||
best_id = None
|
||||
best_score = -float("inf")
|
||||
best_features: dict = {}
|
||||
|
||||
for candidate in req.candidates:
|
||||
feat_dict = {
|
||||
@@ -152,13 +168,28 @@ def score(req: ScoreRequest) -> ScoreResponse:
|
||||
if ucb > best_score:
|
||||
best_score = ucb
|
||||
best_id = candidate.id
|
||||
best_features = feat_dict
|
||||
|
||||
# Log to feature history ring buffer
|
||||
history = get_feature_history(req.user_id)
|
||||
history.append({
|
||||
"ts": time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()),
|
||||
"features": best_features,
|
||||
"score": best_score,
|
||||
"tip_id": best_id,
|
||||
})
|
||||
|
||||
# Update meta stats
|
||||
meta["pulls"] = meta.get("pulls", 0) + 1
|
||||
meta["last_updated"] = time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime())
|
||||
save_state(req.user_id, A, b, meta)
|
||||
|
||||
return ScoreResponse(tip_id=best_id, score=best_score, policy="linucb-v1")
|
||||
|
||||
|
||||
@app.post("/reward", response_model=RewardResponse)
|
||||
def reward(req: RewardRequest) -> RewardResponse:
|
||||
A, b = load_state(req.user_id)
|
||||
A, b, meta = load_state(req.user_id)
|
||||
feat_dict = {
|
||||
"hour_of_day": req.features.hour_of_day,
|
||||
"is_overdue": req.features.is_overdue,
|
||||
@@ -168,5 +199,58 @@ def reward(req: RewardRequest) -> RewardResponse:
|
||||
x = build_feature_vector(feat_dict)
|
||||
A += np.outer(x, x)
|
||||
b += req.reward * x
|
||||
save_state(req.user_id, A, b)
|
||||
|
||||
# Track cumulative reward in meta
|
||||
meta["cumulative_reward"] = meta.get("cumulative_reward", 0.0) + req.reward
|
||||
meta["reward_count"] = meta.get("reward_count", 0) + 1
|
||||
meta["last_updated"] = time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime())
|
||||
|
||||
save_state(req.user_id, A, b, meta)
|
||||
return RewardResponse(ok=True)
|
||||
|
||||
|
||||
@app.post("/reset/{user_id}", response_model=RewardResponse)
|
||||
def reset(user_id: str) -> RewardResponse:
|
||||
"""Reset per-user bandit state (admin action)."""
|
||||
p = state_path(user_id)
|
||||
if p.exists():
|
||||
p.unlink()
|
||||
if user_id in _feature_history:
|
||||
_feature_history[user_id].clear()
|
||||
return RewardResponse(ok=True)
|
||||
|
||||
|
||||
@app.get("/stats/{user_id}")
|
||||
def stats(user_id: str):
|
||||
"""Return current LinUCB state summary for a user."""
|
||||
A, b, meta = load_state(user_id)
|
||||
try:
|
||||
A_inv = np.linalg.inv(A)
|
||||
theta = (A_inv @ b).tolist()
|
||||
except np.linalg.LinAlgError:
|
||||
theta = [0.0] * D
|
||||
|
||||
pulls = meta.get("pulls", 0)
|
||||
cumulative_reward = meta.get("cumulative_reward", 0.0)
|
||||
reward_count = meta.get("reward_count", 0)
|
||||
estimated_mean = cumulative_reward / reward_count if reward_count > 0 else 0.0
|
||||
|
||||
return {
|
||||
"user_id": user_id,
|
||||
"pulls": pulls,
|
||||
"reward_count": reward_count,
|
||||
"cumulative_reward": cumulative_reward,
|
||||
"estimated_mean_reward": estimated_mean,
|
||||
"theta": theta,
|
||||
"last_updated": meta.get("last_updated"),
|
||||
}
|
||||
|
||||
|
||||
@app.get("/features/{user_id}")
|
||||
def features(user_id: str):
|
||||
"""Return recent feature vectors logged at scoring time."""
|
||||
history = get_feature_history(user_id)
|
||||
return {
|
||||
"user_id": user_id,
|
||||
"history": list(history),
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user