Admin console (issues #63–72): - Event stream viewer: live-tail ring buffer (500 events) with subject/user filters - Feature store browser: per-user feature vector history from ml/serving - Model registry panel: MLflow embed at /admin/models - Experiment dashboard: LinUCB per-user stats (pulls, reward, θ) + bandit reset - Recommendation log: per-tip explainability (policy, score, features, latency) - Reward analytics: daily reaction breakdown + per-policy compare - Data quality widget: missing-feature rate, stale-token rate, daily completeness - Ops actions: replay-signal, policy enable/disable; user actions link to Users page - SQL runner: read-only SELECT runner with saved queries - Health rollup: fan-out to api/ml/sqlite/event-bus with auto-refresh Backend: - tip_scores table: logs features+policy+score+latency at every scoring call (#67) - saved_queries table: per-admin saved SQL (#71) - Event bus: 500-event ring buffer + tail() API (#63) - Admin routes: /events, /tips, /reward-analytics, /data-quality, /health, /policies, /replay-signal, /sql, /saved-queries endpoints - /api/ml/* admin-gated proxy to ml/serving (#64, #66) - Shadow-policy registry in recommender (#56) ML serving: - /reset/{user_id}: clear bandit state + feature history (#66) - /stats/{user_id}: pulls, cumulative reward, estimated mean, θ (#66) - /features/{user_id}: last 100 feature vectors logged at scoring time (#64) - Meta (pulls, rewards) persisted alongside A/b matrices Web: - Tip action sheet adds Helpful / Not helpful buttons (#62) - TipFeedback type extended with helpful/not_helpful actions - Rewards mapped: helpful=+0.5, not_helpful=−0.5 Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
257 lines
8.1 KiB
Python
257 lines
8.1 KiB
Python
"""
|
||
oO ML Serving — Phase 1: LinUCB contextual bandit.
|
||
|
||
Contract:
|
||
POST /score { user_id, candidates, context } → { tip_id, score, policy }
|
||
POST /reward { user_id, tip_id, reward, features } → { ok }
|
||
POST /reset/{user_id} → { ok }
|
||
GET /stats/{user_id} → { pulls, cumulative_reward, estimated_mean, last_updated }
|
||
GET /features/{user_id} → { history: [{ ts, features, score }] }
|
||
GET /health → { ok }
|
||
|
||
Features (d=5):
|
||
hour_sin, hour_cos — cyclical time-of-day encoding
|
||
is_overdue — 0 or 1
|
||
task_age_days — days since due date (clipped 0–30, normalised 0–1)
|
||
priority_norm — Todoist priority 1–4, normalised to 0–1
|
||
"""
|
||
|
||
from __future__ import annotations
|
||
|
||
import json
|
||
import math
|
||
import os
|
||
import time
|
||
from collections import deque
|
||
from pathlib import Path
|
||
from typing import Optional, Deque
|
||
|
||
import numpy as np
|
||
from fastapi import FastAPI, HTTPException
|
||
from pydantic import BaseModel
|
||
|
||
app = FastAPI(title="oO ML Serving", version="1.0.0")
|
||
|
||
STATE_DIR = Path(os.getenv("STATE_DIR", "/tmp/oo-bandit-state"))
|
||
STATE_DIR.mkdir(parents=True, exist_ok=True)
|
||
|
||
ALPHA = 1.0 # exploration coefficient
|
||
D = 5 # feature dimension
|
||
FEATURE_HISTORY_SIZE = 100 # per-user ring buffer
|
||
|
||
|
||
# ── Per-user in-memory feature history ────────────────────────────────────
|
||
_feature_history: dict[str, deque] = {}
|
||
|
||
def get_feature_history(user_id: str) -> deque:
|
||
if user_id not in _feature_history:
|
||
_feature_history[user_id] = deque(maxlen=FEATURE_HISTORY_SIZE)
|
||
return _feature_history[user_id]
|
||
|
||
|
||
# ── Feature helpers ────────────────────────────────────────────────────────
|
||
|
||
def build_feature_vector(features: dict) -> np.ndarray:
|
||
hour = features.get("hour_of_day", 12)
|
||
hour_sin = math.sin(2 * math.pi * hour / 24)
|
||
hour_cos = math.cos(2 * math.pi * hour / 24)
|
||
is_overdue = float(bool(features.get("is_overdue", False)))
|
||
age = min(float(features.get("task_age_days", 0)), 30.0) / 30.0
|
||
priority = (float(features.get("priority", 1)) - 1.0) / 3.0
|
||
return np.array([hour_sin, hour_cos, is_overdue, age, priority], dtype=np.float64)
|
||
|
||
|
||
# ── Per-user bandit state (disjoint LinUCB, global arm) ───────────────────
|
||
|
||
def state_path(user_id: str) -> Path:
|
||
safe = "".join(c if c.isalnum() else "_" for c in user_id)
|
||
return STATE_DIR / f"{safe}.json"
|
||
|
||
|
||
def load_state(user_id: str) -> tuple[np.ndarray, np.ndarray, dict]:
|
||
"""Returns (A, b, meta). A is DxD, b is D-vector."""
|
||
p = state_path(user_id)
|
||
if p.exists():
|
||
raw = json.loads(p.read_text())
|
||
A = np.array(raw["A"], dtype=np.float64)
|
||
b = np.array(raw["b"], dtype=np.float64)
|
||
meta = raw.get("meta", {})
|
||
return A, b, meta
|
||
return np.identity(D, dtype=np.float64), np.zeros(D, dtype=np.float64), {}
|
||
|
||
|
||
def save_state(user_id: str, A: np.ndarray, b: np.ndarray, meta: dict) -> None:
|
||
p = state_path(user_id)
|
||
p.write_text(json.dumps({"A": A.tolist(), "b": b.tolist(), "meta": meta}))
|
||
|
||
|
||
# ── API models ─────────────────────────────────────────────────────────────
|
||
|
||
class CandidateFeatures(BaseModel):
|
||
hour_of_day: int = 12
|
||
is_overdue: bool = False
|
||
task_age_days: float = 0.0
|
||
priority: int = 1
|
||
|
||
|
||
class Candidate(BaseModel):
|
||
id: str
|
||
content: str
|
||
source: str
|
||
source_id: Optional[str] = None
|
||
features: CandidateFeatures = CandidateFeatures()
|
||
|
||
|
||
class Context(BaseModel):
|
||
hour_of_day: int = 12
|
||
day_of_week: int = 0
|
||
|
||
|
||
class ScoreRequest(BaseModel):
|
||
user_id: str
|
||
candidates: list[Candidate]
|
||
context: Context = Context()
|
||
|
||
|
||
class ScoreResponse(BaseModel):
|
||
tip_id: str
|
||
score: float
|
||
policy: str
|
||
|
||
|
||
class RewardRequest(BaseModel):
|
||
user_id: str
|
||
tip_id: str
|
||
reward: float # +1 done, +0.5 helpful, 0 snooze, -0.5 not_helpful, -1 dismiss
|
||
features: CandidateFeatures
|
||
|
||
|
||
class RewardResponse(BaseModel):
|
||
ok: bool
|
||
|
||
|
||
# ── Endpoints ──────────────────────────────────────────────────────────────
|
||
|
||
@app.get("/health")
|
||
def health():
|
||
return {"ok": True}
|
||
|
||
|
||
@app.post("/score", response_model=ScoreResponse)
|
||
def score(req: ScoreRequest) -> ScoreResponse:
|
||
if not req.candidates:
|
||
raise HTTPException(status_code=422, detail="No candidates")
|
||
|
||
A, b, meta = load_state(req.user_id)
|
||
try:
|
||
A_inv = np.linalg.inv(A)
|
||
except np.linalg.LinAlgError:
|
||
A_inv = np.identity(D, dtype=np.float64)
|
||
|
||
theta = A_inv @ b
|
||
|
||
best_id = None
|
||
best_score = -float("inf")
|
||
best_features: dict = {}
|
||
|
||
for candidate in req.candidates:
|
||
feat_dict = {
|
||
"hour_of_day": req.context.hour_of_day,
|
||
"is_overdue": candidate.features.is_overdue,
|
||
"task_age_days": candidate.features.task_age_days,
|
||
"priority": candidate.features.priority,
|
||
}
|
||
x = build_feature_vector(feat_dict)
|
||
exploit = float(theta @ x)
|
||
explore = ALPHA * math.sqrt(float(x @ A_inv @ x))
|
||
ucb = exploit + explore
|
||
if ucb > best_score:
|
||
best_score = ucb
|
||
best_id = candidate.id
|
||
best_features = feat_dict
|
||
|
||
# Log to feature history ring buffer
|
||
history = get_feature_history(req.user_id)
|
||
history.append({
|
||
"ts": time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()),
|
||
"features": best_features,
|
||
"score": best_score,
|
||
"tip_id": best_id,
|
||
})
|
||
|
||
# Update meta stats
|
||
meta["pulls"] = meta.get("pulls", 0) + 1
|
||
meta["last_updated"] = time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime())
|
||
save_state(req.user_id, A, b, meta)
|
||
|
||
return ScoreResponse(tip_id=best_id, score=best_score, policy="linucb-v1")
|
||
|
||
|
||
@app.post("/reward", response_model=RewardResponse)
|
||
def reward(req: RewardRequest) -> RewardResponse:
|
||
A, b, meta = load_state(req.user_id)
|
||
feat_dict = {
|
||
"hour_of_day": req.features.hour_of_day,
|
||
"is_overdue": req.features.is_overdue,
|
||
"task_age_days": req.features.task_age_days,
|
||
"priority": req.features.priority,
|
||
}
|
||
x = build_feature_vector(feat_dict)
|
||
A += np.outer(x, x)
|
||
b += req.reward * x
|
||
|
||
# Track cumulative reward in meta
|
||
meta["cumulative_reward"] = meta.get("cumulative_reward", 0.0) + req.reward
|
||
meta["reward_count"] = meta.get("reward_count", 0) + 1
|
||
meta["last_updated"] = time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime())
|
||
|
||
save_state(req.user_id, A, b, meta)
|
||
return RewardResponse(ok=True)
|
||
|
||
|
||
@app.post("/reset/{user_id}", response_model=RewardResponse)
|
||
def reset(user_id: str) -> RewardResponse:
|
||
"""Reset per-user bandit state (admin action)."""
|
||
p = state_path(user_id)
|
||
if p.exists():
|
||
p.unlink()
|
||
if user_id in _feature_history:
|
||
_feature_history[user_id].clear()
|
||
return RewardResponse(ok=True)
|
||
|
||
|
||
@app.get("/stats/{user_id}")
|
||
def stats(user_id: str):
|
||
"""Return current LinUCB state summary for a user."""
|
||
A, b, meta = load_state(user_id)
|
||
try:
|
||
A_inv = np.linalg.inv(A)
|
||
theta = (A_inv @ b).tolist()
|
||
except np.linalg.LinAlgError:
|
||
theta = [0.0] * D
|
||
|
||
pulls = meta.get("pulls", 0)
|
||
cumulative_reward = meta.get("cumulative_reward", 0.0)
|
||
reward_count = meta.get("reward_count", 0)
|
||
estimated_mean = cumulative_reward / reward_count if reward_count > 0 else 0.0
|
||
|
||
return {
|
||
"user_id": user_id,
|
||
"pulls": pulls,
|
||
"reward_count": reward_count,
|
||
"cumulative_reward": cumulative_reward,
|
||
"estimated_mean_reward": estimated_mean,
|
||
"theta": theta,
|
||
"last_updated": meta.get("last_updated"),
|
||
}
|
||
|
||
|
||
@app.get("/features/{user_id}")
|
||
def features(user_id: str):
|
||
"""Return recent feature vectors logged at scoring time."""
|
||
history = get_feature_history(user_id)
|
||
return {
|
||
"user_id": user_id,
|
||
"history": list(history),
|
||
}
|