Files
oO/ml/serving/main.py
alvis e62c726ea4 feat: M1 admin console — all 10 remaining pages + signal/quality/ops infrastructure
Admin console (issues #63–72):
- Event stream viewer: live-tail ring buffer (500 events) with subject/user filters
- Feature store browser: per-user feature vector history from ml/serving
- Model registry panel: MLflow embed at /admin/models
- Experiment dashboard: LinUCB per-user stats (pulls, reward, θ) + bandit reset
- Recommendation log: per-tip explainability (policy, score, features, latency)
- Reward analytics: daily reaction breakdown + per-policy compare
- Data quality widget: missing-feature rate, stale-token rate, daily completeness
- Ops actions: replay-signal, policy enable/disable; user actions link to Users page
- SQL runner: read-only SELECT runner with saved queries
- Health rollup: fan-out to api/ml/sqlite/event-bus with auto-refresh

Backend:
- tip_scores table: logs features+policy+score+latency at every scoring call (#67)
- saved_queries table: per-admin saved SQL (#71)
- Event bus: 500-event ring buffer + tail() API (#63)
- Admin routes: /events, /tips, /reward-analytics, /data-quality, /health,
  /policies, /replay-signal, /sql, /saved-queries endpoints
- /api/ml/* admin-gated proxy to ml/serving (#64, #66)
- Shadow-policy registry in recommender (#56)

ML serving:
- /reset/{user_id}: clear bandit state + feature history (#66)
- /stats/{user_id}: pulls, cumulative reward, estimated mean, θ (#66)
- /features/{user_id}: last 100 feature vectors logged at scoring time (#64)
- Meta (pulls, rewards) persisted alongside A/b matrices

Web:
- Tip action sheet adds Helpful / Not helpful buttons (#62)
- TipFeedback type extended with helpful/not_helpful actions
- Rewards mapped: helpful=+0.5, not_helpful=−0.5

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-16 03:56:48 +00:00

257 lines
8.1 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
oO ML Serving — Phase 1: LinUCB contextual bandit.
Contract:
POST /score { user_id, candidates, context } → { tip_id, score, policy }
POST /reward { user_id, tip_id, reward, features } → { ok }
POST /reset/{user_id}{ ok }
GET /stats/{user_id}{ pulls, cumulative_reward, estimated_mean, last_updated }
GET /features/{user_id}{ history: [{ ts, features, score }] }
GET /health → { ok }
Features (d=5):
hour_sin, hour_cos — cyclical time-of-day encoding
is_overdue — 0 or 1
task_age_days — days since due date (clipped 030, normalised 01)
priority_norm — Todoist priority 14, normalised to 01
"""
from __future__ import annotations
import json
import math
import os
import time
from collections import deque
from pathlib import Path
from typing import Optional, Deque
import numpy as np
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
app = FastAPI(title="oO ML Serving", version="1.0.0")
STATE_DIR = Path(os.getenv("STATE_DIR", "/tmp/oo-bandit-state"))
STATE_DIR.mkdir(parents=True, exist_ok=True)
ALPHA = 1.0 # exploration coefficient
D = 5 # feature dimension
FEATURE_HISTORY_SIZE = 100 # per-user ring buffer
# ── Per-user in-memory feature history ────────────────────────────────────
_feature_history: dict[str, deque] = {}
def get_feature_history(user_id: str) -> deque:
if user_id not in _feature_history:
_feature_history[user_id] = deque(maxlen=FEATURE_HISTORY_SIZE)
return _feature_history[user_id]
# ── Feature helpers ────────────────────────────────────────────────────────
def build_feature_vector(features: dict) -> np.ndarray:
hour = features.get("hour_of_day", 12)
hour_sin = math.sin(2 * math.pi * hour / 24)
hour_cos = math.cos(2 * math.pi * hour / 24)
is_overdue = float(bool(features.get("is_overdue", False)))
age = min(float(features.get("task_age_days", 0)), 30.0) / 30.0
priority = (float(features.get("priority", 1)) - 1.0) / 3.0
return np.array([hour_sin, hour_cos, is_overdue, age, priority], dtype=np.float64)
# ── Per-user bandit state (disjoint LinUCB, global arm) ───────────────────
def state_path(user_id: str) -> Path:
safe = "".join(c if c.isalnum() else "_" for c in user_id)
return STATE_DIR / f"{safe}.json"
def load_state(user_id: str) -> tuple[np.ndarray, np.ndarray, dict]:
"""Returns (A, b, meta). A is DxD, b is D-vector."""
p = state_path(user_id)
if p.exists():
raw = json.loads(p.read_text())
A = np.array(raw["A"], dtype=np.float64)
b = np.array(raw["b"], dtype=np.float64)
meta = raw.get("meta", {})
return A, b, meta
return np.identity(D, dtype=np.float64), np.zeros(D, dtype=np.float64), {}
def save_state(user_id: str, A: np.ndarray, b: np.ndarray, meta: dict) -> None:
p = state_path(user_id)
p.write_text(json.dumps({"A": A.tolist(), "b": b.tolist(), "meta": meta}))
# ── API models ─────────────────────────────────────────────────────────────
class CandidateFeatures(BaseModel):
hour_of_day: int = 12
is_overdue: bool = False
task_age_days: float = 0.0
priority: int = 1
class Candidate(BaseModel):
id: str
content: str
source: str
source_id: Optional[str] = None
features: CandidateFeatures = CandidateFeatures()
class Context(BaseModel):
hour_of_day: int = 12
day_of_week: int = 0
class ScoreRequest(BaseModel):
user_id: str
candidates: list[Candidate]
context: Context = Context()
class ScoreResponse(BaseModel):
tip_id: str
score: float
policy: str
class RewardRequest(BaseModel):
user_id: str
tip_id: str
reward: float # +1 done, +0.5 helpful, 0 snooze, -0.5 not_helpful, -1 dismiss
features: CandidateFeatures
class RewardResponse(BaseModel):
ok: bool
# ── Endpoints ──────────────────────────────────────────────────────────────
@app.get("/health")
def health():
return {"ok": True}
@app.post("/score", response_model=ScoreResponse)
def score(req: ScoreRequest) -> ScoreResponse:
if not req.candidates:
raise HTTPException(status_code=422, detail="No candidates")
A, b, meta = load_state(req.user_id)
try:
A_inv = np.linalg.inv(A)
except np.linalg.LinAlgError:
A_inv = np.identity(D, dtype=np.float64)
theta = A_inv @ b
best_id = None
best_score = -float("inf")
best_features: dict = {}
for candidate in req.candidates:
feat_dict = {
"hour_of_day": req.context.hour_of_day,
"is_overdue": candidate.features.is_overdue,
"task_age_days": candidate.features.task_age_days,
"priority": candidate.features.priority,
}
x = build_feature_vector(feat_dict)
exploit = float(theta @ x)
explore = ALPHA * math.sqrt(float(x @ A_inv @ x))
ucb = exploit + explore
if ucb > best_score:
best_score = ucb
best_id = candidate.id
best_features = feat_dict
# Log to feature history ring buffer
history = get_feature_history(req.user_id)
history.append({
"ts": time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()),
"features": best_features,
"score": best_score,
"tip_id": best_id,
})
# Update meta stats
meta["pulls"] = meta.get("pulls", 0) + 1
meta["last_updated"] = time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime())
save_state(req.user_id, A, b, meta)
return ScoreResponse(tip_id=best_id, score=best_score, policy="linucb-v1")
@app.post("/reward", response_model=RewardResponse)
def reward(req: RewardRequest) -> RewardResponse:
A, b, meta = load_state(req.user_id)
feat_dict = {
"hour_of_day": req.features.hour_of_day,
"is_overdue": req.features.is_overdue,
"task_age_days": req.features.task_age_days,
"priority": req.features.priority,
}
x = build_feature_vector(feat_dict)
A += np.outer(x, x)
b += req.reward * x
# Track cumulative reward in meta
meta["cumulative_reward"] = meta.get("cumulative_reward", 0.0) + req.reward
meta["reward_count"] = meta.get("reward_count", 0) + 1
meta["last_updated"] = time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime())
save_state(req.user_id, A, b, meta)
return RewardResponse(ok=True)
@app.post("/reset/{user_id}", response_model=RewardResponse)
def reset(user_id: str) -> RewardResponse:
"""Reset per-user bandit state (admin action)."""
p = state_path(user_id)
if p.exists():
p.unlink()
if user_id in _feature_history:
_feature_history[user_id].clear()
return RewardResponse(ok=True)
@app.get("/stats/{user_id}")
def stats(user_id: str):
"""Return current LinUCB state summary for a user."""
A, b, meta = load_state(user_id)
try:
A_inv = np.linalg.inv(A)
theta = (A_inv @ b).tolist()
except np.linalg.LinAlgError:
theta = [0.0] * D
pulls = meta.get("pulls", 0)
cumulative_reward = meta.get("cumulative_reward", 0.0)
reward_count = meta.get("reward_count", 0)
estimated_mean = cumulative_reward / reward_count if reward_count > 0 else 0.0
return {
"user_id": user_id,
"pulls": pulls,
"reward_count": reward_count,
"cumulative_reward": cumulative_reward,
"estimated_mean_reward": estimated_mean,
"theta": theta,
"last_updated": meta.get("last_updated"),
}
@app.get("/features/{user_id}")
def features(user_id: str):
"""Return recent feature vectors logged at scoring time."""
history = get_feature_history(user_id)
return {
"user_id": user_id,
"history": list(history),
}