""" oO ML Serving — Phase 1: LinUCB contextual bandit. Contract: POST /score { user_id, candidates, context } → { tip_id, score, policy } POST /reward { user_id, tip_id, reward, features } → { ok } POST /reset/{user_id} → { ok } GET /stats/{user_id} → { pulls, cumulative_reward, estimated_mean, last_updated } GET /features/{user_id} → { history: [{ ts, features, score }] } GET /health → { ok } Features (d=5): hour_sin, hour_cos — cyclical time-of-day encoding is_overdue — 0 or 1 task_age_days — days since due date (clipped 0–30, normalised 0–1) priority_norm — Todoist priority 1–4, normalised to 0–1 """ from __future__ import annotations import json import math import os import time from collections import deque from pathlib import Path from typing import Optional, Deque import numpy as np from fastapi import FastAPI, HTTPException from pydantic import BaseModel app = FastAPI(title="oO ML Serving", version="1.0.0") STATE_DIR = Path(os.getenv("STATE_DIR", "/tmp/oo-bandit-state")) STATE_DIR.mkdir(parents=True, exist_ok=True) ALPHA = 1.0 # exploration coefficient D = 5 # feature dimension FEATURE_HISTORY_SIZE = 100 # per-user ring buffer # ── Per-user in-memory feature history ──────────────────────────────────── _feature_history: dict[str, deque] = {} def get_feature_history(user_id: str) -> deque: if user_id not in _feature_history: _feature_history[user_id] = deque(maxlen=FEATURE_HISTORY_SIZE) return _feature_history[user_id] # ── Feature helpers ──────────────────────────────────────────────────────── def build_feature_vector(features: dict) -> np.ndarray: hour = features.get("hour_of_day", 12) hour_sin = math.sin(2 * math.pi * hour / 24) hour_cos = math.cos(2 * math.pi * hour / 24) is_overdue = float(bool(features.get("is_overdue", False))) age = min(float(features.get("task_age_days", 0)), 30.0) / 30.0 priority = (float(features.get("priority", 1)) - 1.0) / 3.0 return np.array([hour_sin, hour_cos, is_overdue, age, priority], dtype=np.float64) # ── Per-user bandit state (disjoint LinUCB, global arm) ─────────────────── def state_path(user_id: str) -> Path: safe = "".join(c if c.isalnum() else "_" for c in user_id) return STATE_DIR / f"{safe}.json" def load_state(user_id: str) -> tuple[np.ndarray, np.ndarray, dict]: """Returns (A, b, meta). A is DxD, b is D-vector.""" p = state_path(user_id) if p.exists(): raw = json.loads(p.read_text()) A = np.array(raw["A"], dtype=np.float64) b = np.array(raw["b"], dtype=np.float64) meta = raw.get("meta", {}) return A, b, meta return np.identity(D, dtype=np.float64), np.zeros(D, dtype=np.float64), {} def save_state(user_id: str, A: np.ndarray, b: np.ndarray, meta: dict) -> None: p = state_path(user_id) p.write_text(json.dumps({"A": A.tolist(), "b": b.tolist(), "meta": meta})) # ── API models ───────────────────────────────────────────────────────────── class CandidateFeatures(BaseModel): hour_of_day: int = 12 is_overdue: bool = False task_age_days: float = 0.0 priority: int = 1 class Candidate(BaseModel): id: str content: str source: str source_id: Optional[str] = None features: CandidateFeatures = CandidateFeatures() class Context(BaseModel): hour_of_day: int = 12 day_of_week: int = 0 class ScoreRequest(BaseModel): user_id: str candidates: list[Candidate] context: Context = Context() class ScoreResponse(BaseModel): tip_id: str score: float policy: str class RewardRequest(BaseModel): user_id: str tip_id: str reward: float # +1 done, +0.5 helpful, 0 snooze, -0.5 not_helpful, -1 dismiss features: CandidateFeatures class RewardResponse(BaseModel): ok: bool # ── Endpoints ────────────────────────────────────────────────────────────── @app.get("/health") def health(): return {"ok": True} @app.post("/score", response_model=ScoreResponse) def score(req: ScoreRequest) -> ScoreResponse: if not req.candidates: raise HTTPException(status_code=422, detail="No candidates") A, b, meta = load_state(req.user_id) try: A_inv = np.linalg.inv(A) except np.linalg.LinAlgError: A_inv = np.identity(D, dtype=np.float64) theta = A_inv @ b best_id = None best_score = -float("inf") best_features: dict = {} for candidate in req.candidates: feat_dict = { "hour_of_day": req.context.hour_of_day, "is_overdue": candidate.features.is_overdue, "task_age_days": candidate.features.task_age_days, "priority": candidate.features.priority, } x = build_feature_vector(feat_dict) exploit = float(theta @ x) explore = ALPHA * math.sqrt(float(x @ A_inv @ x)) ucb = exploit + explore if ucb > best_score: best_score = ucb best_id = candidate.id best_features = feat_dict # Log to feature history ring buffer history = get_feature_history(req.user_id) history.append({ "ts": time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()), "features": best_features, "score": best_score, "tip_id": best_id, }) # Update meta stats meta["pulls"] = meta.get("pulls", 0) + 1 meta["last_updated"] = time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()) save_state(req.user_id, A, b, meta) return ScoreResponse(tip_id=best_id, score=best_score, policy="linucb-v1") @app.post("/reward", response_model=RewardResponse) def reward(req: RewardRequest) -> RewardResponse: A, b, meta = load_state(req.user_id) feat_dict = { "hour_of_day": req.features.hour_of_day, "is_overdue": req.features.is_overdue, "task_age_days": req.features.task_age_days, "priority": req.features.priority, } x = build_feature_vector(feat_dict) A += np.outer(x, x) b += req.reward * x # Track cumulative reward in meta meta["cumulative_reward"] = meta.get("cumulative_reward", 0.0) + req.reward meta["reward_count"] = meta.get("reward_count", 0) + 1 meta["last_updated"] = time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()) save_state(req.user_id, A, b, meta) return RewardResponse(ok=True) @app.post("/reset/{user_id}", response_model=RewardResponse) def reset(user_id: str) -> RewardResponse: """Reset per-user bandit state (admin action).""" p = state_path(user_id) if p.exists(): p.unlink() if user_id in _feature_history: _feature_history[user_id].clear() return RewardResponse(ok=True) @app.get("/stats/{user_id}") def stats(user_id: str): """Return current LinUCB state summary for a user.""" A, b, meta = load_state(user_id) try: A_inv = np.linalg.inv(A) theta = (A_inv @ b).tolist() except np.linalg.LinAlgError: theta = [0.0] * D pulls = meta.get("pulls", 0) cumulative_reward = meta.get("cumulative_reward", 0.0) reward_count = meta.get("reward_count", 0) estimated_mean = cumulative_reward / reward_count if reward_count > 0 else 0.0 return { "user_id": user_id, "pulls": pulls, "reward_count": reward_count, "cumulative_reward": cumulative_reward, "estimated_mean_reward": estimated_mean, "theta": theta, "last_updated": meta.get("last_updated"), } @app.get("/features/{user_id}") def features(user_id: str): """Return recent feature vectors logged at scoring time.""" history = get_feature_history(user_id) return { "user_id": user_id, "history": list(history), }