ML serving: - LinUCB contextual bandit (disjoint, d=5 features: hour_sin/cos, is_overdue, task_age, priority) - /score endpoint replaces stub random; /reward endpoint for online learning - Per-user model state persisted to disk as JSON (survives restarts) - venv at ml/serving/.venv; start with pnpm dev from ml/serving Recommender: - Todoist fetch now extracts features (is_overdue, task_age_days, priority) - RemotePolicy calls ml/serving with 3s timeout; falls back to RandomPolicy - Reward sent to /reward on feedback (done=+1, snooze=0, dismiss=-1) Web Push: - VAPID keys in config; push_subscriptions table in DB - POST/DELETE /api/push/subscribe; GET /api/push/vapid-public-key - Service worker (public/sw.js): push → showNotification, notificationclick → focus/open - "notify me" button on tip page; registers SW + subscribes on permission grant Event bus: - services/api/src/events/bus.ts: typed EventEmitter wrapper - Subjects: signals.tip.served, signals.tip.feedback, signals.task.synced - Same publish/subscribe API NATS JetStream will implement — swap is mechanical Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
173 lines
5.2 KiB
Python
173 lines
5.2 KiB
Python
"""
|
||
oO ML Serving — Phase 1: LinUCB contextual bandit.
|
||
|
||
Contract:
|
||
POST /score { user_id, candidates, context } → { tip_id, score, policy }
|
||
POST /reward { user_id, tip_id, reward, features } → { ok }
|
||
GET /health → { ok }
|
||
|
||
Features (d=5):
|
||
hour_sin, hour_cos — cyclical time-of-day encoding
|
||
is_overdue — 0 or 1
|
||
task_age_days — days since due date (clipped 0–30, normalised 0–1)
|
||
priority_norm — Todoist priority 1–4, normalised to 0–1
|
||
"""
|
||
|
||
from __future__ import annotations
|
||
|
||
import json
|
||
import math
|
||
import os
|
||
import random
|
||
from pathlib import Path
|
||
from typing import Optional
|
||
|
||
import numpy as np
|
||
from fastapi import FastAPI, HTTPException
|
||
from pydantic import BaseModel
|
||
|
||
app = FastAPI(title="oO ML Serving", version="1.0.0")
|
||
|
||
STATE_DIR = Path(os.getenv("STATE_DIR", "/tmp/oo-bandit-state"))
|
||
STATE_DIR.mkdir(parents=True, exist_ok=True)
|
||
|
||
ALPHA = 1.0 # exploration coefficient
|
||
D = 5 # feature dimension
|
||
|
||
|
||
# ── Feature helpers ────────────────────────────────────────────────────────
|
||
|
||
def build_feature_vector(features: dict) -> np.ndarray:
|
||
hour = features.get("hour_of_day", 12)
|
||
hour_sin = math.sin(2 * math.pi * hour / 24)
|
||
hour_cos = math.cos(2 * math.pi * hour / 24)
|
||
is_overdue = float(bool(features.get("is_overdue", False)))
|
||
age = min(float(features.get("task_age_days", 0)), 30.0) / 30.0
|
||
priority = (float(features.get("priority", 1)) - 1.0) / 3.0
|
||
return np.array([hour_sin, hour_cos, is_overdue, age, priority], dtype=np.float64)
|
||
|
||
|
||
# ── Per-user bandit state (disjoint LinUCB, global arm) ───────────────────
|
||
|
||
def state_path(user_id: str) -> Path:
|
||
safe = "".join(c if c.isalnum() else "_" for c in user_id)
|
||
return STATE_DIR / f"{safe}.json"
|
||
|
||
|
||
def load_state(user_id: str) -> tuple[np.ndarray, np.ndarray]:
|
||
"""Returns (A, b). A is DxD, b is D-vector."""
|
||
p = state_path(user_id)
|
||
if p.exists():
|
||
raw = json.loads(p.read_text())
|
||
A = np.array(raw["A"], dtype=np.float64)
|
||
b = np.array(raw["b"], dtype=np.float64)
|
||
return A, b
|
||
return np.identity(D, dtype=np.float64), np.zeros(D, dtype=np.float64)
|
||
|
||
|
||
def save_state(user_id: str, A: np.ndarray, b: np.ndarray) -> None:
|
||
p = state_path(user_id)
|
||
p.write_text(json.dumps({"A": A.tolist(), "b": b.tolist()}))
|
||
|
||
|
||
# ── API models ─────────────────────────────────────────────────────────────
|
||
|
||
class CandidateFeatures(BaseModel):
|
||
hour_of_day: int = 12
|
||
is_overdue: bool = False
|
||
task_age_days: float = 0.0
|
||
priority: int = 1
|
||
|
||
|
||
class Candidate(BaseModel):
|
||
id: str
|
||
content: str
|
||
source: str
|
||
source_id: Optional[str] = None
|
||
features: CandidateFeatures = CandidateFeatures()
|
||
|
||
|
||
class Context(BaseModel):
|
||
hour_of_day: int = 12
|
||
day_of_week: int = 0
|
||
|
||
|
||
class ScoreRequest(BaseModel):
|
||
user_id: str
|
||
candidates: list[Candidate]
|
||
context: Context = Context()
|
||
|
||
|
||
class ScoreResponse(BaseModel):
|
||
tip_id: str
|
||
score: float
|
||
policy: str
|
||
|
||
|
||
class RewardRequest(BaseModel):
|
||
user_id: str
|
||
tip_id: str
|
||
reward: float # +1 done, 0 snooze, -1 dismiss
|
||
features: CandidateFeatures
|
||
|
||
|
||
class RewardResponse(BaseModel):
|
||
ok: bool
|
||
|
||
|
||
# ── Endpoints ──────────────────────────────────────────────────────────────
|
||
|
||
@app.get("/health")
|
||
def health():
|
||
return {"ok": True}
|
||
|
||
|
||
@app.post("/score", response_model=ScoreResponse)
|
||
def score(req: ScoreRequest) -> ScoreResponse:
|
||
if not req.candidates:
|
||
raise HTTPException(status_code=422, detail="No candidates")
|
||
|
||
A, b = load_state(req.user_id)
|
||
try:
|
||
A_inv = np.linalg.inv(A)
|
||
except np.linalg.LinAlgError:
|
||
A_inv = np.identity(D, dtype=np.float64)
|
||
|
||
theta = A_inv @ b
|
||
|
||
best_id = None
|
||
best_score = -float("inf")
|
||
|
||
for candidate in req.candidates:
|
||
feat_dict = {
|
||
"hour_of_day": req.context.hour_of_day,
|
||
"is_overdue": candidate.features.is_overdue,
|
||
"task_age_days": candidate.features.task_age_days,
|
||
"priority": candidate.features.priority,
|
||
}
|
||
x = build_feature_vector(feat_dict)
|
||
exploit = float(theta @ x)
|
||
explore = ALPHA * math.sqrt(float(x @ A_inv @ x))
|
||
ucb = exploit + explore
|
||
if ucb > best_score:
|
||
best_score = ucb
|
||
best_id = candidate.id
|
||
|
||
return ScoreResponse(tip_id=best_id, score=best_score, policy="linucb-v1")
|
||
|
||
|
||
@app.post("/reward", response_model=RewardResponse)
|
||
def reward(req: RewardRequest) -> RewardResponse:
|
||
A, b = load_state(req.user_id)
|
||
feat_dict = {
|
||
"hour_of_day": req.features.hour_of_day,
|
||
"is_overdue": req.features.is_overdue,
|
||
"task_age_days": req.features.task_age_days,
|
||
"priority": req.features.priority,
|
||
}
|
||
x = build_feature_vector(feat_dict)
|
||
A += np.outer(x, x)
|
||
b += req.reward * x
|
||
save_state(req.user_id, A, b)
|
||
return RewardResponse(ok=True)
|