feat: M1 — LinUCB bandit, RemotePolicy, Web Push, event bus

ML serving: - LinUCB contextual bandit (disjoint, d=5 features: hour_sin/cos, is_overdue, task_age, priority) - /score endpoint replaces stub random; /reward endpoint for online learning - Per-user model state persisted to disk as JSON (survives restarts) - venv at ml/serving/.venv; start with pnpm dev from ml/serving Recommender: - Todoist fetch now extracts features (is_overdue, task_age_days, priority) - RemotePolicy calls ml/serving with 3s timeout; falls back to RandomPolicy - Reward sent to /reward on feedback (done=+1, snooze=0, dismiss=-1) Web Push: - VAPID keys in config; push_subscriptions table in DB - POST/DELETE /api/push/subscribe; GET /api/push/vapid-public-key - Service worker (public/sw.js): push → showNotification, notificationclick → focus/open - "notify me" button on tip page; registers SW + subscribes on permission grant Event bus: - services/api/src/events/bus.ts: typed EventEmitter wrapper - Subjects: signals.tip.served, signals.tip.feedback, signals.task.synced - Same publish/subscribe API NATS JetStream will implement — swap is mechanical Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-15 14:08:00 +00:00
parent 08dfa1d8c9
commit c7edd92e15
16 changed files with 648 additions and 75 deletions
--- a/ml/serving/main.py
+++ b/ml/serving/main.py
@@ -1,32 +1,101 @@
 """
-oO ML Serving — Phase 0 stub.
-
-Returns a placeholder response that matches the interface the real scorer will implement.
-The recommender service calls this via RemotePolicy (not yet wired in Phase 0).
+oO ML Serving — Phase 1: LinUCB contextual bandit.

 Contract:
-  POST /score
-  Body: { user_id: str, candidates: [{ id: str, content: str, source: str, source_id?: str }] }
-  Response: { tip_id: str, score: float }
+  POST /score   { user_id, candidates, context } → { tip_id, score, policy }
+  POST /reward  { user_id, tip_id, reward, features } → { ok }
+  GET  /health  → { ok }
+
+Features (d=5):
+  hour_sin, hour_cos  — cyclical time-of-day encoding
+  is_overdue          — 0 or 1
+  task_age_days       — days since due date (clipped 0–30, normalised 0–1)
+  priority_norm       — Todoist priority 1–4, normalised to 0–1
 """

+from __future__ import annotations
+
+import json
+import math
+import os
+import random
+from pathlib import Path
+from typing import Optional
+
+import numpy as np
 from fastapi import FastAPI, HTTPException
 from pydantic import BaseModel
-import random

-app = FastAPI(title="oO ML Serving", version="0.0.0")
+app = FastAPI(title="oO ML Serving", version="1.0.0")
+
+STATE_DIR = Path(os.getenv("STATE_DIR", "/tmp/oo-bandit-state"))
+STATE_DIR.mkdir(parents=True, exist_ok=True)
+
+ALPHA = 1.0  # exploration coefficient
+D = 5        # feature dimension
+
+
+# ── Feature helpers ────────────────────────────────────────────────────────
+
+def build_feature_vector(features: dict) -> np.ndarray:
+    hour = features.get("hour_of_day", 12)
+    hour_sin = math.sin(2 * math.pi * hour / 24)
+    hour_cos = math.cos(2 * math.pi * hour / 24)
+    is_overdue = float(bool(features.get("is_overdue", False)))
+    age = min(float(features.get("task_age_days", 0)), 30.0) / 30.0
+    priority = (float(features.get("priority", 1)) - 1.0) / 3.0
+    return np.array([hour_sin, hour_cos, is_overdue, age, priority], dtype=np.float64)
+
+
+# ── Per-user bandit state (disjoint LinUCB, global arm) ───────────────────
+
+def state_path(user_id: str) -> Path:
+    safe = "".join(c if c.isalnum() else "_" for c in user_id)
+    return STATE_DIR / f"{safe}.json"
+
+
+def load_state(user_id: str) -> tuple[np.ndarray, np.ndarray]:
+    """Returns (A, b). A is DxD, b is D-vector."""
+    p = state_path(user_id)
+    if p.exists():
+        raw = json.loads(p.read_text())
+        A = np.array(raw["A"], dtype=np.float64)
+        b = np.array(raw["b"], dtype=np.float64)
+        return A, b
+    return np.identity(D, dtype=np.float64), np.zeros(D, dtype=np.float64)
+
+
+def save_state(user_id: str, A: np.ndarray, b: np.ndarray) -> None:
+    p = state_path(user_id)
+    p.write_text(json.dumps({"A": A.tolist(), "b": b.tolist()}))
+
+
+# ── API models ─────────────────────────────────────────────────────────────
+
+class CandidateFeatures(BaseModel):
+    hour_of_day: int = 12
+    is_overdue: bool = False
+    task_age_days: float = 0.0
+    priority: int = 1


 class Candidate(BaseModel):
    id: str
    content: str
    source: str
-    source_id: str | None = None
+    source_id: Optional[str] = None
+    features: CandidateFeatures = CandidateFeatures()
+
+
+class Context(BaseModel):
+    hour_of_day: int = 12
+    day_of_week: int = 0


 class ScoreRequest(BaseModel):
    user_id: str
    candidates: list[Candidate]
+    context: Context = Context()


 class ScoreResponse(BaseModel):
@@ -35,15 +104,69 @@ class ScoreResponse(BaseModel):
    policy: str


+class RewardRequest(BaseModel):
+    user_id: str
+    tip_id: str
+    reward: float          # +1 done, 0 snooze, -1 dismiss
+    features: CandidateFeatures
+
+
+class RewardResponse(BaseModel):
+    ok: bool
+
+
+# ── Endpoints ──────────────────────────────────────────────────────────────
+
@app.get("/health")
 def health():
    return {"ok": True}


@app.post("/score", response_model=ScoreResponse)
-def score(req: ScoreRequest):
+def score(req: ScoreRequest) -> ScoreResponse:
    if not req.candidates:
        raise HTTPException(status_code=422, detail="No candidates")
-    # Stub: random uniform scoring — real model slots in here
-    chosen = random.choice(req.candidates)
-    return ScoreResponse(tip_id=chosen.id, score=1.0, policy="stub-random")
+
+    A, b = load_state(req.user_id)
+    try:
+        A_inv = np.linalg.inv(A)
+    except np.linalg.LinAlgError:
+        A_inv = np.identity(D, dtype=np.float64)
+
+    theta = A_inv @ b
+
+    best_id = None
+    best_score = -float("inf")
+
+    for candidate in req.candidates:
+        feat_dict = {
+            "hour_of_day": req.context.hour_of_day,
+            "is_overdue": candidate.features.is_overdue,
+            "task_age_days": candidate.features.task_age_days,
+            "priority": candidate.features.priority,
+        }
+        x = build_feature_vector(feat_dict)
+        exploit = float(theta @ x)
+        explore = ALPHA * math.sqrt(float(x @ A_inv @ x))
+        ucb = exploit + explore
+        if ucb > best_score:
+            best_score = ucb
+            best_id = candidate.id
+
+    return ScoreResponse(tip_id=best_id, score=best_score, policy="linucb-v1")
+
+
+@app.post("/reward", response_model=RewardResponse)
+def reward(req: RewardRequest) -> RewardResponse:
+    A, b = load_state(req.user_id)
+    feat_dict = {
+        "hour_of_day": req.features.hour_of_day,
+        "is_overdue": req.features.is_overdue,
+        "task_age_days": req.features.task_age_days,
+        "priority": req.features.priority,
+    }
+    x = build_feature_vector(feat_dict)
+    A += np.outer(x, x)
+    b += req.reward * x
+    save_state(req.user_id, A, b)
+    return RewardResponse(ok=True)