feat: M1 — LinUCB bandit, RemotePolicy, Web Push, event bus

ML serving:
- LinUCB contextual bandit (disjoint, d=5 features: hour_sin/cos, is_overdue, task_age, priority)
- /score endpoint replaces stub random; /reward endpoint for online learning
- Per-user model state persisted to disk as JSON (survives restarts)
- venv at ml/serving/.venv; start with pnpm dev from ml/serving

Recommender:
- Todoist fetch now extracts features (is_overdue, task_age_days, priority)
- RemotePolicy calls ml/serving with 3s timeout; falls back to RandomPolicy
- Reward sent to /reward on feedback (done=+1, snooze=0, dismiss=-1)

Web Push:
- VAPID keys in config; push_subscriptions table in DB
- POST/DELETE /api/push/subscribe; GET /api/push/vapid-public-key
- Service worker (public/sw.js): push → showNotification, notificationclick → focus/open
- "notify me" button on tip page; registers SW + subscribes on permission grant

Event bus:
- services/api/src/events/bus.ts: typed EventEmitter wrapper
- Subjects: signals.tip.served, signals.tip.feedback, signals.task.synced
- Same publish/subscribe API NATS JetStream will implement — swap is mechanical

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-04-15 14:08:00 +00:00
parent 08dfa1d8c9
commit c7edd92e15
16 changed files with 648 additions and 75 deletions

View File

@@ -1,32 +1,101 @@
"""
oO ML Serving — Phase 0 stub.
Returns a placeholder response that matches the interface the real scorer will implement.
The recommender service calls this via RemotePolicy (not yet wired in Phase 0).
oO ML Serving — Phase 1: LinUCB contextual bandit.
Contract:
POST /score
Body: { user_id: str, candidates: [{ id: str, content: str, source: str, source_id?: str }] }
Response: { tip_id: str, score: float }
POST /score { user_id, candidates, context } → { tip_id, score, policy }
POST /reward { user_id, tip_id, reward, features } → { ok }
GET /health → { ok }
Features (d=5):
hour_sin, hour_cos — cyclical time-of-day encoding
is_overdue — 0 or 1
task_age_days — days since due date (clipped 030, normalised 01)
priority_norm — Todoist priority 14, normalised to 01
"""
from __future__ import annotations
import json
import math
import os
import random
from pathlib import Path
from typing import Optional
import numpy as np
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
import random
app = FastAPI(title="oO ML Serving", version="0.0.0")
app = FastAPI(title="oO ML Serving", version="1.0.0")
STATE_DIR = Path(os.getenv("STATE_DIR", "/tmp/oo-bandit-state"))
STATE_DIR.mkdir(parents=True, exist_ok=True)
ALPHA = 1.0 # exploration coefficient
D = 5 # feature dimension
# ── Feature helpers ────────────────────────────────────────────────────────
def build_feature_vector(features: dict) -> np.ndarray:
hour = features.get("hour_of_day", 12)
hour_sin = math.sin(2 * math.pi * hour / 24)
hour_cos = math.cos(2 * math.pi * hour / 24)
is_overdue = float(bool(features.get("is_overdue", False)))
age = min(float(features.get("task_age_days", 0)), 30.0) / 30.0
priority = (float(features.get("priority", 1)) - 1.0) / 3.0
return np.array([hour_sin, hour_cos, is_overdue, age, priority], dtype=np.float64)
# ── Per-user bandit state (disjoint LinUCB, global arm) ───────────────────
def state_path(user_id: str) -> Path:
safe = "".join(c if c.isalnum() else "_" for c in user_id)
return STATE_DIR / f"{safe}.json"
def load_state(user_id: str) -> tuple[np.ndarray, np.ndarray]:
"""Returns (A, b). A is DxD, b is D-vector."""
p = state_path(user_id)
if p.exists():
raw = json.loads(p.read_text())
A = np.array(raw["A"], dtype=np.float64)
b = np.array(raw["b"], dtype=np.float64)
return A, b
return np.identity(D, dtype=np.float64), np.zeros(D, dtype=np.float64)
def save_state(user_id: str, A: np.ndarray, b: np.ndarray) -> None:
p = state_path(user_id)
p.write_text(json.dumps({"A": A.tolist(), "b": b.tolist()}))
# ── API models ─────────────────────────────────────────────────────────────
class CandidateFeatures(BaseModel):
hour_of_day: int = 12
is_overdue: bool = False
task_age_days: float = 0.0
priority: int = 1
class Candidate(BaseModel):
id: str
content: str
source: str
source_id: str | None = None
source_id: Optional[str] = None
features: CandidateFeatures = CandidateFeatures()
class Context(BaseModel):
hour_of_day: int = 12
day_of_week: int = 0
class ScoreRequest(BaseModel):
user_id: str
candidates: list[Candidate]
context: Context = Context()
class ScoreResponse(BaseModel):
@@ -35,15 +104,69 @@ class ScoreResponse(BaseModel):
policy: str
class RewardRequest(BaseModel):
user_id: str
tip_id: str
reward: float # +1 done, 0 snooze, -1 dismiss
features: CandidateFeatures
class RewardResponse(BaseModel):
ok: bool
# ── Endpoints ──────────────────────────────────────────────────────────────
@app.get("/health")
def health():
return {"ok": True}
@app.post("/score", response_model=ScoreResponse)
def score(req: ScoreRequest):
def score(req: ScoreRequest) -> ScoreResponse:
if not req.candidates:
raise HTTPException(status_code=422, detail="No candidates")
# Stub: random uniform scoring — real model slots in here
chosen = random.choice(req.candidates)
return ScoreResponse(tip_id=chosen.id, score=1.0, policy="stub-random")
A, b = load_state(req.user_id)
try:
A_inv = np.linalg.inv(A)
except np.linalg.LinAlgError:
A_inv = np.identity(D, dtype=np.float64)
theta = A_inv @ b
best_id = None
best_score = -float("inf")
for candidate in req.candidates:
feat_dict = {
"hour_of_day": req.context.hour_of_day,
"is_overdue": candidate.features.is_overdue,
"task_age_days": candidate.features.task_age_days,
"priority": candidate.features.priority,
}
x = build_feature_vector(feat_dict)
exploit = float(theta @ x)
explore = ALPHA * math.sqrt(float(x @ A_inv @ x))
ucb = exploit + explore
if ucb > best_score:
best_score = ucb
best_id = candidate.id
return ScoreResponse(tip_id=best_id, score=best_score, policy="linucb-v1")
@app.post("/reward", response_model=RewardResponse)
def reward(req: RewardRequest) -> RewardResponse:
A, b = load_state(req.user_id)
feat_dict = {
"hour_of_day": req.features.hour_of_day,
"is_overdue": req.features.is_overdue,
"task_age_days": req.features.task_age_days,
"priority": req.features.priority,
}
x = build_feature_vector(feat_dict)
A += np.outer(x, x)
b += req.reward * x
save_state(req.user_id, A, b)
return RewardResponse(ok=True)