feat: M1 — LinUCB bandit, RemotePolicy, Web Push, event bus
ML serving: - LinUCB contextual bandit (disjoint, d=5 features: hour_sin/cos, is_overdue, task_age, priority) - /score endpoint replaces stub random; /reward endpoint for online learning - Per-user model state persisted to disk as JSON (survives restarts) - venv at ml/serving/.venv; start with pnpm dev from ml/serving Recommender: - Todoist fetch now extracts features (is_overdue, task_age_days, priority) - RemotePolicy calls ml/serving with 3s timeout; falls back to RandomPolicy - Reward sent to /reward on feedback (done=+1, snooze=0, dismiss=-1) Web Push: - VAPID keys in config; push_subscriptions table in DB - POST/DELETE /api/push/subscribe; GET /api/push/vapid-public-key - Service worker (public/sw.js): push → showNotification, notificationclick → focus/open - "notify me" button on tip page; registers SW + subscribes on permission grant Event bus: - services/api/src/events/bus.ts: typed EventEmitter wrapper - Subjects: signals.tip.served, signals.tip.feedback, signals.task.synced - Same publish/subscribe API NATS JetStream will implement — swap is mechanical Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -1,32 +1,101 @@
|
||||
"""
|
||||
oO ML Serving — Phase 0 stub.
|
||||
|
||||
Returns a placeholder response that matches the interface the real scorer will implement.
|
||||
The recommender service calls this via RemotePolicy (not yet wired in Phase 0).
|
||||
oO ML Serving — Phase 1: LinUCB contextual bandit.
|
||||
|
||||
Contract:
|
||||
POST /score
|
||||
Body: { user_id: str, candidates: [{ id: str, content: str, source: str, source_id?: str }] }
|
||||
Response: { tip_id: str, score: float }
|
||||
POST /score { user_id, candidates, context } → { tip_id, score, policy }
|
||||
POST /reward { user_id, tip_id, reward, features } → { ok }
|
||||
GET /health → { ok }
|
||||
|
||||
Features (d=5):
|
||||
hour_sin, hour_cos — cyclical time-of-day encoding
|
||||
is_overdue — 0 or 1
|
||||
task_age_days — days since due date (clipped 0–30, normalised 0–1)
|
||||
priority_norm — Todoist priority 1–4, normalised to 0–1
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import math
|
||||
import os
|
||||
import random
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
|
||||
import numpy as np
|
||||
from fastapi import FastAPI, HTTPException
|
||||
from pydantic import BaseModel
|
||||
import random
|
||||
|
||||
app = FastAPI(title="oO ML Serving", version="0.0.0")
|
||||
app = FastAPI(title="oO ML Serving", version="1.0.0")
|
||||
|
||||
STATE_DIR = Path(os.getenv("STATE_DIR", "/tmp/oo-bandit-state"))
|
||||
STATE_DIR.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
ALPHA = 1.0 # exploration coefficient
|
||||
D = 5 # feature dimension
|
||||
|
||||
|
||||
# ── Feature helpers ────────────────────────────────────────────────────────
|
||||
|
||||
def build_feature_vector(features: dict) -> np.ndarray:
|
||||
hour = features.get("hour_of_day", 12)
|
||||
hour_sin = math.sin(2 * math.pi * hour / 24)
|
||||
hour_cos = math.cos(2 * math.pi * hour / 24)
|
||||
is_overdue = float(bool(features.get("is_overdue", False)))
|
||||
age = min(float(features.get("task_age_days", 0)), 30.0) / 30.0
|
||||
priority = (float(features.get("priority", 1)) - 1.0) / 3.0
|
||||
return np.array([hour_sin, hour_cos, is_overdue, age, priority], dtype=np.float64)
|
||||
|
||||
|
||||
# ── Per-user bandit state (disjoint LinUCB, global arm) ───────────────────
|
||||
|
||||
def state_path(user_id: str) -> Path:
|
||||
safe = "".join(c if c.isalnum() else "_" for c in user_id)
|
||||
return STATE_DIR / f"{safe}.json"
|
||||
|
||||
|
||||
def load_state(user_id: str) -> tuple[np.ndarray, np.ndarray]:
|
||||
"""Returns (A, b). A is DxD, b is D-vector."""
|
||||
p = state_path(user_id)
|
||||
if p.exists():
|
||||
raw = json.loads(p.read_text())
|
||||
A = np.array(raw["A"], dtype=np.float64)
|
||||
b = np.array(raw["b"], dtype=np.float64)
|
||||
return A, b
|
||||
return np.identity(D, dtype=np.float64), np.zeros(D, dtype=np.float64)
|
||||
|
||||
|
||||
def save_state(user_id: str, A: np.ndarray, b: np.ndarray) -> None:
|
||||
p = state_path(user_id)
|
||||
p.write_text(json.dumps({"A": A.tolist(), "b": b.tolist()}))
|
||||
|
||||
|
||||
# ── API models ─────────────────────────────────────────────────────────────
|
||||
|
||||
class CandidateFeatures(BaseModel):
|
||||
hour_of_day: int = 12
|
||||
is_overdue: bool = False
|
||||
task_age_days: float = 0.0
|
||||
priority: int = 1
|
||||
|
||||
|
||||
class Candidate(BaseModel):
|
||||
id: str
|
||||
content: str
|
||||
source: str
|
||||
source_id: str | None = None
|
||||
source_id: Optional[str] = None
|
||||
features: CandidateFeatures = CandidateFeatures()
|
||||
|
||||
|
||||
class Context(BaseModel):
|
||||
hour_of_day: int = 12
|
||||
day_of_week: int = 0
|
||||
|
||||
|
||||
class ScoreRequest(BaseModel):
|
||||
user_id: str
|
||||
candidates: list[Candidate]
|
||||
context: Context = Context()
|
||||
|
||||
|
||||
class ScoreResponse(BaseModel):
|
||||
@@ -35,15 +104,69 @@ class ScoreResponse(BaseModel):
|
||||
policy: str
|
||||
|
||||
|
||||
class RewardRequest(BaseModel):
|
||||
user_id: str
|
||||
tip_id: str
|
||||
reward: float # +1 done, 0 snooze, -1 dismiss
|
||||
features: CandidateFeatures
|
||||
|
||||
|
||||
class RewardResponse(BaseModel):
|
||||
ok: bool
|
||||
|
||||
|
||||
# ── Endpoints ──────────────────────────────────────────────────────────────
|
||||
|
||||
@app.get("/health")
|
||||
def health():
|
||||
return {"ok": True}
|
||||
|
||||
|
||||
@app.post("/score", response_model=ScoreResponse)
|
||||
def score(req: ScoreRequest):
|
||||
def score(req: ScoreRequest) -> ScoreResponse:
|
||||
if not req.candidates:
|
||||
raise HTTPException(status_code=422, detail="No candidates")
|
||||
# Stub: random uniform scoring — real model slots in here
|
||||
chosen = random.choice(req.candidates)
|
||||
return ScoreResponse(tip_id=chosen.id, score=1.0, policy="stub-random")
|
||||
|
||||
A, b = load_state(req.user_id)
|
||||
try:
|
||||
A_inv = np.linalg.inv(A)
|
||||
except np.linalg.LinAlgError:
|
||||
A_inv = np.identity(D, dtype=np.float64)
|
||||
|
||||
theta = A_inv @ b
|
||||
|
||||
best_id = None
|
||||
best_score = -float("inf")
|
||||
|
||||
for candidate in req.candidates:
|
||||
feat_dict = {
|
||||
"hour_of_day": req.context.hour_of_day,
|
||||
"is_overdue": candidate.features.is_overdue,
|
||||
"task_age_days": candidate.features.task_age_days,
|
||||
"priority": candidate.features.priority,
|
||||
}
|
||||
x = build_feature_vector(feat_dict)
|
||||
exploit = float(theta @ x)
|
||||
explore = ALPHA * math.sqrt(float(x @ A_inv @ x))
|
||||
ucb = exploit + explore
|
||||
if ucb > best_score:
|
||||
best_score = ucb
|
||||
best_id = candidate.id
|
||||
|
||||
return ScoreResponse(tip_id=best_id, score=best_score, policy="linucb-v1")
|
||||
|
||||
|
||||
@app.post("/reward", response_model=RewardResponse)
|
||||
def reward(req: RewardRequest) -> RewardResponse:
|
||||
A, b = load_state(req.user_id)
|
||||
feat_dict = {
|
||||
"hour_of_day": req.features.hour_of_day,
|
||||
"is_overdue": req.features.is_overdue,
|
||||
"task_age_days": req.features.task_age_days,
|
||||
"priority": req.features.priority,
|
||||
}
|
||||
x = build_feature_vector(feat_dict)
|
||||
A += np.outer(x, x)
|
||||
b += req.reward * x
|
||||
save_state(req.user_id, A, b)
|
||||
return RewardResponse(ok=True)
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
"version": "0.0.0",
|
||||
"private": true,
|
||||
"scripts": {
|
||||
"dev": "uvicorn main:app --reload --port 8000",
|
||||
"start": "uvicorn main:app --port 8000"
|
||||
"dev": ".venv/bin/uvicorn main:app --reload --port 8000",
|
||||
"start": ".venv/bin/uvicorn main:app --port 8000"
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
fastapi==0.115.6
|
||||
uvicorn[standard]==0.32.1
|
||||
pydantic==2.10.4
|
||||
numpy>=1.26.0
|
||||
|
||||
Reference in New Issue
Block a user