Files
oO/ml/serving/main.py
alvis 2d7cf217a9 feat(ml): egreedy-v2 shadow policy — D=12 with profile features (#99)
Ship the scaffolding for #99 (phase B.3 of #81):

- ml/serving: add /score/egreedy/v2, /reward/egreedy/v2, /stats/egreedy/v2
  endpoints (D=12). New feature dims: completion/dismiss rates, mean dwell
  (clipped 10min), preferred-hour alignment (cosine, 1-dim), tip volume (log).
  Separate state file per user (_egreedy_v2.json). /reset clears v2 state too.
- ADR-0012: documents D=7→12 dimension change, normalization choices, shadow
  rollout protocol, and promotion gate (offline sim win per ADR-0002).
- recommender.ts: register egreedy-v2-shadow in shadow-policy map (disabled by
  default). When enabled, calls /score/egreedy/v2 fire-and-forget and publishes
  shadow:egreedy-v2-shadow serve signal. No reward to shadow — sim is the gate.
- sim runner/personas: personas carry synthetic profile_features per persona;
  _call_score/_call_reward thread profile_features through (None-safe for v1/linucb).
- 18 new Python tests; all 56 Python + 170 TS tests pass.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-25 10:00:38 +00:00

778 lines
28 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
oO ML Serving — Phase 1: LinUCB contextual bandit.
Contract:
POST /score LinUCB d=5 (baseline, kept as shadow-eligible)
POST /score/egreedy ε-greedy v1, d=7 (active — ADR-0007)
POST /score/egreedy/v2 ε-greedy v2, d=12, profile features (shadow — ADR-0012)
POST /reward, /reward/egreedy, /reward/egreedy/v2
GET /stats/{user_id} LinUCB stats
GET /stats/egreedy/{user_id} ε-greedy v1 stats
GET /stats/egreedy/v2/{user_id} ε-greedy v2 stats
POST /reset/{user_id} clear all per-user bandit state
GET /features/{user_id} last 100 scored feature vectors
POST /generate LLM tip candidates via LiteLLM
GET /health { ok }
Features (d=5):
hour_sin, hour_cos — cyclical time-of-day encoding
is_overdue — 0 or 1
task_age_days — days since due date (clipped 030, normalised 01)
priority_norm — Todoist priority 14, normalised to 01
"""
from __future__ import annotations
import json
import math
import os
import time
from collections import deque
from pathlib import Path
from typing import Optional, Deque
import httpx
import numpy as np
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
from prompts import get_prompt
app = FastAPI(title="oO ML Serving", version="1.0.0")
LITELLM_URL = os.getenv("LITELLM_URL", "http://localhost:4000")
LITELLM_MASTER_KEY = os.getenv("LITELLM_MASTER_KEY", "sk-oo-dev")
STATE_DIR = Path(os.getenv("STATE_DIR", "/tmp/oo-bandit-state"))
STATE_DIR.mkdir(parents=True, exist_ok=True)
ALPHA = 1.0 # LinUCB exploration coefficient
D = 5 # LinUCB feature dimension
D7 = 7 # ε-greedy v1 feature dimension (adds day-of-week cyclical encoding)
D12 = 12 # ε-greedy v2 feature dimension (adds 5 profile features — ADR-0012)
EPSILON = 0.1 # ε-greedy exploration rate
FEATURE_HISTORY_SIZE = 100 # per-user ring buffer
# ── Per-user in-memory feature history ────────────────────────────────────
_feature_history: dict[str, deque] = {}
def get_feature_history(user_id: str) -> deque:
if user_id not in _feature_history:
_feature_history[user_id] = deque(maxlen=FEATURE_HISTORY_SIZE)
return _feature_history[user_id]
# ── Feature helpers ────────────────────────────────────────────────────────
def build_feature_vector(features: dict) -> np.ndarray:
hour = features.get("hour_of_day", 12)
hour_sin = math.sin(2 * math.pi * hour / 24)
hour_cos = math.cos(2 * math.pi * hour / 24)
is_overdue = float(bool(features.get("is_overdue", False)))
age = min(float(features.get("task_age_days", 0)), 30.0) / 30.0
priority = (float(features.get("priority", 1)) - 1.0) / 3.0
return np.array([hour_sin, hour_cos, is_overdue, age, priority], dtype=np.float64)
# ── Per-user bandit state (disjoint LinUCB, global arm) ───────────────────
# ── LinUCB state helpers ───────────────────────────────────────────────────
def state_path(user_id: str) -> Path:
safe = "".join(c if c.isalnum() else "_" for c in user_id)
return STATE_DIR / f"{safe}.json"
def load_state(user_id: str) -> tuple[np.ndarray, np.ndarray, dict]:
"""Returns (A, b, meta). A is DxD, b is D-vector."""
p = state_path(user_id)
if p.exists():
raw = json.loads(p.read_text())
A = np.array(raw["A"], dtype=np.float64)
b = np.array(raw["b"], dtype=np.float64)
meta = raw.get("meta", {})
return A, b, meta
return np.identity(D, dtype=np.float64), np.zeros(D, dtype=np.float64), {}
def save_state(user_id: str, A: np.ndarray, b: np.ndarray, meta: dict) -> None:
p = state_path(user_id)
p.write_text(json.dumps({"A": A.tolist(), "b": b.tolist(), "meta": meta}))
# ── ε-greedy state helpers (d=7, extended features) ───────────────────────
def build_feature_vector_7(features: dict, day_of_week: int = 0) -> np.ndarray:
"""d=7: base 5 features + day-of-week cyclical encoding."""
base = build_feature_vector(features)
dow_sin = math.sin(2 * math.pi * day_of_week / 7)
dow_cos = math.cos(2 * math.pi * day_of_week / 7)
return np.append(base, [dow_sin, dow_cos])
def state7_path(user_id: str) -> Path:
safe = "".join(c if c.isalnum() else "_" for c in user_id)
return STATE_DIR / f"{safe}_egreedy.json"
def load_state7(user_id: str) -> tuple[np.ndarray, np.ndarray, dict]:
"""Returns (A, b, meta) for ε-greedy d=7 policy."""
p = state7_path(user_id)
if p.exists():
raw = json.loads(p.read_text())
A = np.array(raw["A"], dtype=np.float64)
b = np.array(raw["b"], dtype=np.float64)
return A, b, raw.get("meta", {})
return np.identity(D7, dtype=np.float64), np.zeros(D7, dtype=np.float64), {}
def save_state7(user_id: str, A: np.ndarray, b: np.ndarray, meta: dict) -> None:
p = state7_path(user_id)
p.write_text(json.dumps({"A": A.tolist(), "b": b.tolist(), "meta": meta}))
# ── ε-greedy v2 state helpers (d=12, profile features — ADR-0012) ─────────
#
# Normalization choices (see ADR-0012):
# completion_rate_30d — already 01, passthrough; null → 0
# dismiss_rate_30d — already 01, passthrough; null → 0
# mean_dwell_ms_30d — clipped to [0, 600_000 ms] (10 min), then /600_000
# preferred_hour — circular alignment with context hour:
# (cos(2π·(now pref)/24) + 1) / 2 → 01
# captures "is the user's habitual peak near now?"
# null → 0.5 (neutral)
# tip_volume_30d — log1p(n) / log1p(100), clipped to [0, 1]
_DWELL_CLIP_MS = 600_000.0 # 10 minutes
_VOLUME_LOG_MAX = math.log1p(100.0)
def _profile_value(profile: Optional[dict], key: str) -> Optional[float]:
if not profile:
return None
v = profile.get(key)
if v is None:
return None
try:
return float(v)
except (TypeError, ValueError):
return None
def _norm_rate(v: Optional[float]) -> float:
return 0.0 if v is None else max(0.0, min(1.0, v))
def _norm_dwell(v: Optional[float]) -> float:
if v is None:
return 0.0
return max(0.0, min(1.0, v / _DWELL_CLIP_MS))
def _norm_volume(v: Optional[float]) -> float:
if v is None or v <= 0:
return 0.0
return min(1.0, math.log1p(float(v)) / _VOLUME_LOG_MAX)
def _norm_preferred_hour(pref: Optional[float], now_hour: int) -> float:
if pref is None:
return 0.5 # neutral when the user has no established peak yet
delta = (float(pref) - float(now_hour)) * (2.0 * math.pi / 24.0)
return (math.cos(delta) + 1.0) / 2.0
def build_feature_vector_12(
features: dict,
day_of_week: int = 0,
profile: Optional[dict] = None,
) -> np.ndarray:
"""d=12: egreedy-v1's 7 dims + 5 normalized profile features (ADR-0012)."""
base7 = build_feature_vector_7(features, day_of_week)
now_hour = int(features.get("hour_of_day", 12))
profile_dims = np.array(
[
_norm_rate(_profile_value(profile, "completion_rate_30d")),
_norm_rate(_profile_value(profile, "dismiss_rate_30d")),
_norm_dwell(_profile_value(profile, "mean_dwell_ms_30d")),
_norm_preferred_hour(_profile_value(profile, "preferred_hour"), now_hour),
_norm_volume(_profile_value(profile, "tip_volume_30d")),
],
dtype=np.float64,
)
return np.concatenate([base7, profile_dims])
def state12_path(user_id: str) -> Path:
safe = "".join(c if c.isalnum() else "_" for c in user_id)
return STATE_DIR / f"{safe}_egreedy_v2.json"
def load_state12(user_id: str) -> tuple[np.ndarray, np.ndarray, dict]:
p = state12_path(user_id)
if p.exists():
raw = json.loads(p.read_text())
A = np.array(raw["A"], dtype=np.float64)
b = np.array(raw["b"], dtype=np.float64)
return A, b, raw.get("meta", {})
return np.identity(D12, dtype=np.float64), np.zeros(D12, dtype=np.float64), {}
def save_state12(user_id: str, A: np.ndarray, b: np.ndarray, meta: dict) -> None:
p = state12_path(user_id)
p.write_text(json.dumps({"A": A.tolist(), "b": b.tolist(), "meta": meta}))
# ── API models ─────────────────────────────────────────────────────────────
class CandidateFeatures(BaseModel):
hour_of_day: int = 12
is_overdue: bool = False
task_age_days: float = 0.0
priority: int = 1
class Candidate(BaseModel):
id: str
content: str
source: str
source_id: Optional[str] = None
features: CandidateFeatures = CandidateFeatures()
class Context(BaseModel):
hour_of_day: int = 12
day_of_week: int = 0
class ScoreRequest(BaseModel):
user_id: str
candidates: list[Candidate]
context: Context = Context()
# User-level features computed by the API (#81 phase A). Accepted, logged,
# but not yet consumed by the bandit — extending the feature vector
# changes `D` and resets every user's learned state, which is a deliberate
# follow-up (phase B), not a side effect of this PR.
profile_features: Optional[dict] = None
class ScoreResponse(BaseModel):
tip_id: str
score: float
policy: str
class RewardRequest(BaseModel):
user_id: str
tip_id: str
reward: float # +1 done, +0.5 helpful, 0 snooze, -0.5 not_helpful, -1 dismiss
features: CandidateFeatures
day_of_week: int = 0 # included so egreedy can train dow features correctly
# Profile features at the time the tip was served. Ignored by /reward and
# /reward/egreedy; consumed by /reward/egreedy/v2 so the ridge update uses
# the same feature vector as the matching /score/egreedy/v2 call.
profile_features: Optional[dict] = None
class RewardResponse(BaseModel):
ok: bool
class PromptContext(BaseModel):
tasks: list[dict] = []
hour_of_day: int = 12
day_of_week: int = 0
extra: dict = {}
class GenerateRequest(BaseModel):
user_id: str
context: PromptContext = PromptContext()
n: int = 3
prompt_version: Optional[str] = None # None → server default (env DEFAULT_PROMPT_VERSION)
# User-level features (#81 phase A). Accepted by the contract; not yet
# injected into the prompt — that's a #84-style prompt-design decision.
profile_features: Optional[dict] = None
class TipCandidate(BaseModel):
id: str
content: str
source: str = "llm"
rationale: Optional[str] = None
class GenerateResponse(BaseModel):
candidates: list[TipCandidate]
model: str
prompt_version: str
prompt_tokens: int = 0
completion_tokens: int = 0
# ── Endpoints ──────────────────────────────────────────────────────────────
@app.get("/health")
def health():
return {"ok": True}
_RETRY_SUFFIX = (
"\n\nYour previous response was not valid JSON. "
"Reply ONLY with the JSON array — no prose, no markdown fences."
)
_MAX_GENERATE_RETRIES = 2
def _parse_llm_json(raw: str) -> list[dict]:
"""Strip markdown fences and parse JSON array. Raises ValueError on failure."""
text = raw.strip()
if text.startswith("```"):
parts = text.split("```")
text = parts[1] if len(parts) > 1 else text
if text.startswith("json"):
text = text[4:]
return json.loads(text)
@app.post("/generate", response_model=GenerateResponse)
async def generate(req: GenerateRequest) -> GenerateResponse:
"""Generate tip candidates via LiteLLM → tip-generator alias.
Retries up to _MAX_GENERATE_RETRIES times on malformed JSON, appending
a correction hint to the conversation so the model can self-correct.
"""
try:
prompt_template = get_prompt(req.prompt_version)
except KeyError as e:
raise HTTPException(status_code=422, detail=f"Unknown prompt_version: {e.args[0]}")
user_msg = prompt_template.build_user(req.context, req.n)
messages: list[dict] = [
{"role": "system", "content": prompt_template.system},
{"role": "user", "content": user_msg},
]
headers = {"Authorization": f"Bearer {LITELLM_MASTER_KEY}"}
last_parse_error: str = ""
last_raw: str = ""
total_usage: dict = {"prompt_tokens": 0, "completion_tokens": 0}
model_used = "tip-generator"
async with httpx.AsyncClient(timeout=30.0) as client:
for attempt in range(1 + _MAX_GENERATE_RETRIES):
payload = {"model": "tip-generator", "messages": messages, "temperature": 0.7}
try:
resp = await client.post(
f"{LITELLM_URL}/chat/completions",
json=payload,
headers=headers,
)
resp.raise_for_status()
except httpx.HTTPStatusError as e:
raise HTTPException(status_code=502, detail=f"LiteLLM error: {e.response.text}")
except httpx.RequestError as e:
raise HTTPException(status_code=503, detail=f"LiteLLM unreachable: {e}")
data = resp.json()
usage = data.get("usage", {})
total_usage["prompt_tokens"] += usage.get("prompt_tokens", 0)
total_usage["completion_tokens"] += usage.get("completion_tokens", 0)
model_used = data.get("model", "tip-generator")
last_raw = data["choices"][0]["message"]["content"]
try:
items = _parse_llm_json(last_raw)
break
except (json.JSONDecodeError, ValueError) as e:
last_parse_error = str(e)
# Feed the bad reply back so the model can self-correct
messages.append({"role": "assistant", "content": last_raw})
messages.append({"role": "user", "content": _RETRY_SUFFIX})
else:
raise HTTPException(
status_code=502,
detail=f"LLM returned invalid JSON after {_MAX_GENERATE_RETRIES} retries: "
f"{last_parse_error}\n{last_raw[:200]}",
)
candidates = [
TipCandidate(
id=item.get("id", f"tip-{i}"),
content=item.get("content", ""),
rationale=item.get("rationale"),
)
for i, item in enumerate(items)
]
return GenerateResponse(
candidates=candidates,
model=model_used,
prompt_version=prompt_template.version,
prompt_tokens=total_usage["prompt_tokens"],
completion_tokens=total_usage["completion_tokens"],
)
@app.post("/score", response_model=ScoreResponse)
def score(req: ScoreRequest) -> ScoreResponse:
if not req.candidates:
raise HTTPException(status_code=422, detail="No candidates")
A, b, meta = load_state(req.user_id)
try:
A_inv = np.linalg.inv(A)
except np.linalg.LinAlgError:
A_inv = np.identity(D, dtype=np.float64)
theta = A_inv @ b
best_id = None
best_score = -float("inf")
best_features: dict = {}
for candidate in req.candidates:
feat_dict = {
"hour_of_day": req.context.hour_of_day,
"is_overdue": candidate.features.is_overdue,
"task_age_days": candidate.features.task_age_days,
"priority": candidate.features.priority,
}
x = build_feature_vector(feat_dict)
exploit = float(theta @ x)
explore = ALPHA * math.sqrt(float(x @ A_inv @ x))
ucb = exploit + explore
if ucb > best_score:
best_score = ucb
best_id = candidate.id
best_features = feat_dict
# Log to feature history ring buffer
history = get_feature_history(req.user_id)
history.append({
"ts": time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()),
"features": best_features,
"score": best_score,
"tip_id": best_id,
})
# Update meta stats
meta["pulls"] = meta.get("pulls", 0) + 1
meta["last_updated"] = time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime())
save_state(req.user_id, A, b, meta)
return ScoreResponse(tip_id=best_id, score=best_score, policy="linucb-v1")
@app.post("/reward", response_model=RewardResponse)
def reward(req: RewardRequest) -> RewardResponse:
A, b, meta = load_state(req.user_id)
feat_dict = {
"hour_of_day": req.features.hour_of_day,
"is_overdue": req.features.is_overdue,
"task_age_days": req.features.task_age_days,
"priority": req.features.priority,
}
x = build_feature_vector(feat_dict)
A += np.outer(x, x)
b += req.reward * x
# Track cumulative reward in meta
meta["cumulative_reward"] = meta.get("cumulative_reward", 0.0) + req.reward
meta["reward_count"] = meta.get("reward_count", 0) + 1
meta["last_updated"] = time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime())
save_state(req.user_id, A, b, meta)
return RewardResponse(ok=True)
@app.post("/score/egreedy", response_model=ScoreResponse)
def score_egreedy(req: ScoreRequest) -> ScoreResponse:
"""ε-greedy policy with d=7 features (adds day-of-week encoding).
Exploration: pick uniformly at random with probability ε.
Exploitation: pick argmax of linear payoff estimate θ·x.
Differs from LinUCB in: no UCB bonus, richer feature space.
"""
if not req.candidates:
raise HTTPException(status_code=422, detail="No candidates")
A, b, meta = load_state7(req.user_id)
try:
A_inv = np.linalg.inv(A)
except np.linalg.LinAlgError:
A_inv = np.identity(D7, dtype=np.float64)
theta = A_inv @ b
dow = req.context.day_of_week
exploring = np.random.random() < EPSILON
if exploring:
chosen = req.candidates[np.random.randint(len(req.candidates))]
feat_dict = {
"hour_of_day": req.context.hour_of_day,
"is_overdue": chosen.features.is_overdue,
"task_age_days": chosen.features.task_age_days,
"priority": chosen.features.priority,
}
x = build_feature_vector_7(feat_dict, dow)
best_score = float(theta @ x)
best_id = chosen.id
else:
best_id = None
best_score = -float("inf")
feat_dict = {}
for candidate in req.candidates:
fd = {
"hour_of_day": req.context.hour_of_day,
"is_overdue": candidate.features.is_overdue,
"task_age_days": candidate.features.task_age_days,
"priority": candidate.features.priority,
}
x = build_feature_vector_7(fd, dow)
s = float(theta @ x)
if s > best_score:
best_score = s
best_id = candidate.id
feat_dict = fd
history = get_feature_history(req.user_id)
history.append({
"ts": time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()),
"features": {**feat_dict, "day_of_week": dow, "exploring": exploring},
"score": best_score,
"tip_id": best_id,
"policy": "egreedy-v1",
})
meta["pulls"] = meta.get("pulls", 0) + 1
meta["explore_count"] = meta.get("explore_count", 0) + int(exploring)
meta["last_updated"] = time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime())
save_state7(req.user_id, A, b, meta)
return ScoreResponse(tip_id=best_id, score=best_score, policy="egreedy-v1")
@app.post("/reward/egreedy", response_model=RewardResponse)
def reward_egreedy(req: RewardRequest) -> RewardResponse:
"""Update ε-greedy ridge estimator with observed reward."""
A, b, meta = load_state7(req.user_id)
feat_dict = {
"hour_of_day": req.features.hour_of_day,
"is_overdue": req.features.is_overdue,
"task_age_days": req.features.task_age_days,
"priority": req.features.priority,
}
x = build_feature_vector_7(feat_dict, day_of_week=req.day_of_week)
A += np.outer(x, x)
b += req.reward * x
meta["cumulative_reward"] = meta.get("cumulative_reward", 0.0) + req.reward
meta["reward_count"] = meta.get("reward_count", 0) + 1
meta["last_updated"] = time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime())
save_state7(req.user_id, A, b, meta)
return RewardResponse(ok=True)
@app.post("/score/egreedy/v2", response_model=ScoreResponse)
def score_egreedy_v2(req: ScoreRequest) -> ScoreResponse:
"""ε-greedy v2 — d=12, adds 5 normalized profile features (ADR-0012).
Shadow-only until offline sim + rollout per ADR-0002 completes.
Accepts the same ScoreRequest shape as v1; `profile_features` drives the
extra 5 dims (defaults: zeros for rates/volume/dwell, 0.5 neutral for
preferred_hour alignment).
"""
if not req.candidates:
raise HTTPException(status_code=422, detail="No candidates")
A, b, meta = load_state12(req.user_id)
try:
A_inv = np.linalg.inv(A)
except np.linalg.LinAlgError:
A_inv = np.identity(D12, dtype=np.float64)
theta = A_inv @ b
dow = req.context.day_of_week
exploring = np.random.random() < EPSILON
if exploring:
chosen = req.candidates[np.random.randint(len(req.candidates))]
feat_dict = {
"hour_of_day": req.context.hour_of_day,
"is_overdue": chosen.features.is_overdue,
"task_age_days": chosen.features.task_age_days,
"priority": chosen.features.priority,
}
x = build_feature_vector_12(feat_dict, dow, req.profile_features)
best_score = float(theta @ x)
best_id = chosen.id
else:
best_id = None
best_score = -float("inf")
feat_dict = {}
for candidate in req.candidates:
fd = {
"hour_of_day": req.context.hour_of_day,
"is_overdue": candidate.features.is_overdue,
"task_age_days": candidate.features.task_age_days,
"priority": candidate.features.priority,
}
x = build_feature_vector_12(fd, dow, req.profile_features)
s = float(theta @ x)
if s > best_score:
best_score = s
best_id = candidate.id
feat_dict = fd
history = get_feature_history(req.user_id)
history.append({
"ts": time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()),
"features": {**feat_dict, "day_of_week": dow, "exploring": exploring},
"score": best_score,
"tip_id": best_id,
"policy": "egreedy-v2",
})
meta["pulls"] = meta.get("pulls", 0) + 1
meta["explore_count"] = meta.get("explore_count", 0) + int(exploring)
meta["last_updated"] = time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime())
save_state12(req.user_id, A, b, meta)
return ScoreResponse(tip_id=best_id, score=best_score, policy="egreedy-v2")
@app.post("/reward/egreedy/v2", response_model=RewardResponse)
def reward_egreedy_v2(req: RewardRequest) -> RewardResponse:
"""Update ε-greedy v2 ridge estimator using the d=12 feature vector."""
A, b, meta = load_state12(req.user_id)
feat_dict = {
"hour_of_day": req.features.hour_of_day,
"is_overdue": req.features.is_overdue,
"task_age_days": req.features.task_age_days,
"priority": req.features.priority,
}
x = build_feature_vector_12(feat_dict, req.day_of_week, req.profile_features)
A += np.outer(x, x)
b += req.reward * x
meta["cumulative_reward"] = meta.get("cumulative_reward", 0.0) + req.reward
meta["reward_count"] = meta.get("reward_count", 0) + 1
meta["last_updated"] = time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime())
save_state12(req.user_id, A, b, meta)
return RewardResponse(ok=True)
@app.get("/stats/egreedy/v2/{user_id}")
def stats_egreedy_v2(user_id: str):
"""ε-greedy v2 policy stats — pulls, cumulative reward, θ vector."""
A, b, meta = load_state12(user_id)
try:
theta = (np.linalg.inv(A) @ b).tolist()
except np.linalg.LinAlgError:
theta = [0.0] * D12
pulls = meta.get("pulls", 0)
cumulative_reward = meta.get("cumulative_reward", 0.0)
reward_count = meta.get("reward_count", 0)
explore_count = meta.get("explore_count", 0)
return {
"user_id": user_id,
"policy": "egreedy-v2",
"pulls": pulls,
"reward_count": reward_count,
"cumulative_reward": cumulative_reward,
"estimated_mean_reward": cumulative_reward / reward_count if reward_count > 0 else 0.0,
"exploration_rate": explore_count / pulls if pulls > 0 else 0.0,
"theta": theta,
"feature_labels": [
"hour_sin", "hour_cos", "is_overdue", "task_age", "priority",
"dow_sin", "dow_cos",
"completion_rate_30d", "dismiss_rate_30d", "mean_dwell_norm",
"preferred_hour_alignment", "tip_volume_norm",
],
"last_updated": meta.get("last_updated"),
}
@app.get("/stats/egreedy/{user_id}")
def stats_egreedy(user_id: str):
"""ε-greedy policy stats — pulls, cumulative reward, θ vector."""
A, b, meta = load_state7(user_id)
try:
theta = (np.linalg.inv(A) @ b).tolist()
except np.linalg.LinAlgError:
theta = [0.0] * D7
pulls = meta.get("pulls", 0)
cumulative_reward = meta.get("cumulative_reward", 0.0)
reward_count = meta.get("reward_count", 0)
explore_count = meta.get("explore_count", 0)
return {
"user_id": user_id,
"policy": "egreedy-v1",
"pulls": pulls,
"reward_count": reward_count,
"cumulative_reward": cumulative_reward,
"estimated_mean_reward": cumulative_reward / reward_count if reward_count > 0 else 0.0,
"exploration_rate": explore_count / pulls if pulls > 0 else 0.0,
"theta": theta,
"feature_labels": ["hour_sin", "hour_cos", "is_overdue", "task_age", "priority", "dow_sin", "dow_cos"],
"last_updated": meta.get("last_updated"),
}
@app.post("/reset/{user_id}", response_model=RewardResponse)
def reset(user_id: str) -> RewardResponse:
"""Reset per-user bandit state (admin action)."""
p = state_path(user_id)
if p.exists():
p.unlink()
p7 = state7_path(user_id)
if p7.exists():
p7.unlink()
p12 = state12_path(user_id)
if p12.exists():
p12.unlink()
if user_id in _feature_history:
_feature_history[user_id].clear()
return RewardResponse(ok=True)
@app.get("/stats/{user_id}")
def stats(user_id: str):
"""Return current LinUCB state summary for a user."""
A, b, meta = load_state(user_id)
try:
A_inv = np.linalg.inv(A)
theta = (A_inv @ b).tolist()
except np.linalg.LinAlgError:
theta = [0.0] * D
pulls = meta.get("pulls", 0)
cumulative_reward = meta.get("cumulative_reward", 0.0)
reward_count = meta.get("reward_count", 0)
estimated_mean = cumulative_reward / reward_count if reward_count > 0 else 0.0
return {
"user_id": user_id,
"pulls": pulls,
"reward_count": reward_count,
"cumulative_reward": cumulative_reward,
"estimated_mean_reward": estimated_mean,
"theta": theta,
"last_updated": meta.get("last_updated"),
}
@app.get("/features/{user_id}")
def features(user_id: str):
"""Return recent feature vectors logged at scoring time."""
history = get_feature_history(user_id)
return {
"user_id": user_id,
"history": list(history),
}