Drop all four Airflow containers (db, init, webserver, scheduler) from the mlops compose profile, leaving MLflow as the sole mlops service. Remove AIRFLOW_* env vars, config fields, health-check entries, DAG trigger code in admin/bench routes, the airflow_dag_run_id schema column, Airflow nav links and DAG-run links in the admin UI, the two Airflow DAG files (bench_dag.py, sim_dag.py), and all related docs/ADR references. Simulations now run exclusively via the subprocess path. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
1016 lines
36 KiB
Python
1016 lines
36 KiB
Python
"""
|
||
oO ML Serving — Phase 1: LinUCB contextual bandit.
|
||
|
||
Contract:
|
||
POST /score LinUCB d=5 (baseline, kept as shadow-eligible)
|
||
POST /score/egreedy ε-greedy v1, d=7 (active — ADR-0007)
|
||
POST /score/egreedy/v2 ε-greedy v2, d=12, profile features (shadow — ADR-0012)
|
||
POST /reward, /reward/egreedy, /reward/egreedy/v2
|
||
GET /stats/{user_id} LinUCB stats
|
||
GET /stats/egreedy/{user_id} ε-greedy v1 stats
|
||
GET /stats/egreedy/v2/{user_id} ε-greedy v2 stats
|
||
POST /reset/{user_id} clear all per-user bandit state
|
||
GET /features/{user_id} last 100 scored feature vectors
|
||
POST /generate LLM tip candidates via LiteLLM
|
||
GET /health { ok }
|
||
|
||
Features (d=5):
|
||
hour_sin, hour_cos — cyclical time-of-day encoding
|
||
is_overdue — 0 or 1
|
||
task_age_days — days since due date (clipped 0–30, normalised 0–1)
|
||
priority_norm — Todoist priority 1–4, normalised to 0–1
|
||
"""
|
||
|
||
from __future__ import annotations
|
||
|
||
import json
|
||
import math
|
||
import os
|
||
import sys
|
||
import time
|
||
from collections import deque
|
||
from contextlib import asynccontextmanager
|
||
from datetime import datetime, timezone
|
||
from pathlib import Path
|
||
from typing import Optional, Deque
|
||
|
||
import httpx
|
||
import numpy as np
|
||
import sentry_sdk
|
||
import structlog
|
||
import structlog.contextvars
|
||
from fastapi import FastAPI, HTTPException, Request
|
||
from pydantic import BaseModel
|
||
from starlette.middleware.base import BaseHTTPMiddleware
|
||
|
||
import logging_config
|
||
import nats_consumer
|
||
from prompts import get_prompt, build_orchestrator_messages
|
||
|
||
# Make ml.agents importable regardless of working directory.
|
||
# In Docker (WORKDIR=/app/ml/serving, PYTHONPATH=/app): /app already on path.
|
||
# In local dev (run from ml/serving/): repo root is two levels up.
|
||
_repo_root = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", ".."))
|
||
if _repo_root not in sys.path:
|
||
sys.path.insert(0, _repo_root)
|
||
|
||
from ml.agents.base import AgentInput # noqa: E402
|
||
from ml.agents.registry import get_agent, all_agents # noqa: E402
|
||
|
||
logging_config.configure()
|
||
|
||
_SENTRY_DSN = os.getenv("SENTRY_DSN")
|
||
if _SENTRY_DSN:
|
||
sentry_sdk.init(dsn=_SENTRY_DSN, environment=os.getenv("ENV", "development"))
|
||
|
||
log = structlog.get_logger()
|
||
|
||
|
||
@asynccontextmanager
|
||
async def lifespan(app: FastAPI):
|
||
await nats_consumer.start(STATE_DIR)
|
||
yield
|
||
await nats_consumer.stop()
|
||
|
||
|
||
app = FastAPI(title="oO ML Serving", version="1.0.0", lifespan=lifespan)
|
||
|
||
|
||
class _TracingMiddleware(BaseHTTPMiddleware):
|
||
async def dispatch(self, request: Request, call_next):
|
||
structlog.contextvars.clear_contextvars()
|
||
traceparent = request.headers.get("traceparent", "")
|
||
if traceparent:
|
||
parts = traceparent.split("-")
|
||
trace_id = parts[1] if len(parts) == 4 and len(parts[1]) == 32 else None
|
||
if trace_id:
|
||
structlog.contextvars.bind_contextvars(trace_id=trace_id)
|
||
return await call_next(request)
|
||
|
||
|
||
app.add_middleware(_TracingMiddleware)
|
||
|
||
LITELLM_URL = os.getenv("LITELLM_URL", "http://localhost:4000")
|
||
LITELLM_MASTER_KEY = os.getenv("LITELLM_MASTER_KEY", "sk-oo-dev")
|
||
|
||
STATE_DIR = Path(os.getenv("STATE_DIR", "/tmp/oo-bandit-state"))
|
||
STATE_DIR.mkdir(parents=True, exist_ok=True)
|
||
|
||
ALPHA = 1.0 # LinUCB exploration coefficient
|
||
D = 5 # LinUCB feature dimension
|
||
D7 = 7 # ε-greedy v1 feature dimension (adds day-of-week cyclical encoding)
|
||
D12 = 12 # ε-greedy v2 feature dimension (adds 5 profile features — ADR-0012)
|
||
EPSILON = 0.1 # ε-greedy exploration rate
|
||
FEATURE_HISTORY_SIZE = 100 # per-user ring buffer
|
||
|
||
|
||
# ── Per-user in-memory feature history ────────────────────────────────────
|
||
_feature_history: dict[str, deque] = {}
|
||
|
||
def get_feature_history(user_id: str) -> deque:
|
||
if user_id not in _feature_history:
|
||
_feature_history[user_id] = deque(maxlen=FEATURE_HISTORY_SIZE)
|
||
return _feature_history[user_id]
|
||
|
||
|
||
# ── Feature helpers ────────────────────────────────────────────────────────
|
||
|
||
def build_feature_vector(features: dict) -> np.ndarray:
|
||
hour = features.get("hour_of_day", 12)
|
||
hour_sin = math.sin(2 * math.pi * hour / 24)
|
||
hour_cos = math.cos(2 * math.pi * hour / 24)
|
||
is_overdue = float(bool(features.get("is_overdue", False)))
|
||
age = min(float(features.get("task_age_days", 0)), 30.0) / 30.0
|
||
priority = (float(features.get("priority", 1)) - 1.0) / 3.0
|
||
return np.array([hour_sin, hour_cos, is_overdue, age, priority], dtype=np.float64)
|
||
|
||
|
||
# ── Per-user bandit state (disjoint LinUCB, global arm) ───────────────────
|
||
|
||
# ── LinUCB state helpers ───────────────────────────────────────────────────
|
||
|
||
def state_path(user_id: str) -> Path:
|
||
safe = "".join(c if c.isalnum() else "_" for c in user_id)
|
||
return STATE_DIR / f"{safe}.json"
|
||
|
||
|
||
def load_state(user_id: str) -> tuple[np.ndarray, np.ndarray, dict]:
|
||
"""Returns (A, b, meta). A is DxD, b is D-vector."""
|
||
p = state_path(user_id)
|
||
if p.exists():
|
||
raw = json.loads(p.read_text())
|
||
A = np.array(raw["A"], dtype=np.float64)
|
||
b = np.array(raw["b"], dtype=np.float64)
|
||
meta = raw.get("meta", {})
|
||
return A, b, meta
|
||
return np.identity(D, dtype=np.float64), np.zeros(D, dtype=np.float64), {}
|
||
|
||
|
||
def save_state(user_id: str, A: np.ndarray, b: np.ndarray, meta: dict) -> None:
|
||
p = state_path(user_id)
|
||
p.write_text(json.dumps({"A": A.tolist(), "b": b.tolist(), "meta": meta}))
|
||
|
||
|
||
# ── ε-greedy state helpers (d=7, extended features) ───────────────────────
|
||
|
||
def build_feature_vector_7(features: dict, day_of_week: int = 0) -> np.ndarray:
|
||
"""d=7: base 5 features + day-of-week cyclical encoding."""
|
||
base = build_feature_vector(features)
|
||
dow_sin = math.sin(2 * math.pi * day_of_week / 7)
|
||
dow_cos = math.cos(2 * math.pi * day_of_week / 7)
|
||
return np.append(base, [dow_sin, dow_cos])
|
||
|
||
|
||
def state7_path(user_id: str) -> Path:
|
||
safe = "".join(c if c.isalnum() else "_" for c in user_id)
|
||
return STATE_DIR / f"{safe}_egreedy.json"
|
||
|
||
|
||
def load_state7(user_id: str) -> tuple[np.ndarray, np.ndarray, dict]:
|
||
"""Returns (A, b, meta) for ε-greedy d=7 policy."""
|
||
p = state7_path(user_id)
|
||
if p.exists():
|
||
raw = json.loads(p.read_text())
|
||
A = np.array(raw["A"], dtype=np.float64)
|
||
b = np.array(raw["b"], dtype=np.float64)
|
||
return A, b, raw.get("meta", {})
|
||
return np.identity(D7, dtype=np.float64), np.zeros(D7, dtype=np.float64), {}
|
||
|
||
|
||
def save_state7(user_id: str, A: np.ndarray, b: np.ndarray, meta: dict) -> None:
|
||
p = state7_path(user_id)
|
||
p.write_text(json.dumps({"A": A.tolist(), "b": b.tolist(), "meta": meta}))
|
||
|
||
|
||
# ── ε-greedy v2 state helpers (d=12, profile features — ADR-0012) ─────────
|
||
#
|
||
# Normalization choices (see ADR-0012):
|
||
# completion_rate_30d — already 0–1, passthrough; null → 0
|
||
# dismiss_rate_30d — already 0–1, passthrough; null → 0
|
||
# mean_dwell_ms_30d — clipped to [0, 600_000 ms] (10 min), then /600_000
|
||
# preferred_hour — circular alignment with context hour:
|
||
# (cos(2π·(now − pref)/24) + 1) / 2 → 0–1
|
||
# captures "is the user's habitual peak near now?"
|
||
# null → 0.5 (neutral)
|
||
# tip_volume_30d — log1p(n) / log1p(100), clipped to [0, 1]
|
||
|
||
_DWELL_CLIP_MS = 600_000.0 # 10 minutes
|
||
_VOLUME_LOG_MAX = math.log1p(100.0)
|
||
|
||
|
||
def _profile_value(profile: Optional[dict], key: str) -> Optional[float]:
|
||
if not profile:
|
||
return None
|
||
v = profile.get(key)
|
||
if v is None:
|
||
return None
|
||
try:
|
||
return float(v)
|
||
except (TypeError, ValueError):
|
||
return None
|
||
|
||
|
||
def _norm_rate(v: Optional[float]) -> float:
|
||
return 0.0 if v is None else max(0.0, min(1.0, v))
|
||
|
||
|
||
def _norm_dwell(v: Optional[float]) -> float:
|
||
if v is None:
|
||
return 0.0
|
||
return max(0.0, min(1.0, v / _DWELL_CLIP_MS))
|
||
|
||
|
||
def _norm_volume(v: Optional[float]) -> float:
|
||
if v is None or v <= 0:
|
||
return 0.0
|
||
return min(1.0, math.log1p(float(v)) / _VOLUME_LOG_MAX)
|
||
|
||
|
||
def _norm_preferred_hour(pref: Optional[float], now_hour: int) -> float:
|
||
if pref is None:
|
||
return 0.5 # neutral when the user has no established peak yet
|
||
delta = (float(pref) - float(now_hour)) * (2.0 * math.pi / 24.0)
|
||
return (math.cos(delta) + 1.0) / 2.0
|
||
|
||
|
||
def build_feature_vector_12(
|
||
features: dict,
|
||
day_of_week: int = 0,
|
||
profile: Optional[dict] = None,
|
||
) -> np.ndarray:
|
||
"""d=12: egreedy-v1's 7 dims + 5 normalized profile features (ADR-0012)."""
|
||
base7 = build_feature_vector_7(features, day_of_week)
|
||
now_hour = int(features.get("hour_of_day", 12))
|
||
profile_dims = np.array(
|
||
[
|
||
_norm_rate(_profile_value(profile, "completion_rate_30d")),
|
||
_norm_rate(_profile_value(profile, "dismiss_rate_30d")),
|
||
_norm_dwell(_profile_value(profile, "mean_dwell_ms_30d")),
|
||
_norm_preferred_hour(_profile_value(profile, "preferred_hour"), now_hour),
|
||
_norm_volume(_profile_value(profile, "tip_volume_30d")),
|
||
],
|
||
dtype=np.float64,
|
||
)
|
||
return np.concatenate([base7, profile_dims])
|
||
|
||
|
||
def state12_path(user_id: str) -> Path:
|
||
safe = "".join(c if c.isalnum() else "_" for c in user_id)
|
||
return STATE_DIR / f"{safe}_egreedy_v2.json"
|
||
|
||
|
||
def load_state12(user_id: str) -> tuple[np.ndarray, np.ndarray, dict]:
|
||
p = state12_path(user_id)
|
||
if p.exists():
|
||
raw = json.loads(p.read_text())
|
||
A = np.array(raw["A"], dtype=np.float64)
|
||
b = np.array(raw["b"], dtype=np.float64)
|
||
return A, b, raw.get("meta", {})
|
||
return np.identity(D12, dtype=np.float64), np.zeros(D12, dtype=np.float64), {}
|
||
|
||
|
||
def save_state12(user_id: str, A: np.ndarray, b: np.ndarray, meta: dict) -> None:
|
||
p = state12_path(user_id)
|
||
p.write_text(json.dumps({"A": A.tolist(), "b": b.tolist(), "meta": meta}))
|
||
|
||
|
||
# ── API models ─────────────────────────────────────────────────────────────
|
||
|
||
class CandidateFeatures(BaseModel):
|
||
hour_of_day: int = 12
|
||
is_overdue: bool = False
|
||
task_age_days: float = 0.0
|
||
priority: int = 1
|
||
|
||
|
||
class Candidate(BaseModel):
|
||
id: str
|
||
content: str
|
||
source: str
|
||
source_id: Optional[str] = None
|
||
features: CandidateFeatures = CandidateFeatures()
|
||
|
||
|
||
class Context(BaseModel):
|
||
hour_of_day: int = 12
|
||
day_of_week: int = 0
|
||
|
||
|
||
class ScoreRequest(BaseModel):
|
||
user_id: str
|
||
candidates: list[Candidate]
|
||
context: Context = Context()
|
||
# User-level features computed by the API (#81 phase A). Accepted, logged,
|
||
# but not yet consumed by the bandit — extending the feature vector
|
||
# changes `D` and resets every user's learned state, which is a deliberate
|
||
# follow-up (phase B), not a side effect of this PR.
|
||
profile_features: Optional[dict] = None
|
||
|
||
|
||
class ScoreResponse(BaseModel):
|
||
tip_id: str
|
||
score: float
|
||
policy: str
|
||
|
||
|
||
class RewardRequest(BaseModel):
|
||
user_id: str
|
||
tip_id: str
|
||
reward: float # +1 done, +0.5 helpful, 0 snooze, -0.5 not_helpful, -1 dismiss
|
||
features: CandidateFeatures
|
||
day_of_week: int = 0 # included so egreedy can train dow features correctly
|
||
# Profile features at the time the tip was served. Ignored by /reward and
|
||
# /reward/egreedy; consumed by /reward/egreedy/v2 so the ridge update uses
|
||
# the same feature vector as the matching /score/egreedy/v2 call.
|
||
profile_features: Optional[dict] = None
|
||
|
||
|
||
class RewardResponse(BaseModel):
|
||
ok: bool
|
||
|
||
|
||
class PromptContext(BaseModel):
|
||
tasks: list[dict] = []
|
||
hour_of_day: int = 12
|
||
day_of_week: int = 0
|
||
extra: dict = {}
|
||
profile_features: Optional[dict] = None
|
||
|
||
|
||
class GenerateRequest(BaseModel):
|
||
user_id: str
|
||
context: PromptContext = PromptContext()
|
||
n: int = 3
|
||
prompt_version: Optional[str] = None # None → server default (env DEFAULT_PROMPT_VERSION)
|
||
# User-level features (#81 phase A). Accepted by the contract; not yet
|
||
# injected into the prompt — that's a #84-style prompt-design decision.
|
||
profile_features: Optional[dict] = None
|
||
|
||
|
||
class TipCandidate(BaseModel):
|
||
id: str
|
||
content: str
|
||
source: str = "llm"
|
||
rationale: Optional[str] = None
|
||
|
||
|
||
class GenerateResponse(BaseModel):
|
||
candidates: list[TipCandidate]
|
||
model: str
|
||
prompt_version: str
|
||
prompt_tokens: int = 0
|
||
completion_tokens: int = 0
|
||
|
||
|
||
# ── Multi-agent models ─────────────────────────────────────────────────────
|
||
|
||
class AgentComputeRequest(BaseModel):
|
||
user_id: str
|
||
tasks: list[dict] = []
|
||
profile: dict[str, Optional[float]] = {}
|
||
feedback_history: list[dict] = []
|
||
now_iso: Optional[str] = None # ISO 8601; defaults to utcnow
|
||
|
||
|
||
class AgentComputeResponse(BaseModel):
|
||
user_id: str
|
||
agent_id: str
|
||
prompt_text: str
|
||
signals_snapshot: dict
|
||
computed_at: str
|
||
expires_at: str
|
||
agent_version: str
|
||
|
||
|
||
class AgentOutputSnippet(BaseModel):
|
||
agent_id: str
|
||
prompt_text: str
|
||
|
||
|
||
class RecommendRequest(BaseModel):
|
||
user_id: str
|
||
agent_outputs: list[AgentOutputSnippet] = []
|
||
tasks: list[dict] = []
|
||
hour_of_day: int = 12
|
||
day_of_week: int = 0
|
||
|
||
|
||
class TipResult(BaseModel):
|
||
id: str
|
||
content: str
|
||
source: str = "llm"
|
||
kind: str = "advice"
|
||
rationale: Optional[str] = None
|
||
|
||
|
||
class RecommendResponse(BaseModel):
|
||
tip: TipResult
|
||
model: str
|
||
prompt_tokens: int = 0
|
||
completion_tokens: int = 0
|
||
|
||
|
||
# ── Endpoints ──────────────────────────────────────────────────────────────
|
||
|
||
@app.get("/health")
|
||
def health():
|
||
return {
|
||
"ok": True,
|
||
"agents": [a.agent_id for a in all_agents()],
|
||
"nats": {
|
||
"enabled": bool(nats_consumer.NATS_URL),
|
||
"consumers": nats_consumer.consumer_health,
|
||
},
|
||
}
|
||
|
||
|
||
_RETRY_SUFFIX = (
|
||
"\n\nYour previous response was not valid JSON. "
|
||
"Reply ONLY with the JSON array — no prose, no markdown fences."
|
||
)
|
||
|
||
_RETRY_SUFFIX_OBJ = (
|
||
"\n\nYour previous response was not valid JSON. "
|
||
"Reply ONLY with the JSON object — no prose, no markdown fences."
|
||
)
|
||
|
||
|
||
@app.post("/agents/{agent_id}/compute", response_model=AgentComputeResponse)
|
||
async def compute_agent(agent_id: str, req: AgentComputeRequest) -> AgentComputeResponse:
|
||
"""Run a single sub-agent for a user and return its prompt snippet.
|
||
|
||
Called by the precompute pipeline for each (user_id, agent_id) pair.
|
||
The caller is responsible for persisting the result to agent_outputs via the
|
||
TypeScript API callback.
|
||
"""
|
||
try:
|
||
agent = get_agent(agent_id)
|
||
except KeyError:
|
||
raise HTTPException(status_code=404, detail=f"Unknown agent: {agent_id!r}")
|
||
|
||
now = (
|
||
datetime.fromisoformat(req.now_iso.replace("Z", "+00:00"))
|
||
if req.now_iso
|
||
else datetime.now(timezone.utc)
|
||
)
|
||
if now.tzinfo is None:
|
||
now = now.replace(tzinfo=timezone.utc)
|
||
|
||
inp = AgentInput(
|
||
user_id=req.user_id,
|
||
tasks=req.tasks,
|
||
profile=req.profile,
|
||
feedback_history=req.feedback_history,
|
||
now=now,
|
||
)
|
||
try:
|
||
output = agent.compute(inp)
|
||
except Exception as exc:
|
||
log.error("agent_compute_failed", agent_id=agent_id, user_id=req.user_id, error=str(exc))
|
||
raise HTTPException(status_code=500, detail=f"Agent compute failed: {exc}")
|
||
|
||
log.info("agent_computed", agent_id=agent_id, user_id=req.user_id, expires_at=output.expires_at)
|
||
return AgentComputeResponse(
|
||
user_id=output.user_id,
|
||
agent_id=output.agent_id,
|
||
prompt_text=output.prompt_text,
|
||
signals_snapshot=output.signals_snapshot,
|
||
computed_at=output.computed_at,
|
||
expires_at=output.expires_at,
|
||
agent_version=output.agent_version,
|
||
)
|
||
|
||
|
||
@app.post("/recommend", response_model=RecommendResponse)
|
||
async def recommend(req: RecommendRequest) -> RecommendResponse:
|
||
"""Orchestrator: combine pre-computed agent outputs into one tip via LLM.
|
||
|
||
Called in real time when a user requests a tip. agent_outputs should be
|
||
the fresh rows from agent_outputs table (fetched by the TypeScript recommender
|
||
before calling this endpoint). Falls back to raw task context if empty.
|
||
"""
|
||
messages = build_orchestrator_messages(
|
||
agent_outputs=[s.model_dump() for s in req.agent_outputs],
|
||
tasks=req.tasks,
|
||
hour_of_day=req.hour_of_day,
|
||
day_of_week=req.day_of_week,
|
||
)
|
||
headers = {"Authorization": f"Bearer {LITELLM_MASTER_KEY}"}
|
||
last_raw = ""
|
||
last_parse_error = ""
|
||
total_usage: dict = {"prompt_tokens": 0, "completion_tokens": 0}
|
||
model_used = "tip-generator"
|
||
|
||
async with httpx.AsyncClient(timeout=30.0) as client:
|
||
for _attempt in range(1 + _MAX_GENERATE_RETRIES):
|
||
payload = {"model": "tip-generator", "messages": messages, "temperature": 0.7}
|
||
try:
|
||
resp = await client.post(
|
||
f"{LITELLM_URL}/chat/completions", json=payload, headers=headers
|
||
)
|
||
resp.raise_for_status()
|
||
except httpx.HTTPStatusError as e:
|
||
raise HTTPException(status_code=502, detail=f"LiteLLM error: {e.response.text}")
|
||
except httpx.RequestError as e:
|
||
raise HTTPException(status_code=503, detail=f"LiteLLM unreachable: {e}")
|
||
|
||
data = resp.json()
|
||
usage = data.get("usage", {})
|
||
total_usage["prompt_tokens"] += usage.get("prompt_tokens", 0)
|
||
total_usage["completion_tokens"] += usage.get("completion_tokens", 0)
|
||
model_used = data.get("model", "tip-generator")
|
||
last_raw = data["choices"][0]["message"]["content"]
|
||
|
||
try:
|
||
text = last_raw.strip()
|
||
if text.startswith("```"):
|
||
parts = text.split("```")
|
||
text = parts[1] if len(parts) > 1 else text
|
||
if text.startswith("json"):
|
||
text = text[4:]
|
||
parsed = json.loads(text)
|
||
item: dict = parsed[0] if isinstance(parsed, list) else parsed
|
||
break
|
||
except (json.JSONDecodeError, ValueError, IndexError) as exc:
|
||
last_parse_error = str(exc)
|
||
messages.append({"role": "assistant", "content": last_raw})
|
||
messages.append({"role": "user", "content": _RETRY_SUFFIX_OBJ})
|
||
else:
|
||
raise HTTPException(
|
||
status_code=502,
|
||
detail=f"LLM returned invalid JSON after {_MAX_GENERATE_RETRIES} retries: "
|
||
f"{last_parse_error}\n{last_raw[:200]}",
|
||
)
|
||
|
||
tip = TipResult(
|
||
id=item.get("id", f"tip-{req.user_id[:8]}"),
|
||
content=item.get("content", ""),
|
||
rationale=item.get("rationale"),
|
||
)
|
||
log.info(
|
||
"recommend_served",
|
||
user_id=req.user_id,
|
||
agent_count=len(req.agent_outputs),
|
||
tip_id=tip.id,
|
||
)
|
||
return RecommendResponse(
|
||
tip=tip,
|
||
model=model_used,
|
||
prompt_tokens=total_usage["prompt_tokens"],
|
||
completion_tokens=total_usage["completion_tokens"],
|
||
)
|
||
|
||
_MAX_GENERATE_RETRIES = 2
|
||
|
||
|
||
def _parse_llm_json(raw: str) -> list[dict]:
|
||
"""Strip markdown fences and parse JSON array. Raises ValueError on failure."""
|
||
text = raw.strip()
|
||
if text.startswith("```"):
|
||
parts = text.split("```")
|
||
text = parts[1] if len(parts) > 1 else text
|
||
if text.startswith("json"):
|
||
text = text[4:]
|
||
return json.loads(text)
|
||
|
||
|
||
@app.post("/generate", response_model=GenerateResponse)
|
||
async def generate(req: GenerateRequest) -> GenerateResponse:
|
||
"""Generate tip candidates via LiteLLM → tip-generator alias.
|
||
|
||
Retries up to _MAX_GENERATE_RETRIES times on malformed JSON, appending
|
||
a correction hint to the conversation so the model can self-correct.
|
||
"""
|
||
try:
|
||
prompt_template = get_prompt(req.prompt_version)
|
||
except KeyError as e:
|
||
raise HTTPException(status_code=422, detail=f"Unknown prompt_version: {e.args[0]}")
|
||
ctx = req.context.model_copy(update={"profile_features": req.profile_features})
|
||
user_msg = prompt_template.build_user(ctx, req.n)
|
||
messages: list[dict] = [
|
||
{"role": "system", "content": prompt_template.system},
|
||
{"role": "user", "content": user_msg},
|
||
]
|
||
headers = {"Authorization": f"Bearer {LITELLM_MASTER_KEY}"}
|
||
last_parse_error: str = ""
|
||
last_raw: str = ""
|
||
total_usage: dict = {"prompt_tokens": 0, "completion_tokens": 0}
|
||
model_used = "tip-generator"
|
||
|
||
async with httpx.AsyncClient(timeout=30.0) as client:
|
||
for attempt in range(1 + _MAX_GENERATE_RETRIES):
|
||
payload = {"model": "tip-generator", "messages": messages, "temperature": 0.7}
|
||
try:
|
||
resp = await client.post(
|
||
f"{LITELLM_URL}/chat/completions",
|
||
json=payload,
|
||
headers=headers,
|
||
)
|
||
resp.raise_for_status()
|
||
except httpx.HTTPStatusError as e:
|
||
raise HTTPException(status_code=502, detail=f"LiteLLM error: {e.response.text}")
|
||
except httpx.RequestError as e:
|
||
raise HTTPException(status_code=503, detail=f"LiteLLM unreachable: {e}")
|
||
|
||
data = resp.json()
|
||
usage = data.get("usage", {})
|
||
total_usage["prompt_tokens"] += usage.get("prompt_tokens", 0)
|
||
total_usage["completion_tokens"] += usage.get("completion_tokens", 0)
|
||
model_used = data.get("model", "tip-generator")
|
||
|
||
last_raw = data["choices"][0]["message"]["content"]
|
||
try:
|
||
items = _parse_llm_json(last_raw)
|
||
break
|
||
except (json.JSONDecodeError, ValueError) as e:
|
||
last_parse_error = str(e)
|
||
# Feed the bad reply back so the model can self-correct
|
||
messages.append({"role": "assistant", "content": last_raw})
|
||
messages.append({"role": "user", "content": _RETRY_SUFFIX})
|
||
else:
|
||
raise HTTPException(
|
||
status_code=502,
|
||
detail=f"LLM returned invalid JSON after {_MAX_GENERATE_RETRIES} retries: "
|
||
f"{last_parse_error}\n{last_raw[:200]}",
|
||
)
|
||
|
||
candidates = [
|
||
TipCandidate(
|
||
id=item.get("id", f"tip-{i}"),
|
||
content=item.get("content", ""),
|
||
rationale=item.get("rationale"),
|
||
)
|
||
for i, item in enumerate(items)
|
||
]
|
||
|
||
return GenerateResponse(
|
||
candidates=candidates,
|
||
model=model_used,
|
||
prompt_version=prompt_template.version,
|
||
prompt_tokens=total_usage["prompt_tokens"],
|
||
completion_tokens=total_usage["completion_tokens"],
|
||
)
|
||
|
||
|
||
@app.post("/score", response_model=ScoreResponse)
|
||
def score(req: ScoreRequest) -> ScoreResponse:
|
||
if not req.candidates:
|
||
raise HTTPException(status_code=422, detail="No candidates")
|
||
|
||
A, b, meta = load_state(req.user_id)
|
||
try:
|
||
A_inv = np.linalg.inv(A)
|
||
except np.linalg.LinAlgError:
|
||
A_inv = np.identity(D, dtype=np.float64)
|
||
|
||
theta = A_inv @ b
|
||
|
||
best_id = None
|
||
best_score = -float("inf")
|
||
best_features: dict = {}
|
||
|
||
for candidate in req.candidates:
|
||
feat_dict = {
|
||
"hour_of_day": req.context.hour_of_day,
|
||
"is_overdue": candidate.features.is_overdue,
|
||
"task_age_days": candidate.features.task_age_days,
|
||
"priority": candidate.features.priority,
|
||
}
|
||
x = build_feature_vector(feat_dict)
|
||
exploit = float(theta @ x)
|
||
explore = ALPHA * math.sqrt(float(x @ A_inv @ x))
|
||
ucb = exploit + explore
|
||
if ucb > best_score:
|
||
best_score = ucb
|
||
best_id = candidate.id
|
||
best_features = feat_dict
|
||
|
||
# Log to feature history ring buffer
|
||
history = get_feature_history(req.user_id)
|
||
history.append({
|
||
"ts": time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()),
|
||
"features": best_features,
|
||
"score": best_score,
|
||
"tip_id": best_id,
|
||
})
|
||
|
||
# Update meta stats
|
||
meta["pulls"] = meta.get("pulls", 0) + 1
|
||
meta["last_updated"] = time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime())
|
||
save_state(req.user_id, A, b, meta)
|
||
|
||
return ScoreResponse(tip_id=best_id, score=best_score, policy="linucb-v1")
|
||
|
||
|
||
@app.post("/reward", response_model=RewardResponse)
|
||
def reward(req: RewardRequest) -> RewardResponse:
|
||
A, b, meta = load_state(req.user_id)
|
||
feat_dict = {
|
||
"hour_of_day": req.features.hour_of_day,
|
||
"is_overdue": req.features.is_overdue,
|
||
"task_age_days": req.features.task_age_days,
|
||
"priority": req.features.priority,
|
||
}
|
||
x = build_feature_vector(feat_dict)
|
||
A += np.outer(x, x)
|
||
b += req.reward * x
|
||
|
||
# Track cumulative reward in meta
|
||
meta["cumulative_reward"] = meta.get("cumulative_reward", 0.0) + req.reward
|
||
meta["reward_count"] = meta.get("reward_count", 0) + 1
|
||
meta["last_updated"] = time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime())
|
||
|
||
save_state(req.user_id, A, b, meta)
|
||
return RewardResponse(ok=True)
|
||
|
||
|
||
@app.post("/score/egreedy", response_model=ScoreResponse)
|
||
def score_egreedy(req: ScoreRequest) -> ScoreResponse:
|
||
"""ε-greedy policy with d=7 features (adds day-of-week encoding).
|
||
|
||
Exploration: pick uniformly at random with probability ε.
|
||
Exploitation: pick argmax of linear payoff estimate θ·x.
|
||
Differs from LinUCB in: no UCB bonus, richer feature space.
|
||
"""
|
||
if not req.candidates:
|
||
raise HTTPException(status_code=422, detail="No candidates")
|
||
|
||
A, b, meta = load_state7(req.user_id)
|
||
try:
|
||
A_inv = np.linalg.inv(A)
|
||
except np.linalg.LinAlgError:
|
||
A_inv = np.identity(D7, dtype=np.float64)
|
||
theta = A_inv @ b
|
||
|
||
dow = req.context.day_of_week
|
||
exploring = np.random.random() < EPSILON
|
||
|
||
if exploring:
|
||
chosen = req.candidates[np.random.randint(len(req.candidates))]
|
||
feat_dict = {
|
||
"hour_of_day": req.context.hour_of_day,
|
||
"is_overdue": chosen.features.is_overdue,
|
||
"task_age_days": chosen.features.task_age_days,
|
||
"priority": chosen.features.priority,
|
||
}
|
||
x = build_feature_vector_7(feat_dict, dow)
|
||
best_score = float(theta @ x)
|
||
best_id = chosen.id
|
||
else:
|
||
best_id = None
|
||
best_score = -float("inf")
|
||
feat_dict = {}
|
||
for candidate in req.candidates:
|
||
fd = {
|
||
"hour_of_day": req.context.hour_of_day,
|
||
"is_overdue": candidate.features.is_overdue,
|
||
"task_age_days": candidate.features.task_age_days,
|
||
"priority": candidate.features.priority,
|
||
}
|
||
x = build_feature_vector_7(fd, dow)
|
||
s = float(theta @ x)
|
||
if s > best_score:
|
||
best_score = s
|
||
best_id = candidate.id
|
||
feat_dict = fd
|
||
|
||
history = get_feature_history(req.user_id)
|
||
history.append({
|
||
"ts": time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()),
|
||
"features": {**feat_dict, "day_of_week": dow, "exploring": exploring},
|
||
"score": best_score,
|
||
"tip_id": best_id,
|
||
"policy": "egreedy-v1",
|
||
})
|
||
|
||
meta["pulls"] = meta.get("pulls", 0) + 1
|
||
meta["explore_count"] = meta.get("explore_count", 0) + int(exploring)
|
||
meta["last_updated"] = time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime())
|
||
save_state7(req.user_id, A, b, meta)
|
||
|
||
return ScoreResponse(tip_id=best_id, score=best_score, policy="egreedy-v1")
|
||
|
||
|
||
@app.post("/reward/egreedy", response_model=RewardResponse)
|
||
def reward_egreedy(req: RewardRequest) -> RewardResponse:
|
||
"""Update ε-greedy ridge estimator with observed reward."""
|
||
A, b, meta = load_state7(req.user_id)
|
||
feat_dict = {
|
||
"hour_of_day": req.features.hour_of_day,
|
||
"is_overdue": req.features.is_overdue,
|
||
"task_age_days": req.features.task_age_days,
|
||
"priority": req.features.priority,
|
||
}
|
||
x = build_feature_vector_7(feat_dict, day_of_week=req.day_of_week)
|
||
A += np.outer(x, x)
|
||
b += req.reward * x
|
||
|
||
meta["cumulative_reward"] = meta.get("cumulative_reward", 0.0) + req.reward
|
||
meta["reward_count"] = meta.get("reward_count", 0) + 1
|
||
meta["last_updated"] = time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime())
|
||
save_state7(req.user_id, A, b, meta)
|
||
return RewardResponse(ok=True)
|
||
|
||
|
||
@app.post("/score/egreedy/v2", response_model=ScoreResponse)
|
||
def score_egreedy_v2(req: ScoreRequest) -> ScoreResponse:
|
||
"""ε-greedy v2 — d=12, adds 5 normalized profile features (ADR-0012).
|
||
|
||
Shadow-only until offline sim + rollout per ADR-0002 completes.
|
||
Accepts the same ScoreRequest shape as v1; `profile_features` drives the
|
||
extra 5 dims (defaults: zeros for rates/volume/dwell, 0.5 neutral for
|
||
preferred_hour alignment).
|
||
"""
|
||
if not req.candidates:
|
||
raise HTTPException(status_code=422, detail="No candidates")
|
||
|
||
A, b, meta = load_state12(req.user_id)
|
||
try:
|
||
A_inv = np.linalg.inv(A)
|
||
except np.linalg.LinAlgError:
|
||
A_inv = np.identity(D12, dtype=np.float64)
|
||
theta = A_inv @ b
|
||
|
||
dow = req.context.day_of_week
|
||
exploring = np.random.random() < EPSILON
|
||
|
||
if exploring:
|
||
chosen = req.candidates[np.random.randint(len(req.candidates))]
|
||
feat_dict = {
|
||
"hour_of_day": req.context.hour_of_day,
|
||
"is_overdue": chosen.features.is_overdue,
|
||
"task_age_days": chosen.features.task_age_days,
|
||
"priority": chosen.features.priority,
|
||
}
|
||
x = build_feature_vector_12(feat_dict, dow, req.profile_features)
|
||
best_score = float(theta @ x)
|
||
best_id = chosen.id
|
||
else:
|
||
best_id = None
|
||
best_score = -float("inf")
|
||
feat_dict = {}
|
||
for candidate in req.candidates:
|
||
fd = {
|
||
"hour_of_day": req.context.hour_of_day,
|
||
"is_overdue": candidate.features.is_overdue,
|
||
"task_age_days": candidate.features.task_age_days,
|
||
"priority": candidate.features.priority,
|
||
}
|
||
x = build_feature_vector_12(fd, dow, req.profile_features)
|
||
s = float(theta @ x)
|
||
if s > best_score:
|
||
best_score = s
|
||
best_id = candidate.id
|
||
feat_dict = fd
|
||
|
||
history = get_feature_history(req.user_id)
|
||
history.append({
|
||
"ts": time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()),
|
||
"features": {**feat_dict, "day_of_week": dow, "exploring": exploring},
|
||
"score": best_score,
|
||
"tip_id": best_id,
|
||
"policy": "egreedy-v2",
|
||
})
|
||
|
||
meta["pulls"] = meta.get("pulls", 0) + 1
|
||
meta["explore_count"] = meta.get("explore_count", 0) + int(exploring)
|
||
meta["last_updated"] = time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime())
|
||
save_state12(req.user_id, A, b, meta)
|
||
|
||
return ScoreResponse(tip_id=best_id, score=best_score, policy="egreedy-v2")
|
||
|
||
|
||
@app.post("/reward/egreedy/v2", response_model=RewardResponse)
|
||
def reward_egreedy_v2(req: RewardRequest) -> RewardResponse:
|
||
"""Update ε-greedy v2 ridge estimator using the d=12 feature vector."""
|
||
A, b, meta = load_state12(req.user_id)
|
||
feat_dict = {
|
||
"hour_of_day": req.features.hour_of_day,
|
||
"is_overdue": req.features.is_overdue,
|
||
"task_age_days": req.features.task_age_days,
|
||
"priority": req.features.priority,
|
||
}
|
||
x = build_feature_vector_12(feat_dict, req.day_of_week, req.profile_features)
|
||
A += np.outer(x, x)
|
||
b += req.reward * x
|
||
|
||
meta["cumulative_reward"] = meta.get("cumulative_reward", 0.0) + req.reward
|
||
meta["reward_count"] = meta.get("reward_count", 0) + 1
|
||
meta["last_updated"] = time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime())
|
||
save_state12(req.user_id, A, b, meta)
|
||
return RewardResponse(ok=True)
|
||
|
||
|
||
@app.get("/stats/egreedy/v2/{user_id}")
|
||
def stats_egreedy_v2(user_id: str):
|
||
"""ε-greedy v2 policy stats — pulls, cumulative reward, θ vector."""
|
||
A, b, meta = load_state12(user_id)
|
||
try:
|
||
theta = (np.linalg.inv(A) @ b).tolist()
|
||
except np.linalg.LinAlgError:
|
||
theta = [0.0] * D12
|
||
|
||
pulls = meta.get("pulls", 0)
|
||
cumulative_reward = meta.get("cumulative_reward", 0.0)
|
||
reward_count = meta.get("reward_count", 0)
|
||
explore_count = meta.get("explore_count", 0)
|
||
|
||
return {
|
||
"user_id": user_id,
|
||
"policy": "egreedy-v2",
|
||
"pulls": pulls,
|
||
"reward_count": reward_count,
|
||
"cumulative_reward": cumulative_reward,
|
||
"estimated_mean_reward": cumulative_reward / reward_count if reward_count > 0 else 0.0,
|
||
"exploration_rate": explore_count / pulls if pulls > 0 else 0.0,
|
||
"theta": theta,
|
||
"feature_labels": [
|
||
"hour_sin", "hour_cos", "is_overdue", "task_age", "priority",
|
||
"dow_sin", "dow_cos",
|
||
"completion_rate_30d", "dismiss_rate_30d", "mean_dwell_norm",
|
||
"preferred_hour_alignment", "tip_volume_norm",
|
||
],
|
||
"last_updated": meta.get("last_updated"),
|
||
}
|
||
|
||
|
||
@app.get("/stats/egreedy/{user_id}")
|
||
def stats_egreedy(user_id: str):
|
||
"""ε-greedy policy stats — pulls, cumulative reward, θ vector."""
|
||
A, b, meta = load_state7(user_id)
|
||
try:
|
||
theta = (np.linalg.inv(A) @ b).tolist()
|
||
except np.linalg.LinAlgError:
|
||
theta = [0.0] * D7
|
||
|
||
pulls = meta.get("pulls", 0)
|
||
cumulative_reward = meta.get("cumulative_reward", 0.0)
|
||
reward_count = meta.get("reward_count", 0)
|
||
explore_count = meta.get("explore_count", 0)
|
||
|
||
return {
|
||
"user_id": user_id,
|
||
"policy": "egreedy-v1",
|
||
"pulls": pulls,
|
||
"reward_count": reward_count,
|
||
"cumulative_reward": cumulative_reward,
|
||
"estimated_mean_reward": cumulative_reward / reward_count if reward_count > 0 else 0.0,
|
||
"exploration_rate": explore_count / pulls if pulls > 0 else 0.0,
|
||
"theta": theta,
|
||
"feature_labels": ["hour_sin", "hour_cos", "is_overdue", "task_age", "priority", "dow_sin", "dow_cos"],
|
||
"last_updated": meta.get("last_updated"),
|
||
}
|
||
|
||
|
||
@app.post("/reset/{user_id}", response_model=RewardResponse)
|
||
def reset(user_id: str) -> RewardResponse:
|
||
"""Reset per-user bandit state (admin action)."""
|
||
p = state_path(user_id)
|
||
if p.exists():
|
||
p.unlink()
|
||
p7 = state7_path(user_id)
|
||
if p7.exists():
|
||
p7.unlink()
|
||
p12 = state12_path(user_id)
|
||
if p12.exists():
|
||
p12.unlink()
|
||
if user_id in _feature_history:
|
||
_feature_history[user_id].clear()
|
||
return RewardResponse(ok=True)
|
||
|
||
|
||
@app.get("/stats/{user_id}")
|
||
def stats(user_id: str):
|
||
"""Return current LinUCB state summary for a user."""
|
||
A, b, meta = load_state(user_id)
|
||
try:
|
||
A_inv = np.linalg.inv(A)
|
||
theta = (A_inv @ b).tolist()
|
||
except np.linalg.LinAlgError:
|
||
theta = [0.0] * D
|
||
|
||
pulls = meta.get("pulls", 0)
|
||
cumulative_reward = meta.get("cumulative_reward", 0.0)
|
||
reward_count = meta.get("reward_count", 0)
|
||
estimated_mean = cumulative_reward / reward_count if reward_count > 0 else 0.0
|
||
|
||
return {
|
||
"user_id": user_id,
|
||
"pulls": pulls,
|
||
"reward_count": reward_count,
|
||
"cumulative_reward": cumulative_reward,
|
||
"estimated_mean_reward": estimated_mean,
|
||
"theta": theta,
|
||
"last_updated": meta.get("last_updated"),
|
||
}
|
||
|
||
|
||
@app.get("/features/{user_id}")
|
||
def features(user_id: str):
|
||
"""Return recent feature vectors logged at scoring time."""
|
||
history = get_feature_history(user_id)
|
||
return {
|
||
"user_id": user_id,
|
||
"history": list(history),
|
||
}
|