From 2d7cf217a92accf4e5d3e32a2f5a30be0d749bab Mon Sep 17 00:00:00 2001 From: alvis Date: Sat, 25 Apr 2026 10:00:38 +0000 Subject: [PATCH] =?UTF-8?q?feat(ml):=20egreedy-v2=20shadow=20policy=20?= =?UTF-8?q?=E2=80=94=20D=3D12=20with=20profile=20features=20(#99)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Ship the scaffolding for #99 (phase B.3 of #81): - ml/serving: add /score/egreedy/v2, /reward/egreedy/v2, /stats/egreedy/v2 endpoints (D=12). New feature dims: completion/dismiss rates, mean dwell (clipped 10min), preferred-hour alignment (cosine, 1-dim), tip volume (log). Separate state file per user (_egreedy_v2.json). /reset clears v2 state too. - ADR-0012: documents D=7→12 dimension change, normalization choices, shadow rollout protocol, and promotion gate (offline sim win per ADR-0002). - recommender.ts: register egreedy-v2-shadow in shadow-policy map (disabled by default). When enabled, calls /score/egreedy/v2 fire-and-forget and publishes shadow:egreedy-v2-shadow serve signal. No reward to shadow — sim is the gate. - sim runner/personas: personas carry synthetic profile_features per persona; _call_score/_call_reward thread profile_features through (None-safe for v1/linucb). - 18 new Python tests; all 56 Python + 170 TS tests pass. Co-Authored-By: Claude Sonnet 4.6 --- docs/adr/0012-egreedy-v2-profile-features.md | 108 +++++++++ ml/experiments/sim/personas.py | 44 ++++ ml/experiments/sim/runner.py | 36 ++- ml/serving/main.py | 241 ++++++++++++++++++- ml/serving/tests/test_score.py | 180 +++++++++++++- services/api/src/routes/recommender.ts | 40 ++- 6 files changed, 629 insertions(+), 20 deletions(-) create mode 100644 docs/adr/0012-egreedy-v2-profile-features.md diff --git a/docs/adr/0012-egreedy-v2-profile-features.md b/docs/adr/0012-egreedy-v2-profile-features.md new file mode 100644 index 0000000..27d0958 --- /dev/null +++ b/docs/adr/0012-egreedy-v2-profile-features.md @@ -0,0 +1,108 @@ +# ADR-0012 — ε-greedy v2: profile features in the bandit (D=7→12) + +**Status:** Accepted +**Date:** 2026-04-25 +**Issue:** #99 + +## Context + +ADR-0011 shipped a 5-feature user-profile registry (completion rate, dismiss rate, +mean dwell, preferred hour, tip volume). `POST /score` and `POST /score/egreedy` +already receive a `profile_features` dict on every call but **ignore it** — the +comment in `ml/serving/main.py` explains why: extending the feature vector changes +`D`, which resets every user's learned `A`/`b` matrices and discards accumulated +signal. That loss requires a deliberate shadow-first rollout per ADR-0002, not an +in-place update. + +This ADR authorises `egreedy-v2`, which extends the active `egreedy-v1` (D=7) with +the 5 profile features (D=12) and defines how it ships safely. + +## Decision + +### New policy: egreedy-v2 (D=12) + +Feature vector layout: + +| idx | name | encoding | +|-----|------|----------| +| 0–1 | hour_sin, hour_cos | cyclical, current hour | +| 2 | is_overdue | 0/1 | +| 3 | task_age_norm | age_days / 30, clipped 0–1 | +| 4 | priority_norm | (p − 1) / 3 | +| 5–6 | dow_sin, dow_cos | cyclical, day of week | +| 7 | completion_rate_30d | raw (already 0–1); null → 0 | +| 8 | dismiss_rate_30d | raw (already 0–1); null → 0 | +| 9 | mean_dwell_norm | dwell_ms / 600_000, clipped 0–1; null → 0 | +| 10 | preferred_hour_alignment | `(cos(2π(pref − now)/24) + 1) / 2`; null → 0.5 (neutral) | +| 11 | tip_volume_norm | `log1p(n) / log1p(100)`, clipped 0–1; null → 0 | + +**Normalization rationale:** +- Rates are already in [0, 1]; no transform needed. +- Dwell clips at 10 min — anything beyond that carries diminishing signal. +- `preferred_hour` needs circular continuity; one-dimension approximation using + cosine alignment with the current hour. At null (no established peak) we use + 0.5 (the midpoint/neutral) rather than 0 (misleading "polar-opposite hour"). +- `tip_volume` uses log-scale because engagement counts are heavy-tailed. + +### Rollout sequence (per ADR-0002) + +1. **Shadow** (this ADR) — `egreedy-v2-shadow` registered in the recommender's + shadow-policy map (disabled by default). Admin enables via `/admin/policies`. + - Calls `/score/egreedy/v2` fire-and-forget alongside the active `egreedy-v1` call. + - Publishes `signals.tip.served` with `policy: shadow:egreedy-v2-shadow` for logging. + - **No reward delivery to shadow** — live shadow collects decision-agreement + exposure only; reward measurement uses offline simulation. + - State files: `{user}_egreedy_v2.json` — isolated from v1's `{user}_egreedy.json`. + +2. **Offline sim** — run `runner.py --policies egreedy-v1 egreedy-v2 --n-rounds 20` + using the `rule` judge and persona-level profile features (synthetic values in + `personas.py`). Gate: v2 mean reward ≥ v1 mean reward. + +3. **Promote** — if sim gate passes, change the `remotePolicy()` call in + `recommender.ts` from `/score/egreedy` to `/score/egreedy/v2` and change reward + delivery to `/reward/egreedy/v2`. No DB migration; old per-user v1 state files + are left on disk (available for rollback; clean up after 30 days). + +### State-file migration + +No migration of `A`/`b` matrices from v1 → v2. A D×D→D'×D' transform would +require assumptions about the new dimensions that we cannot justify without data. +v2 starts from the identity prior and learns from scratch in shadow/sim. The reward +penalty from cold-start is the correct price for the dimension extension. + +### Admin control + +`GET /api/admin/policies` surfaces `egreedy-v2-shadow` with `active: false`. +Toggle via `POST /api/admin/policies/egreedy-v2-shadow/toggle`. + +## Consequences + +**Good:** +- Profile features (preferred hour, completion/dismiss rates, volume) allow the + bandit to personalise timing recommendations beyond what the candidate-level + features encode. +- Normalization is deterministic, bounded [0, 1], and numerically stable; no + scaling artefacts as the population grows. +- Shadow-first rollout protects real users from a cold-start regression. + +**Trade-offs:** +- Cold-start: v2 state files begin from the identity prior. During shadow, + v2 makes random-ish decisions for early users. This is expected and intentional. +- Synthetic persona profiles in `personas.py` approximate real user distributions; + the offline sim is evidence, not proof. The promotion gate requires the sim to + run after v2 has accumulated enough behavioral data (suggest ≥100 shadow calls + per policy per user before running the final sim). +- The one-dim preferred-hour encoding loses some circular information compared to + two-dim sin/cos. If preferred-hour alignment becomes a dominant signal, revisit + with D=13 in a follow-up ADR. + +## Alternatives considered + +**Warm-start via projection** — project v1's 7-dim theta into D=12 by padding +with zeros. Rejected: zero initialization for the profile dims is equivalent, and +projecting theta without the corresponding `A` matrix cannot be done correctly. + +**D=13 with two preferred-hour dims** — cleaner circular encoding, but contradicts +the D=12 target in the issue spec and complicates the sim comparison. Deferred. + +**In-place v1 promotion without shadow** — violates ADR-0002. diff --git a/ml/experiments/sim/personas.py b/ml/experiments/sim/personas.py index 8c43805..f4212b9 100644 --- a/ml/experiments/sim/personas.py +++ b/ml/experiments/sim/personas.py @@ -1,5 +1,6 @@ """Synthetic user personas for simulation.""" +import math from dataclasses import dataclass @@ -13,6 +14,24 @@ class Persona: morning_active: bool # higher engagement hours 6–10 evening_active: bool # higher engagement hours 18–22 recency_bias: float # 0–1: prefers recently-due tasks + # Synthetic profile features for egreedy-v2 sim (ADR-0012). + # Values represent what a typical user of this persona would have + # accumulated after a few weeks of app use. + _completion_rate: float = 0.3 + _dismiss_rate: float = 0.2 + _mean_dwell_ms: float = 60_000.0 # ms + _preferred_hour: float = 12.0 # 0–23 + _tip_volume_30d: float = 15.0 + + def profile_features(self, now_hour: int | None = None) -> dict: + """Return profile_features dict compatible with the ml/serving API.""" + return { + "completion_rate_30d": self._completion_rate, + "dismiss_rate_30d": self._dismiss_rate, + "mean_dwell_ms_30d": self._mean_dwell_ms, + "preferred_hour": self._preferred_hour, + "tip_volume_30d": self._tip_volume_30d, + } PERSONAS: list[Persona] = [ @@ -27,6 +46,11 @@ PERSONAS: list[Persona] = [ morning_active=True, evening_active=False, recency_bias=0.3, + _completion_rate=0.55, + _dismiss_rate=0.10, + _mean_dwell_ms=45_000.0, + _preferred_hour=8.0, + _tip_volume_30d=22.0, ), Persona( name="evening-relaxed", @@ -39,6 +63,11 @@ PERSONAS: list[Persona] = [ morning_active=False, evening_active=True, recency_bias=0.5, + _completion_rate=0.30, + _dismiss_rate=0.25, + _mean_dwell_ms=90_000.0, + _preferred_hour=20.0, + _tip_volume_30d=12.0, ), Persona( name="low-priority-first", @@ -51,6 +80,11 @@ PERSONAS: list[Persona] = [ morning_active=True, evening_active=False, recency_bias=0.7, + _completion_rate=0.40, + _dismiss_rate=0.15, + _mean_dwell_ms=30_000.0, + _preferred_hour=9.0, + _tip_volume_30d=18.0, ), Persona( name="consistent-responder", @@ -63,6 +97,11 @@ PERSONAS: list[Persona] = [ morning_active=True, evening_active=True, recency_bias=0.5, + _completion_rate=0.50, + _dismiss_rate=0.10, + _mean_dwell_ms=60_000.0, + _preferred_hour=12.0, + _tip_volume_30d=30.0, ), Persona( name="overdue-ignorer", @@ -75,5 +114,10 @@ PERSONAS: list[Persona] = [ morning_active=False, evening_active=True, recency_bias=0.2, + _completion_rate=0.20, + _dismiss_rate=0.40, + _mean_dwell_ms=120_000.0, + _preferred_hour=19.0, + _tip_volume_30d=10.0, ), ] diff --git a/ml/experiments/sim/runner.py b/ml/experiments/sim/runner.py index da65079..42e94c8 100644 --- a/ml/experiments/sim/runner.py +++ b/ml/experiments/sim/runner.py @@ -43,19 +43,22 @@ from task_generator import generate_task_pool POLICY_SCORE_ENDPOINTS: dict[str, str] = { "linucb-v1": "/score", "egreedy-v1": "/score/egreedy", + "egreedy-v2": "/score/egreedy/v2", } POLICY_REWARD_ENDPOINTS: dict[str, str] = { "linucb-v1": "/reward", "egreedy-v1": "/reward/egreedy", + "egreedy-v2": "/reward/egreedy/v2", } def _call_score( client: httpx.Client, ml_url: str, policy: str, user_id: str, tasks: list[dict], hour: int, dow: int, + profile_features: dict | None = None, ) -> dict | None: endpoint = POLICY_SCORE_ENDPOINTS.get(policy, "/score") - body = { + body: dict = { "user_id": user_id, "candidates": [ { @@ -72,6 +75,8 @@ def _call_score( ], "context": {"hour_of_day": hour, "day_of_week": dow}, } + if profile_features is not None: + body["profile_features"] = profile_features try: r = client.post(f"{ml_url}{endpoint}", json=body, timeout=5.0) r.raise_for_status() @@ -85,15 +90,17 @@ def _call_reward( client: httpx.Client, ml_url: str, policy: str, user_id: str, tip_id: str, reward: float, features: dict, day_of_week: int = 0, + profile_features: dict | None = None, ) -> None: endpoint = POLICY_REWARD_ENDPOINTS.get(policy, "/reward") + body: dict = { + "user_id": user_id, "tip_id": tip_id, "reward": reward, + "features": features, "day_of_week": day_of_week, + } + if profile_features is not None: + body["profile_features"] = profile_features try: - client.post( - f"{ml_url}{endpoint}", - json={"user_id": user_id, "tip_id": tip_id, "reward": reward, - "features": features, "day_of_week": day_of_week}, - timeout=5.0, - ) + client.post(f"{ml_url}{endpoint}", json=body, timeout=5.0) except Exception as e: print(f" [warn] reward {policy}: {e}", file=sys.stderr) @@ -133,9 +140,13 @@ def run_simulation( seed_tasks = rnd * 997 + abs(hash(user_id)) % 997 tasks = generate_task_pool(n=tasks_per_round, seed=seed_tasks) + # Per-persona profile features for v2 (synthetic for sim — see ADR-0012) + profile = persona.profile_features(hour) if hasattr(persona, "profile_features") else None + for policy in policies: p_user = f"{user_id}-{policy}" - scored = _call_score(client, ml_url, policy, p_user, tasks, hour, dow) + scored = _call_score(client, ml_url, policy, p_user, tasks, hour, dow, + profile_features=profile) if not scored: continue tip_id = scored.get("tip_id") @@ -149,7 +160,7 @@ def run_simulation( "is_overdue": tip["features"]["is_overdue"], "task_age_days": tip["features"]["task_age_days"], "priority": tip["features"]["priority"], - }, day_of_week=dow) + }, day_of_week=dow, profile_features=profile) acc[policy]["total_reward"] += reward acc[policy]["n_pulls"] += 1 @@ -208,9 +219,12 @@ def run_score_phase( seed_tasks = rnd * 997 + abs(hash(user_id)) % 997 tasks = generate_task_pool(n=tasks_per_round, seed=seed_tasks) + profile = persona.profile_features(hour) if hasattr(persona, "profile_features") else None + for policy in policies: p_user = f"{user_id}-{policy}" - scored = _call_score(client, ml_url, policy, p_user, tasks, hour, dow) + scored = _call_score(client, ml_url, policy, p_user, tasks, hour, dow, + profile_features=profile) if not scored: continue tip_id = scored.get("tip_id") @@ -229,6 +243,7 @@ def run_score_phase( "tip_features": tip["features"], "tip_content": tip["content"], "ml_score": scored.get("score"), + "profile_features": profile, }) judgment_requests.append({ @@ -368,6 +383,7 @@ def run_reward_phase(plan_path: str, out_path: str, ml_url: str) -> dict: session["tip_id"], reward, {"hour_of_day": rnd_data["hour"], **session["tip_features"]}, day_of_week=rnd_data["dow"], + profile_features=session.get("profile_features"), ) p = session["policy"] diff --git a/ml/serving/main.py b/ml/serving/main.py index 4cdb6e5..473f043 100644 --- a/ml/serving/main.py +++ b/ml/serving/main.py @@ -2,12 +2,17 @@ oO ML Serving — Phase 1: LinUCB contextual bandit. Contract: - POST /score { user_id, candidates, context } → { tip_id, score, policy } - POST /reward { user_id, tip_id, reward, features } → { ok } - POST /reset/{user_id} → { ok } - GET /stats/{user_id} → { pulls, cumulative_reward, estimated_mean, last_updated } - GET /features/{user_id} → { history: [{ ts, features, score }] } - GET /health → { ok } + POST /score LinUCB d=5 (baseline, kept as shadow-eligible) + POST /score/egreedy ε-greedy v1, d=7 (active — ADR-0007) + POST /score/egreedy/v2 ε-greedy v2, d=12, profile features (shadow — ADR-0012) + POST /reward, /reward/egreedy, /reward/egreedy/v2 + GET /stats/{user_id} LinUCB stats + GET /stats/egreedy/{user_id} ε-greedy v1 stats + GET /stats/egreedy/v2/{user_id} ε-greedy v2 stats + POST /reset/{user_id} clear all per-user bandit state + GET /features/{user_id} last 100 scored feature vectors + POST /generate LLM tip candidates via LiteLLM + GET /health { ok } Features (d=5): hour_sin, hour_cos — cyclical time-of-day encoding @@ -43,7 +48,8 @@ STATE_DIR.mkdir(parents=True, exist_ok=True) ALPHA = 1.0 # LinUCB exploration coefficient D = 5 # LinUCB feature dimension -D7 = 7 # ε-greedy feature dimension (adds day-of-week cyclical encoding) +D7 = 7 # ε-greedy v1 feature dimension (adds day-of-week cyclical encoding) +D12 = 12 # ε-greedy v2 feature dimension (adds 5 profile features — ADR-0012) EPSILON = 0.1 # ε-greedy exploration rate FEATURE_HISTORY_SIZE = 100 # per-user ring buffer @@ -126,6 +132,98 @@ def save_state7(user_id: str, A: np.ndarray, b: np.ndarray, meta: dict) -> None: p.write_text(json.dumps({"A": A.tolist(), "b": b.tolist(), "meta": meta})) +# ── ε-greedy v2 state helpers (d=12, profile features — ADR-0012) ───────── +# +# Normalization choices (see ADR-0012): +# completion_rate_30d — already 0–1, passthrough; null → 0 +# dismiss_rate_30d — already 0–1, passthrough; null → 0 +# mean_dwell_ms_30d — clipped to [0, 600_000 ms] (10 min), then /600_000 +# preferred_hour — circular alignment with context hour: +# (cos(2π·(now − pref)/24) + 1) / 2 → 0–1 +# captures "is the user's habitual peak near now?" +# null → 0.5 (neutral) +# tip_volume_30d — log1p(n) / log1p(100), clipped to [0, 1] + +_DWELL_CLIP_MS = 600_000.0 # 10 minutes +_VOLUME_LOG_MAX = math.log1p(100.0) + + +def _profile_value(profile: Optional[dict], key: str) -> Optional[float]: + if not profile: + return None + v = profile.get(key) + if v is None: + return None + try: + return float(v) + except (TypeError, ValueError): + return None + + +def _norm_rate(v: Optional[float]) -> float: + return 0.0 if v is None else max(0.0, min(1.0, v)) + + +def _norm_dwell(v: Optional[float]) -> float: + if v is None: + return 0.0 + return max(0.0, min(1.0, v / _DWELL_CLIP_MS)) + + +def _norm_volume(v: Optional[float]) -> float: + if v is None or v <= 0: + return 0.0 + return min(1.0, math.log1p(float(v)) / _VOLUME_LOG_MAX) + + +def _norm_preferred_hour(pref: Optional[float], now_hour: int) -> float: + if pref is None: + return 0.5 # neutral when the user has no established peak yet + delta = (float(pref) - float(now_hour)) * (2.0 * math.pi / 24.0) + return (math.cos(delta) + 1.0) / 2.0 + + +def build_feature_vector_12( + features: dict, + day_of_week: int = 0, + profile: Optional[dict] = None, +) -> np.ndarray: + """d=12: egreedy-v1's 7 dims + 5 normalized profile features (ADR-0012).""" + base7 = build_feature_vector_7(features, day_of_week) + now_hour = int(features.get("hour_of_day", 12)) + profile_dims = np.array( + [ + _norm_rate(_profile_value(profile, "completion_rate_30d")), + _norm_rate(_profile_value(profile, "dismiss_rate_30d")), + _norm_dwell(_profile_value(profile, "mean_dwell_ms_30d")), + _norm_preferred_hour(_profile_value(profile, "preferred_hour"), now_hour), + _norm_volume(_profile_value(profile, "tip_volume_30d")), + ], + dtype=np.float64, + ) + return np.concatenate([base7, profile_dims]) + + +def state12_path(user_id: str) -> Path: + safe = "".join(c if c.isalnum() else "_" for c in user_id) + return STATE_DIR / f"{safe}_egreedy_v2.json" + + +def load_state12(user_id: str) -> tuple[np.ndarray, np.ndarray, dict]: + p = state12_path(user_id) + if p.exists(): + raw = json.loads(p.read_text()) + A = np.array(raw["A"], dtype=np.float64) + b = np.array(raw["b"], dtype=np.float64) + return A, b, raw.get("meta", {}) + return np.identity(D12, dtype=np.float64), np.zeros(D12, dtype=np.float64), {} + + +def save_state12(user_id: str, A: np.ndarray, b: np.ndarray, meta: dict) -> None: + p = state12_path(user_id) + p.write_text(json.dumps({"A": A.tolist(), "b": b.tolist(), "meta": meta})) + + # ── API models ───────────────────────────────────────────────────────────── class CandidateFeatures(BaseModel): @@ -171,6 +269,10 @@ class RewardRequest(BaseModel): reward: float # +1 done, +0.5 helpful, 0 snooze, -0.5 not_helpful, -1 dismiss features: CandidateFeatures day_of_week: int = 0 # included so egreedy can train dow features correctly + # Profile features at the time the tip was served. Ignored by /reward and + # /reward/egreedy; consumed by /reward/egreedy/v2 so the ridge update uses + # the same feature vector as the matching /score/egreedy/v2 call. + profile_features: Optional[dict] = None class RewardResponse(BaseModel): @@ -472,6 +574,128 @@ def reward_egreedy(req: RewardRequest) -> RewardResponse: return RewardResponse(ok=True) +@app.post("/score/egreedy/v2", response_model=ScoreResponse) +def score_egreedy_v2(req: ScoreRequest) -> ScoreResponse: + """ε-greedy v2 — d=12, adds 5 normalized profile features (ADR-0012). + + Shadow-only until offline sim + rollout per ADR-0002 completes. + Accepts the same ScoreRequest shape as v1; `profile_features` drives the + extra 5 dims (defaults: zeros for rates/volume/dwell, 0.5 neutral for + preferred_hour alignment). + """ + if not req.candidates: + raise HTTPException(status_code=422, detail="No candidates") + + A, b, meta = load_state12(req.user_id) + try: + A_inv = np.linalg.inv(A) + except np.linalg.LinAlgError: + A_inv = np.identity(D12, dtype=np.float64) + theta = A_inv @ b + + dow = req.context.day_of_week + exploring = np.random.random() < EPSILON + + if exploring: + chosen = req.candidates[np.random.randint(len(req.candidates))] + feat_dict = { + "hour_of_day": req.context.hour_of_day, + "is_overdue": chosen.features.is_overdue, + "task_age_days": chosen.features.task_age_days, + "priority": chosen.features.priority, + } + x = build_feature_vector_12(feat_dict, dow, req.profile_features) + best_score = float(theta @ x) + best_id = chosen.id + else: + best_id = None + best_score = -float("inf") + feat_dict = {} + for candidate in req.candidates: + fd = { + "hour_of_day": req.context.hour_of_day, + "is_overdue": candidate.features.is_overdue, + "task_age_days": candidate.features.task_age_days, + "priority": candidate.features.priority, + } + x = build_feature_vector_12(fd, dow, req.profile_features) + s = float(theta @ x) + if s > best_score: + best_score = s + best_id = candidate.id + feat_dict = fd + + history = get_feature_history(req.user_id) + history.append({ + "ts": time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()), + "features": {**feat_dict, "day_of_week": dow, "exploring": exploring}, + "score": best_score, + "tip_id": best_id, + "policy": "egreedy-v2", + }) + + meta["pulls"] = meta.get("pulls", 0) + 1 + meta["explore_count"] = meta.get("explore_count", 0) + int(exploring) + meta["last_updated"] = time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()) + save_state12(req.user_id, A, b, meta) + + return ScoreResponse(tip_id=best_id, score=best_score, policy="egreedy-v2") + + +@app.post("/reward/egreedy/v2", response_model=RewardResponse) +def reward_egreedy_v2(req: RewardRequest) -> RewardResponse: + """Update ε-greedy v2 ridge estimator using the d=12 feature vector.""" + A, b, meta = load_state12(req.user_id) + feat_dict = { + "hour_of_day": req.features.hour_of_day, + "is_overdue": req.features.is_overdue, + "task_age_days": req.features.task_age_days, + "priority": req.features.priority, + } + x = build_feature_vector_12(feat_dict, req.day_of_week, req.profile_features) + A += np.outer(x, x) + b += req.reward * x + + meta["cumulative_reward"] = meta.get("cumulative_reward", 0.0) + req.reward + meta["reward_count"] = meta.get("reward_count", 0) + 1 + meta["last_updated"] = time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()) + save_state12(req.user_id, A, b, meta) + return RewardResponse(ok=True) + + +@app.get("/stats/egreedy/v2/{user_id}") +def stats_egreedy_v2(user_id: str): + """ε-greedy v2 policy stats — pulls, cumulative reward, θ vector.""" + A, b, meta = load_state12(user_id) + try: + theta = (np.linalg.inv(A) @ b).tolist() + except np.linalg.LinAlgError: + theta = [0.0] * D12 + + pulls = meta.get("pulls", 0) + cumulative_reward = meta.get("cumulative_reward", 0.0) + reward_count = meta.get("reward_count", 0) + explore_count = meta.get("explore_count", 0) + + return { + "user_id": user_id, + "policy": "egreedy-v2", + "pulls": pulls, + "reward_count": reward_count, + "cumulative_reward": cumulative_reward, + "estimated_mean_reward": cumulative_reward / reward_count if reward_count > 0 else 0.0, + "exploration_rate": explore_count / pulls if pulls > 0 else 0.0, + "theta": theta, + "feature_labels": [ + "hour_sin", "hour_cos", "is_overdue", "task_age", "priority", + "dow_sin", "dow_cos", + "completion_rate_30d", "dismiss_rate_30d", "mean_dwell_norm", + "preferred_hour_alignment", "tip_volume_norm", + ], + "last_updated": meta.get("last_updated"), + } + + @app.get("/stats/egreedy/{user_id}") def stats_egreedy(user_id: str): """ε-greedy policy stats — pulls, cumulative reward, θ vector.""" @@ -509,6 +733,9 @@ def reset(user_id: str) -> RewardResponse: p7 = state7_path(user_id) if p7.exists(): p7.unlink() + p12 = state12_path(user_id) + if p12.exists(): + p12.unlink() if user_id in _feature_history: _feature_history[user_id].clear() return RewardResponse(ok=True) diff --git a/ml/serving/tests/test_score.py b/ml/serving/tests/test_score.py index 9111f0a..e32b1b5 100644 --- a/ml/serving/tests/test_score.py +++ b/ml/serving/tests/test_score.py @@ -6,7 +6,15 @@ import math import pytest from httpx import AsyncClient, ASGITransport -from main import app, build_feature_vector +from main import ( + app, + build_feature_vector, + build_feature_vector_12, + _norm_dwell, + _norm_preferred_hour, + _norm_rate, + _norm_volume, +) class TestFeatureVector: @@ -243,6 +251,176 @@ async def test_stats_for_fresh_user(): assert body["estimated_mean_reward"] == 0.0 +class TestV2Normalization: + def test_rate_passthrough(self): + assert _norm_rate(0.0) == 0.0 + assert _norm_rate(0.42) == 0.42 + assert _norm_rate(1.0) == 1.0 + + def test_rate_none_zero(self): + assert _norm_rate(None) == 0.0 + + def test_rate_clipped(self): + assert _norm_rate(1.5) == 1.0 + assert _norm_rate(-0.1) == 0.0 + + def test_dwell_none_zero(self): + assert _norm_dwell(None) == 0.0 + + def test_dwell_scales_to_0_1(self): + assert _norm_dwell(0) == 0.0 + # 600_000 ms (10 min) is the clip ceiling + assert _norm_dwell(600_000) == 1.0 + assert _norm_dwell(1_200_000) == 1.0 + assert _norm_dwell(60_000) == pytest.approx(0.1) + + def test_volume_monotonic_and_clipped(self): + assert _norm_volume(None) == 0.0 + assert _norm_volume(0) == 0.0 + assert _norm_volume(10) < _norm_volume(100) + # 100 tips ≈ full saturation + assert _norm_volume(100) == pytest.approx(1.0) + assert _norm_volume(10_000) == 1.0 + + def test_preferred_hour_alignment(self): + # Exact match → 1.0 + assert _norm_preferred_hour(9, 9) == pytest.approx(1.0) + # 12h opposite → 0.0 + assert _norm_preferred_hour(21, 9) == pytest.approx(0.0, abs=1e-10) + # 6h off → 0.5 (cos(π/2) = 0, scaled to 0.5) + assert _norm_preferred_hour(15, 9) == pytest.approx(0.5, abs=1e-10) + + def test_preferred_hour_null_neutral(self): + # Null preference → neutral 0.5 rather than misleading "alignment at 0" + assert _norm_preferred_hour(None, 9) == 0.5 + + +class TestFeatureVector12: + def test_shape(self): + v = build_feature_vector_12( + {"hour_of_day": 9, "is_overdue": True, "task_age_days": 2, "priority": 3}, + day_of_week=2, + profile={ + "completion_rate_30d": 0.5, + "dismiss_rate_30d": 0.1, + "mean_dwell_ms_30d": 60_000, + "preferred_hour": 9, + "tip_volume_30d": 20, + }, + ) + assert v.shape == (12,) + + def test_first_seven_match_v1(self): + """v2 must reduce to v1-style features on the first 7 dims so rollout + behaviour is predictable when profile is absent.""" + from main import build_feature_vector_7 + feat = {"hour_of_day": 14, "is_overdue": True, "task_age_days": 5, "priority": 2} + v1 = build_feature_vector_7(feat, day_of_week=3) + v2 = build_feature_vector_12(feat, day_of_week=3, profile=None) + assert (v1 == v2[:7]).all() + + def test_missing_profile_defaults(self): + v = build_feature_vector_12({"hour_of_day": 9}, day_of_week=0, profile=None) + # completion, dismiss, dwell, volume → 0; preferred_hour → 0.5 neutral + assert v[7] == 0.0 + assert v[8] == 0.0 + assert v[9] == 0.0 + assert v[10] == pytest.approx(0.5) + assert v[11] == 0.0 + + +@pytest.mark.asyncio +async def test_score_egreedy_v2_returns_candidate(): + payload = { + "user_id": "v2-user", + "candidates": [ + {"id": "t:a", "content": "A", "source": "todoist", + "features": {"is_overdue": True, "task_age_days": 2, "priority": 3}}, + {"id": "t:b", "content": "B", "source": "todoist", + "features": {"is_overdue": False, "task_age_days": 0, "priority": 1}}, + ], + "context": {"hour_of_day": 9, "day_of_week": 1}, + "profile_features": { + "completion_rate_30d": 0.4, + "dismiss_rate_30d": 0.1, + "mean_dwell_ms_30d": 45_000, + "preferred_hour": 9, + "tip_volume_30d": 8, + }, + } + async with AsyncClient(transport=ASGITransport(app=app), base_url="http://test") as client: + r = await client.post("/score/egreedy/v2", json=payload) + assert r.status_code == 200 + body = r.json() + assert body["tip_id"] in {"t:a", "t:b"} + assert body["policy"] == "egreedy-v2" + + +@pytest.mark.asyncio +async def test_score_egreedy_v2_accepts_missing_profile(): + payload = { + "user_id": "v2-no-profile", + "candidates": [ + {"id": "t:solo", "content": "Solo", "source": "todoist", + "features": {"is_overdue": False, "task_age_days": 0, "priority": 1}}, + ], + "context": {"hour_of_day": 10, "day_of_week": 0}, + } + async with AsyncClient(transport=ASGITransport(app=app), base_url="http://test") as client: + r = await client.post("/score/egreedy/v2", json=payload) + assert r.status_code == 200 + assert r.json()["tip_id"] == "t:solo" + + +@pytest.mark.asyncio +async def test_reward_egreedy_v2_updates_stats(): + user_id = "v2-reward-stats" + async with AsyncClient(transport=ASGITransport(app=app), base_url="http://test") as client: + r0 = await client.get(f"/stats/egreedy/v2/{user_id}") + before = r0.json()["cumulative_reward"] + + await client.post("/reward/egreedy/v2", json={ + "user_id": user_id, + "tip_id": "t:r", + "reward": 1.0, + "features": {"hour_of_day": 9, "is_overdue": True, "task_age_days": 2, "priority": 3}, + "day_of_week": 1, + "profile_features": { + "completion_rate_30d": 0.3, + "dismiss_rate_30d": 0.2, + "mean_dwell_ms_30d": 30_000, + "preferred_hour": 9, + "tip_volume_30d": 5, + }, + }) + r1 = await client.get(f"/stats/egreedy/v2/{user_id}") + body = r1.json() + assert body["cumulative_reward"] == pytest.approx(before + 1.0) + assert body["policy"] == "egreedy-v2" + assert len(body["theta"]) == 12 + assert len(body["feature_labels"]) == 12 + + +@pytest.mark.asyncio +async def test_reset_clears_v2_state(): + user_id = "v2-reset" + async with AsyncClient(transport=ASGITransport(app=app), base_url="http://test") as client: + await client.post("/score/egreedy/v2", json={ + "user_id": user_id, + "candidates": [ + {"id": "t:v2r", "content": "x", "source": "todoist", + "features": {"is_overdue": False, "task_age_days": 0, "priority": 1}}, + ], + "context": {"hour_of_day": 10, "day_of_week": 0}, + }) + r0 = await client.get(f"/stats/egreedy/v2/{user_id}") + assert r0.json()["pulls"] >= 1 + + await client.post(f"/reset/{user_id}") + r1 = await client.get(f"/stats/egreedy/v2/{user_id}") + assert r1.json()["pulls"] == 0 + + @pytest.mark.asyncio async def test_reward_negative_value(): """Dismissing a tip should decrease cumulative_reward.""" diff --git a/services/api/src/routes/recommender.ts b/services/api/src/routes/recommender.ts index 40dbac5..30bfc05 100644 --- a/services/api/src/routes/recommender.ts +++ b/services/api/src/routes/recommender.ts @@ -47,8 +47,8 @@ export const _clearCandidateCacheForTests = () => { // Shadow-policy registry // --------------------------------------------------------------------------- const shadowPolicies = new Map([ - // Example: enable random as a shadow baseline - // ('random-shadow', { active: true }), + // egreedy-v2 (D=12, profile features) — disabled until sim gate per ADR-0012 + ['egreedy-v2-shadow', { active: false }], ]); export function getShadowPolicies() { @@ -296,6 +296,42 @@ router.post('/recommend', requireAuth, async (req: AuthenticatedRequest, res: Re policy: `shadow:${name}`, servedAt, }); + } else if (name === 'egreedy-v2-shadow') { + // Call v2 endpoint with the same payload used for the active policy. + // No reward is delivered — offline sim is the reward measurement for shadow. + void (async () => { + try { + const body = { + user_id: req.userId!, + candidates: allCandidates.map((t) => ({ + id: t.id, + content: t.content, + source: t.source, + source_id: t.sourceId ?? null, + features: t.features, + })), + context: { hour_of_day: hour, day_of_week: dayOfWeek }, + profile_features: profile, + }; + const res = await fetch(`${config.ML_SERVING_URL}/score/egreedy/v2`, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify(body), + signal: AbortSignal.timeout(3000), + }); + if (res.ok) { + const data = (await res.json()) as { tip_id: string }; + bus.publish('signals.tip.served', { + userId: req.userId!, + tipId: data.tip_id, + policy: `shadow:${name}`, + servedAt, + }); + } + } catch { + // shadow is best-effort + } + })(); } }