feat(ml): egreedy-v2 shadow policy — D=12 with profile features (#99)

Ship the scaffolding for #99 (phase B.3 of #81):

- ml/serving: add /score/egreedy/v2, /reward/egreedy/v2, /stats/egreedy/v2
  endpoints (D=12). New feature dims: completion/dismiss rates, mean dwell
  (clipped 10min), preferred-hour alignment (cosine, 1-dim), tip volume (log).
  Separate state file per user (_egreedy_v2.json). /reset clears v2 state too.
- ADR-0012: documents D=7→12 dimension change, normalization choices, shadow
  rollout protocol, and promotion gate (offline sim win per ADR-0002).
- recommender.ts: register egreedy-v2-shadow in shadow-policy map (disabled by
  default). When enabled, calls /score/egreedy/v2 fire-and-forget and publishes
  shadow:egreedy-v2-shadow serve signal. No reward to shadow — sim is the gate.
- sim runner/personas: personas carry synthetic profile_features per persona;
  _call_score/_call_reward thread profile_features through (None-safe for v1/linucb).
- 18 new Python tests; all 56 Python + 170 TS tests pass.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-04-25 10:00:38 +00:00
parent b8113d4bda
commit 2d7cf217a9
6 changed files with 629 additions and 20 deletions

View File

@@ -1,5 +1,6 @@
"""Synthetic user personas for simulation."""
import math
from dataclasses import dataclass
@@ -13,6 +14,24 @@ class Persona:
morning_active: bool # higher engagement hours 610
evening_active: bool # higher engagement hours 1822
recency_bias: float # 01: prefers recently-due tasks
# Synthetic profile features for egreedy-v2 sim (ADR-0012).
# Values represent what a typical user of this persona would have
# accumulated after a few weeks of app use.
_completion_rate: float = 0.3
_dismiss_rate: float = 0.2
_mean_dwell_ms: float = 60_000.0 # ms
_preferred_hour: float = 12.0 # 023
_tip_volume_30d: float = 15.0
def profile_features(self, now_hour: int | None = None) -> dict:
"""Return profile_features dict compatible with the ml/serving API."""
return {
"completion_rate_30d": self._completion_rate,
"dismiss_rate_30d": self._dismiss_rate,
"mean_dwell_ms_30d": self._mean_dwell_ms,
"preferred_hour": self._preferred_hour,
"tip_volume_30d": self._tip_volume_30d,
}
PERSONAS: list[Persona] = [
@@ -27,6 +46,11 @@ PERSONAS: list[Persona] = [
morning_active=True,
evening_active=False,
recency_bias=0.3,
_completion_rate=0.55,
_dismiss_rate=0.10,
_mean_dwell_ms=45_000.0,
_preferred_hour=8.0,
_tip_volume_30d=22.0,
),
Persona(
name="evening-relaxed",
@@ -39,6 +63,11 @@ PERSONAS: list[Persona] = [
morning_active=False,
evening_active=True,
recency_bias=0.5,
_completion_rate=0.30,
_dismiss_rate=0.25,
_mean_dwell_ms=90_000.0,
_preferred_hour=20.0,
_tip_volume_30d=12.0,
),
Persona(
name="low-priority-first",
@@ -51,6 +80,11 @@ PERSONAS: list[Persona] = [
morning_active=True,
evening_active=False,
recency_bias=0.7,
_completion_rate=0.40,
_dismiss_rate=0.15,
_mean_dwell_ms=30_000.0,
_preferred_hour=9.0,
_tip_volume_30d=18.0,
),
Persona(
name="consistent-responder",
@@ -63,6 +97,11 @@ PERSONAS: list[Persona] = [
morning_active=True,
evening_active=True,
recency_bias=0.5,
_completion_rate=0.50,
_dismiss_rate=0.10,
_mean_dwell_ms=60_000.0,
_preferred_hour=12.0,
_tip_volume_30d=30.0,
),
Persona(
name="overdue-ignorer",
@@ -75,5 +114,10 @@ PERSONAS: list[Persona] = [
morning_active=False,
evening_active=True,
recency_bias=0.2,
_completion_rate=0.20,
_dismiss_rate=0.40,
_mean_dwell_ms=120_000.0,
_preferred_hour=19.0,
_tip_volume_30d=10.0,
),
]

View File

@@ -43,19 +43,22 @@ from task_generator import generate_task_pool
POLICY_SCORE_ENDPOINTS: dict[str, str] = {
"linucb-v1": "/score",
"egreedy-v1": "/score/egreedy",
"egreedy-v2": "/score/egreedy/v2",
}
POLICY_REWARD_ENDPOINTS: dict[str, str] = {
"linucb-v1": "/reward",
"egreedy-v1": "/reward/egreedy",
"egreedy-v2": "/reward/egreedy/v2",
}
def _call_score(
client: httpx.Client, ml_url: str, policy: str,
user_id: str, tasks: list[dict], hour: int, dow: int,
profile_features: dict | None = None,
) -> dict | None:
endpoint = POLICY_SCORE_ENDPOINTS.get(policy, "/score")
body = {
body: dict = {
"user_id": user_id,
"candidates": [
{
@@ -72,6 +75,8 @@ def _call_score(
],
"context": {"hour_of_day": hour, "day_of_week": dow},
}
if profile_features is not None:
body["profile_features"] = profile_features
try:
r = client.post(f"{ml_url}{endpoint}", json=body, timeout=5.0)
r.raise_for_status()
@@ -85,15 +90,17 @@ def _call_reward(
client: httpx.Client, ml_url: str, policy: str,
user_id: str, tip_id: str, reward: float, features: dict,
day_of_week: int = 0,
profile_features: dict | None = None,
) -> None:
endpoint = POLICY_REWARD_ENDPOINTS.get(policy, "/reward")
body: dict = {
"user_id": user_id, "tip_id": tip_id, "reward": reward,
"features": features, "day_of_week": day_of_week,
}
if profile_features is not None:
body["profile_features"] = profile_features
try:
client.post(
f"{ml_url}{endpoint}",
json={"user_id": user_id, "tip_id": tip_id, "reward": reward,
"features": features, "day_of_week": day_of_week},
timeout=5.0,
)
client.post(f"{ml_url}{endpoint}", json=body, timeout=5.0)
except Exception as e:
print(f" [warn] reward {policy}: {e}", file=sys.stderr)
@@ -133,9 +140,13 @@ def run_simulation(
seed_tasks = rnd * 997 + abs(hash(user_id)) % 997
tasks = generate_task_pool(n=tasks_per_round, seed=seed_tasks)
# Per-persona profile features for v2 (synthetic for sim — see ADR-0012)
profile = persona.profile_features(hour) if hasattr(persona, "profile_features") else None
for policy in policies:
p_user = f"{user_id}-{policy}"
scored = _call_score(client, ml_url, policy, p_user, tasks, hour, dow)
scored = _call_score(client, ml_url, policy, p_user, tasks, hour, dow,
profile_features=profile)
if not scored:
continue
tip_id = scored.get("tip_id")
@@ -149,7 +160,7 @@ def run_simulation(
"is_overdue": tip["features"]["is_overdue"],
"task_age_days": tip["features"]["task_age_days"],
"priority": tip["features"]["priority"],
}, day_of_week=dow)
}, day_of_week=dow, profile_features=profile)
acc[policy]["total_reward"] += reward
acc[policy]["n_pulls"] += 1
@@ -208,9 +219,12 @@ def run_score_phase(
seed_tasks = rnd * 997 + abs(hash(user_id)) % 997
tasks = generate_task_pool(n=tasks_per_round, seed=seed_tasks)
profile = persona.profile_features(hour) if hasattr(persona, "profile_features") else None
for policy in policies:
p_user = f"{user_id}-{policy}"
scored = _call_score(client, ml_url, policy, p_user, tasks, hour, dow)
scored = _call_score(client, ml_url, policy, p_user, tasks, hour, dow,
profile_features=profile)
if not scored:
continue
tip_id = scored.get("tip_id")
@@ -229,6 +243,7 @@ def run_score_phase(
"tip_features": tip["features"],
"tip_content": tip["content"],
"ml_score": scored.get("score"),
"profile_features": profile,
})
judgment_requests.append({
@@ -368,6 +383,7 @@ def run_reward_phase(plan_path: str, out_path: str, ml_url: str) -> dict:
session["tip_id"], reward,
{"hour_of_day": rnd_data["hour"], **session["tip_features"]},
day_of_week=rnd_data["dow"],
profile_features=session.get("profile_features"),
)
p = session["policy"]