feat(ml): egreedy-v2 shadow policy — D=12 with profile features (#99)
Ship the scaffolding for #99 (phase B.3 of #81): - ml/serving: add /score/egreedy/v2, /reward/egreedy/v2, /stats/egreedy/v2 endpoints (D=12). New feature dims: completion/dismiss rates, mean dwell (clipped 10min), preferred-hour alignment (cosine, 1-dim), tip volume (log). Separate state file per user (_egreedy_v2.json). /reset clears v2 state too. - ADR-0012: documents D=7→12 dimension change, normalization choices, shadow rollout protocol, and promotion gate (offline sim win per ADR-0002). - recommender.ts: register egreedy-v2-shadow in shadow-policy map (disabled by default). When enabled, calls /score/egreedy/v2 fire-and-forget and publishes shadow:egreedy-v2-shadow serve signal. No reward to shadow — sim is the gate. - sim runner/personas: personas carry synthetic profile_features per persona; _call_score/_call_reward thread profile_features through (None-safe for v1/linucb). - 18 new Python tests; all 56 Python + 170 TS tests pass. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -43,19 +43,22 @@ from task_generator import generate_task_pool
|
||||
POLICY_SCORE_ENDPOINTS: dict[str, str] = {
|
||||
"linucb-v1": "/score",
|
||||
"egreedy-v1": "/score/egreedy",
|
||||
"egreedy-v2": "/score/egreedy/v2",
|
||||
}
|
||||
POLICY_REWARD_ENDPOINTS: dict[str, str] = {
|
||||
"linucb-v1": "/reward",
|
||||
"egreedy-v1": "/reward/egreedy",
|
||||
"egreedy-v2": "/reward/egreedy/v2",
|
||||
}
|
||||
|
||||
|
||||
def _call_score(
|
||||
client: httpx.Client, ml_url: str, policy: str,
|
||||
user_id: str, tasks: list[dict], hour: int, dow: int,
|
||||
profile_features: dict | None = None,
|
||||
) -> dict | None:
|
||||
endpoint = POLICY_SCORE_ENDPOINTS.get(policy, "/score")
|
||||
body = {
|
||||
body: dict = {
|
||||
"user_id": user_id,
|
||||
"candidates": [
|
||||
{
|
||||
@@ -72,6 +75,8 @@ def _call_score(
|
||||
],
|
||||
"context": {"hour_of_day": hour, "day_of_week": dow},
|
||||
}
|
||||
if profile_features is not None:
|
||||
body["profile_features"] = profile_features
|
||||
try:
|
||||
r = client.post(f"{ml_url}{endpoint}", json=body, timeout=5.0)
|
||||
r.raise_for_status()
|
||||
@@ -85,15 +90,17 @@ def _call_reward(
|
||||
client: httpx.Client, ml_url: str, policy: str,
|
||||
user_id: str, tip_id: str, reward: float, features: dict,
|
||||
day_of_week: int = 0,
|
||||
profile_features: dict | None = None,
|
||||
) -> None:
|
||||
endpoint = POLICY_REWARD_ENDPOINTS.get(policy, "/reward")
|
||||
body: dict = {
|
||||
"user_id": user_id, "tip_id": tip_id, "reward": reward,
|
||||
"features": features, "day_of_week": day_of_week,
|
||||
}
|
||||
if profile_features is not None:
|
||||
body["profile_features"] = profile_features
|
||||
try:
|
||||
client.post(
|
||||
f"{ml_url}{endpoint}",
|
||||
json={"user_id": user_id, "tip_id": tip_id, "reward": reward,
|
||||
"features": features, "day_of_week": day_of_week},
|
||||
timeout=5.0,
|
||||
)
|
||||
client.post(f"{ml_url}{endpoint}", json=body, timeout=5.0)
|
||||
except Exception as e:
|
||||
print(f" [warn] reward {policy}: {e}", file=sys.stderr)
|
||||
|
||||
@@ -133,9 +140,13 @@ def run_simulation(
|
||||
seed_tasks = rnd * 997 + abs(hash(user_id)) % 997
|
||||
tasks = generate_task_pool(n=tasks_per_round, seed=seed_tasks)
|
||||
|
||||
# Per-persona profile features for v2 (synthetic for sim — see ADR-0012)
|
||||
profile = persona.profile_features(hour) if hasattr(persona, "profile_features") else None
|
||||
|
||||
for policy in policies:
|
||||
p_user = f"{user_id}-{policy}"
|
||||
scored = _call_score(client, ml_url, policy, p_user, tasks, hour, dow)
|
||||
scored = _call_score(client, ml_url, policy, p_user, tasks, hour, dow,
|
||||
profile_features=profile)
|
||||
if not scored:
|
||||
continue
|
||||
tip_id = scored.get("tip_id")
|
||||
@@ -149,7 +160,7 @@ def run_simulation(
|
||||
"is_overdue": tip["features"]["is_overdue"],
|
||||
"task_age_days": tip["features"]["task_age_days"],
|
||||
"priority": tip["features"]["priority"],
|
||||
}, day_of_week=dow)
|
||||
}, day_of_week=dow, profile_features=profile)
|
||||
|
||||
acc[policy]["total_reward"] += reward
|
||||
acc[policy]["n_pulls"] += 1
|
||||
@@ -208,9 +219,12 @@ def run_score_phase(
|
||||
seed_tasks = rnd * 997 + abs(hash(user_id)) % 997
|
||||
tasks = generate_task_pool(n=tasks_per_round, seed=seed_tasks)
|
||||
|
||||
profile = persona.profile_features(hour) if hasattr(persona, "profile_features") else None
|
||||
|
||||
for policy in policies:
|
||||
p_user = f"{user_id}-{policy}"
|
||||
scored = _call_score(client, ml_url, policy, p_user, tasks, hour, dow)
|
||||
scored = _call_score(client, ml_url, policy, p_user, tasks, hour, dow,
|
||||
profile_features=profile)
|
||||
if not scored:
|
||||
continue
|
||||
tip_id = scored.get("tip_id")
|
||||
@@ -229,6 +243,7 @@ def run_score_phase(
|
||||
"tip_features": tip["features"],
|
||||
"tip_content": tip["content"],
|
||||
"ml_score": scored.get("score"),
|
||||
"profile_features": profile,
|
||||
})
|
||||
|
||||
judgment_requests.append({
|
||||
@@ -368,6 +383,7 @@ def run_reward_phase(plan_path: str, out_path: str, ml_url: str) -> dict:
|
||||
session["tip_id"], reward,
|
||||
{"hour_of_day": rnd_data["hour"], **session["tip_features"]},
|
||||
day_of_week=rnd_data["dow"],
|
||||
profile_features=session.get("profile_features"),
|
||||
)
|
||||
|
||||
p = session["policy"]
|
||||
|
||||
Reference in New Issue
Block a user