""" LLM-based user reaction judge. Uses Claude Haiku when ANTHROPIC_API_KEY is set; falls back to a deterministic persona-based rule when it is not. """ from __future__ import annotations import os import random from personas import Persona ACTIONS = ["done", "snooze", "dismiss"] # Reward is NOT a fixed map anymore — it depends on action + simulated dwell time. # Use infer_reward() to compute the final reward after simulating dwell. _BASE_REWARDS: dict[str, float] = { "done": 1.0, # placeholder; real reward computed from dwell "snooze": 0.1, "dismiss": -1.0, } def infer_reward(action: str, dwell_ms: int) -> float: """Mirror of production inferReward() in recommender.ts.""" if action == "dismiss": return -1.0 if action == "snooze": return 0.1 # done — dwell-based if dwell_ms < 15_000: return -0.3 # stale / reflex done if dwell_ms < 120_000: return 1.0 # magic zone if dwell_ms < 600_000: return 0.6 # good return 0.3 # eventually done _HOUR_PERIODS = { (5, 10): "morning", (10, 14): "midday", (14, 18): "afternoon", (18, 22): "evening", } def _period(hour: int) -> str: for (lo, hi), name in _HOUR_PERIODS.items(): if lo <= hour < hi: return name return "night" # ── Deterministic judge ──────────────────────────────────────────────────── def _engagement_score(persona: Persona, tip: dict, hour: int) -> float: """0–1 score of how well this tip fits this persona right now.""" features = tip.get("features", {}) priority = features.get("priority", 1) is_overdue = features.get("is_overdue", False) p = 0.35 priority_norm = (priority - 1) / 3.0 p += (priority_norm - 0.5) * persona.prefers_high_priority * 0.4 if is_overdue: p += (persona.prefers_overdue - 0.5) * 0.3 is_morning = 5 <= hour < 10 is_evening = 18 <= hour < 22 if persona.morning_active and is_morning: p += 0.15 elif persona.evening_active and is_evening: p += 0.15 elif persona.morning_active and not is_morning and not is_evening: p -= 0.10 elif persona.evening_active and not is_evening and not is_morning: p -= 0.10 return max(0.05, min(0.90, p)) def _simulate_dwell_ms(engagement: float, rng: random.Random) -> int: """ Simulate how many milliseconds the user takes to act on a tip. High engagement → quick action (magic zone, 15s–2min). Medium engagement → slower (2–10min). Low engagement → very slow (>10min) — tip helped eventually but not 'magic'. For snooze/dismiss the dwell doesn't affect reward; return a short value. """ if engagement >= 0.70: # Strong match — magic zone: 15s–90s return rng.randint(15_000, 90_000) elif engagement >= 0.50: # Moderate match — good zone: 2–8min return rng.randint(120_000, 480_000) else: # Weak match but still done — eventually: 10–30min return rng.randint(600_000, 1_800_000) def _rule_judge(persona: Persona, tip: dict, hour: int, rng: random.Random) -> tuple[str, int]: """Return (action, dwell_ms) based on persona preferences and task features.""" engagement = _engagement_score(persona, tip, hour) r = rng.random() if r < engagement * 0.55: # done — dwell depends on engagement dwell = _simulate_dwell_ms(engagement, rng) return "done", dwell elif r < engagement: return "snooze", rng.randint(3_000, 20_000) else: return "dismiss", rng.randint(1_000, 5_000) # ── LLM judge ───────────────────────────────────────────────────────────── _anthropic_client = None def _get_client(): global _anthropic_client if _anthropic_client is None: try: import anthropic # type: ignore key = os.environ.get("ANTHROPIC_API_KEY", "") if key: _anthropic_client = anthropic.Anthropic(api_key=key) except ImportError: pass return _anthropic_client def _llm_judge( persona: Persona, tip: dict, hour: int, day_of_week: int, rng: random.Random, ) -> tuple[str, int]: client = _get_client() if client is None: return _rule_judge(persona, tip, hour, rng) features = tip.get("features", {}) priority = features.get("priority", 1) is_overdue = features.get("is_overdue", False) age_days = features.get("task_age_days", 0) priority_label = {1: "low", 2: "normal", 3: "high", 4: "urgent"}.get(priority, "normal") overdue_str = f", overdue by {age_days:.0f} day(s)" if is_overdue else "" days = ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"] day_str = days[day_of_week % 7] prompt = ( f"You are simulating how a specific user reacts to a task recommendation app.\n\n" f"User persona: {persona.name}\n" f"Persona: {persona.description}\n\n" f'Recommended task: "{tip.get("content", "Unknown task")}"\n' f"Task: priority={priority_label}{overdue_str}\n" f"Current time: {_period(hour)} ({hour}:00, {day_str})\n\n" f"How does this user react? Reply with exactly one word: done | snooze | dismiss\n\n" f"- done: acts on this tip (marks task complete)\n" f"- snooze: acknowledges but not now\n" f"- dismiss: ignores or rejects it" ) try: message = client.messages.create( model="claude-haiku-4-5-20251001", max_tokens=10, messages=[{"role": "user", "content": prompt}], ) raw = message.content[0].text.strip().lower().split()[0] action = raw if raw in ACTIONS else _rule_judge(persona, tip, hour, rng)[0] except Exception: action, _ = _rule_judge(persona, tip, hour, rng) # Simulate dwell based on engagement level engagement = _engagement_score(persona, tip, hour) dwell = _simulate_dwell_ms(engagement, rng) if action == "done" else rng.randint(2_000, 15_000) return action, dwell # ── Public API ───────────────────────────────────────────────────────────── def judge( persona: Persona, tip: dict, hour: int, day_of_week: int, rng: random.Random, use_llm: bool = True, ) -> tuple[str, int, float]: """Return (action, dwell_ms, reward). action — 'done' | 'snooze' | 'dismiss' dwell_ms — simulated milliseconds between tip appearance and user action reward — inferred from action + dwell_ms via infer_reward() """ if use_llm and os.environ.get("ANTHROPIC_API_KEY"): action, dwell_ms = _llm_judge(persona, tip, hour, day_of_week, rng) else: action, dwell_ms = _rule_judge(persona, tip, hour, rng) return action, dwell_ms, infer_reward(action, dwell_ms)