"""Fixed contexts for the tip-generation benchmark. Every cell of the (model × prompt) grid is evaluated on the *same* set of scenarios so quality differences are attributable to the model/prompt, not to context variance. A scenario is one (persona, hour-of-day, candidate-task-pool) tuple. The hour and the task pool are seeded deterministically from the persona's name so the bench is reproducible across machines. """ from __future__ import annotations import sys from dataclasses import dataclass from pathlib import Path # Reuse personas from sim — same source of truth for user archetypes. sys.path.insert(0, str(Path(__file__).resolve().parents[1] / "sim")) from personas import PERSONAS, Persona # type: ignore from task_generator import generate_task_pool # type: ignore @dataclass(frozen=True) class Scenario: id: str # stable id used as MLflow tag — keep ASCII safe persona: Persona hour_of_day: int # 0–23 day_of_week: int # 0=Mon tasks: list[dict] def to_prompt_context(self) -> dict: """Shape expected by ml/serving/prompts.PromptContext.""" return { "tasks": [ { "content": t["content"], "priority": t["features"]["priority"], "is_overdue": t["features"]["is_overdue"], "due_date": t.get("due_date", "no due date"), } for t in self.tasks ], "hour_of_day": self.hour_of_day, "day_of_week": self.day_of_week, "extra": { "persona": self.persona.name, "persona_hint": self.persona.description, }, } # Two time-slots probe whether the model adapts its tone to the hour. # Morning (09) and evening (21) are picked because most personas have # strong directional preferences there. _TIME_SLOTS = [(9, 1), (21, 3)] # (hour_of_day, day_of_week) def build_scenarios(tasks_per_scenario: int = 6) -> list[Scenario]: """Return a deterministic list of scenarios. With 4 personas × 2 time-slots = 8 scenarios. Task pools are seeded by ``hash(persona.name) + hour`` so runs are reproducible and each persona sees the same tasks at the same hour across cells. """ out: list[Scenario] = [] for persona in PERSONAS[:4]: for hour, dow in _TIME_SLOTS: seed = (abs(hash(persona.name)) % 9973) + hour tasks = generate_task_pool(n=tasks_per_scenario, seed=seed) out.append( Scenario( id=f"{persona.name}-h{hour:02d}", persona=persona, hour_of_day=hour, day_of_week=dow, tasks=tasks, ) ) return out