oO/ml/experiments/sim/task_generator.py

"""Generate synthetic task pools for simulation."""

from __future__ import annotations

import random

_TEMPLATES = [
    "Send weekly report to team",
    "Review pull request #{n}",
    "Schedule meeting with {name}",
    "Update project documentation",
    "Fix bug in authentication module",
    "Prepare presentation for stakeholders",
    "Call back {name}",
    "Submit expense report",
    "Review quarterly goals",
    "Clean up inbox",
    "Follow up on proposal to {name}",
    "Complete onboarding checklist",
    "Write tests for feature #{n}",
    "Deploy hotfix to production",
    "Respond to support ticket #{n}",
    "Draft release notes",
    "Update dependencies",
    "Review design mockups",
    "Archive old tickets",
    "Check in with {name}",
]

_NAMES = ["Alice", "Bob", "Carol", "David", "Eve", "Frank", "Grace"]


def generate_task_pool(n: int = 10, seed: int | None = None) -> list[dict]:
    """Return n synthetic tasks with randomly sampled features."""
    rng = random.Random(seed)

    tasks = []
    for i in range(n):
        priority = rng.choices([1, 2, 3, 4], weights=[0.3, 0.3, 0.25, 0.15])[0]
        # age_days: most tasks fresh, a few stale
        age_days = rng.choices(
            [0.0, 0.5, 1.0, 3.0, 7.0, 14.0],
            weights=[0.35, 0.20, 0.20, 0.12, 0.08, 0.05],
        )[0] + rng.random() * 0.5
        # is_overdue only meaningful when age > 0
        is_overdue = age_days > 0.5 and rng.random() < 0.65

        template = rng.choice(_TEMPLATES)
        content = template.format(n=rng.randint(100, 999), name=rng.choice(_NAMES))

        tasks.append({
            "id": f"sim:{i}",
            "content": content,
            "source": "sim",
            "features": {
                "is_overdue": is_overdue,
                "task_age_days": age_days if is_overdue else 0.0,
                "priority": priority,
            },
        })

    return tasks