oO/ml/agents/overdue_task.py

from __future__ import annotations

import statistics
from typing import ClassVar

from .base import BaseAgent, AgentInput, AgentOutput
from .inference.history import UserHistory
from .manifest import AgentManifest, InferredParam


def _infer_lateness_tolerance(history: UserHistory) -> float:
    """p50 lateness (days) across completed tasks that had a due date, clipped at 0.

    Negative lateness (finished early) pulls the percentile down; we clip at 0
    so punctual users always get tolerance=0, never a negative offset.
    """
    lateness = [c.lateness_days for c in history.task_completions]
    if not lateness:
        return 0.0
    return max(0.0, statistics.median(lateness))


def _infer_project_realness(history: UserHistory) -> dict[str, float]:
    """Per-project realness: 1 − (median project lateness / global median lateness).

    Projects whose tasks are consistently completed on time get realness ≈ 1.
    Aspirational projects (chronic lateness) get realness closer to 0.
    """
    completions = [c for c in history.task_completions if c.project_id]
    if not completions:
        return {}

    global_median = statistics.median(c.lateness_days for c in completions)
    if global_median <= 0:
        # Everyone finishes early — no project is less real than another.
        return {pid: 1.0 for pid in {c.project_id for c in completions}}  # type: ignore[misc]

    by_project: dict[str, list[float]] = {}
    for c in completions:
        by_project.setdefault(c.project_id, []).append(c.lateness_days)  # type: ignore[index]

    result: dict[str, float] = {}
    for pid, days in by_project.items():
        project_median = statistics.median(days)
        realness = 1.0 - (project_median / global_median)
        result[pid] = round(max(0.0, min(1.0, realness)), 3)
    return result


MANIFEST = AgentManifest(
    id="overdue-task",
    version="1.2.0",  # #115: p50-lateness tolerance + per-project realness
    description="Reports the user's overdue tasks by count and age.",
    pref_schema={
        "type": "object",
        "additionalProperties": False,
        "properties": {
            "lateness_tolerance_days": {
                "type": "number",
                "minimum": 0,
                "default": 0,
                "description": "Days past due before a task is flagged. p50 of historical lateness.",
            },
            "project_realness": {
                "type": "object",
                "additionalProperties": {"type": "number", "minimum": 0, "maximum": 1},
                "default": {},
                "description": "Per-project realness score [0,1]. Low = aspirational due dates.",
            },
        },
    },
    context_schema=["todoist.tasks"],
    required_consents=["data:core", "data:todoist", "agent:overdue-task"],
    output_contract={"type": "snippet", "format": "free_text"},
    ttl_sec=3600,
    silenced_in_contexts=["vacation"],
    inferred_params=[
        InferredParam(
            key="lateness_tolerance_days",
            ttl_sec=7 * 86_400,      # recompute weekly — lateness habits shift slowly
            cold_start_default=0.0,
            min_history=10,
            infer=_infer_lateness_tolerance,
        ),
        InferredParam(
            key="project_realness",
            ttl_sec=7 * 86_400,
            cold_start_default={},
            min_history=10,
            infer=_infer_project_realness,
        ),
    ],
)


def _realness(project_id: str | None, project_realness: dict[str, float]) -> float:
    """Return realness for a project, defaulting to 1.0 (treat as real)."""
    if not project_id or not project_realness:
        return 1.0
    return project_realness.get(project_id, 1.0)


def _format_task(task: dict, project_realness: dict[str, float]) -> str:
    content = task["content"]
    age = round(task.get("task_age_days", 0))
    pid = task.get("project_id")
    r = _realness(pid, project_realness)
    unit = "day" if age == 1 else "days"
    if r < 0.4:
        return f'"{content}" ({age} {unit} past target date)'
    return f'"{content}" ({age} {unit} overdue)'


class OverdueTaskAgent(BaseAgent):
    """Reports the user's overdue tasks by count and age."""
    agent_id: ClassVar[str] = MANIFEST.id
    ttl_seconds: ClassVar[int] = MANIFEST.ttl_sec
    version: ClassVar[str] = MANIFEST.version

    def compute(self, inp: AgentInput) -> AgentOutput:
        tolerance = max(0.0, float(inp.agent_prefs.get("lateness_tolerance_days", 0)))
        project_realness: dict[str, float] = inp.agent_prefs.get("project_realness", {})

        overdue = [
            t for t in inp.tasks
            if t.get("is_overdue") and t.get("task_age_days", 0) >= tolerance
        ]
        top = sorted(overdue, key=lambda t: -t.get("task_age_days", 0))[:3]

        if not overdue:
            prompt = "The user has no overdue tasks at this time. (Always write the tip in English.)"
        elif len(overdue) == 1:
            t = top[0]
            r = _realness(t.get("project_id"), project_realness)
            item = _format_task(t, project_realness)
            if r < 0.4:
                prompt = f"The user has 1 task past its target date: {item}. (Task titles may be in any language — always write the tip in English.)"
            else:
                prompt = f"The user has 1 overdue task: {item}. (Task titles may be in any language — always write the tip in English.)"
        else:
            items = ", ".join(_format_task(t, project_realness) for t in top)
            avg_realness = (
                sum(_realness(t.get("project_id"), project_realness) for t in overdue)
                / len(overdue)
            )
            label = "tasks past their target dates" if avg_realness < 0.4 else "overdue tasks"
            prompt = (
                f"The user has {len(overdue)} {label}. "
                f"Top {len(top)}: {items}. (Task titles may be in any language — always write the tip in English.)"
            )

        snapshot = {
            "overdue_count": len(overdue),
            "lateness_tolerance_days": tolerance,
            "top_overdue": [
                {
                    "content": t["content"],
                    "task_age_days": t.get("task_age_days", 0),
                    "project_id": t.get("project_id"),
                    "realness": _realness(t.get("project_id"), project_realness),
                }
                for t in top
            ],
        }
        return self._make_output(inp, prompt, snapshot)