diff --git a/ml/agents/inference/__init__.py b/ml/agents/inference/__init__.py index a02cf2a..c3aa56c 100644 --- a/ml/agents/inference/__init__.py +++ b/ml/agents/inference/__init__.py @@ -4,6 +4,6 @@ Each agent's manifest declares InferredParams; this package owns the scheduling contract, history data model, and write path to user_preferences. """ from .framework import run_inference -from .history import FeedbackEvent, UserHistory +from .history import FeedbackEvent, TaskCompletion, UserHistory -__all__ = ["run_inference", "FeedbackEvent", "UserHistory"] +__all__ = ["run_inference", "FeedbackEvent", "TaskCompletion", "UserHistory"] diff --git a/ml/agents/inference/history.py b/ml/agents/inference/history.py index 69a3f87..57d71a4 100644 --- a/ml/agents/inference/history.py +++ b/ml/agents/inference/history.py @@ -23,7 +23,27 @@ class FeedbackEvent: return dt.hour +@dataclass +class TaskCompletion: + """A completed task that had a due date — used for lateness inference.""" + project_id: str | None + completed_at: str # ISO 8601 + due_at: str # ISO 8601 + + @property + def lateness_days(self) -> float: + """Days between due_at and completed_at. Negative = completed early.""" + try: + def _parse(s: str) -> datetime: + dt = datetime.fromisoformat(s.replace("Z", "+00:00")) + return dt if dt.tzinfo else dt.replace(tzinfo=timezone.utc) + return (_parse(self.completed_at) - _parse(self.due_at)).total_seconds() / 86_400 + except ValueError: + return 0.0 + + @dataclass class UserHistory: user_id: str events: list[FeedbackEvent] = field(default_factory=list) + task_completions: list[TaskCompletion] = field(default_factory=list) diff --git a/ml/agents/overdue_task.py b/ml/agents/overdue_task.py index 2fd52aa..7ea8062 100644 --- a/ml/agents/overdue_task.py +++ b/ml/agents/overdue_task.py @@ -1,5 +1,6 @@ from __future__ import annotations +import statistics from typing import ClassVar from .base import BaseAgent, AgentInput, AgentOutput @@ -7,36 +8,64 @@ from .inference.history import UserHistory from .manifest import AgentManifest, InferredParam -def _infer_lateness_tolerance(history: UserHistory) -> int: - """Estimate how many days past due a task needs to be before the user acts. +def _infer_lateness_tolerance(history: UserHistory) -> float: + """p50 lateness (days) across completed tasks that had a due date, clipped at 0. - High snooze rate → user doesn't act immediately → raise tolerance so the - agent doesn't nag them about tasks they'll handle in their own time. + Negative lateness (finished early) pulls the percentile down; we clip at 0 + so punctual users always get tolerance=0, never a negative offset. """ - total = len(history.events) - if total == 0: - return 0 - snooze_rate = sum(1 for e in history.events if e.action == "snooze") / total - if snooze_rate > 0.40: - return 2 - if snooze_rate > 0.20: - return 1 - return 0 + lateness = [c.lateness_days for c in history.task_completions] + if not lateness: + return 0.0 + return max(0.0, statistics.median(lateness)) + + +def _infer_project_realness(history: UserHistory) -> dict[str, float]: + """Per-project realness: 1 − (median project lateness / global median lateness). + + Projects whose tasks are consistently completed on time get realness ≈ 1. + Aspirational projects (chronic lateness) get realness closer to 0. + """ + completions = [c for c in history.task_completions if c.project_id] + if not completions: + return {} + + global_median = statistics.median(c.lateness_days for c in completions) + if global_median <= 0: + # Everyone finishes early — no project is less real than another. + return {pid: 1.0 for pid in {c.project_id for c in completions}} # type: ignore[misc] + + by_project: dict[str, list[float]] = {} + for c in completions: + by_project.setdefault(c.project_id, []).append(c.lateness_days) # type: ignore[index] + + result: dict[str, float] = {} + for pid, days in by_project.items(): + project_median = statistics.median(days) + realness = 1.0 - (project_median / global_median) + result[pid] = round(max(0.0, min(1.0, realness)), 3) + return result MANIFEST = AgentManifest( id="overdue-task", - version="1.1.0", # bumped: lateness_tolerance_days InferredParam added (#115) + version="1.2.0", # #115: p50-lateness tolerance + per-project realness description="Reports the user's overdue tasks by count and age.", pref_schema={ "type": "object", "additionalProperties": False, "properties": { "lateness_tolerance_days": { - "type": "integer", + "type": "number", "minimum": 0, "default": 0, - "description": "Days past due before a task is considered overdue. 0 = the moment it's late.", + "description": "Days past due before a task is flagged. p50 of historical lateness.", + }, + "project_realness": { + "type": "object", + "additionalProperties": {"type": "number", "minimum": 0, "maximum": 1}, + "default": {}, + "description": "Per-project realness score [0,1]. Low = aspirational due dates.", }, }, }, @@ -48,15 +77,40 @@ MANIFEST = AgentManifest( inferred_params=[ InferredParam( key="lateness_tolerance_days", - ttl_sec=86_400, # recompute daily — snooze pattern shifts slowly - cold_start_default=0, + ttl_sec=7 * 86_400, # recompute weekly — lateness habits shift slowly + cold_start_default=0.0, min_history=10, infer=_infer_lateness_tolerance, ), + InferredParam( + key="project_realness", + ttl_sec=7 * 86_400, + cold_start_default={}, + min_history=10, + infer=_infer_project_realness, + ), ], ) +def _realness(project_id: str | None, project_realness: dict[str, float]) -> float: + """Return realness for a project, defaulting to 1.0 (treat as real).""" + if not project_id or not project_realness: + return 1.0 + return project_realness.get(project_id, 1.0) + + +def _format_task(task: dict, project_realness: dict[str, float]) -> str: + content = task["content"] + age = round(task.get("task_age_days", 0)) + pid = task.get("project_id") + r = _realness(pid, project_realness) + unit = "day" if age == 1 else "days" + if r < 0.4: + return f'"{content}" ({age} {unit} past target date)' + return f'"{content}" ({age} {unit} overdue)' + + class OverdueTaskAgent(BaseAgent): """Reports the user's overdue tasks by count and age.""" agent_id: ClassVar[str] = MANIFEST.id @@ -64,7 +118,9 @@ class OverdueTaskAgent(BaseAgent): version: ClassVar[str] = MANIFEST.version def compute(self, inp: AgentInput) -> AgentOutput: - tolerance = max(0, int(inp.agent_prefs.get("lateness_tolerance_days", 0))) + tolerance = max(0.0, float(inp.agent_prefs.get("lateness_tolerance_days", 0))) + project_realness: dict[str, float] = inp.agent_prefs.get("project_realness", {}) + overdue = [ t for t in inp.tasks if t.get("is_overdue") and t.get("task_age_days", 0) >= tolerance @@ -75,18 +131,21 @@ class OverdueTaskAgent(BaseAgent): prompt = "The user has no overdue tasks at this time." elif len(overdue) == 1: t = top[0] - age = round(t.get("task_age_days", 0)) - prompt = ( - f'The user has 1 overdue task: "{t["content"]}" ' - f"({age} day{'s' if age != 1 else ''} overdue)." - ) + r = _realness(t.get("project_id"), project_realness) + item = _format_task(t, project_realness) + if r < 0.4: + prompt = f"The user has 1 task past its target date: {item}." + else: + prompt = f"The user has 1 overdue task: {item}." else: - items = ", ".join( - f'"{t["content"]}" ({round(t.get("task_age_days", 0))}d)' - for t in top + items = ", ".join(_format_task(t, project_realness) for t in top) + avg_realness = ( + sum(_realness(t.get("project_id"), project_realness) for t in overdue) + / len(overdue) ) + label = "tasks past their target dates" if avg_realness < 0.4 else "overdue tasks" prompt = ( - f"The user has {len(overdue)} overdue tasks. " + f"The user has {len(overdue)} {label}. " f"Top {len(top)}: {items}." ) @@ -94,7 +153,12 @@ class OverdueTaskAgent(BaseAgent): "overdue_count": len(overdue), "lateness_tolerance_days": tolerance, "top_overdue": [ - {"content": t["content"], "task_age_days": t.get("task_age_days", 0)} + { + "content": t["content"], + "task_age_days": t.get("task_age_days", 0), + "project_id": t.get("project_id"), + "realness": _realness(t.get("project_id"), project_realness), + } for t in top ], } diff --git a/ml/agents/tests/test_per_agent_inference.py b/ml/agents/tests/test_per_agent_inference.py index 1ab356c..8552d53 100644 --- a/ml/agents/tests/test_per_agent_inference.py +++ b/ml/agents/tests/test_per_agent_inference.py @@ -8,7 +8,7 @@ sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..", "..")) from datetime import datetime, timezone import pytest -from ml.agents.inference.history import FeedbackEvent, UserHistory +from ml.agents.inference.history import FeedbackEvent, TaskCompletion, UserHistory from ml.agents.inference.framework import run_inference from ml.agents.momentum import MomentumAgent, MANIFEST as MOMENTUM_MANIFEST from ml.agents.overdue_task import OverdueTaskAgent, MANIFEST as OVERDUE_MANIFEST @@ -32,8 +32,20 @@ def _event(action: str, days_ago: float = 1.0) -> FeedbackEvent: return FeedbackEvent(action=action, dwell_ms=dwell, created_at=ts) -def _history(*events: FeedbackEvent) -> UserHistory: - return UserHistory(user_id="u1", events=list(events)) +def _history(*events: FeedbackEvent, completions: list[TaskCompletion] | None = None) -> UserHistory: + return UserHistory(user_id="u1", events=list(events), task_completions=completions or []) + + +def _completion(project_id: str | None, lateness_days: float) -> TaskCompletion: + """Build a TaskCompletion where completed_at is lateness_days after due_at.""" + from datetime import timedelta + due = _NOW - timedelta(days=30) + completed = due + timedelta(days=lateness_days) + return TaskCompletion( + project_id=project_id, + completed_at=completed.isoformat(), + due_at=due.isoformat(), + ) # ── momentum: engagement_trend ─────────────────────────────────────────────── @@ -82,49 +94,94 @@ class TestMomentumInference: assert MOMENTUM_MANIFEST.version == "1.1.0" -# ── overdue-task: lateness_tolerance_days ──────────────────────────────────── +# ── overdue-task: lateness_tolerance_days + project_realness (#115) ────────── class TestOverdueTaskInference: - def test_cold_start_returns_zero(self): - history = _history(*[_event("done") for _ in range(5)]) - result = run_inference(OVERDUE_MANIFEST, history) - assert result["lateness_tolerance_days"] == 0 + # -- lateness_tolerance_days inference -- - def test_high_snooze_rate_returns_two(self): - events = [_event("snooze")] * 8 + [_event("done")] * 2 - history = _history(*events) + def test_cold_start_returns_zero_when_few_completions(self): + # Below min_history=10 task completions → cold start + cs = [_completion("p1", 2.0) for _ in range(5)] + history = _history(*[_event("done")] * 5, completions=cs) result = run_inference(OVERDUE_MANIFEST, history) - assert result["lateness_tolerance_days"] == 2 + assert result["lateness_tolerance_days"] == 0.0 - def test_moderate_snooze_returns_one(self): - events = [_event("snooze")] * 3 + [_event("done")] * 7 - history = _history(*events) + def test_punctual_user_zero_tolerance(self): + # User always finishes early or on time (negative lateness) → tolerance 0 + cs = [_completion("p1", -1.0) for _ in range(12)] + history = _history(*[_event("done")] * 12, completions=cs) result = run_inference(OVERDUE_MANIFEST, history) - assert result["lateness_tolerance_days"] == 1 + assert result["lateness_tolerance_days"] == 0.0 - def test_low_snooze_returns_zero(self): - events = [_event("done")] * 9 + [_event("snooze")] * 1 - history = _history(*events) + def test_chronic_late_user_positive_tolerance(self): + # User consistently finishes 5 days late → p50 = 5 + cs = [_completion("p1", 5.0) for _ in range(12)] + history = _history(*[_event("done")] * 12, completions=cs) result = run_inference(OVERDUE_MANIFEST, history) - assert result["lateness_tolerance_days"] == 0 + assert result["lateness_tolerance_days"] == pytest.approx(5.0) + + def test_mixed_lateness_uses_median(self): + # 6 tasks at +1d, 6 tasks at +3d → median = 2 + cs = [_completion("p1", 1.0)] * 6 + [_completion("p1", 3.0)] * 6 + history = _history(*[_event("done")] * 12, completions=cs) + result = run_inference(OVERDUE_MANIFEST, history) + assert result["lateness_tolerance_days"] == pytest.approx(2.0) + + # -- project_realness inference -- + + def test_project_realness_cold_start_empty(self): + cs = [_completion("p1", 1.0) for _ in range(5)] # below min_history + history = _history(*[_event("done")] * 5, completions=cs) + result = run_inference(OVERDUE_MANIFEST, history) + assert result["project_realness"] == {} + + def test_project_realness_punctual_project_scores_high(self): + # p1 always on time (0d late), p2 always 10d late → p1 should be realness ≈ 1 + cs = [_completion("p1", 0.0)] * 6 + [_completion("p2", 10.0)] * 6 + history = _history(*[_event("done")] * 12, completions=cs) + result = run_inference(OVERDUE_MANIFEST, history) + assert result["project_realness"]["p1"] > result["project_realness"]["p2"] + + def test_project_realness_values_clipped_01(self): + cs = [_completion("p1", 0.0)] * 6 + [_completion("p2", 100.0)] * 6 + history = _history(*[_event("done")] * 12, completions=cs) + result = run_inference(OVERDUE_MANIFEST, history) + for v in result["project_realness"].values(): + assert 0.0 <= v <= 1.0 + + # -- compute() reads inferred prefs -- def test_tolerance_filters_tasks(self): tasks = [ {"content": "Fresh overdue", "is_overdue": True, "task_age_days": 0.5}, {"content": "Old overdue", "is_overdue": True, "task_age_days": 3.0}, ] - # tolerance=2 → only the 3-day task should count out = OverdueTaskAgent().compute(_inp(tasks=tasks, agent_prefs={"lateness_tolerance_days": 2})) assert "1 overdue task" in out.prompt_text assert "Old overdue" in out.prompt_text - def test_snapshot_includes_tolerance(self): - tasks = [{"content": "T", "is_overdue": True, "task_age_days": 1.0}] - out = OverdueTaskAgent().compute(_inp(tasks=tasks, agent_prefs={"lateness_tolerance_days": 0})) - assert "lateness_tolerance_days" in out.signals_snapshot + def test_low_realness_softens_language(self): + tasks = [{"content": "Wishlist", "is_overdue": True, "task_age_days": 3.0, + "project_id": "aspirational"}] + prefs = {"lateness_tolerance_days": 0, "project_realness": {"aspirational": 0.2}} + out = OverdueTaskAgent().compute(_inp(tasks=tasks, agent_prefs=prefs)) + assert "target date" in out.prompt_text + + def test_high_realness_uses_overdue_language(self): + tasks = [{"content": "Critical", "is_overdue": True, "task_age_days": 3.0, + "project_id": "work"}] + prefs = {"lateness_tolerance_days": 0, "project_realness": {"work": 0.9}} + out = OverdueTaskAgent().compute(_inp(tasks=tasks, agent_prefs=prefs)) + assert "overdue" in out.prompt_text + + def test_snapshot_includes_realness(self): + tasks = [{"content": "T", "is_overdue": True, "task_age_days": 1.0, "project_id": "p1"}] + prefs = {"lateness_tolerance_days": 0, "project_realness": {"p1": 0.8}} + out = OverdueTaskAgent().compute(_inp(tasks=tasks, agent_prefs=prefs)) + assert "realness" in out.signals_snapshot["top_overdue"][0] def test_version_bumped(self): - assert OVERDUE_MANIFEST.version == "1.1.0" + assert OVERDUE_MANIFEST.version == "1.2.0" # ── recent-patterns: window_days ───────────────────────────────────────────── diff --git a/ml/serving/main.py b/ml/serving/main.py index fb8a40d..a852319 100644 --- a/ml/serving/main.py +++ b/ml/serving/main.py @@ -40,7 +40,7 @@ if _repo_root not in sys.path: from ml.agents.base import AgentInput # noqa: E402 from ml.agents.registry import get_agent, all_agents, all_manifests, get_manifest # noqa: E402 -from ml.agents.inference import run_inference, FeedbackEvent, UserHistory # noqa: E402 +from ml.agents.inference import run_inference, FeedbackEvent, TaskCompletion, UserHistory # noqa: E402 logging_config.configure() @@ -141,7 +141,8 @@ class AgentComputeResponse(BaseModel): class AgentInferRequest(BaseModel): user_id: str - feedback_history: list[dict] = [] # [{action, dwell_ms, created_at}, …] + feedback_history: list[dict] = [] # [{action, dwell_ms, created_at}, …] + task_completions: list[dict] = [] # [{project_id, completed_at, due_at}, …] class AgentInferResponse(BaseModel): @@ -284,7 +285,15 @@ async def infer_agent(agent_id: str, req: AgentInferRequest) -> AgentInferRespon ) for e in req.feedback_history ] - history = UserHistory(user_id=req.user_id, events=events) + completions = [ + TaskCompletion( + project_id=c.get("project_id"), + completed_at=c.get("completed_at", ""), + due_at=c.get("due_at", ""), + ) + for c in req.task_completions + ] + history = UserHistory(user_id=req.user_id, events=events, task_completions=completions) t0 = __import__("time").monotonic() inferred = run_inference(manifest, history)