"""Per-agent inference tests: momentum (#114), overdue-task (#115), recent-patterns (#116), and focus-area (#113) preferred_areas wiring.""" from __future__ import annotations import sys, os sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..", "..")) from datetime import datetime, timezone import pytest from ml.agents.inference.history import FeedbackEvent, TaskCompletion, UserHistory from ml.agents.inference.framework import run_inference from ml.agents.momentum import MomentumAgent, MANIFEST as MOMENTUM_MANIFEST from ml.agents.overdue_task import OverdueTaskAgent, MANIFEST as OVERDUE_MANIFEST from ml.agents.recent_patterns import RecentPatternsAgent, MANIFEST as RECENT_MANIFEST from ml.agents.focus_area import FocusAreaAgent from ml.agents.base import AgentInput _NOW = datetime(2026, 5, 8, 14, 0, 0, tzinfo=timezone.utc) def _inp(**kwargs) -> AgentInput: defaults = dict(user_id="u1", tasks=[], profile={}, now=_NOW, agent_prefs={}) defaults.update(kwargs) return AgentInput(**defaults) def _event(action: str, days_ago: float = 1.0) -> FeedbackEvent: from datetime import timedelta ts = (_NOW - timedelta(days=days_ago)).isoformat() dwell = 60_000 if action == "done" else 500 return FeedbackEvent(action=action, dwell_ms=dwell, created_at=ts) def _history(*events: FeedbackEvent, completions: list[TaskCompletion] | None = None) -> UserHistory: return UserHistory(user_id="u1", events=list(events), task_completions=completions or []) def _completion(project_id: str | None, lateness_days: float) -> TaskCompletion: """Build a TaskCompletion where completed_at is lateness_days after due_at.""" from datetime import timedelta due = _NOW - timedelta(days=30) completed = due + timedelta(days=lateness_days) return TaskCompletion( project_id=project_id, completed_at=completed.isoformat(), due_at=due.isoformat(), ) # ── momentum: engagement_trend ─────────────────────────────────────────────── class TestMomentumInference: def test_cold_start_below_min_history(self): history = _history(*[_event("done", days_ago=i) for i in range(5)]) result = run_inference(MOMENTUM_MANIFEST, history) assert result["engagement_trend"] == "stable" # cold_start_default def test_trend_up_when_recent_done_rate_higher(self): # 8 done in last 7 days, 1 done in prior 7 days → trending up recent = [_event("done", days_ago=i) for i in range(1, 9)] older = [_event("dismiss", days_ago=i) for i in range(8, 15)] older[0] = _event("done", days_ago=8) # one done in older window history = _history(*recent, *older) result = run_inference(MOMENTUM_MANIFEST, history) assert result["engagement_trend"] == "up" def test_trend_down_when_recent_done_rate_lower(self): recent = [_event("dismiss", days_ago=i) for i in range(1, 8)] older = [_event("done", days_ago=i) for i in range(8, 15)] history = _history(*recent, *older) result = run_inference(MOMENTUM_MANIFEST, history) assert result["engagement_trend"] == "down" def test_trend_stable_when_similar(self): events = [_event("done" if i % 2 == 0 else "dismiss", days_ago=i) for i in range(1, 15)] history = _history(*events) result = run_inference(MOMENTUM_MANIFEST, history) assert result["engagement_trend"] == "stable" def test_agent_uses_trend_in_snippet(self): out = MomentumAgent().compute(_inp(agent_prefs={"engagement_trend": "up"})) assert "trending up" in out.prompt_text def test_agent_uses_down_trend_in_snippet(self): out = MomentumAgent().compute(_inp(agent_prefs={"engagement_trend": "down"})) assert "trending down" in out.prompt_text def test_snapshot_includes_trend(self): out = MomentumAgent().compute(_inp(agent_prefs={"engagement_trend": "stable"})) assert "engagement_trend" in out.signals_snapshot def test_version_bumped(self): assert MOMENTUM_MANIFEST.version == "1.1.0" # ── overdue-task: lateness_tolerance_days + project_realness (#115) ────────── class TestOverdueTaskInference: # -- lateness_tolerance_days inference -- def test_cold_start_returns_zero_when_few_completions(self): # Below min_history=10 task completions → cold start cs = [_completion("p1", 2.0) for _ in range(5)] history = _history(*[_event("done")] * 5, completions=cs) result = run_inference(OVERDUE_MANIFEST, history) assert result["lateness_tolerance_days"] == 0.0 def test_punctual_user_zero_tolerance(self): # User always finishes early or on time (negative lateness) → tolerance 0 cs = [_completion("p1", -1.0) for _ in range(12)] history = _history(*[_event("done")] * 12, completions=cs) result = run_inference(OVERDUE_MANIFEST, history) assert result["lateness_tolerance_days"] == 0.0 def test_chronic_late_user_positive_tolerance(self): # User consistently finishes 5 days late → p50 = 5 cs = [_completion("p1", 5.0) for _ in range(12)] history = _history(*[_event("done")] * 12, completions=cs) result = run_inference(OVERDUE_MANIFEST, history) assert result["lateness_tolerance_days"] == pytest.approx(5.0) def test_mixed_lateness_uses_median(self): # 6 tasks at +1d, 6 tasks at +3d → median = 2 cs = [_completion("p1", 1.0)] * 6 + [_completion("p1", 3.0)] * 6 history = _history(*[_event("done")] * 12, completions=cs) result = run_inference(OVERDUE_MANIFEST, history) assert result["lateness_tolerance_days"] == pytest.approx(2.0) # -- project_realness inference -- def test_project_realness_cold_start_empty(self): cs = [_completion("p1", 1.0) for _ in range(5)] # below min_history history = _history(*[_event("done")] * 5, completions=cs) result = run_inference(OVERDUE_MANIFEST, history) assert result["project_realness"] == {} def test_project_realness_punctual_project_scores_high(self): # p1 always on time (0d late), p2 always 10d late → p1 should be realness ≈ 1 cs = [_completion("p1", 0.0)] * 6 + [_completion("p2", 10.0)] * 6 history = _history(*[_event("done")] * 12, completions=cs) result = run_inference(OVERDUE_MANIFEST, history) assert result["project_realness"]["p1"] > result["project_realness"]["p2"] def test_project_realness_values_clipped_01(self): cs = [_completion("p1", 0.0)] * 6 + [_completion("p2", 100.0)] * 6 history = _history(*[_event("done")] * 12, completions=cs) result = run_inference(OVERDUE_MANIFEST, history) for v in result["project_realness"].values(): assert 0.0 <= v <= 1.0 # -- compute() reads inferred prefs -- def test_tolerance_filters_tasks(self): tasks = [ {"content": "Fresh overdue", "is_overdue": True, "task_age_days": 0.5}, {"content": "Old overdue", "is_overdue": True, "task_age_days": 3.0}, ] out = OverdueTaskAgent().compute(_inp(tasks=tasks, agent_prefs={"lateness_tolerance_days": 2})) assert "1 overdue task" in out.prompt_text assert "Old overdue" in out.prompt_text def test_low_realness_softens_language(self): tasks = [{"content": "Wishlist", "is_overdue": True, "task_age_days": 3.0, "project_id": "aspirational"}] prefs = {"lateness_tolerance_days": 0, "project_realness": {"aspirational": 0.2}} out = OverdueTaskAgent().compute(_inp(tasks=tasks, agent_prefs=prefs)) assert "target date" in out.prompt_text def test_high_realness_uses_overdue_language(self): tasks = [{"content": "Critical", "is_overdue": True, "task_age_days": 3.0, "project_id": "work"}] prefs = {"lateness_tolerance_days": 0, "project_realness": {"work": 0.9}} out = OverdueTaskAgent().compute(_inp(tasks=tasks, agent_prefs=prefs)) assert "overdue" in out.prompt_text def test_snapshot_includes_realness(self): tasks = [{"content": "T", "is_overdue": True, "task_age_days": 1.0, "project_id": "p1"}] prefs = {"lateness_tolerance_days": 0, "project_realness": {"p1": 0.8}} out = OverdueTaskAgent().compute(_inp(tasks=tasks, agent_prefs=prefs)) assert "realness" in out.signals_snapshot["top_overdue"][0] def test_version_bumped(self): assert OVERDUE_MANIFEST.version == "1.2.0" # ── recent-patterns: window_days ───────────────────────────────────────────── class TestRecentPatternsInference: def test_cold_start_default_7(self): history = _history(*[_event("done") for _ in range(3)]) # below min_history=5 result = run_inference(RECENT_MANIFEST, history) assert result["window_days"] == 7 # cold_start_default def test_sparse_history_widens_window(self): history = _history(*[_event("done") for _ in range(5)]) # 5 events, n < 7 → 30 days result = run_inference(RECENT_MANIFEST, history) assert result["window_days"] == 30 def test_moderate_history_14_days(self): history = _history(*[_event("done") for _ in range(10)]) # 7 ≤ n < 14 → 14 days result = run_inference(RECENT_MANIFEST, history) assert result["window_days"] == 14 def test_dense_history_stays_7(self): history = _history(*[_event("done") for _ in range(20)]) # 20+ → 7 days result = run_inference(RECENT_MANIFEST, history) assert result["window_days"] == 7 def test_agent_uses_window_days_pref(self): from datetime import timedelta # 5 feedback events, all within 14 days but older than 7 days feedback = [ {"action": "done", "dwell_ms": 60000, "created_at": (_NOW - timedelta(days=10)).isoformat()} ] * 5 # With window_days=7 → 0 events seen; with window_days=14 → 5 events out_narrow = RecentPatternsAgent().compute( _inp(feedback_history=feedback, agent_prefs={"window_days": 7}) ) out_wide = RecentPatternsAgent().compute( _inp(feedback_history=feedback, agent_prefs={"window_days": 14}) ) assert "No tip reactions" in out_narrow.prompt_text assert "5 tip reactions" in out_wide.prompt_text def test_snapshot_includes_window_days(self): out = RecentPatternsAgent().compute(_inp(agent_prefs={"window_days": 14})) assert out.signals_snapshot["window_days"] == 14 def test_version_bumped(self): assert RECENT_MANIFEST.version == "1.1.0" # ── focus-area: preferred_areas wiring ─────────────────────────────────────── class TestFocusAreaPreferredAreas: agent = FocusAreaAgent() def _task(self, content: str, project_id: str, is_overdue: bool = False) -> dict: return {"id": "t1", "content": content, "is_overdue": is_overdue, "task_age_days": 2.0, "priority": 1, "project_id": project_id} def test_preferred_area_wins_tie(self): tasks = [ self._task("Work thing", "work"), self._task("Home thing", "home"), ] out = self.agent.compute(_inp(tasks=tasks, agent_prefs={"preferred_areas": ["work"]})) assert "work" in out.prompt_text assert "matches the user's stated focus preferences" in out.prompt_text def test_no_preferred_areas_uses_congestion_score(self): tasks = [ self._task("W1", "work"), self._task("H1", "home"), self._task("H2", "home"), ] out = self.agent.compute(_inp(tasks=tasks)) # home has more tasks → wins without any preference assert "home" in out.prompt_text def test_snapshot_includes_preferred_areas(self): tasks = [self._task("T", "work")] out = self.agent.compute(_inp(tasks=tasks, agent_prefs={"preferred_areas": ["work"]})) assert out.signals_snapshot["preferred_areas"] == ["work"] def test_version_bumped(self): from ml.agents.focus_area import MANIFEST as FA_MANIFEST assert FA_MANIFEST.version == "1.1.0"