refactor(focus-area): output all clusters as context; remove scoring and preferred_areas

The agent no longer picks a winner — it summarises every cluster so the orchestrator can decide what's relevant. Scoring by overdue count overlapped with the overdue-task agent. preferred_areas (project-ID based, broken label matching) removed entirely. Output format: numbered list of areas with task titles included. Snapshot: {cluster_count, clusters: [{label, task_count, tasks}]}. Version bumped to 3.0.0; inferred_params cleared. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-05-12 14:57:04 +00:00
parent 12c956b588
commit f6b89fc849
3 changed files with 69 additions and 168 deletions
--- a/ml/agents/focus_area.py
+++ b/ml/agents/focus_area.py
@@ -1,69 +1,37 @@
 from __future__ import annotations

-from collections import Counter
 from typing import ClassVar

 from .base import BaseAgent, AgentInput, AgentOutput
 from .clustering import cluster_tasks
-from .inference.history import UserHistory
-from .manifest import AgentManifest, InferredParam
-
-
-def _infer_preferred_areas(history: UserHistory) -> list[str]:
-    """Top-2 project IDs by completed task count (last 90 days worth of data)."""
-    counts: Counter[str] = Counter()
-    for tc in history.task_completions:
-        if tc.project_id:
-            counts[tc.project_id] += 1
-    return [pid for pid, _ in counts.most_common(2)]
+from .manifest import AgentManifest


 MANIFEST = AgentManifest(
    id="focus-area",
-    version="2.1.0",  # 1h TTL + task-change detection (#129)
-    description="Identifies the most congested semantic focus area in the user's task list.",
-    pref_schema={
-        "type": "object",
-        "additionalProperties": False,
-        "properties": {
-            "preferred_areas": {
-                "type": "array",
-                "items": {"type": "string"},
-                "default": [],
-                "description": "Project IDs or label names to prioritise when multiple areas tie.",
-            },
-        },
-    },
+    version="3.0.0",  # output all clusters as context; no scoring (#129)
+    description="Clusters the user's task list and summarises all areas for the orchestrator.",
+    pref_schema={"type": "object", "additionalProperties": False, "properties": {}},
    context_schema=["todoist.tasks"],
    required_consents=["data:core", "data:todoist"],
    output_contract={"type": "snippet", "format": "free_text"},
    ttl_sec=86_400,
-    inferred_params=[
-        InferredParam(
-            key="preferred_areas",
-            ttl_sec=86_400,
-            cold_start_default=[],
-            min_history=0,   # use task_completions, not feedback events; handle empty inside
-            infer=_infer_preferred_areas,
-        ),
-    ],
+    inferred_params=[],
 )


 class FocusAreaAgent(BaseAgent):
-    """Identifies the most congested semantic focus area in the user's task list."""
+    """Clusters tasks and outputs a full area summary for the orchestrator."""
    agent_id: ClassVar[str] = MANIFEST.id
    ttl_seconds: ClassVar[int] = MANIFEST.ttl_sec
-    version: ClassVar[str] = MANIFEST.version  # 2.1.0
+    version: ClassVar[str] = MANIFEST.version  # 3.0.0

    def compute(self, inp: AgentInput) -> AgentOutput:
-        preferred: list[str] = inp.agent_prefs.get("preferred_areas", [])
-
        if not inp.tasks:
            return self._make_output(
                inp,
-                "No tasks available to identify a focus area.",
-                {"cluster_count": 0, "strategy": "none"},
+                "No tasks available to identify focus areas.",
+                {"cluster_count": 0},
            )

        clusters, new_enrichments = cluster_tasks(inp.tasks, enrichment_cache=inp.enrichment_cache)
@@ -71,45 +39,27 @@ class FocusAreaAgent(BaseAgent):
        if not clusters:
            return self._make_output(
                inp,
-                "No tasks available to identify a focus area.",
-                {"cluster_count": 0, "strategy": "none"},
+                "No tasks available to identify focus areas.",
+                {"cluster_count": 0},
            )

-        strategy = "semantic" if len(clusters) > 1 or len(inp.tasks) > 1 else "fallback"
+        lines = [f"The user's tasks are grouped into {len(clusters)} area(s):"]
+        for i, cluster in enumerate(clusters, 1):
+            titles = [t.get("content", "").strip() for t in cluster.tasks if t.get("content")]
+            titles_str = "; ".join(f'"{t}"' for t in titles[:8])
+            if len(titles) > 8:
+                titles_str += f" (and {len(titles) - 8} more)"
+            lines.append(f"{i}. {cluster.label} — {cluster.task_count} task(s): {titles_str}")

-        def score(cluster) -> float:
-            base = sum(2.0 if t.get("is_overdue") else 1.0 for t in cluster.tasks)
-            boosted = any(p in cluster.label for p in preferred) if preferred else False
-            return base + (0.5 if boosted else 0.0)
-
-        top = max(clusters, key=score)
-        boosted = bool(preferred) and any(p in top.label for p in preferred)
-
-        parts = [
-            f'The user\'s most active focus area is "{top.label}" '
-            f"({top.task_count} task{'s' if top.task_count != 1 else ''}, "
-            f"{top.overdue_count} overdue). "
-            f"(Note: task titles may be in any language — always write the tip in English.)"
-        ]
-        if boosted:
-            parts.append("This area matches the user's stated focus preferences.")
-        if top.overdue_count >= 3:
-            parts.append("Consider surfacing an action from this area.")
-        if len(clusters) > 1:
-            other_total = sum(c.task_count for c in clusters if c is not top)
-            parts.append(
-                f"{len(clusters) - 1} other area{'s' if len(clusters) > 2 else ''} "
-                f"contain {other_total} task{'s' if other_total != 1 else ''}."
-            )
+        lines.append("(Task titles may be in any language — always write the tip in English.)")

        snapshot = {
-            "top_cluster_label": top.label,
-            "top_task_count": top.task_count,
-            "top_overdue_count": top.overdue_count,
            "cluster_count": len(clusters),
-            "strategy": strategy,
-            "preferred_areas": preferred,
-            # Consumed by compute_agent endpoint; stripped before storing the snapshot.
+            "clusters": [
+                {"label": c.label, "task_count": c.task_count,
+                 "tasks": [t.get("content", "") for t in c.tasks]}
+                for c in clusters
+            ],
            "_new_enrichments": new_enrichments,
        }
-        return self._make_output(inp, " ".join(parts), snapshot)
+        return self._make_output(inp, "\n".join(lines), snapshot)