feat(agents): semantic task clustering + focus-area inferred preferred_areas (#97, #113)

- New ml/agents/clustering.py: embed task content via nomic-embed-text (Ollama), greedy cosine clustering (threshold 0.72, max 6 clusters), graceful fallback to project-id grouping when Ollama is unreachable - focus_area v2.0.0: compute() uses semantic clusters as focus areas; adds preferred_areas InferredParam inferred from top-2 projects by task_completion count - 135 tests, all passing Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-05-06 06:54:46 +00:00
parent 336644a90a
commit 26fc67776f
5 changed files with 404 additions and 41 deletions
--- a/ml/agents/tests/test_per_agent_inference.py
+++ b/ml/agents/tests/test_per_agent_inference.py
@@ -662,4 +662,51 @@ class TestFocusAreaPreferredAreas:

    def test_version_bumped(self):
        from ml.agents.focus_area import MANIFEST as FA_MANIFEST
-        assert FA_MANIFEST.version == "1.1.0"
+        assert FA_MANIFEST.version == "2.0.0"
+
+    def test_snapshot_uses_cluster_keys(self):
+        tasks = [self._task("T", "work")]
+        out = self.agent.compute(_inp(tasks=tasks))
+        assert "top_cluster_label" in out.signals_snapshot
+        assert "cluster_count" in out.signals_snapshot
+        assert "strategy" in out.signals_snapshot
+
+
+# ── focus-area: preferred_areas inference from task_completions (#113) ────────
+
+class TestFocusAreaPreferredAreasInference:
+    from ml.agents.focus_area import MANIFEST as _FA_MANIFEST
+
+    def _completion(self, project_id: str) -> TaskCompletion:
+        return _completion(project_id, lateness_days=0.0)
+
+    def test_cold_start_no_completions(self):
+        history = _history(completions=[])
+        from ml.agents.focus_area import MANIFEST as FA_MANIFEST
+        result = run_inference(FA_MANIFEST, history)
+        assert result["preferred_areas"] == []
+
+    def test_top_two_projects_returned(self):
+        completions = (
+            [_completion("p1", 0)] * 8
+            + [_completion("p2", 0)] * 5
+            + [_completion("p3", 0)] * 2
+        )
+        history = _history(completions=completions)
+        from ml.agents.focus_area import MANIFEST as FA_MANIFEST
+        result = run_inference(FA_MANIFEST, history)
+        assert result["preferred_areas"] == ["p1", "p2"]
+
+    def test_single_project_returns_one(self):
+        completions = [_completion("work", 0)] * 6
+        history = _history(completions=completions)
+        from ml.agents.focus_area import MANIFEST as FA_MANIFEST
+        result = run_inference(FA_MANIFEST, history)
+        assert result["preferred_areas"] == ["work"]
+
+    def test_none_project_id_ignored(self):
+        completions = [_completion(None, 0)] * 5 + [_completion("real", 0)] * 3
+        history = _history(completions=completions)
+        from ml.agents.focus_area import MANIFEST as FA_MANIFEST
+        result = run_inference(FA_MANIFEST, history)
+        assert result["preferred_areas"] == ["real"]