- New ml/agents/clustering.py: embed task content via nomic-embed-text (Ollama), greedy cosine clustering (threshold 0.72, max 6 clusters), graceful fallback to project-id grouping when Ollama is unreachable - focus_area v2.0.0: compute() uses semantic clusters as focus areas; adds preferred_areas InferredParam inferred from top-2 projects by task_completion count - 135 tests, all passing Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -1,16 +1,27 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from collections import defaultdict
|
||||
from collections import Counter
|
||||
from typing import ClassVar
|
||||
|
||||
from .base import BaseAgent, AgentInput, AgentOutput
|
||||
from .manifest import AgentManifest
|
||||
from .clustering import cluster_tasks
|
||||
from .inference.history import UserHistory
|
||||
from .manifest import AgentManifest, InferredParam
|
||||
|
||||
|
||||
def _infer_preferred_areas(history: UserHistory) -> list[str]:
|
||||
"""Top-2 project IDs by completed task count (last 90 days worth of data)."""
|
||||
counts: Counter[str] = Counter()
|
||||
for tc in history.task_completions:
|
||||
if tc.project_id:
|
||||
counts[tc.project_id] += 1
|
||||
return [pid for pid, _ in counts.most_common(2)]
|
||||
|
||||
|
||||
MANIFEST = AgentManifest(
|
||||
id="focus-area",
|
||||
version="1.1.0", # bumped: preferred_areas pref is now honoured in compute (#113)
|
||||
description="Identifies the most congested project/area in the user's task list.",
|
||||
version="2.0.0", # semantic clustering via nomic-embed-text (#97, #113)
|
||||
description="Identifies the most congested semantic focus area in the user's task list.",
|
||||
pref_schema={
|
||||
"type": "object",
|
||||
"additionalProperties": False,
|
||||
@@ -19,7 +30,7 @@ MANIFEST = AgentManifest(
|
||||
"type": "array",
|
||||
"items": {"type": "string"},
|
||||
"default": [],
|
||||
"description": "Project / label names to prioritise when multiple areas tie.",
|
||||
"description": "Project IDs or label names to prioritise when multiple areas tie.",
|
||||
},
|
||||
},
|
||||
},
|
||||
@@ -27,59 +38,75 @@ MANIFEST = AgentManifest(
|
||||
required_consents=["data:core", "data:todoist", "agent:focus-area"],
|
||||
output_contract={"type": "snippet", "format": "free_text"},
|
||||
ttl_sec=43_200,
|
||||
# No inferred_params: preferred_areas requires project-level feedback linkage
|
||||
# that isn't available in feedback_history alone. Revisit with #78 (signal
|
||||
# abstraction) once per-task reactions can be traced back to a project.
|
||||
inferred_params=[
|
||||
InferredParam(
|
||||
key="preferred_areas",
|
||||
ttl_sec=86_400,
|
||||
cold_start_default=[],
|
||||
min_history=0, # use task_completions, not feedback events; handle empty inside
|
||||
infer=_infer_preferred_areas,
|
||||
),
|
||||
],
|
||||
)
|
||||
|
||||
|
||||
class FocusAreaAgent(BaseAgent):
|
||||
"""Identifies the most congested project/area in the user's task list."""
|
||||
"""Identifies the most congested semantic focus area in the user's task list."""
|
||||
agent_id: ClassVar[str] = MANIFEST.id
|
||||
ttl_seconds: ClassVar[int] = MANIFEST.ttl_sec
|
||||
version: ClassVar[str] = MANIFEST.version
|
||||
|
||||
def compute(self, inp: AgentInput) -> AgentOutput:
|
||||
preferred: list[str] = inp.agent_prefs.get("preferred_areas", [])
|
||||
by_project: dict[str, list[dict]] = defaultdict(list)
|
||||
for task in inp.tasks:
|
||||
project = task.get("project_id") or task.get("project") or "default"
|
||||
by_project[project].append(task)
|
||||
|
||||
if not by_project:
|
||||
prompt = "No tasks available to identify a focus area."
|
||||
return self._make_output(inp, prompt, {"project_count": 0})
|
||||
if not inp.tasks:
|
||||
return self._make_output(
|
||||
inp,
|
||||
"No tasks available to identify a focus area.",
|
||||
{"cluster_count": 0, "strategy": "none"},
|
||||
)
|
||||
|
||||
def score(project: str, tasks: list[dict]) -> tuple[float, bool]:
|
||||
base = sum(2.0 if t.get("is_overdue") else 1.0 for t in tasks)
|
||||
# Boost preferred areas to break ties in their favour
|
||||
boosted = project in preferred or any(p in project for p in preferred)
|
||||
return (base + (0.5 if boosted else 0.0), boosted)
|
||||
clusters = cluster_tasks(inp.tasks)
|
||||
|
||||
top_project, top_tasks = max(
|
||||
by_project.items(),
|
||||
key=lambda kv: score(kv[0], kv[1]),
|
||||
)
|
||||
overdue_in_top = sum(1 for t in top_tasks if t.get("is_overdue"))
|
||||
label = "the default project" if top_project == "default" else f'"{top_project}"'
|
||||
n = len(top_tasks)
|
||||
boosted = top_project in preferred or any(p in top_project for p in preferred)
|
||||
if not clusters:
|
||||
return self._make_output(
|
||||
inp,
|
||||
"No tasks available to identify a focus area.",
|
||||
{"cluster_count": 0, "strategy": "none"},
|
||||
)
|
||||
|
||||
strategy = "semantic" if len(clusters) > 1 or len(inp.tasks) > 1 else "fallback"
|
||||
|
||||
def score(cluster) -> float:
|
||||
base = sum(2.0 if t.get("is_overdue") else 1.0 for t in cluster.tasks)
|
||||
boosted = any(p in cluster.label for p in preferred) if preferred else False
|
||||
return base + (0.5 if boosted else 0.0)
|
||||
|
||||
top = max(clusters, key=score)
|
||||
boosted = bool(preferred) and any(p in top.label for p in preferred)
|
||||
|
||||
parts = [
|
||||
f"The user's most congested area is {label} "
|
||||
f"({n} task{'s' if n != 1 else ''}, {overdue_in_top} overdue)."
|
||||
f'The user\'s most active focus area is "{top.label}" '
|
||||
f"({top.task_count} task{'s' if top.task_count != 1 else ''}, "
|
||||
f"{top.overdue_count} overdue)."
|
||||
]
|
||||
if boosted:
|
||||
parts.append("This area matches the user's stated focus preferences.")
|
||||
if overdue_in_top >= 3:
|
||||
if top.overdue_count >= 3:
|
||||
parts.append("Consider surfacing an action from this area.")
|
||||
if len(clusters) > 1:
|
||||
other_total = sum(c.task_count for c in clusters if c is not top)
|
||||
parts.append(
|
||||
f"{len(clusters) - 1} other area{'s' if len(clusters) > 2 else ''} "
|
||||
f"contain {other_total} task{'s' if other_total != 1 else ''}."
|
||||
)
|
||||
|
||||
prompt = " ".join(parts)
|
||||
snapshot = {
|
||||
"top_project": top_project,
|
||||
"top_task_count": n,
|
||||
"top_overdue_count": overdue_in_top,
|
||||
"project_count": len(by_project),
|
||||
"top_cluster_label": top.label,
|
||||
"top_task_count": top.task_count,
|
||||
"top_overdue_count": top.overdue_count,
|
||||
"cluster_count": len(clusters),
|
||||
"strategy": strategy,
|
||||
"preferred_areas": preferred,
|
||||
}
|
||||
return self._make_output(inp, prompt, snapshot)
|
||||
return self._make_output(inp, " ".join(parts), snapshot)
|
||||
|
||||
Reference in New Issue
Block a user