From f66f337779d74d79880dfbeca65ba282e6b56cae Mon Sep 17 00:00:00 2001 From: alvis Date: Tue, 12 May 2026 14:58:31 +0000 Subject: [PATCH] feat(focus-area): use enriched descriptions in cluster output cluster_tasks now attaches enriched_description to each task dict. focus-area reads enriched_description (falling back to raw content) when building the area summary, so the orchestrator sees the expanded 3-sentence descriptions instead of terse raw titles. Co-Authored-By: Claude Sonnet 4.6 --- ml/agents/clustering.py | 5 +++++ ml/agents/focus_area.py | 15 ++++++++++----- 2 files changed, 15 insertions(+), 5 deletions(-) diff --git a/ml/agents/clustering.py b/ml/agents/clustering.py index 4eb189b..1d3ec93 100644 --- a/ml/agents/clustering.py +++ b/ml/agents/clustering.py @@ -268,6 +268,11 @@ def cluster_tasks( # Step 1: LLM-enrich titles → richer semantic signal before embedding. descriptions, new_enrichments = _enrich_batch(raw_titles, persistent_cache=enrichment_cache) + # Attach enriched description to each task dict so consumers (e.g. focus-area) + # can show the expanded text instead of the terse raw title. + for task, desc in zip(task_objs, descriptions): + task["enriched_description"] = desc + # Step 2: Prefix with nomic-embed-text task prefix, then batch-embed. prefixed = [f"clustering: {d}" for d in descriptions] vecs = _embed_batch(prefixed) diff --git a/ml/agents/focus_area.py b/ml/agents/focus_area.py index bc887f3..9eca856 100644 --- a/ml/agents/focus_area.py +++ b/ml/agents/focus_area.py @@ -45,11 +45,16 @@ class FocusAreaAgent(BaseAgent): lines = [f"The user's tasks are grouped into {len(clusters)} area(s):"] for i, cluster in enumerate(clusters, 1): - titles = [t.get("content", "").strip() for t in cluster.tasks if t.get("content")] - titles_str = "; ".join(f'"{t}"' for t in titles[:8]) - if len(titles) > 8: - titles_str += f" (and {len(titles) - 8} more)" - lines.append(f"{i}. {cluster.label} — {cluster.task_count} task(s): {titles_str}") + descs = [ + t.get("enriched_description") or t.get("content", "") + for t in cluster.tasks + if t.get("content") + ] + descs = [d.strip() for d in descs if d.strip()] + descs_str = "; ".join(f'"{d}"' for d in descs[:8]) + if len(descs) > 8: + descs_str += f" (and {len(descs) - 8} more)" + lines.append(f"{i}. {cluster.label} — {cluster.task_count} task(s): {descs_str}") lines.append("(Task titles may be in any language — always write the tip in English.)")