feat(agents): manifest plumbing + GET /agents/registry (ADR-0014 step 3)

Each agent now exports a module-level MANIFEST declaring id, version,
pref_schema, required_consents, ttl_sec, and silenced_in_contexts. The
registry surfaces both the agent and its manifest, and rejects on
mismatch so the two cannot drift.

ml/serving exposes GET /agents/registry; services/api proxies it as
GET /api/agents/registry with a 60s in-process cache so admin pageviews
don't hammer upstream. Failures aren't cached.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-05-05 10:55:54 +00:00
parent 5d43339616
commit 305eeae38b
13 changed files with 511 additions and 33 deletions

View File

@@ -2,13 +2,37 @@ from __future__ import annotations
from collections import defaultdict
from typing import ClassVar
from .base import BaseAgent, AgentInput, AgentOutput
from .manifest import AgentManifest
MANIFEST = AgentManifest(
id="focus-area",
version="1.0.0",
description="Identifies the most congested project/area in the user's task list.",
pref_schema={
"type": "object",
"additionalProperties": False,
"properties": {
"preferred_areas": {
"type": "array",
"items": {"type": "string"},
"default": [],
"description": "Project / label names to prioritise when multiple areas tie.",
},
},
},
context_schema=["todoist.tasks"],
required_consents=["data:core", "data:todoist", "agent:focus-area"],
output_contract={"type": "snippet", "format": "free_text"},
ttl_sec=43_200,
)
class FocusAreaAgent(BaseAgent):
"""Identifies the most congested project/area in the user's task list."""
agent_id: ClassVar[str] = "focus-area"
ttl_seconds: ClassVar[int] = 43_200 # 12h
version: ClassVar[str] = "1.0.0"
agent_id: ClassVar[str] = MANIFEST.id
ttl_seconds: ClassVar[int] = MANIFEST.ttl_sec
version: ClassVar[str] = MANIFEST.version
def compute(self, inp: AgentInput) -> AgentOutput:
by_project: dict[str, list[dict]] = defaultdict(list)

70
ml/agents/manifest.py Normal file
View File

@@ -0,0 +1,70 @@
"""Agent manifest dataclass (ADR-0014).
A manifest is the single point of registration for an agent. The orchestrator,
admin UI, registry endpoint, and inference framework all read from it. Adding
an agent is adding a manifest + agent class — never editing a list elsewhere.
The manifest lives next to the agent code (each agent module in ml/agents/
exposes a module-level `MANIFEST` constant). The registry surfaces both the
agent instance and its manifest.
"""
from __future__ import annotations
from dataclasses import dataclass, field
from typing import Any, Callable
@dataclass(frozen=True)
class InferredParam:
"""One auto-inferred preference key (#111-#116).
The inference framework owns scheduling, history reads, persistence, and
cold-start. Each agent's `inferred_params` list declares what to infer and
how, leaving each agent to implement just `infer()`.
"""
key: str # e.g. 'quietStart'
ttl_sec: int # how often to recompute
cold_start_default: Any # value used until min_history is met
min_history: int # event count threshold
# Pure function: given a UserHistory snapshot, return the inferred value.
# Typed as a generic callable here; concrete signature lives in the framework.
infer: Callable[[Any], Any] | None = None
@dataclass(frozen=True)
class AgentManifest:
"""Declarative description of an agent — see ADR-0014 §1."""
id: str # 'time-of-day'
version: str # bump invalidates cached outputs + inferences
description: str # one-line human summary for admin UI
pref_schema: dict # JSON Schema for user-tunable knobs
context_schema: list[str] # signals it reads, e.g. ['todoist.tasks']
required_consents: list[str] # ['data:todoist', 'agent:time-of-day']
output_contract: dict # snippet shape (free text + optional tags)
ttl_sec: int # snippet freshness for agent_outputs
silenced_in_contexts: list[str] = field(default_factory=list) # active context names that suppress this agent
inferred_params: list[InferredParam] = field(default_factory=list)
def to_dict(self) -> dict:
"""Serialise for the registry endpoint. `inferred_params` drops `infer`
(callable) since the wire format only carries metadata."""
return {
"id": self.id,
"version": self.version,
"description": self.description,
"pref_schema": self.pref_schema,
"context_schema": self.context_schema,
"required_consents": self.required_consents,
"output_contract": self.output_contract,
"ttl_sec": self.ttl_sec,
"silenced_in_contexts": list(self.silenced_in_contexts),
"inferred_params": [
{
"key": p.key,
"ttl_sec": p.ttl_sec,
"cold_start_default": p.cold_start_default,
"min_history": p.min_history,
}
for p in self.inferred_params
],
}

View File

@@ -1,13 +1,38 @@
from __future__ import annotations
from typing import ClassVar
from .base import BaseAgent, AgentInput, AgentOutput
from .manifest import AgentManifest
MANIFEST = AgentManifest(
id="momentum",
version="1.0.0",
description="Characterises the user's recent engagement trend from profile features.",
pref_schema={
"type": "object",
"additionalProperties": False,
"properties": {
"low_engagement_threshold_pct": {
"type": "integer",
"minimum": 0,
"maximum": 100,
"default": 25,
"description": "Completion rate below which momentum hints at low engagement.",
},
},
},
context_schema=["profile.features"],
required_consents=["data:core", "agent:momentum"],
output_contract={"type": "snippet", "format": "free_text"},
ttl_sec=21_600,
)
class MomentumAgent(BaseAgent):
"""Characterises the user's recent engagement trend from profile features."""
agent_id: ClassVar[str] = "momentum"
ttl_seconds: ClassVar[int] = 21600 # 6h
version: ClassVar[str] = "1.0.0"
agent_id: ClassVar[str] = MANIFEST.id
ttl_seconds: ClassVar[int] = MANIFEST.ttl_sec
version: ClassVar[str] = MANIFEST.version
def compute(self, inp: AgentInput) -> AgentOutput:
completion = inp.profile.get("completion_rate_30d")

View File

@@ -1,13 +1,38 @@
from __future__ import annotations
from typing import ClassVar
from .base import BaseAgent, AgentInput, AgentOutput
from .manifest import AgentManifest
MANIFEST = AgentManifest(
id="overdue-task",
version="1.0.0",
description="Reports the user's overdue tasks by count and age.",
pref_schema={
"type": "object",
"additionalProperties": False,
"properties": {
"lateness_tolerance_days": {
"type": "integer",
"minimum": 0,
"default": 0,
"description": "Days past due before a task is considered overdue. 0 = the moment it's late.",
},
},
},
context_schema=["todoist.tasks"],
required_consents=["data:core", "data:todoist", "agent:overdue-task"],
output_contract={"type": "snippet", "format": "free_text"},
ttl_sec=3600,
silenced_in_contexts=["vacation"],
)
class OverdueTaskAgent(BaseAgent):
"""Reports the user's overdue tasks by count and age."""
agent_id: ClassVar[str] = "overdue-task"
ttl_seconds: ClassVar[int] = 3600 # 1h — overdue status changes infrequently
version: ClassVar[str] = "1.0.0"
agent_id: ClassVar[str] = MANIFEST.id
ttl_seconds: ClassVar[int] = MANIFEST.ttl_sec
version: ClassVar[str] = MANIFEST.version
def compute(self, inp: AgentInput) -> AgentOutput:
overdue = [t for t in inp.tasks if t.get("is_overdue")]

View File

@@ -3,15 +3,40 @@ from collections import Counter
from datetime import datetime, timezone
from typing import ClassVar
from .base import BaseAgent, AgentInput, AgentOutput
from .manifest import AgentManifest
_SEVEN_DAYS_S = 7 * 86_400
MANIFEST = AgentManifest(
id="recent-patterns",
version="1.0.0",
description="Surfaces the user's reaction pattern from the last 7 days of feedback.",
pref_schema={
"type": "object",
"additionalProperties": False,
"properties": {
"window_days": {
"type": "integer",
"minimum": 1,
"maximum": 30,
"default": 7,
"description": "Lookback window for pattern analysis.",
},
},
},
context_schema=["tip_feedback", "profile.features"],
required_consents=["data:core", "agent:recent-patterns"],
output_contract={"type": "snippet", "format": "free_text"},
ttl_sec=86_400,
)
class RecentPatternsAgent(BaseAgent):
"""Surfaces the user's reaction pattern from the last 7 days of feedback."""
agent_id: ClassVar[str] = "recent-patterns"
ttl_seconds: ClassVar[int] = 86_400 # 24h
version: ClassVar[str] = "1.0.0"
agent_id: ClassVar[str] = MANIFEST.id
ttl_seconds: ClassVar[int] = MANIFEST.ttl_sec
version: ClassVar[str] = MANIFEST.version
def compute(self, inp: AgentInput) -> AgentOutput:
now_ts = inp.now.timestamp()

View File

@@ -1,21 +1,41 @@
from __future__ import annotations
from .base import BaseAgent
from .overdue_task import OverdueTaskAgent
from .momentum import MomentumAgent
from .time_of_day import TimeOfDayAgent
from .recent_patterns import RecentPatternsAgent
from .focus_area import FocusAreaAgent
"""Agent registry — single point of registration for sub-agents (ADR-0014).
_AGENTS: dict[str, BaseAgent] = {
a.agent_id: a
for a in [
OverdueTaskAgent(),
MomentumAgent(),
TimeOfDayAgent(),
RecentPatternsAgent(),
FocusAreaAgent(),
]
}
Each agent module contributes:
- a `BaseAgent` subclass instance
- a module-level `MANIFEST: AgentManifest`
The orchestrator, registry endpoint, and inference framework all read from
here. Adding an agent is: add a module, register it once below.
"""
from __future__ import annotations
from .base import BaseAgent
from .manifest import AgentManifest
from .overdue_task import OverdueTaskAgent, MANIFEST as OVERDUE_TASK_MANIFEST
from .momentum import MomentumAgent, MANIFEST as MOMENTUM_MANIFEST
from .time_of_day import TimeOfDayAgent, MANIFEST as TIME_OF_DAY_MANIFEST
from .recent_patterns import RecentPatternsAgent, MANIFEST as RECENT_PATTERNS_MANIFEST
from .focus_area import FocusAreaAgent, MANIFEST as FOCUS_AREA_MANIFEST
_REGISTERED: list[tuple[BaseAgent, AgentManifest]] = [
(OverdueTaskAgent(), OVERDUE_TASK_MANIFEST),
(MomentumAgent(), MOMENTUM_MANIFEST),
(TimeOfDayAgent(), TIME_OF_DAY_MANIFEST),
(RecentPatternsAgent(), RECENT_PATTERNS_MANIFEST),
(FocusAreaAgent(), FOCUS_AREA_MANIFEST),
]
# Sanity check — agent_id and manifest.id must agree, otherwise the registry
# becomes inconsistent across endpoints.
for _agent, _manifest in _REGISTERED:
if _agent.agent_id != _manifest.id:
raise RuntimeError(
f"Manifest mismatch: {_agent.__class__.__name__}.agent_id={_agent.agent_id!r} "
f"≠ MANIFEST.id={_manifest.id!r}"
)
_AGENTS: dict[str, BaseAgent] = {a.agent_id: a for a, _ in _REGISTERED}
_MANIFESTS: dict[str, AgentManifest] = {m.id: m for _, m in _REGISTERED}
def get_agent(agent_id: str) -> BaseAgent:
@@ -26,3 +46,13 @@ def get_agent(agent_id: str) -> BaseAgent:
def all_agents() -> list[BaseAgent]:
return list(_AGENTS.values())
def get_manifest(agent_id: str) -> AgentManifest:
if agent_id not in _MANIFESTS:
raise KeyError(f"Unknown agent: {agent_id!r}. Known: {sorted(_MANIFESTS)}")
return _MANIFESTS[agent_id]
def all_manifests() -> list[AgentManifest]:
return list(_MANIFESTS.values())

View File

@@ -0,0 +1,67 @@
"""Manifest registry tests (ADR-0014).
Each agent module exports a `MANIFEST: AgentManifest` whose id and version
must agree with the agent class. The registry exposes both, and `to_dict()`
must drop the `infer` callable so the wire payload is JSON-serialisable.
"""
from __future__ import annotations
import json
import os
import sys
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..", ".."))
import pytest # noqa: E402
from ml.agents.manifest import AgentManifest, InferredParam # noqa: E402
from ml.agents.registry import ( # noqa: E402
all_agents,
all_manifests,
get_agent,
get_manifest,
)
def test_every_agent_has_a_matching_manifest():
agents = {a.agent_id: a for a in all_agents()}
manifests = {m.id: m for m in all_manifests()}
assert agents.keys() == manifests.keys(), "agent / manifest registries diverged"
for aid in agents:
assert agents[aid].version == manifests[aid].version, (
f"version mismatch for {aid}: agent={agents[aid].version!r} "
f"manifest={manifests[aid].version!r}"
)
@pytest.mark.parametrize("agent_id", [
"overdue-task", "momentum", "time-of-day", "recent-patterns", "focus-area",
])
def test_manifest_required_fields(agent_id: str):
m = get_manifest(agent_id)
assert m.id == agent_id
assert m.version
assert m.description
assert isinstance(m.pref_schema, dict) and m.pref_schema.get("type") == "object"
assert isinstance(m.required_consents, list) and m.required_consents
assert "data:core" in m.required_consents, "every agent should require data:core"
assert m.ttl_sec == get_agent(agent_id).ttl_seconds, "ttl divergence"
def test_to_dict_is_json_serialisable_and_drops_infer_callable():
m = AgentManifest(
id="x", version="1.0.0", description="d",
pref_schema={"type": "object"}, context_schema=[], required_consents=["data:core"],
output_contract={"type": "snippet"}, ttl_sec=60,
inferred_params=[InferredParam(key="k", ttl_sec=60, cold_start_default=0, min_history=10, infer=lambda h: 0)],
)
payload = m.to_dict()
# Round-trip through json to confirm no callables / non-JSON types leaked.
data = json.loads(json.dumps(payload))
assert data["inferred_params"][0]["key"] == "k"
assert "infer" not in data["inferred_params"][0]
def test_get_manifest_unknown_raises():
with pytest.raises(KeyError):
get_manifest("not-an-agent")

View File

@@ -1,15 +1,43 @@
from __future__ import annotations
from typing import ClassVar
from .base import BaseAgent, AgentInput, AgentOutput
from .manifest import AgentManifest
_DOW_NAMES = ["Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"]
MANIFEST = AgentManifest(
id="time-of-day",
version="1.0.0",
description="Frames the current moment relative to the user's productive peak and quiet hours.",
pref_schema={
"type": "object",
"additionalProperties": False,
"properties": {
"quiet_start": {
"type": "string",
"pattern": "^([01][0-9]|2[0-3]):[0-5][0-9]$",
"description": "HH:MM start of quiet hours (24h, user's local TZ).",
},
"quiet_end": {
"type": "string",
"pattern": "^([01][0-9]|2[0-3]):[0-5][0-9]$",
"description": "HH:MM end of quiet hours.",
},
},
},
context_schema=["profile.features"],
required_consents=["data:core", "agent:time-of-day"],
output_contract={"type": "snippet", "format": "free_text"},
ttl_sec=900,
)
class TimeOfDayAgent(BaseAgent):
"""Frames the current moment relative to the user's productive peak."""
agent_id: ClassVar[str] = "time-of-day"
ttl_seconds: ClassVar[int] = 900 # 15m — must stay current-hour accurate
version: ClassVar[str] = "1.0.0"
agent_id: ClassVar[str] = MANIFEST.id
ttl_seconds: ClassVar[int] = MANIFEST.ttl_sec
version: ClassVar[str] = MANIFEST.version
def compute(self, inp: AgentInput) -> AgentOutput:
hour = inp.now.hour

View File

@@ -38,7 +38,7 @@ if _repo_root not in sys.path:
sys.path.insert(0, _repo_root)
from ml.agents.base import AgentInput # noqa: E402
from ml.agents.registry import get_agent, all_agents # noqa: E402
from ml.agents.registry import get_agent, all_agents, all_manifests # noqa: E402
logging_config.configure()
@@ -177,6 +177,16 @@ def health():
}
@app.get("/agents/registry")
def agents_registry():
"""Manifest list for every registered agent (ADR-0014).
Consumers: TS recommender (eligibility filter), admin UI (auto-rendered
pref forms), inference framework (#111). Static at process boot.
"""
return {"agents": [m.to_dict() for m in all_manifests()]}
_RETRY_SUFFIX = (
"\n\nYour previous response was not valid JSON. "
"Reply ONLY with the JSON array — no prose, no markdown fences."

View File

@@ -0,0 +1,21 @@
"""GET /agents/registry — manifests are exposed in JSON-serialisable form."""
import pytest
from httpx import AsyncClient, ASGITransport
from main import app
@pytest.mark.anyio
async def test_registry_returns_all_agents():
transport = ASGITransport(app=app)
async with AsyncClient(transport=transport, base_url="http://test") as client:
resp = await client.get("/agents/registry")
assert resp.status_code == 200
payload = resp.json()
ids = {a["id"] for a in payload["agents"]}
assert ids == {"overdue-task", "momentum", "time-of-day", "recent-patterns", "focus-area"}
sample = payload["agents"][0]
for key in ("id", "version", "description", "pref_schema", "required_consents", "ttl_sec"):
assert key in sample