feat(profile): /api/profile + eligibility filter + inference framework (ADR-0014 steps 4-6)
Step 4 — /api/profile read-through API:
GET /api/profile → { user, prefs, consents, contexts }
PATCH /api/profile/prefs/:scope upsert user_preferences (source='user')
PATCH /api/profile/consents grant / revoke consent keys
PATCH /api/profile/contexts create / activate / deactivate contexts
Legacy consentGiven bit folded in as data:core fallback.
Step 5 — registry-driven eligibility filter:
fetchRegistry() exported from agent-registry.ts.
profile/eligibility.ts: getEligibleAgentIds(userId) — filters by required
consents, silenced_in_contexts, and user_preferences[enabled=false].
fetchOrchestratorTip filters agent_outputs to eligible set before calling
ml/serving /recommend. Fail-closed: registry unavailable → empty set.
Step 6 — shared context-inference framework (#111) + time-of-day proof (#112):
ml/agents/inference/: UserHistory, FeedbackEvent, run_inference().
Framework: cold-start, min_history gating, error fallback, structured logs.
TimeOfDayAgent v1.1.0: inferred_params=[preferred_hour]; also reads
quiet_start/quiet_end from agent_prefs. agent_prefs injected by TS caller.
AgentInput gains agent_prefs field.
ml/serving: POST /agents/{agent_id}/infer endpoint.
agent-outputs.ts computeAndStore: loads prefs before compute, calls /infer
after, persists results (source='inferred'); user overrides never touched.
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -3,6 +3,7 @@ oO ML Serving — multi-agent orchestrator (ADR-0013).
|
||||
|
||||
Contract:
|
||||
POST /agents/{agent_id}/compute run a sub-agent, return prompt snippet
|
||||
POST /agents/{agent_id}/infer run inference framework for a user, return inferred prefs
|
||||
POST /recommend orchestrate agent snippets → one tip via LiteLLM
|
||||
POST /generate LLM tip candidates (legacy; kept for bench/eval)
|
||||
GET /health { ok, agents: [...] }
|
||||
@@ -38,7 +39,8 @@ if _repo_root not in sys.path:
|
||||
sys.path.insert(0, _repo_root)
|
||||
|
||||
from ml.agents.base import AgentInput # noqa: E402
|
||||
from ml.agents.registry import get_agent, all_agents, all_manifests # noqa: E402
|
||||
from ml.agents.registry import get_agent, all_agents, all_manifests, get_manifest # noqa: E402
|
||||
from ml.agents.inference import run_inference, FeedbackEvent, UserHistory # noqa: E402
|
||||
|
||||
logging_config.configure()
|
||||
|
||||
@@ -123,6 +125,8 @@ class AgentComputeRequest(BaseModel):
|
||||
profile: dict[str, Optional[float]] = {}
|
||||
feedback_history: list[dict] = []
|
||||
now_iso: Optional[str] = None # ISO 8601; defaults to utcnow
|
||||
# Per-agent prefs from user_preferences (merged: user source overrides inferred).
|
||||
agent_prefs: dict = {}
|
||||
|
||||
|
||||
class AgentComputeResponse(BaseModel):
|
||||
@@ -135,6 +139,18 @@ class AgentComputeResponse(BaseModel):
|
||||
agent_version: str
|
||||
|
||||
|
||||
class AgentInferRequest(BaseModel):
|
||||
user_id: str
|
||||
feedback_history: list[dict] = [] # [{action, dwell_ms, created_at}, …]
|
||||
|
||||
|
||||
class AgentInferResponse(BaseModel):
|
||||
user_id: str
|
||||
agent_id: str
|
||||
# {key: inferred_value} — caller persists to user_preferences with source='inferred'
|
||||
inferred_prefs: dict
|
||||
|
||||
|
||||
class AgentOutputSnippet(BaseModel):
|
||||
agent_id: str
|
||||
prompt_text: str
|
||||
@@ -225,6 +241,7 @@ async def compute_agent(agent_id: str, req: AgentComputeRequest) -> AgentCompute
|
||||
profile=req.profile,
|
||||
feedback_history=req.feedback_history,
|
||||
now=now,
|
||||
agent_prefs=req.agent_prefs,
|
||||
)
|
||||
try:
|
||||
output = agent.compute(inp)
|
||||
@@ -244,6 +261,46 @@ async def compute_agent(agent_id: str, req: AgentComputeRequest) -> AgentCompute
|
||||
)
|
||||
|
||||
|
||||
@app.post("/agents/{agent_id}/infer", response_model=AgentInferResponse)
|
||||
async def infer_agent(agent_id: str, req: AgentInferRequest) -> AgentInferResponse:
|
||||
"""Run the inference framework for one agent and return inferred preference values.
|
||||
|
||||
The caller (TS agent-outputs.ts) persists results to user_preferences
|
||||
with source='inferred', skipping keys where source='user' already exists.
|
||||
"""
|
||||
try:
|
||||
manifest = get_manifest(agent_id)
|
||||
except KeyError:
|
||||
raise HTTPException(status_code=404, detail=f"Unknown agent: {agent_id!r}")
|
||||
|
||||
if not manifest.inferred_params:
|
||||
return AgentInferResponse(user_id=req.user_id, agent_id=agent_id, inferred_prefs={})
|
||||
|
||||
events = [
|
||||
FeedbackEvent(
|
||||
action=e.get("action", ""),
|
||||
dwell_ms=e.get("dwell_ms"),
|
||||
created_at=e.get("created_at", ""),
|
||||
)
|
||||
for e in req.feedback_history
|
||||
]
|
||||
history = UserHistory(user_id=req.user_id, events=events)
|
||||
|
||||
t0 = __import__("time").monotonic()
|
||||
inferred = run_inference(manifest, history)
|
||||
latency_ms = round((__import__("time").monotonic() - t0) * 1000, 1)
|
||||
|
||||
log.info(
|
||||
"inference_run",
|
||||
agent_id=agent_id,
|
||||
user_id=req.user_id,
|
||||
n_params=len(inferred),
|
||||
history_len=len(events),
|
||||
latency_ms=latency_ms,
|
||||
)
|
||||
return AgentInferResponse(user_id=req.user_id, agent_id=agent_id, inferred_prefs=inferred)
|
||||
|
||||
|
||||
@app.post("/recommend", response_model=RecommendResponse)
|
||||
async def recommend(req: RecommendRequest) -> RecommendResponse:
|
||||
"""Orchestrator: combine pre-computed agent outputs into one tip via LLM.
|
||||
|
||||
52
ml/serving/tests/test_infer_endpoint.py
Normal file
52
ml/serving/tests/test_infer_endpoint.py
Normal file
@@ -0,0 +1,52 @@
|
||||
"""POST /agents/{agent_id}/infer — inference framework endpoint."""
|
||||
import pytest
|
||||
from httpx import AsyncClient, ASGITransport
|
||||
|
||||
from main import app
|
||||
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_infer_time_of_day_cold_start():
|
||||
"""Fewer than min_history events → cold_start_default for preferred_hour."""
|
||||
transport = ASGITransport(app=app)
|
||||
async with AsyncClient(transport=transport, base_url="http://test") as client:
|
||||
resp = await client.post("/agents/time-of-day/infer", json={
|
||||
"user_id": "u1",
|
||||
"feedback_history": [
|
||||
{"action": "done", "dwell_ms": 60000, "created_at": "2026-05-01T09:00:00+00:00"},
|
||||
] * 5, # 5 < min_history=10
|
||||
})
|
||||
assert resp.status_code == 200
|
||||
body = resp.json()
|
||||
assert body["agent_id"] == "time-of-day"
|
||||
assert body["inferred_prefs"]["preferred_hour"] is None
|
||||
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_infer_time_of_day_enough_history():
|
||||
"""10+ events → preferred_hour is inferred as the mode done-hour."""
|
||||
events = [{"action": "done", "dwell_ms": 60000, "created_at": "2026-05-01T09:00:00+00:00"}] * 10
|
||||
transport = ASGITransport(app=app)
|
||||
async with AsyncClient(transport=transport, base_url="http://test") as client:
|
||||
resp = await client.post("/agents/time-of-day/infer", json={"user_id": "u1", "feedback_history": events})
|
||||
assert resp.status_code == 200
|
||||
body = resp.json()
|
||||
assert body["inferred_prefs"]["preferred_hour"] == 9
|
||||
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_infer_agent_with_no_inferred_params():
|
||||
"""Agents with no inferred_params return an empty dict."""
|
||||
transport = ASGITransport(app=app)
|
||||
async with AsyncClient(transport=transport, base_url="http://test") as client:
|
||||
resp = await client.post("/agents/overdue-task/infer", json={"user_id": "u1", "feedback_history": []})
|
||||
assert resp.status_code == 200
|
||||
assert resp.json()["inferred_prefs"] == {}
|
||||
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_infer_unknown_agent_404():
|
||||
transport = ASGITransport(app=app)
|
||||
async with AsyncClient(transport=transport, base_url="http://test") as client:
|
||||
resp = await client.post("/agents/ghost/infer", json={"user_id": "u1", "feedback_history": []})
|
||||
assert resp.status_code == 404
|
||||
Reference in New Issue
Block a user