feat(profile): /api/profile + eligibility filter + inference framework (ADR-0014 steps 4-6)
Step 4 — /api/profile read-through API:
GET /api/profile → { user, prefs, consents, contexts }
PATCH /api/profile/prefs/:scope upsert user_preferences (source='user')
PATCH /api/profile/consents grant / revoke consent keys
PATCH /api/profile/contexts create / activate / deactivate contexts
Legacy consentGiven bit folded in as data:core fallback.
Step 5 — registry-driven eligibility filter:
fetchRegistry() exported from agent-registry.ts.
profile/eligibility.ts: getEligibleAgentIds(userId) — filters by required
consents, silenced_in_contexts, and user_preferences[enabled=false].
fetchOrchestratorTip filters agent_outputs to eligible set before calling
ml/serving /recommend. Fail-closed: registry unavailable → empty set.
Step 6 — shared context-inference framework (#111) + time-of-day proof (#112):
ml/agents/inference/: UserHistory, FeedbackEvent, run_inference().
Framework: cold-start, min_history gating, error fallback, structured logs.
TimeOfDayAgent v1.1.0: inferred_params=[preferred_hour]; also reads
quiet_start/quiet_end from agent_prefs. agent_prefs injected by TS caller.
AgentInput gains agent_prefs field.
ml/serving: POST /agents/{agent_id}/infer endpoint.
agent-outputs.ts computeAndStore: loads prefs before compute, calls /infer
after, persists results (source='inferred'); user overrides never touched.
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -3,6 +3,7 @@ oO ML Serving — multi-agent orchestrator (ADR-0013).
|
||||
|
||||
Contract:
|
||||
POST /agents/{agent_id}/compute run a sub-agent, return prompt snippet
|
||||
POST /agents/{agent_id}/infer run inference framework for a user, return inferred prefs
|
||||
POST /recommend orchestrate agent snippets → one tip via LiteLLM
|
||||
POST /generate LLM tip candidates (legacy; kept for bench/eval)
|
||||
GET /health { ok, agents: [...] }
|
||||
@@ -38,7 +39,8 @@ if _repo_root not in sys.path:
|
||||
sys.path.insert(0, _repo_root)
|
||||
|
||||
from ml.agents.base import AgentInput # noqa: E402
|
||||
from ml.agents.registry import get_agent, all_agents, all_manifests # noqa: E402
|
||||
from ml.agents.registry import get_agent, all_agents, all_manifests, get_manifest # noqa: E402
|
||||
from ml.agents.inference import run_inference, FeedbackEvent, UserHistory # noqa: E402
|
||||
|
||||
logging_config.configure()
|
||||
|
||||
@@ -123,6 +125,8 @@ class AgentComputeRequest(BaseModel):
|
||||
profile: dict[str, Optional[float]] = {}
|
||||
feedback_history: list[dict] = []
|
||||
now_iso: Optional[str] = None # ISO 8601; defaults to utcnow
|
||||
# Per-agent prefs from user_preferences (merged: user source overrides inferred).
|
||||
agent_prefs: dict = {}
|
||||
|
||||
|
||||
class AgentComputeResponse(BaseModel):
|
||||
@@ -135,6 +139,18 @@ class AgentComputeResponse(BaseModel):
|
||||
agent_version: str
|
||||
|
||||
|
||||
class AgentInferRequest(BaseModel):
|
||||
user_id: str
|
||||
feedback_history: list[dict] = [] # [{action, dwell_ms, created_at}, …]
|
||||
|
||||
|
||||
class AgentInferResponse(BaseModel):
|
||||
user_id: str
|
||||
agent_id: str
|
||||
# {key: inferred_value} — caller persists to user_preferences with source='inferred'
|
||||
inferred_prefs: dict
|
||||
|
||||
|
||||
class AgentOutputSnippet(BaseModel):
|
||||
agent_id: str
|
||||
prompt_text: str
|
||||
@@ -225,6 +241,7 @@ async def compute_agent(agent_id: str, req: AgentComputeRequest) -> AgentCompute
|
||||
profile=req.profile,
|
||||
feedback_history=req.feedback_history,
|
||||
now=now,
|
||||
agent_prefs=req.agent_prefs,
|
||||
)
|
||||
try:
|
||||
output = agent.compute(inp)
|
||||
@@ -244,6 +261,46 @@ async def compute_agent(agent_id: str, req: AgentComputeRequest) -> AgentCompute
|
||||
)
|
||||
|
||||
|
||||
@app.post("/agents/{agent_id}/infer", response_model=AgentInferResponse)
|
||||
async def infer_agent(agent_id: str, req: AgentInferRequest) -> AgentInferResponse:
|
||||
"""Run the inference framework for one agent and return inferred preference values.
|
||||
|
||||
The caller (TS agent-outputs.ts) persists results to user_preferences
|
||||
with source='inferred', skipping keys where source='user' already exists.
|
||||
"""
|
||||
try:
|
||||
manifest = get_manifest(agent_id)
|
||||
except KeyError:
|
||||
raise HTTPException(status_code=404, detail=f"Unknown agent: {agent_id!r}")
|
||||
|
||||
if not manifest.inferred_params:
|
||||
return AgentInferResponse(user_id=req.user_id, agent_id=agent_id, inferred_prefs={})
|
||||
|
||||
events = [
|
||||
FeedbackEvent(
|
||||
action=e.get("action", ""),
|
||||
dwell_ms=e.get("dwell_ms"),
|
||||
created_at=e.get("created_at", ""),
|
||||
)
|
||||
for e in req.feedback_history
|
||||
]
|
||||
history = UserHistory(user_id=req.user_id, events=events)
|
||||
|
||||
t0 = __import__("time").monotonic()
|
||||
inferred = run_inference(manifest, history)
|
||||
latency_ms = round((__import__("time").monotonic() - t0) * 1000, 1)
|
||||
|
||||
log.info(
|
||||
"inference_run",
|
||||
agent_id=agent_id,
|
||||
user_id=req.user_id,
|
||||
n_params=len(inferred),
|
||||
history_len=len(events),
|
||||
latency_ms=latency_ms,
|
||||
)
|
||||
return AgentInferResponse(user_id=req.user_id, agent_id=agent_id, inferred_prefs=inferred)
|
||||
|
||||
|
||||
@app.post("/recommend", response_model=RecommendResponse)
|
||||
async def recommend(req: RecommendRequest) -> RecommendResponse:
|
||||
"""Orchestrator: combine pre-computed agent outputs into one tip via LLM.
|
||||
|
||||
Reference in New Issue
Block a user