feat(profile): /api/profile + eligibility filter + inference framework (ADR-0014 steps 4-6)

Step 4 — /api/profile read-through API: GET /api/profile → { user, prefs, consents, contexts } PATCH /api/profile/prefs/:scope upsert user_preferences (source='user') PATCH /api/profile/consents grant / revoke consent keys PATCH /api/profile/contexts create / activate / deactivate contexts Legacy consentGiven bit folded in as data:core fallback. Step 5 — registry-driven eligibility filter: fetchRegistry() exported from agent-registry.ts. profile/eligibility.ts: getEligibleAgentIds(userId) — filters by required consents, silenced_in_contexts, and user_preferences[enabled=false]. fetchOrchestratorTip filters agent_outputs to eligible set before calling ml/serving /recommend. Fail-closed: registry unavailable → empty set. Step 6 — shared context-inference framework (#111) + time-of-day proof (#112): ml/agents/inference/: UserHistory, FeedbackEvent, run_inference(). Framework: cold-start, min_history gating, error fallback, structured logs. TimeOfDayAgent v1.1.0: inferred_params=[preferred_hour]; also reads quiet_start/quiet_end from agent_prefs. agent_prefs injected by TS caller. AgentInput gains agent_prefs field. ml/serving: POST /agents/{agent_id}/infer endpoint. agent-outputs.ts computeAndStore: loads prefs before compute, calls /infer after, persists results (source='inferred'); user overrides never touched. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-05-05 11:14:25 +00:00
parent 305eeae38b
commit ad6747c242
19 changed files with 1196 additions and 24 deletions
--- a/ml/serving/main.py
+++ b/ml/serving/main.py
@@ -3,6 +3,7 @@ oO ML Serving — multi-agent orchestrator (ADR-0013).

 Contract:
  POST /agents/{agent_id}/compute  run a sub-agent, return prompt snippet
+  POST /agents/{agent_id}/infer    run inference framework for a user, return inferred prefs
  POST /recommend                  orchestrate agent snippets → one tip via LiteLLM
  POST /generate                   LLM tip candidates (legacy; kept for bench/eval)
  GET  /health                     { ok, agents: [...] }
@@ -38,7 +39,8 @@ if _repo_root not in sys.path:
    sys.path.insert(0, _repo_root)

 from ml.agents.base import AgentInput  # noqa: E402
-from ml.agents.registry import get_agent, all_agents, all_manifests  # noqa: E402
+from ml.agents.registry import get_agent, all_agents, all_manifests, get_manifest  # noqa: E402
+from ml.agents.inference import run_inference, FeedbackEvent, UserHistory  # noqa: E402

 logging_config.configure()

@@ -123,6 +125,8 @@ class AgentComputeRequest(BaseModel):
    profile: dict[str, Optional[float]] = {}
    feedback_history: list[dict] = []
    now_iso: Optional[str] = None  # ISO 8601; defaults to utcnow
+    # Per-agent prefs from user_preferences (merged: user source overrides inferred).
+    agent_prefs: dict = {}


 class AgentComputeResponse(BaseModel):
@@ -135,6 +139,18 @@ class AgentComputeResponse(BaseModel):
    agent_version: str


+class AgentInferRequest(BaseModel):
+    user_id: str
+    feedback_history: list[dict] = []   # [{action, dwell_ms, created_at}, …]
+
+
+class AgentInferResponse(BaseModel):
+    user_id: str
+    agent_id: str
+    # {key: inferred_value} — caller persists to user_preferences with source='inferred'
+    inferred_prefs: dict
+
+
 class AgentOutputSnippet(BaseModel):
    agent_id: str
    prompt_text: str
@@ -225,6 +241,7 @@ async def compute_agent(agent_id: str, req: AgentComputeRequest) -> AgentCompute
        profile=req.profile,
        feedback_history=req.feedback_history,
        now=now,
+        agent_prefs=req.agent_prefs,
    )
    try:
        output = agent.compute(inp)
@@ -244,6 +261,46 @@ async def compute_agent(agent_id: str, req: AgentComputeRequest) -> AgentCompute
    )


+@app.post("/agents/{agent_id}/infer", response_model=AgentInferResponse)
+async def infer_agent(agent_id: str, req: AgentInferRequest) -> AgentInferResponse:
+    """Run the inference framework for one agent and return inferred preference values.
+
+    The caller (TS agent-outputs.ts) persists results to user_preferences
+    with source='inferred', skipping keys where source='user' already exists.
+    """
+    try:
+        manifest = get_manifest(agent_id)
+    except KeyError:
+        raise HTTPException(status_code=404, detail=f"Unknown agent: {agent_id!r}")
+
+    if not manifest.inferred_params:
+        return AgentInferResponse(user_id=req.user_id, agent_id=agent_id, inferred_prefs={})
+
+    events = [
+        FeedbackEvent(
+            action=e.get("action", ""),
+            dwell_ms=e.get("dwell_ms"),
+            created_at=e.get("created_at", ""),
+        )
+        for e in req.feedback_history
+    ]
+    history = UserHistory(user_id=req.user_id, events=events)
+
+    t0 = __import__("time").monotonic()
+    inferred = run_inference(manifest, history)
+    latency_ms = round((__import__("time").monotonic() - t0) * 1000, 1)
+
+    log.info(
+        "inference_run",
+        agent_id=agent_id,
+        user_id=req.user_id,
+        n_params=len(inferred),
+        history_len=len(events),
+        latency_ms=latency_ms,
+    )
+    return AgentInferResponse(user_id=req.user_id, agent_id=agent_id, inferred_prefs=inferred)
+
+
@app.post("/recommend", response_model=RecommendResponse)
 async def recommend(req: RecommendRequest) -> RecommendResponse:
    """Orchestrator: combine pre-computed agent outputs into one tip via LLM.