feat(serving): replace MLflow run logging with native trace spans

Convert ml-serving from isolated MLflow runs to nested traces using
mlflow.start_span_no_context(). The recommend endpoint now emits a full
span tree: recommend (CHAIN) → build_context (TOOL), agent:* (AGENT) ×N,
llm_orchestrator (LLM). Compute and infer endpoints each emit a single span.

Supporting changes:
- mlflow-skinny>=3.1.0 added to requirements
- MLflow configured with --serve-artifacts + mlflow-artifacts:/ default root
  for cross-container artifact proxy (spans now persist from ml-serving)
- --allowed-hosts extended to include mlflow:5000 (SDK includes port in Host)
- science_destiny slider wired through prompts.py and recommend endpoint
- Config page exposes science/destiny slider (0=data-driven, 100=intuitive)
- Tip page shows rationale inline on tap

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-05-11 08:26:05 +00:00
parent afacc34969
commit 161e654027
14 changed files with 419 additions and 141 deletions

View File

@@ -67,6 +67,11 @@ docs/ architecture notes, ADRs, API specs
- No secrets in repo. Local dev via `.env.local` (gitignored), prod via the server's secret store (Vaultwarden now; k8s secrets later).
- Compose profiles: `core` (api + web + admin), `full` (adds ml-serving + nats), `mlops` (adds MLflow), `ai` (adds Ollama + LiteLLM). Mix as needed. Always pass `--profile <name>` to `build`/`up` — without a profile, no services are selected and builds silently do nothing.
- Docker rebuild: use `--force-recreate` on `up` when only env vars changed (no image rebuild needed); new env vars in `.env.local` are not picked up by a running container until it is recreated.
- Docker rebuild gotchas:
- **Never run two `docker compose up --build` at once** — both grab the same `--mount=type=cache,id=pnpm` and deadlock on the API's `pnpm --prod deploy` step. Symptom: build sits silent for hours on `[api builder 8/8]`. Before starting any build, check `ps aux | grep "docker compose"` and kill any prior `up --build` (`kill -9 <pid>` — the wrapper bash and the docker compose binary are separate PIDs; kill the docker compose one).
- **Don't add `--offline` to `pnpm --prod deploy`** — pnpm's metadata cache (`/root/.cache/pnpm/`) is not in the `/pnpm/store` cache mount, so `--offline` fails with `ERR_PNPM_NO_OFFLINE_META` for transitive devDeps (e.g. vite via vitest). Leave the deploy step network-on; it works.
- **All TS Dockerfiles need `python3 make g++`** in the base stage — `better-sqlite3` rebuilds natively on install. Missing from `Dockerfile.admin` historically caused `gyp ERR! find Python` failures.
- **A clean build of `--profile core` takes ~3 min total** when the buildx cache is warm. If it's been silent for >10 min, check for the parallel-build deadlock above before assuming "still going".
- Run Python agent tests: `python3 -m pytest ml/agents/tests/ -x -q` (tests add repo root to `sys.path` themselves).
- Run Python feature tests: `python3 -m pytest ml/features/ -x -q`
- `ml/features/` files are Python mirrors of TS registries — TS is source of truth. Tests parse `registry.ts` with regex to detect drift; follow the same pattern whenever a new field is added to `ProfileFeature`.
@@ -98,6 +103,16 @@ All `httpx` calls in `ml/` must use `trust_env=False` to bypass the system proxy
MLflow container-to-container calls: always pass `host_header="localhost"` to `MLflowClient` — MLflow's `--allowed-hosts` rejects `Host: mlflow` (the container DNS name) with 403. Auth credential is `MLFLOW_ADMIN_PASSWORD`. MLflow REST API lives at the origin root (`/api/2.0/mlflow`), not under the `/mlflow` UI prefix.
MLflow from the host shell — query with curl, no script needed:
```bash
env -u HTTPS_PROXY -u HTTP_PROXY -u ALL_PROXY -u https_proxy -u http_proxy -u all_proxy \
curl -s -H "Host: localhost" -u "admin:${MLFLOW_ADMIN_PASSWORD}" \
-X POST http://localhost:5000/api/2.0/mlflow/runs/search \
-H "Content-Type: application/json" \
-d '{"experiment_ids":["3"],"max_results":1,"order_by":["start_time DESC"]}'
```
`Host: localhost` required (no port) — `localhost:5000` fails the DNS-rebinding check. Experiment IDs: `3`=oO/serving. Artifacts stored as run tags prefixed `artifact:<path>`.
**Multi-agent tip generation pipeline (ADR-0013):**
1. Pre-compute agents (`ml/agents/<id>/`) run on a schedule, each emitting a snippet into `agent_outputs` with a per-agent TTL
2. On request, `recommender` (TS) loads the eligible agent set (registry-driven, ADR-0014) and pulls the freshest non-expired snippets

View File

@@ -1,12 +1,27 @@
'use client';
import { useEffect, useState, useCallback } from 'react';
import { getVapidPublicKey, subscribePush } from '@/lib/api';
import { getVapidPublicKey, subscribePush, getOrchestatorPrefs, updateOrchestratorPref } from '@/lib/api';
type PushState = 'idle' | 'subscribed' | 'denied';
export default function ConfigPage() {
const [pushState, setPushState] = useState<PushState>('idle');
const [scienceDestiny, setScienceDestiny] = useState(50);
const [prefSaving, setPrefSaving] = useState(false);
useEffect(() => {
getOrchestatorPrefs().then((prefs) => {
if (typeof prefs.science_destiny === 'number') setScienceDestiny(prefs.science_destiny);
}).catch(() => {});
}, []);
const handleScienceDestinyChange = useCallback(async (value: number) => {
setScienceDestiny(value);
setPrefSaving(true);
try { await updateOrchestratorPref('science_destiny', value); }
finally { setPrefSaving(false); }
}, []);
useEffect(() => {
if (typeof Notification !== 'undefined') {
@@ -87,6 +102,41 @@ export default function ConfigPage() {
</div>
</section>
{/* Tip style */}
<section style={{ marginBottom: '2.5rem' }}>
<h3 style={{ fontSize: '0.75rem', letterSpacing: '0.12em', textTransform: 'uppercase', color: 'rgba(255,255,255,0.35)', marginBottom: '1rem', fontWeight: 400 }}>
Tip style
</h3>
<div style={{
border: '1px solid rgba(255,255,255,0.1)',
borderRadius: '0.75rem',
padding: '1.25rem 1.5rem',
}}>
<div style={{ display: 'flex', justifyContent: 'space-between', alignItems: 'baseline', marginBottom: '0.875rem' }}>
<span style={{ fontSize: '0.85rem', fontWeight: 500 }}>Science</span>
<span style={{ fontSize: '0.7rem', color: 'rgba(255,255,255,0.25)' }}>
{prefSaving ? 'saving…' : scienceDestiny === 50 ? 'balanced' : scienceDestiny < 50 ? 'data-driven' : 'intuitive'}
</span>
<span style={{ fontSize: '0.85rem', fontWeight: 500 }}>Destiny</span>
</div>
<input
type="range"
min={0}
max={100}
value={scienceDestiny}
onChange={(e) => handleScienceDestinyChange(Number(e.target.value))}
style={{ width: '100%', accentColor: 'var(--white)', cursor: 'pointer' }}
/>
<div style={{ color: 'rgba(255,255,255,0.3)', fontSize: '0.7rem', marginTop: '0.75rem' }}>
{scienceDestiny < 30
? 'Tips lean on patterns and data'
: scienceDestiny > 70
? 'Tips lean on intuition and meaning'
: 'Tips balance logic and intuition'}
</div>
</div>
</section>
{/* Integrations */}
<section>
<h3 style={{ fontSize: '0.75rem', letterSpacing: '0.12em', textTransform: 'uppercase', color: 'rgba(255,255,255,0.35)', marginBottom: '1rem', fontWeight: 400 }}>

View File

@@ -29,6 +29,7 @@ export default function TipPage() {
const [visible, setVisible] = useState(false);
const holdTimer = useRef<ReturnType<typeof setTimeout> | null>(null);
const [pressed, setPressed] = useState(false);
const [showReasoning, setShowReasoning] = useState(false);
useEffect(() => {
if (state === 'loading' || state === 'done') {
@@ -49,6 +50,7 @@ export default function TipPage() {
return;
}
setTip(rec.tip);
setShowReasoning(false);
setState('tip');
} catch (err: any) {
console.error('[tip] loadTip error', err?.status, err?.message);
@@ -235,6 +237,81 @@ export default function TipPage() {
</>
)}
{/* Reasoning overlay */}
{showReasoning && tip?.rationale && (
<div
onClick={(e) => { e.stopPropagation(); setShowReasoning(false); }}
style={{
position: 'fixed',
inset: 0,
display: 'flex',
alignItems: 'flex-end',
justifyContent: 'center',
zIndex: 20,
padding: '0 0 5rem',
}}
>
<div
onClick={(e) => e.stopPropagation()}
style={{
background: 'rgba(20,20,20,0.96)',
border: '1px solid rgba(255,255,255,0.08)',
borderRadius: '0.875rem',
padding: '1.25rem 1.5rem',
maxWidth: '360px',
width: 'calc(100% - 3rem)',
}}
>
<p style={{
margin: 0,
fontSize: '0.7rem',
letterSpacing: '0.1em',
textTransform: 'uppercase',
color: 'rgba(255,255,255,0.3)',
marginBottom: '0.625rem',
}}>
Why this tip
</p>
<p style={{
margin: 0,
fontSize: '0.9rem',
fontWeight: 300,
lineHeight: 1.5,
color: 'rgba(255,255,255,0.75)',
}}>
{tip.rationale}
</p>
</div>
</div>
)}
{/* ? button — bottom left, shows reasoning */}
{(state === 'tip' || state === 'actions') && tip?.rationale && (
<button
onClick={(e) => { e.stopPropagation(); setShowReasoning((v) => !v); }}
aria-label="Why this tip"
style={{
position: 'fixed',
bottom: '1.5rem',
left: '1.5rem',
background: 'transparent',
border: 'none',
color: showReasoning ? 'rgba(255,255,255,0.5)' : 'rgba(255,255,255,0.15)',
fontSize: '0.85rem',
fontWeight: 400,
lineHeight: 1,
padding: '0.5rem',
cursor: 'pointer',
pointerEvents: 'auto',
zIndex: 10,
transition: 'color 0.2s ease',
fontFamily: 'inherit',
}}
>
?
</button>
)}
{/* Settings gear — bottom right */}
<a
href="/config"

View File

@@ -81,3 +81,15 @@ export async function unsubscribePush(endpoint: string) {
body: JSON.stringify({ endpoint }),
});
}
export async function getOrchestatorPrefs(): Promise<Record<string, unknown>> {
const data = await apiFetch<{ prefs: Record<string, Record<string, unknown>> }>('/profile');
return data.prefs?.orchestrator ?? {};
}
export async function updateOrchestratorPref(key: string, value: unknown) {
return apiFetch<{ ok: boolean }>('/profile/prefs/orchestrator', {
method: 'PATCH',
body: JSON.stringify({ [key]: value }),
});
}

View File

@@ -1,7 +1,8 @@
# syntax=docker/dockerfile:1.7
FROM node:22-slim AS base
RUN apt-get update && apt-get install -y --no-install-recommends ca-certificates \
RUN apt-get update && apt-get install -y --no-install-recommends \
python3 make g++ ca-certificates \
&& rm -rf /var/lib/apt/lists/* \
&& npm install -g pnpm
ENV CI=true \

View File

@@ -112,11 +112,13 @@ services:
command: >
mlflow server
--backend-store-uri sqlite:////mlflow/mlflow.db
--default-artifact-root /mlflow/artifacts
--artifacts-destination /mlflow/artifacts
--serve-artifacts
--default-artifact-root mlflow-artifacts:/
--host 0.0.0.0
--port 5000
--static-prefix /mlflow
--allowed-hosts o.alogins.net,localhost
--allowed-hosts o.alogins.net,localhost,localhost:5000,mlflow,mlflow:5000
--cors-allowed-origins https://o.alogins.net
volumes:
- /mnt/ssd/dbs/oo/mlflow:/mlflow

View File

@@ -28,9 +28,11 @@ from fastapi import FastAPI, HTTPException, Request
from pydantic import BaseModel
from starlette.middleware.base import BaseHTTPMiddleware
import mlflow
from mlflow.entities import SpanType
import logging_config
import nats_consumer
from mlflow_client import MLflowClient
from prompts import get_prompt, build_orchestrator_messages
# Make ml.agents importable regardless of working directory.
@@ -83,36 +85,69 @@ LITELLM_MASTER_KEY = os.getenv("LITELLM_MASTER_KEY", "sk-oo-dev")
STATE_DIR = Path(os.getenv("STATE_DIR", "/tmp/oo-serving-state"))
# ── MLflow tracing (optional) ───────────────────────────────────────────────
# Set MLFLOW_TRACKING_URI to enable. All calls are fire-and-forget; any error
# is logged at WARNING and never propagates to the caller.
# Set MLFLOW_TRACKING_URI to enable. Spans are fire-and-forget; errors are
# logged at WARNING and never propagate to the caller.
# MLflow --allowed-hosts must include "mlflow" (the container DNS name) so the
# SDK can reach the server from inside other containers.
_MLFLOW_URI = os.getenv("MLFLOW_TRACKING_URI", "")
_mlflow: MLflowClient | None = (
MLflowClient(
tracking_uri=_MLFLOW_URI,
username=os.getenv("MLFLOW_TRACKING_USERNAME", "admin"),
password=os.getenv("MLFLOW_TRACKING_PASSWORD") or os.getenv("MLFLOW_ADMIN_PASSWORD", "password"),
host_header="localhost",
)
if _MLFLOW_URI else None
)
_MLFLOW_EXP = "oO/serving"
_mlflow_exp_id: str | None = None
if _MLFLOW_URI:
try:
mlflow.set_tracking_uri(_MLFLOW_URI)
_mlflow_exp_id = mlflow.set_experiment(_MLFLOW_EXP).experiment_id
except Exception as _exc:
log.warning("mlflow_init_failed", error=str(_exc))
def _mlflow_run(run_name: str, params: dict, metrics: dict, tags: dict) -> None:
"""Create a finished MLflow run. Silently no-ops if MLflow is not configured."""
if _mlflow is None:
class _NoOpSpan:
"""Returned when MLflow is disabled or span creation fails."""
def set_inputs(self, *a, **k): pass
def set_outputs(self, *a, **k): pass
def set_attribute(self, *a, **k): pass
def set_attributes(self, *a, **k): pass
def end(self, *a, **k): pass
_NOOP = _NoOpSpan()
def _start_span(name: str, span_type: str, *, parent=_NOOP, inputs=None):
"""Start an MLflow span. Returns _NOOP on failure or when tracing is off.
experiment_id is only passed for root spans (no parent) — passing it to
child spans causes the SDK to fail with '_Span has no attribute _span'.
"""
if _mlflow_exp_id is None:
return _NOOP
try:
kw: dict = {"span_type": span_type}
if isinstance(parent, _NoOpSpan):
kw["experiment_id"] = _mlflow_exp_id # root span only
else:
kw["parent_span"] = parent
if inputs is not None:
kw["inputs"] = inputs
return mlflow.start_span_no_context(name, **kw)
except Exception as exc: # noqa: BLE001
log.warning("mlflow_span_start_failed", name=name, error=str(exc))
return _NOOP
def _end_span(span, *, status: str = "OK", outputs=None, attributes: dict | None = None) -> None:
"""End a span safely, ignoring _NoOpSpan and swallowing exceptions."""
if isinstance(span, _NoOpSpan):
return
try:
exp_id = _mlflow.get_or_create_experiment(_MLFLOW_EXP)
run_id = _mlflow.create_run(exp_id, run_name, tags={"source": "ml-serving"})
_mlflow.log_params(run_id, {k: str(v)[:250] for k, v in params.items()})
_mlflow.log_metrics(run_id, metrics)
for k, v in tags.items():
_mlflow.log_text(run_id, str(v), k)
_mlflow.end_run(run_id)
if attributes:
span.set_attributes(attributes)
span.end(status=status, outputs=outputs)
except Exception as exc: # noqa: BLE001
log.warning("mlflow_log_failed", error=str(exc))
log.warning("mlflow_span_end_failed", error=str(exc))
STATE_DIR.mkdir(parents=True, exist_ok=True)
@@ -197,6 +232,7 @@ class RecommendRequest(BaseModel):
tasks: list[dict] = []
hour_of_day: int = 12
day_of_week: int = 0
science_destiny: int = 50 # 0=science (data-driven), 100=destiny (intuitive)
class TipResult(BaseModel):
@@ -285,12 +321,15 @@ async def compute_agent(agent_id: str, req: AgentComputeRequest) -> AgentCompute
raise HTTPException(status_code=500, detail=f"Agent compute failed: {exc}")
log.info("agent_computed", agent_id=agent_id, user_id=req.user_id, expires_at=output.expires_at)
_mlflow_run(
run_name=f"compute/{agent_id}",
params={"agent_id": agent_id, "user_id": req.user_id, "agent_version": output.agent_version},
metrics={"task_count": len(req.tasks), "feedback_count": len(req.feedback_history)},
tags={"prompt_text": output.prompt_text, "signals_snapshot": json.dumps(output.signals_snapshot)},
span = _start_span(
f"compute:{agent_id}",
SpanType.AGENT,
inputs={"user_id": req.user_id, "agent_id": agent_id,
"task_count": len(req.tasks), "feedback_count": len(req.feedback_history)},
)
_end_span(span,
outputs={"prompt_text": output.prompt_text, "signals_snapshot": output.signals_snapshot},
attributes={"agent_version": output.agent_version, "expires_at": output.expires_at})
return AgentComputeResponse(
user_id=output.user_id,
agent_id=output.agent_id,
@@ -347,12 +386,15 @@ async def infer_agent(agent_id: str, req: AgentInferRequest) -> AgentInferRespon
history_len=len(events),
latency_ms=latency_ms,
)
_mlflow_run(
run_name=f"infer/{agent_id}",
params={"agent_id": agent_id, "user_id": req.user_id},
metrics={"latency_ms": latency_ms, "history_len": len(events), "n_params": len(inferred)},
tags={"inferred_prefs": json.dumps(inferred)},
span = _start_span(
f"infer:{agent_id}",
SpanType.CHAIN,
inputs={"user_id": req.user_id, "agent_id": agent_id,
"history_len": len(events), "completion_count": len(completions)},
)
_end_span(span,
outputs={"inferred_prefs": inferred},
attributes={"latency_ms": str(latency_ms), "n_params": str(len(inferred))})
return AgentInferResponse(user_id=req.user_id, agent_id=agent_id, inferred_prefs=inferred)
@@ -364,99 +406,132 @@ async def recommend(req: RecommendRequest) -> RecommendResponse:
the fresh rows from agent_outputs table (fetched by the TypeScript recommender
before calling this endpoint). Falls back to raw task context if empty.
"""
t0_recommend = time.monotonic()
messages = build_orchestrator_messages(
agent_outputs=[s.model_dump() for s in req.agent_outputs],
tasks=req.tasks,
hour_of_day=req.hour_of_day,
day_of_week=req.day_of_week,
)
headers = {"Authorization": f"Bearer {LITELLM_MASTER_KEY}"}
last_raw = ""
last_parse_error = ""
total_usage: dict = {"prompt_tokens": 0, "completion_tokens": 0}
model_used = "tip-generator"
t0 = time.monotonic()
async with httpx.AsyncClient(timeout=30.0) as client:
for _attempt in range(1 + _MAX_GENERATE_RETRIES):
payload = {"model": "tip-generator", "messages": messages, "temperature": 0.7}
try:
resp = await client.post(
f"{LITELLM_URL}/chat/completions", json=payload, headers=headers
)
resp.raise_for_status()
except httpx.HTTPStatusError as e:
raise HTTPException(status_code=502, detail=f"LiteLLM error: {e.response.text}")
except httpx.RequestError as e:
raise HTTPException(status_code=503, detail=f"LiteLLM unreachable: {e}")
# ── root span ──────────────────────────────────────────────────────────
root = _start_span("recommend", SpanType.CHAIN, inputs={
"user_id": req.user_id,
"agent_ids": [s.agent_id for s in req.agent_outputs],
"hour_of_day": req.hour_of_day,
"day_of_week": req.day_of_week,
"science_destiny": req.science_destiny,
})
data = resp.json()
usage = data.get("usage", {})
total_usage["prompt_tokens"] += usage.get("prompt_tokens", 0)
total_usage["completion_tokens"] += usage.get("completion_tokens", 0)
model_used = data.get("model", "tip-generator")
last_raw = data["choices"][0]["message"]["content"]
try:
text = last_raw.strip()
if text.startswith("```"):
parts = text.split("```")
text = parts[1] if len(parts) > 1 else text
if text.startswith("json"):
text = text[4:]
parsed = json.loads(text)
item: dict = parsed[0] if isinstance(parsed, list) else parsed
break
except (json.JSONDecodeError, ValueError, IndexError) as exc:
last_parse_error = str(exc)
messages.append({"role": "assistant", "content": last_raw})
messages.append({"role": "user", "content": _RETRY_SUFFIX_OBJ})
else:
raise HTTPException(
status_code=502,
detail=f"LLM returned invalid JSON after {_MAX_GENERATE_RETRIES} retries: "
f"{last_parse_error}\n{last_raw[:200]}",
)
tip = TipResult(
id=item.get("id", f"tip-{req.user_id[:8]}"),
content=item.get("content", ""),
rationale=item.get("rationale"),
)
latency_ms_recommend = round((time.monotonic() - t0_recommend) * 1000, 1)
log.info(
"recommend_served",
user_id=req.user_id,
agent_count=len(req.agent_outputs),
tip_id=tip.id,
)
_mlflow_run(
run_name="recommend",
params={
"user_id": req.user_id,
"agent_ids": ",".join(s.agent_id for s in req.agent_outputs),
"model": model_used,
"hour_of_day": req.hour_of_day,
"day_of_week": req.day_of_week,
},
metrics={
"prompt_tokens": total_usage["prompt_tokens"],
"completion_tokens": total_usage["completion_tokens"],
try:
# ── build_context span ─────────────────────────────────────────────
ctx_span = _start_span("build_context", SpanType.TOOL, parent=root, inputs={
"agent_count": len(req.agent_outputs),
"latency_ms": latency_ms_recommend,
},
tags={
"prompt_messages": json.dumps(messages),
"tip_content": tip.content,
"tip_rationale": tip.rationale or "",
},
)
return RecommendResponse(
tip=tip,
model=model_used,
prompt_tokens=total_usage["prompt_tokens"],
completion_tokens=total_usage["completion_tokens"],
)
"task_count": len(req.tasks),
"science_destiny": req.science_destiny,
})
messages = build_orchestrator_messages(
agent_outputs=[s.model_dump() for s in req.agent_outputs],
tasks=req.tasks,
hour_of_day=req.hour_of_day,
day_of_week=req.day_of_week,
science_destiny=req.science_destiny,
)
_end_span(ctx_span, outputs={"message_count": len(messages)})
# ── one span per pre-computed agent snippet ────────────────────────
for snippet in req.agent_outputs:
a_span = _start_span(
f"agent:{snippet.agent_id}", SpanType.AGENT, parent=root,
inputs={"agent_id": snippet.agent_id},
)
_end_span(a_span, outputs={"prompt_text": snippet.prompt_text})
# ── LLM orchestrator span (wraps retry loop) ───────────────────────
llm_span = _start_span("llm_orchestrator", SpanType.LLM, parent=root, inputs={
"messages": messages,
"model": "tip-generator",
"temperature": 0.7,
})
headers = {"Authorization": f"Bearer {LITELLM_MASTER_KEY}"}
last_raw = ""
last_parse_error = ""
total_usage: dict = {"prompt_tokens": 0, "completion_tokens": 0}
model_used = "tip-generator"
_attempt = 0
async with httpx.AsyncClient(timeout=30.0) as client:
for _attempt in range(1 + _MAX_GENERATE_RETRIES):
payload = {"model": "tip-generator", "messages": messages, "temperature": 0.7}
try:
resp = await client.post(
f"{LITELLM_URL}/chat/completions", json=payload, headers=headers
)
resp.raise_for_status()
except httpx.HTTPStatusError as e:
_end_span(llm_span, status="ERROR")
_end_span(root, status="ERROR")
raise HTTPException(status_code=502, detail=f"LiteLLM error: {e.response.text}")
except httpx.RequestError as e:
_end_span(llm_span, status="ERROR")
_end_span(root, status="ERROR")
raise HTTPException(status_code=503, detail=f"LiteLLM unreachable: {e}")
data = resp.json()
usage = data.get("usage", {})
total_usage["prompt_tokens"] += usage.get("prompt_tokens", 0)
total_usage["completion_tokens"] += usage.get("completion_tokens", 0)
model_used = data.get("model", "tip-generator")
last_raw = data["choices"][0]["message"]["content"]
try:
text = last_raw.strip()
if text.startswith("```"):
parts = text.split("```")
text = parts[1] if len(parts) > 1 else text
if text.startswith("json"):
text = text[4:]
parsed = json.loads(text)
item: dict = parsed[0] if isinstance(parsed, list) else parsed
break
except (json.JSONDecodeError, ValueError, IndexError) as exc:
last_parse_error = str(exc)
messages.append({"role": "assistant", "content": last_raw})
messages.append({"role": "user", "content": _RETRY_SUFFIX_OBJ})
else:
_end_span(llm_span, status="ERROR")
_end_span(root, status="ERROR")
raise HTTPException(
status_code=502,
detail=f"LLM returned invalid JSON after {_MAX_GENERATE_RETRIES} retries: "
f"{last_parse_error}\n{last_raw[:200]}",
)
tip = TipResult(
id=item.get("id", f"tip-{req.user_id[:8]}"),
content=item.get("content", ""),
rationale=item.get("rationale"),
)
_end_span(llm_span, outputs={"content": tip.content, "rationale": tip.rationale or ""},
attributes={
"prompt_tokens": str(total_usage["prompt_tokens"]),
"completion_tokens": str(total_usage["completion_tokens"]),
"model": model_used,
"attempts": str(_attempt + 1),
})
latency_ms = round((time.monotonic() - t0) * 1000, 1)
log.info("recommend_served", user_id=req.user_id, agent_count=len(req.agent_outputs), tip_id=tip.id)
_end_span(root, outputs={"tip_id": tip.id, "content": tip.content, "rationale": tip.rationale or ""},
attributes={"latency_ms": str(latency_ms), "agent_count": str(len(req.agent_outputs))})
return RecommendResponse(
tip=tip,
model=model_used,
prompt_tokens=total_usage["prompt_tokens"],
completion_tokens=total_usage["completion_tokens"],
)
except HTTPException:
raise
except Exception:
_end_span(root, status="ERROR")
raise
_MAX_GENERATE_RETRIES = 2

View File

@@ -124,17 +124,52 @@ _SYS_V4_ORCHESTRATOR = (
)
def _science_destiny_instruction(science_destiny: int) -> str:
"""Translate 0-100 slider into a prompt instruction.
0 = pure science: prioritise patterns, data, measurable progress.
100 = pure destiny: prioritise meaning, intuition, deeper purpose.
50 = balanced (no extra instruction injected).
"""
if science_destiny <= 20:
return (
"The user strongly prefers data-driven advice. "
"Ground every tip in observable patterns, streaks, or measurable progress. "
"Avoid abstract or motivational language."
)
if science_destiny <= 40:
return (
"The user leans toward evidence-based guidance. "
"Anchor tips in patterns and metrics where possible."
)
if science_destiny >= 80:
return (
"The user strongly believes in intuition and meaning. "
"Frame tips around purpose, values, and deeper intention rather than metrics."
)
if science_destiny >= 60:
return (
"The user leans toward intuitive, meaning-driven advice. "
"Weave in purpose and intention alongside practicality."
)
return "" # balanced — no extra instruction
def build_orchestrator_messages(
agent_outputs: list[dict],
tasks: list[dict],
hour_of_day: int,
day_of_week: int,
science_destiny: int = 50,
) -> list[dict]:
"""Build the [system, user] message list for the orchestrator LLM call.
agent_outputs: list of {agent_id, prompt_text} dicts.
Falls back to raw task summary when agent_outputs is empty.
"""
style_hint = _science_destiny_instruction(science_destiny)
system = _SYS_V4_ORCHESTRATOR + (f"\n\n{style_hint}" if style_hint else "")
lines = [f"Current time: {hour_of_day:02d}:00, day_of_week={day_of_week}", ""]
if agent_outputs:
lines.append("Context from analysis agents:")
@@ -150,7 +185,7 @@ def build_orchestrator_messages(
lines.append(f" - {t.get('content', '?')}")
lines.append("\nGenerate one tip as a JSON object. Write the tip content in English only.")
return [
{"role": "system", "content": _SYS_V4_ORCHESTRATOR},
{"role": "system", "content": system},
{"role": "user", "content": "\n".join(lines)},
]

View File

@@ -7,3 +7,4 @@ anthropic>=0.40.0
nats-py>=2.9.0
structlog>=24.1.0
sentry-sdk>=2.0.0
mlflow-skinny>=3.1.0

View File

@@ -35,7 +35,7 @@ const AGENT_C = { ...MANIFEST_DEFAULTS, id: 'agent-c', required_consents: ['data
beforeAll(async () => {
await testDb.insert(users).values({
id: 'u1', email: 'u@test.com', name: null, image: null, role: 'user',
consentGiven: false, createdAt: NOW,
createdAt: NOW,
});
});

View File

@@ -213,7 +213,7 @@ describe('POST /recommend integration', () => {
});
// Intercept the /recommend body to inspect what agent_outputs were sent
const origFetch = globalThis.fetch as ReturnType<typeof vi.fn>;
const origFetch = globalThis.fetch as unknown as (url: string, init?: RequestInit) => Promise<Response>;
const wrappedFetch = vi.fn().mockImplementation(async (url: string, init?: RequestInit) => {
if (String(url).includes('/recommend') && init?.body) {
const body = JSON.parse(init.body as string);

View File

@@ -166,7 +166,7 @@ export async function computeAndStore(userId: string, agentId: string): Promise<
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ user_id: userId, tasks, profile, feedback_history: feedbackHistory, agent_prefs: agentPrefs }),
signal: AbortSignal.timeout(15_000),
signal: AbortSignal.timeout(60_000),
});
if (!mlResp.ok) {

View File

@@ -2,7 +2,7 @@ import { type Router as ExpressRouter, Router, Response } from 'express';
import { nanoid } from 'nanoid';
import { logger } from '../logger.js';
import { db } from '../db/index.js';
import { integrationTokens, tipFeedback, tipViews, tipScores } from '../db/schema.js';
import { integrationTokens, tipFeedback, tipViews, tipScores, userPreferences } from '../db/schema.js';
import { eq, and, desc } from 'drizzle-orm';
import { requireAuth, AuthenticatedRequest } from '../middleware/session.js';
import { config } from '../config.js';
@@ -52,6 +52,16 @@ interface OrchestratorResult {
agentIds: string[];
}
async function loadOrchestratorPref<T>(userId: string, key: string): Promise<T | undefined> {
const rows = await db
.select({ valueJson: userPreferences.valueJson })
.from(userPreferences)
.where(and(eq(userPreferences.userId, userId), eq(userPreferences.scope, 'orchestrator'), eq(userPreferences.key, key)))
.limit(1);
if (!rows.length) return undefined;
try { return JSON.parse(rows[0].valueJson) as T; } catch { return undefined; }
}
async function fetchOrchestratorTip(
userId: string,
signals: Signal[],
@@ -59,9 +69,10 @@ async function fetchOrchestratorTip(
dayOfWeek: number,
traceparent?: string,
): Promise<OrchestratorResult | null> {
const [allAgentRows, eligibleIds] = await Promise.all([
const [allAgentRows, eligibleIds, scienceDestiny] = await Promise.all([
getActiveAgentOutputs(userId),
getEligibleAgentIds(userId),
loadOrchestratorPref<number>(userId, 'science_destiny'),
]);
const agentOutputs = allAgentRows
.filter((r) => eligibleIds.has(r.agentId))
@@ -78,7 +89,7 @@ async function fetchOrchestratorTip(
const res = await fetch(`${config.ML_SERVING_URL}/recommend`, {
method: 'POST',
headers: { 'Content-Type': 'application/json', ...(traceparent ? { traceparent } : {}) },
body: JSON.stringify({ user_id: userId, agent_outputs: agentOutputs, tasks, hour_of_day: hour, day_of_week: dayOfWeek }),
body: JSON.stringify({ user_id: userId, agent_outputs: agentOutputs, tasks, hour_of_day: hour, day_of_week: dayOfWeek, science_destiny: scienceDestiny ?? 50 }),
signal: AbortSignal.timeout(15_000),
});
if (!res.ok) return null;

View File

@@ -68,14 +68,13 @@ async function runCycle(agentIds: string[]): Promise<void> {
let failed = 0;
for (const userId of userIds) {
const results = await Promise.allSettled(
agentIds.map((agentId) => computeAndStore(userId, agentId)),
);
for (const r of results) {
if (r.status === 'fulfilled') ok++;
else {
for (const agentId of agentIds) {
try {
await computeAndStore(userId, agentId);
ok++;
} catch (err: any) {
failed++;
logger.error({ err: r.reason, userId }, 'agent-scheduler: compute error');
logger.error({ err, userId, agentId }, 'agent-scheduler: compute error');
}
}
}