chore: remove Airflow completely from the stack

Drop all four Airflow containers (db, init, webserver, scheduler) from the
mlops compose profile, leaving MLflow as the sole mlops service. Remove
AIRFLOW_* env vars, config fields, health-check entries, DAG trigger code
in admin/bench routes, the airflow_dag_run_id schema column, Airflow nav
links and DAG-run links in the admin UI, the two Airflow DAG files
(bench_dag.py, sim_dag.py), and all related docs/ADR references.
Simulations now run exclusively via the subprocess path.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-05-03 16:38:46 +00:00
parent ce1c8bde57
commit f8d66aa01f
27 changed files with 663 additions and 719 deletions

View File

@@ -26,9 +26,11 @@ from __future__ import annotations
import json
import math
import os
import sys
import time
from collections import deque
from contextlib import asynccontextmanager
from datetime import datetime, timezone
from pathlib import Path
from typing import Optional, Deque
@@ -43,7 +45,17 @@ from starlette.middleware.base import BaseHTTPMiddleware
import logging_config
import nats_consumer
from prompts import get_prompt
from prompts import get_prompt, build_orchestrator_messages
# Make ml.agents importable regardless of working directory.
# In Docker (WORKDIR=/app/ml/serving, PYTHONPATH=/app): /app already on path.
# In local dev (run from ml/serving/): repo root is two levels up.
_repo_root = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", ".."))
if _repo_root not in sys.path:
sys.path.insert(0, _repo_root)
from ml.agents.base import AgentInput # noqa: E402
from ml.agents.registry import get_agent, all_agents # noqa: E402
logging_config.configure()
@@ -350,12 +362,61 @@ class GenerateResponse(BaseModel):
completion_tokens: int = 0
# ── Multi-agent models ─────────────────────────────────────────────────────
class AgentComputeRequest(BaseModel):
user_id: str
tasks: list[dict] = []
profile: dict[str, Optional[float]] = {}
feedback_history: list[dict] = []
now_iso: Optional[str] = None # ISO 8601; defaults to utcnow
class AgentComputeResponse(BaseModel):
user_id: str
agent_id: str
prompt_text: str
signals_snapshot: dict
computed_at: str
expires_at: str
agent_version: str
class AgentOutputSnippet(BaseModel):
agent_id: str
prompt_text: str
class RecommendRequest(BaseModel):
user_id: str
agent_outputs: list[AgentOutputSnippet] = []
tasks: list[dict] = []
hour_of_day: int = 12
day_of_week: int = 0
class TipResult(BaseModel):
id: str
content: str
source: str = "llm"
kind: str = "advice"
rationale: Optional[str] = None
class RecommendResponse(BaseModel):
tip: TipResult
model: str
prompt_tokens: int = 0
completion_tokens: int = 0
# ── Endpoints ──────────────────────────────────────────────────────────────
@app.get("/health")
def health():
return {
"ok": True,
"agents": [a.agent_id for a in all_agents()],
"nats": {
"enabled": bool(nats_consumer.NATS_URL),
"consumers": nats_consumer.consumer_health,
@@ -368,6 +429,137 @@ _RETRY_SUFFIX = (
"Reply ONLY with the JSON array — no prose, no markdown fences."
)
_RETRY_SUFFIX_OBJ = (
"\n\nYour previous response was not valid JSON. "
"Reply ONLY with the JSON object — no prose, no markdown fences."
)
@app.post("/agents/{agent_id}/compute", response_model=AgentComputeResponse)
async def compute_agent(agent_id: str, req: AgentComputeRequest) -> AgentComputeResponse:
"""Run a single sub-agent for a user and return its prompt snippet.
Called by the precompute pipeline for each (user_id, agent_id) pair.
The caller is responsible for persisting the result to agent_outputs via the
TypeScript API callback.
"""
try:
agent = get_agent(agent_id)
except KeyError:
raise HTTPException(status_code=404, detail=f"Unknown agent: {agent_id!r}")
now = (
datetime.fromisoformat(req.now_iso.replace("Z", "+00:00"))
if req.now_iso
else datetime.now(timezone.utc)
)
if now.tzinfo is None:
now = now.replace(tzinfo=timezone.utc)
inp = AgentInput(
user_id=req.user_id,
tasks=req.tasks,
profile=req.profile,
feedback_history=req.feedback_history,
now=now,
)
try:
output = agent.compute(inp)
except Exception as exc:
log.error("agent_compute_failed", agent_id=agent_id, user_id=req.user_id, error=str(exc))
raise HTTPException(status_code=500, detail=f"Agent compute failed: {exc}")
log.info("agent_computed", agent_id=agent_id, user_id=req.user_id, expires_at=output.expires_at)
return AgentComputeResponse(
user_id=output.user_id,
agent_id=output.agent_id,
prompt_text=output.prompt_text,
signals_snapshot=output.signals_snapshot,
computed_at=output.computed_at,
expires_at=output.expires_at,
agent_version=output.agent_version,
)
@app.post("/recommend", response_model=RecommendResponse)
async def recommend(req: RecommendRequest) -> RecommendResponse:
"""Orchestrator: combine pre-computed agent outputs into one tip via LLM.
Called in real time when a user requests a tip. agent_outputs should be
the fresh rows from agent_outputs table (fetched by the TypeScript recommender
before calling this endpoint). Falls back to raw task context if empty.
"""
messages = build_orchestrator_messages(
agent_outputs=[s.model_dump() for s in req.agent_outputs],
tasks=req.tasks,
hour_of_day=req.hour_of_day,
day_of_week=req.day_of_week,
)
headers = {"Authorization": f"Bearer {LITELLM_MASTER_KEY}"}
last_raw = ""
last_parse_error = ""
total_usage: dict = {"prompt_tokens": 0, "completion_tokens": 0}
model_used = "tip-generator"
async with httpx.AsyncClient(timeout=30.0) as client:
for _attempt in range(1 + _MAX_GENERATE_RETRIES):
payload = {"model": "tip-generator", "messages": messages, "temperature": 0.7}
try:
resp = await client.post(
f"{LITELLM_URL}/chat/completions", json=payload, headers=headers
)
resp.raise_for_status()
except httpx.HTTPStatusError as e:
raise HTTPException(status_code=502, detail=f"LiteLLM error: {e.response.text}")
except httpx.RequestError as e:
raise HTTPException(status_code=503, detail=f"LiteLLM unreachable: {e}")
data = resp.json()
usage = data.get("usage", {})
total_usage["prompt_tokens"] += usage.get("prompt_tokens", 0)
total_usage["completion_tokens"] += usage.get("completion_tokens", 0)
model_used = data.get("model", "tip-generator")
last_raw = data["choices"][0]["message"]["content"]
try:
text = last_raw.strip()
if text.startswith("```"):
parts = text.split("```")
text = parts[1] if len(parts) > 1 else text
if text.startswith("json"):
text = text[4:]
parsed = json.loads(text)
item: dict = parsed[0] if isinstance(parsed, list) else parsed
break
except (json.JSONDecodeError, ValueError, IndexError) as exc:
last_parse_error = str(exc)
messages.append({"role": "assistant", "content": last_raw})
messages.append({"role": "user", "content": _RETRY_SUFFIX_OBJ})
else:
raise HTTPException(
status_code=502,
detail=f"LLM returned invalid JSON after {_MAX_GENERATE_RETRIES} retries: "
f"{last_parse_error}\n{last_raw[:200]}",
)
tip = TipResult(
id=item.get("id", f"tip-{req.user_id[:8]}"),
content=item.get("content", ""),
rationale=item.get("rationale"),
)
log.info(
"recommend_served",
user_id=req.user_id,
agent_count=len(req.agent_outputs),
tip_id=tip.id,
)
return RecommendResponse(
tip=tip,
model=model_used,
prompt_tokens=total_usage["prompt_tokens"],
completion_tokens=total_usage["completion_tokens"],
)
_MAX_GENERATE_RETRIES = 2