feat(observability): structured logs, W3C trace IDs, Sentry hooks (#18)
- TS: pino + pino-http; every HTTP request log includes traceId from W3C traceparent header (generated if absent); forwarded to ml/serving on all /score, /generate, /reward, and /api/ml proxy calls - Python: structlog JSON; FastAPI middleware binds trace_id via contextvars so every log line within a request carries it - Sentry: optional SENTRY_DSN init in both runtimes (no-op if unset) - Replace all console.* calls across services/api with pino logger - Update tests to spy on logger instead of console Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -17,15 +17,15 @@ Config (env vars):
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import time
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
|
||||
import structlog
|
||||
from schemas import TaskSyncedPayload, TipFeedbackPayload
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
log = structlog.get_logger(__name__)
|
||||
|
||||
NATS_URL = os.getenv("NATS_URL", "")
|
||||
NATS_DURABLE_PREFIX = os.getenv("NATS_DURABLE_PREFIX", "feature-pipeline")
|
||||
@@ -56,15 +56,12 @@ async def _handle(subject: str, payload: dict, state_dir: Path) -> None:
|
||||
"last_sync_ts": msg.syncedAt,
|
||||
"task_count": msg.count,
|
||||
}))
|
||||
logger.info("[nats] task_synced user=%s count=%s", msg.userId, msg.count)
|
||||
log.info("nats: task_synced", user_id=msg.userId, count=msg.count)
|
||||
elif subject == "signals.tip.feedback":
|
||||
msg = TipFeedbackPayload.model_validate(payload)
|
||||
logger.info(
|
||||
"[nats] tip_feedback user=%s tip=%s action=%s reward=%s",
|
||||
msg.userId, msg.tipId, msg.action, msg.reward,
|
||||
)
|
||||
log.info("nats: tip_feedback", user_id=msg.userId, tip_id=msg.tipId, action=msg.action, reward=msg.reward)
|
||||
else:
|
||||
logger.debug("[nats] unhandled subject=%s", subject)
|
||||
log.debug("nats: unhandled subject", subject=subject)
|
||||
|
||||
|
||||
# ── Consumer factory ───────────────────────────────────────────────────────
|
||||
@@ -80,7 +77,7 @@ def _make_handler(key: str, state_dir: Path):
|
||||
consumer_health[key]["processed"] += 1
|
||||
except Exception as exc:
|
||||
consumer_health[key]["errors"] += 1
|
||||
logger.warning("[nats] processing error key=%s subject=%s: %s", key, msg.subject, exc)
|
||||
log.warning("nats: processing error", key=key, subject=msg.subject, exc=str(exc))
|
||||
await msg.nak()
|
||||
return handler
|
||||
|
||||
@@ -91,7 +88,7 @@ async def start(state_dir: Path) -> None:
|
||||
"""Connect to NATS and register durable push consumers. No-op if NATS_URL is unset."""
|
||||
global _nc
|
||||
if not NATS_URL:
|
||||
logger.info("[nats] NATS_URL unset — JetStream consumers disabled")
|
||||
log.info("nats: NATS_URL unset — JetStream consumers disabled")
|
||||
return
|
||||
|
||||
try:
|
||||
@@ -105,9 +102,9 @@ async def start(state_dir: Path) -> None:
|
||||
max_reconnect_attempts=-1,
|
||||
)
|
||||
js = _nc.jetstream()
|
||||
logger.info("[nats] connected to %s", NATS_URL)
|
||||
log.info("nats: connected", url=NATS_URL)
|
||||
except Exception as exc:
|
||||
logger.warning("[nats] connection failed: %s — consumers disabled", exc)
|
||||
log.warning("nats: connection failed — consumers disabled", exc=str(exc))
|
||||
_nc = None
|
||||
return
|
||||
|
||||
@@ -126,9 +123,9 @@ async def start(state_dir: Path) -> None:
|
||||
config=config,
|
||||
)
|
||||
_subs.append(sub)
|
||||
logger.info("[nats] subscribed subject=%s durable=%s", subject, durable)
|
||||
log.info("nats: subscribed", subject=subject, durable=durable)
|
||||
except Exception as exc:
|
||||
logger.warning("[nats] subscribe failed key=%s: %s", key, exc)
|
||||
log.warning("nats: subscribe failed", key=key, exc=str(exc))
|
||||
|
||||
|
||||
async def stop() -> None:
|
||||
@@ -146,4 +143,4 @@ async def stop() -> None:
|
||||
except Exception:
|
||||
pass
|
||||
_nc = None
|
||||
logger.info("[nats] disconnected")
|
||||
log.info("nats: disconnected")
|
||||
|
||||
Reference in New Issue
Block a user