feat(observability): structured logs, W3C trace IDs, Sentry hooks (#18)

- TS: pino + pino-http; every HTTP request log includes traceId from
  W3C traceparent header (generated if absent); forwarded to ml/serving
  on all /score, /generate, /reward, and /api/ml proxy calls
- Python: structlog JSON; FastAPI middleware binds trace_id via
  contextvars so every log line within a request carries it
- Sentry: optional SENTRY_DSN init in both runtimes (no-op if unset)
- Replace all console.* calls across services/api with pino logger
- Update tests to spy on logger instead of console

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-04-26 03:37:28 +00:00
parent 7281af83a4
commit c4960d0601
18 changed files with 1041 additions and 64 deletions

View File

@@ -1,5 +1,6 @@
import { type Router as ExpressRouter, Router, Response } from 'express';
import { nanoid } from 'nanoid';
import { logger } from '../logger.js';
import { db } from '../db/index.js';
import { integrationTokens, tipFeedback, tipViews, tipScores } from '../db/schema.js';
import { eq, and, desc } from 'drizzle-orm';
@@ -85,6 +86,7 @@ async function remotePolicy(
userId: string,
tasks: TipCandidate[],
profile: Profile,
traceparent?: string,
): Promise<{ tipId: string; score: number; policy: string } | null> {
const hour = new Date().getHours();
const dayOfWeek = new Date().getDay();
@@ -102,11 +104,10 @@ async function remotePolicy(
profile_features: profile,
};
// Active policy: egreedy-v2 (promoted from shadow after offline sim — ADR-0012)
try {
const res = await fetch(`${config.ML_SERVING_URL}/score/egreedy/v2`, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
headers: { 'Content-Type': 'application/json', ...(traceparent ? { traceparent } : {}) },
body: JSON.stringify(body),
signal: AbortSignal.timeout(3000),
});
@@ -146,6 +147,7 @@ async function fetchLlmCandidates(
dayOfWeek: number,
promptVersion: string | null,
profile: Profile,
traceparent?: string,
): Promise<LlmGenerateResult> {
try {
const tasks = signals.slice(0, 10).map((s) => ({
@@ -156,7 +158,7 @@ async function fetchLlmCandidates(
}));
const res = await fetch(`${config.ML_SERVING_URL}/generate`, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
headers: { 'Content-Type': 'application/json', ...(traceparent ? { traceparent } : {}) },
body: JSON.stringify({
user_id: userId,
context: { tasks, hour_of_day: hour, day_of_week: dayOfWeek },
@@ -226,6 +228,7 @@ router.post('/recommend', requireAuth, async (req: AuthenticatedRequest, res: Re
dayOfWeek,
requestedPromptVersion,
profile,
req.traceparent,
);
const allCandidates: TipCandidate[] = [...signalCandidates, ...llmResult.candidates];
@@ -240,7 +243,7 @@ router.post('/recommend', requireAuth, async (req: AuthenticatedRequest, res: Re
const t0 = Date.now();
// Stage 2: score — egreedy bandit with random fallback
const scored = await remotePolicy(req.userId!, allCandidates, profile);
const scored = await remotePolicy(req.userId!, allCandidates, profile, req.traceparent);
const latencyMs = Date.now() - t0;
const tip = scored
? (allCandidates.find((t) => t.id === scored.tipId) ?? randomPolicy(allCandidates))
@@ -373,6 +376,7 @@ async function sendRewardWithRetry(
reward: number,
features: TipCandidate['features'],
profile: Profile,
traceparent?: string,
): Promise<void> {
const body = JSON.stringify({
user_id: userId,
@@ -387,7 +391,7 @@ async function sendRewardWithRetry(
try {
const res = await fetch(`${config.ML_SERVING_URL}/reward/egreedy/v2`, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
headers: { 'Content-Type': 'application/json', ...(traceparent ? { traceparent } : {}) },
body,
signal: AbortSignal.timeout(3000),
});
@@ -395,7 +399,7 @@ async function sendRewardWithRetry(
throw new Error(`HTTP ${res.status}`);
} catch (err: any) {
if (attempt === 3) {
console.error(`[reward] failed after 3 attempts for tip ${tipId}: ${err.message}`);
logger.error({ tipId, err }, 'reward: failed after 3 attempts');
bus.publish('signals.tip.reward_failed', {
userId,
tipId,
@@ -468,7 +472,7 @@ router.post('/tip/:id/feedback', requireAuth, async (req: AuthenticatedRequest,
if (candidate) {
// Re-fetch profile for the v2 ridge update; TTL cache makes this near-instant.
const profile = await getProfile(req.userId!);
sendRewardWithRetry(req.userId!, tipId, reward, candidate.features, profile);
sendRewardWithRetry(req.userId!, tipId, reward, candidate.features, profile, req.traceparent);
}
// Delegate action to the owning signal source (e.g. mark done in Todoist)