feat(observability): structured logs, W3C trace IDs, Sentry hooks (#18)
- TS: pino + pino-http; every HTTP request log includes traceId from W3C traceparent header (generated if absent); forwarded to ml/serving on all /score, /generate, /reward, and /api/ml proxy calls - Python: structlog JSON; FastAPI middleware binds trace_id via contextvars so every log line within a request carries it - Sentry: optional SENTRY_DSN init in both runtimes (no-op if unset) - Replace all console.* calls across services/api with pino logger - Update tests to spy on logger instead of console Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -1,5 +1,6 @@
|
||||
import { type Router as ExpressRouter, Router, Response } from 'express';
|
||||
import { nanoid } from 'nanoid';
|
||||
import { logger } from '../logger.js';
|
||||
import { db } from '../db/index.js';
|
||||
import { integrationTokens, tipFeedback, tipViews, tipScores } from '../db/schema.js';
|
||||
import { eq, and, desc } from 'drizzle-orm';
|
||||
@@ -85,6 +86,7 @@ async function remotePolicy(
|
||||
userId: string,
|
||||
tasks: TipCandidate[],
|
||||
profile: Profile,
|
||||
traceparent?: string,
|
||||
): Promise<{ tipId: string; score: number; policy: string } | null> {
|
||||
const hour = new Date().getHours();
|
||||
const dayOfWeek = new Date().getDay();
|
||||
@@ -102,11 +104,10 @@ async function remotePolicy(
|
||||
profile_features: profile,
|
||||
};
|
||||
|
||||
// Active policy: egreedy-v2 (promoted from shadow after offline sim — ADR-0012)
|
||||
try {
|
||||
const res = await fetch(`${config.ML_SERVING_URL}/score/egreedy/v2`, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
headers: { 'Content-Type': 'application/json', ...(traceparent ? { traceparent } : {}) },
|
||||
body: JSON.stringify(body),
|
||||
signal: AbortSignal.timeout(3000),
|
||||
});
|
||||
@@ -146,6 +147,7 @@ async function fetchLlmCandidates(
|
||||
dayOfWeek: number,
|
||||
promptVersion: string | null,
|
||||
profile: Profile,
|
||||
traceparent?: string,
|
||||
): Promise<LlmGenerateResult> {
|
||||
try {
|
||||
const tasks = signals.slice(0, 10).map((s) => ({
|
||||
@@ -156,7 +158,7 @@ async function fetchLlmCandidates(
|
||||
}));
|
||||
const res = await fetch(`${config.ML_SERVING_URL}/generate`, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
headers: { 'Content-Type': 'application/json', ...(traceparent ? { traceparent } : {}) },
|
||||
body: JSON.stringify({
|
||||
user_id: userId,
|
||||
context: { tasks, hour_of_day: hour, day_of_week: dayOfWeek },
|
||||
@@ -226,6 +228,7 @@ router.post('/recommend', requireAuth, async (req: AuthenticatedRequest, res: Re
|
||||
dayOfWeek,
|
||||
requestedPromptVersion,
|
||||
profile,
|
||||
req.traceparent,
|
||||
);
|
||||
|
||||
const allCandidates: TipCandidate[] = [...signalCandidates, ...llmResult.candidates];
|
||||
@@ -240,7 +243,7 @@ router.post('/recommend', requireAuth, async (req: AuthenticatedRequest, res: Re
|
||||
const t0 = Date.now();
|
||||
|
||||
// Stage 2: score — egreedy bandit with random fallback
|
||||
const scored = await remotePolicy(req.userId!, allCandidates, profile);
|
||||
const scored = await remotePolicy(req.userId!, allCandidates, profile, req.traceparent);
|
||||
const latencyMs = Date.now() - t0;
|
||||
const tip = scored
|
||||
? (allCandidates.find((t) => t.id === scored.tipId) ?? randomPolicy(allCandidates))
|
||||
@@ -373,6 +376,7 @@ async function sendRewardWithRetry(
|
||||
reward: number,
|
||||
features: TipCandidate['features'],
|
||||
profile: Profile,
|
||||
traceparent?: string,
|
||||
): Promise<void> {
|
||||
const body = JSON.stringify({
|
||||
user_id: userId,
|
||||
@@ -387,7 +391,7 @@ async function sendRewardWithRetry(
|
||||
try {
|
||||
const res = await fetch(`${config.ML_SERVING_URL}/reward/egreedy/v2`, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
headers: { 'Content-Type': 'application/json', ...(traceparent ? { traceparent } : {}) },
|
||||
body,
|
||||
signal: AbortSignal.timeout(3000),
|
||||
});
|
||||
@@ -395,7 +399,7 @@ async function sendRewardWithRetry(
|
||||
throw new Error(`HTTP ${res.status}`);
|
||||
} catch (err: any) {
|
||||
if (attempt === 3) {
|
||||
console.error(`[reward] failed after 3 attempts for tip ${tipId}: ${err.message}`);
|
||||
logger.error({ tipId, err }, 'reward: failed after 3 attempts');
|
||||
bus.publish('signals.tip.reward_failed', {
|
||||
userId,
|
||||
tipId,
|
||||
@@ -468,7 +472,7 @@ router.post('/tip/:id/feedback', requireAuth, async (req: AuthenticatedRequest,
|
||||
if (candidate) {
|
||||
// Re-fetch profile for the v2 ridge update; TTL cache makes this near-instant.
|
||||
const profile = await getProfile(req.userId!);
|
||||
sendRewardWithRetry(req.userId!, tipId, reward, candidate.features, profile);
|
||||
sendRewardWithRetry(req.userId!, tipId, reward, candidate.features, profile, req.traceparent);
|
||||
}
|
||||
|
||||
// Delegate action to the owning signal source (e.g. mark done in Todoist)
|
||||
|
||||
Reference in New Issue
Block a user