feat(observability): structured logs, W3C trace IDs, Sentry hooks (#18)

- TS: pino + pino-http; every HTTP request log includes traceId from
  W3C traceparent header (generated if absent); forwarded to ml/serving
  on all /score, /generate, /reward, and /api/ml proxy calls
- Python: structlog JSON; FastAPI middleware binds trace_id via
  contextvars so every log line within a request carries it
- Sentry: optional SENTRY_DSN init in both runtimes (no-op if unset)
- Replace all console.* calls across services/api with pino logger
- Update tests to spy on logger instead of console

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-04-26 03:37:28 +00:00
parent 7281af83a4
commit c4960d0601
18 changed files with 1041 additions and 64 deletions

View File

@@ -0,0 +1,20 @@
"""Structlog JSON configuration — import once at process start."""
import logging
import structlog
def configure() -> None:
structlog.configure(
processors=[
structlog.contextvars.merge_contextvars,
structlog.stdlib.add_log_level,
structlog.stdlib.add_logger_name,
structlog.processors.TimeStamper(fmt="iso"),
structlog.processors.StackInfoRenderer(),
structlog.processors.JSONRenderer(),
],
wrapper_class=structlog.make_filtering_bound_logger(logging.INFO),
context_class=dict,
logger_factory=structlog.PrintLoggerFactory(),
)
logging.basicConfig(level=logging.WARNING)

View File

@@ -34,12 +34,25 @@ from typing import Optional, Deque
import httpx import httpx
import numpy as np import numpy as np
from fastapi import FastAPI, HTTPException import sentry_sdk
import structlog
import structlog.contextvars
from fastapi import FastAPI, HTTPException, Request
from pydantic import BaseModel from pydantic import BaseModel
from starlette.middleware.base import BaseHTTPMiddleware
import logging_config
import nats_consumer import nats_consumer
from prompts import get_prompt from prompts import get_prompt
logging_config.configure()
_SENTRY_DSN = os.getenv("SENTRY_DSN")
if _SENTRY_DSN:
sentry_sdk.init(dsn=_SENTRY_DSN, environment=os.getenv("ENV", "development"))
log = structlog.get_logger()
@asynccontextmanager @asynccontextmanager
async def lifespan(app: FastAPI): async def lifespan(app: FastAPI):
@@ -50,6 +63,21 @@ async def lifespan(app: FastAPI):
app = FastAPI(title="oO ML Serving", version="1.0.0", lifespan=lifespan) app = FastAPI(title="oO ML Serving", version="1.0.0", lifespan=lifespan)
class _TracingMiddleware(BaseHTTPMiddleware):
async def dispatch(self, request: Request, call_next):
structlog.contextvars.clear_contextvars()
traceparent = request.headers.get("traceparent", "")
if traceparent:
parts = traceparent.split("-")
trace_id = parts[1] if len(parts) == 4 and len(parts[1]) == 32 else None
if trace_id:
structlog.contextvars.bind_contextvars(trace_id=trace_id)
return await call_next(request)
app.add_middleware(_TracingMiddleware)
LITELLM_URL = os.getenv("LITELLM_URL", "http://localhost:4000") LITELLM_URL = os.getenv("LITELLM_URL", "http://localhost:4000")
LITELLM_MASTER_KEY = os.getenv("LITELLM_MASTER_KEY", "sk-oo-dev") LITELLM_MASTER_KEY = os.getenv("LITELLM_MASTER_KEY", "sk-oo-dev")

View File

@@ -17,15 +17,15 @@ Config (env vars):
from __future__ import annotations from __future__ import annotations
import json import json
import logging
import os import os
import time import time
from pathlib import Path from pathlib import Path
from typing import Optional from typing import Optional
import structlog
from schemas import TaskSyncedPayload, TipFeedbackPayload from schemas import TaskSyncedPayload, TipFeedbackPayload
logger = logging.getLogger(__name__) log = structlog.get_logger(__name__)
NATS_URL = os.getenv("NATS_URL", "") NATS_URL = os.getenv("NATS_URL", "")
NATS_DURABLE_PREFIX = os.getenv("NATS_DURABLE_PREFIX", "feature-pipeline") NATS_DURABLE_PREFIX = os.getenv("NATS_DURABLE_PREFIX", "feature-pipeline")
@@ -56,15 +56,12 @@ async def _handle(subject: str, payload: dict, state_dir: Path) -> None:
"last_sync_ts": msg.syncedAt, "last_sync_ts": msg.syncedAt,
"task_count": msg.count, "task_count": msg.count,
})) }))
logger.info("[nats] task_synced user=%s count=%s", msg.userId, msg.count) log.info("nats: task_synced", user_id=msg.userId, count=msg.count)
elif subject == "signals.tip.feedback": elif subject == "signals.tip.feedback":
msg = TipFeedbackPayload.model_validate(payload) msg = TipFeedbackPayload.model_validate(payload)
logger.info( log.info("nats: tip_feedback", user_id=msg.userId, tip_id=msg.tipId, action=msg.action, reward=msg.reward)
"[nats] tip_feedback user=%s tip=%s action=%s reward=%s",
msg.userId, msg.tipId, msg.action, msg.reward,
)
else: else:
logger.debug("[nats] unhandled subject=%s", subject) log.debug("nats: unhandled subject", subject=subject)
# ── Consumer factory ─────────────────────────────────────────────────────── # ── Consumer factory ───────────────────────────────────────────────────────
@@ -80,7 +77,7 @@ def _make_handler(key: str, state_dir: Path):
consumer_health[key]["processed"] += 1 consumer_health[key]["processed"] += 1
except Exception as exc: except Exception as exc:
consumer_health[key]["errors"] += 1 consumer_health[key]["errors"] += 1
logger.warning("[nats] processing error key=%s subject=%s: %s", key, msg.subject, exc) log.warning("nats: processing error", key=key, subject=msg.subject, exc=str(exc))
await msg.nak() await msg.nak()
return handler return handler
@@ -91,7 +88,7 @@ async def start(state_dir: Path) -> None:
"""Connect to NATS and register durable push consumers. No-op if NATS_URL is unset.""" """Connect to NATS and register durable push consumers. No-op if NATS_URL is unset."""
global _nc global _nc
if not NATS_URL: if not NATS_URL:
logger.info("[nats] NATS_URL unset — JetStream consumers disabled") log.info("nats: NATS_URL unset — JetStream consumers disabled")
return return
try: try:
@@ -105,9 +102,9 @@ async def start(state_dir: Path) -> None:
max_reconnect_attempts=-1, max_reconnect_attempts=-1,
) )
js = _nc.jetstream() js = _nc.jetstream()
logger.info("[nats] connected to %s", NATS_URL) log.info("nats: connected", url=NATS_URL)
except Exception as exc: except Exception as exc:
logger.warning("[nats] connection failed: %s — consumers disabled", exc) log.warning("nats: connection failed — consumers disabled", exc=str(exc))
_nc = None _nc = None
return return
@@ -126,9 +123,9 @@ async def start(state_dir: Path) -> None:
config=config, config=config,
) )
_subs.append(sub) _subs.append(sub)
logger.info("[nats] subscribed subject=%s durable=%s", subject, durable) log.info("nats: subscribed", subject=subject, durable=durable)
except Exception as exc: except Exception as exc:
logger.warning("[nats] subscribe failed key=%s: %s", key, exc) log.warning("nats: subscribe failed", key=key, exc=str(exc))
async def stop() -> None: async def stop() -> None:
@@ -146,4 +143,4 @@ async def stop() -> None:
except Exception: except Exception:
pass pass
_nc = None _nc = None
logger.info("[nats] disconnected") log.info("nats: disconnected")

View File

@@ -5,3 +5,5 @@ numpy>=1.26.0
httpx>=0.27.0 httpx>=0.27.0
anthropic>=0.40.0 anthropic>=0.40.0
nats-py>=2.9.0 nats-py>=2.9.0
structlog>=24.1.0
sentry-sdk>=2.0.0

877
pnpm-lock.yaml generated

File diff suppressed because it is too large Load Diff

View File

@@ -16,6 +16,7 @@
}, },
"dependencies": { "dependencies": {
"@oo/shared-types": "workspace:*", "@oo/shared-types": "workspace:*",
"@sentry/node": "^10.50.0",
"better-sqlite3": "^11.8.1", "better-sqlite3": "^11.8.1",
"cookie-parser": "^1.4.7", "cookie-parser": "^1.4.7",
"cors": "^2.8.5", "cors": "^2.8.5",
@@ -27,6 +28,8 @@
"nats": "^2.29.3", "nats": "^2.29.3",
"node-fetch": "^3.3.2", "node-fetch": "^3.3.2",
"openid-client": "^6.3.4", "openid-client": "^6.3.4",
"pino": "^10.3.1",
"pino-http": "^11.0.0",
"web-push": "^3.6.7", "web-push": "^3.6.7",
"zod": "^3.24.1" "zod": "^3.24.1"
}, },

View File

@@ -121,13 +121,14 @@ describe('connectNats — bridge bus → JetStream', () => {
it('swallows JetStream publish errors so the in-process bus keeps working', async () => { it('swallows JetStream publish errors so the in-process bus keeps working', async () => {
const { connectNats } = await import('../nats.js'); const { connectNats } = await import('../nats.js');
const { logger } = await import('../../logger.js');
const { bus } = await import('../bus.js'); const { bus } = await import('../bus.js');
await connectNats('nats://test:4222'); await connectNats('nats://test:4222');
// Force the next js.publish to reject. // Force the next js.publish to reject.
lastJsPublish.mockRejectedValueOnce(new Error('jetstream down')); lastJsPublish.mockRejectedValueOnce(new Error('jetstream down'));
const errSpy = vi.spyOn(console, 'error').mockImplementation(() => {}); const errSpy = vi.spyOn(logger, 'error');
expect(() => expect(() =>
bus.publish('signals.task.synced', { userId: 'u', source: 'todoist', count: 0, syncedAt: '' }), bus.publish('signals.task.synced', { userId: 'u', source: 'todoist', count: 0, syncedAt: '' }),
@@ -142,12 +143,16 @@ describe('connectNats — bridge bus → JetStream', () => {
describe('connectNats — failure mode', () => { describe('connectNats — failure mode', () => {
it('logs a warning and stays silent when connect rejects', async () => { it('logs a warning and stays silent when connect rejects', async () => {
const { connectNats } = await import('../nats.js'); const { connectNats } = await import('../nats.js');
const { logger } = await import('../../logger.js');
lastConnect.mockRejectedValueOnce(new Error('ECONNREFUSED')); lastConnect.mockRejectedValueOnce(new Error('ECONNREFUSED'));
const warnSpy = vi.spyOn(console, 'warn').mockImplementation(() => {}); const warnSpy = vi.spyOn(logger, 'warn');
await expect(connectNats('nats://nope:4222')).resolves.toBeUndefined(); await expect(connectNats('nats://nope:4222')).resolves.toBeUndefined();
expect(warnSpy).toHaveBeenCalledWith(expect.stringContaining('connection failed')); expect(warnSpy).toHaveBeenCalledWith(
expect.objectContaining({ err: expect.anything() }),
expect.stringContaining('connection failed'),
);
}); });
}); });

View File

@@ -12,6 +12,7 @@
import type { NatsConnection, JetStreamClient, StreamConfig } from 'nats'; import type { NatsConnection, JetStreamClient, StreamConfig } from 'nats';
import { bus } from './bus.js'; import { bus } from './bus.js';
import { logger } from '../logger.js';
let nc: NatsConnection | null = null; let nc: NatsConnection | null = null;
let js: JetStreamClient | null = null; let js: JetStreamClient | null = null;
@@ -67,13 +68,13 @@ export async function connectNats(natsUrl: string): Promise<void> {
if (!js) return; if (!js) return;
const data = new TextEncoder().encode(JSON.stringify(payload)); const data = new TextEncoder().encode(JSON.stringify(payload));
js.publish(subject, data).catch((err: Error) => js.publish(subject, data).catch((err: Error) =>
console.error(`[nats] publish failed for ${subject}: ${err.message}`), logger.error({ err, subject }, 'nats publish failed'),
); );
}); });
console.log(`[nats] connected to ${natsUrl}, streams: ${STREAMS.map((s) => s.name).join(', ')}`); logger.info({ url: natsUrl, streams: STREAMS.map((s) => s.name) }, 'nats connected');
} catch (err: any) { } catch (err: any) {
console.warn(`[nats] connection failed — running without JetStream: ${err.message}`); logger.warn({ err }, 'nats connection failed — running without JetStream');
} }
} }

View File

@@ -1,7 +1,10 @@
import 'dotenv/config'; import 'dotenv/config';
import { logger } from './logger.js';
import express from 'express'; import express from 'express';
import { pinoHttp } from 'pino-http';
import cookieParser from 'cookie-parser'; import cookieParser from 'cookie-parser';
import cors from 'cors'; import cors from 'cors';
import { tracingMiddleware } from './middleware/tracing.js';
import { config } from './config.js'; import { config } from './config.js';
import { db, runMigrations } from './db/index.js'; import { db, runMigrations } from './db/index.js';
import { tipScores, tipFeedback } from './db/schema.js'; import { tipScores, tipFeedback } from './db/schema.js';
@@ -26,13 +29,11 @@ import { registerProfileSubscriptions } from './profile/subscriber.js';
await mkdir(dirname(config.DATABASE_PATH), { recursive: true }); await mkdir(dirname(config.DATABASE_PATH), { recursive: true });
runMigrations(); runMigrations();
// Keep the API alive on stray async faults (e.g. a single bad admin route)
// rather than dropping the whole process.
process.on('unhandledRejection', (reason) => { process.on('unhandledRejection', (reason) => {
console.error('[api] unhandledRejection', reason); logger.error({ err: reason }, 'unhandledRejection');
}); });
process.on('uncaughtException', (err) => { process.on('uncaughtException', (err) => {
console.error('[api] uncaughtException', err); logger.fatal({ err }, 'uncaughtException');
}); });
const app = express(); const app = express();
@@ -43,6 +44,15 @@ app.use(
credentials: true, credentials: true,
}), }),
); );
app.use(tracingMiddleware);
app.use(
pinoHttp({
logger,
genReqId: (req) => req.traceId,
customProps: (req) => ({ traceId: req.traceId }),
autoLogging: { ignore: (req) => req.url === '/health' },
}),
);
app.use(express.json()); app.use(express.json());
app.use(cookieParser()); app.use(cookieParser());
app.use(sessionMiddleware); app.use(sessionMiddleware);
@@ -56,16 +66,13 @@ app.use('/api/user', userRouter);
app.use('/api/push', pushRouter); app.use('/api/push', pushRouter);
app.use('/api/admin', adminRouter); app.use('/api/admin', adminRouter);
// Proxy ml/serving endpoints through the API (admin-only).
// Allows admin UI to call /api/ml/stats/:userId, /api/ml/features/:userId
// without needing direct access to the ml/serving port.
app.use('/api/ml', requireAuth as any, requireAdmin as any, async (req: Request, res: Response) => { app.use('/api/ml', requireAuth as any, requireAdmin as any, async (req: Request, res: Response) => {
const mlUrl = config.ML_SERVING_URL; const mlUrl = config.ML_SERVING_URL;
const target = `${mlUrl}${req.path}`; const target = `${mlUrl}${req.path}`;
try { try {
const upstream = await fetch(target, { const upstream = await fetch(target, {
method: req.method, method: req.method,
headers: { 'Content-Type': 'application/json' }, headers: { 'Content-Type': 'application/json', traceparent: req.traceparent },
body: req.method !== 'GET' ? JSON.stringify(req.body) : undefined, body: req.method !== 'GET' ? JSON.stringify(req.body) : undefined,
signal: AbortSignal.timeout(5000), signal: AbortSignal.timeout(5000),
}); });
@@ -82,7 +89,7 @@ async function purgeExpiredData() {
await db.delete(tipScores).where(lt(tipScores.servedAt, cutoff)); await db.delete(tipScores).where(lt(tipScores.servedAt, cutoff));
await db.delete(tipFeedback).where(lt(tipFeedback.createdAt, cutoff)); await db.delete(tipFeedback).where(lt(tipFeedback.createdAt, cutoff));
} catch (err: any) { } catch (err: any) {
console.error(`[purge] retention cleanup failed: ${err.message}`); logger.error({ err }, 'retention cleanup failed');
} }
} }
@@ -90,7 +97,7 @@ purgeExpiredData();
setInterval(purgeExpiredData, 24 * 60 * 60 * 1000); setInterval(purgeExpiredData, 24 * 60 * 60 * 1000);
app.listen(config.PORT, () => { app.listen(config.PORT, () => {
console.log(`oO API listening on http://localhost:${config.PORT}`); logger.info({ port: config.PORT }, 'oO API listening');
}); });
if (config.NATS_URL) { if (config.NATS_URL) {

View File

@@ -0,0 +1,12 @@
import pino from 'pino';
import * as Sentry from '@sentry/node';
if (process.env['SENTRY_DSN']) {
Sentry.init({
dsn: process.env['SENTRY_DSN'],
environment: process.env['NODE_ENV'] ?? 'development',
});
}
export const logger = pino({ level: process.env['LOG_LEVEL'] ?? 'info' });
export { Sentry };

View File

@@ -0,0 +1,26 @@
import { randomBytes } from 'crypto';
import type { Request, Response, NextFunction } from 'express';
declare global {
namespace Express {
interface Request {
traceId: string;
traceparent: string;
}
}
}
export function tracingMiddleware(req: Request, _res: Response, next: NextFunction): void {
const incoming = req.headers['traceparent'] as string | undefined;
let traceId: string;
if (incoming) {
const parts = incoming.split('-');
traceId = parts.length === 4 && parts[1]?.length === 32 ? parts[1] : randomBytes(16).toString('hex');
} else {
traceId = randomBytes(16).toString('hex');
}
const parentId = randomBytes(8).toString('hex');
req.traceId = traceId;
req.traceparent = `00-${traceId}-${parentId}-01`;
next();
}

View File

@@ -1,4 +1,5 @@
import { type Router as ExpressRouter, Router, Response } from 'express'; import { type Router as ExpressRouter, Router, Response } from 'express';
import { logger } from '../logger.js';
import { db, rawSqlite } from '../db/index.js'; import { db, rawSqlite } from '../db/index.js';
import { import {
users, users,
@@ -766,7 +767,7 @@ router.post('/simulate/start', async (req: AuthenticatedRequest, res: Response)
// — e.g. in the alpine api container) would emit an unhandled 'error' event // — e.g. in the alpine api container) would emit an unhandled 'error' event
// and crash the whole API process. // and crash the whole API process.
child.on('error', async (err) => { child.on('error', async (err) => {
console.error('[sim] spawn error', err); logger.error({ err }, 'sim: spawn error');
_simProcesses.delete(id); _simProcesses.delete(id);
await db await db
.update(simRuns) .update(simRuns)

View File

@@ -5,6 +5,7 @@ import { db } from '../db/index.js';
import { users, sessions } from '../db/schema.js'; import { users, sessions } from '../db/schema.js';
import { eq } from 'drizzle-orm'; import { eq } from 'drizzle-orm';
import { config } from '../config.js'; import { config } from '../config.js';
import { logger } from '../logger.js';
const router: ExpressRouter = Router(); const router: ExpressRouter = Router();
@@ -36,7 +37,7 @@ router.get('/login', async (req: Request, res: Response) => {
setTimeout(() => pendingStates.delete(state), 10 * 60 * 1000); setTimeout(() => pendingStates.delete(state), 10 * 60 * 1000);
const redirectUri = `${config.API_BASE_URL}/api/auth/callback`; const redirectUri = `${config.API_BASE_URL}/api/auth/callback`;
console.log('[auth] redirect_uri sent to Google:', redirectUri); logger.info({ redirectUri }, 'auth: redirect_uri');
const authUrl = client.buildAuthorizationUrl(cfg, { const authUrl = client.buildAuthorizationUrl(cfg, {
redirect_uri: redirectUri, redirect_uri: redirectUri,
scope: 'openid email profile', scope: 'openid email profile',
@@ -72,7 +73,7 @@ router.get('/callback', async (req: Request, res: Response) => {
expectedState: state, expectedState: state,
}); });
} catch (err) { } catch (err) {
console.error('OAuth callback error', err); logger.error({ err }, 'auth: OAuth callback error');
res.status(400).json({ error: 'OAuth error' }); res.status(400).json({ error: 'OAuth error' });
return; return;
} }

View File

@@ -1,5 +1,6 @@
import { type Router as ExpressRouter, Router, Response } from 'express'; import { type Router as ExpressRouter, Router, Response } from 'express';
import { nanoid } from 'nanoid'; import { nanoid } from 'nanoid';
import { logger } from '../logger.js';
import { db } from '../db/index.js'; import { db } from '../db/index.js';
import { integrationTokens, tipFeedback, tipViews, tipScores } from '../db/schema.js'; import { integrationTokens, tipFeedback, tipViews, tipScores } from '../db/schema.js';
import { eq, and, desc } from 'drizzle-orm'; import { eq, and, desc } from 'drizzle-orm';
@@ -85,6 +86,7 @@ async function remotePolicy(
userId: string, userId: string,
tasks: TipCandidate[], tasks: TipCandidate[],
profile: Profile, profile: Profile,
traceparent?: string,
): Promise<{ tipId: string; score: number; policy: string } | null> { ): Promise<{ tipId: string; score: number; policy: string } | null> {
const hour = new Date().getHours(); const hour = new Date().getHours();
const dayOfWeek = new Date().getDay(); const dayOfWeek = new Date().getDay();
@@ -102,11 +104,10 @@ async function remotePolicy(
profile_features: profile, profile_features: profile,
}; };
// Active policy: egreedy-v2 (promoted from shadow after offline sim — ADR-0012)
try { try {
const res = await fetch(`${config.ML_SERVING_URL}/score/egreedy/v2`, { const res = await fetch(`${config.ML_SERVING_URL}/score/egreedy/v2`, {
method: 'POST', method: 'POST',
headers: { 'Content-Type': 'application/json' }, headers: { 'Content-Type': 'application/json', ...(traceparent ? { traceparent } : {}) },
body: JSON.stringify(body), body: JSON.stringify(body),
signal: AbortSignal.timeout(3000), signal: AbortSignal.timeout(3000),
}); });
@@ -146,6 +147,7 @@ async function fetchLlmCandidates(
dayOfWeek: number, dayOfWeek: number,
promptVersion: string | null, promptVersion: string | null,
profile: Profile, profile: Profile,
traceparent?: string,
): Promise<LlmGenerateResult> { ): Promise<LlmGenerateResult> {
try { try {
const tasks = signals.slice(0, 10).map((s) => ({ const tasks = signals.slice(0, 10).map((s) => ({
@@ -156,7 +158,7 @@ async function fetchLlmCandidates(
})); }));
const res = await fetch(`${config.ML_SERVING_URL}/generate`, { const res = await fetch(`${config.ML_SERVING_URL}/generate`, {
method: 'POST', method: 'POST',
headers: { 'Content-Type': 'application/json' }, headers: { 'Content-Type': 'application/json', ...(traceparent ? { traceparent } : {}) },
body: JSON.stringify({ body: JSON.stringify({
user_id: userId, user_id: userId,
context: { tasks, hour_of_day: hour, day_of_week: dayOfWeek }, context: { tasks, hour_of_day: hour, day_of_week: dayOfWeek },
@@ -226,6 +228,7 @@ router.post('/recommend', requireAuth, async (req: AuthenticatedRequest, res: Re
dayOfWeek, dayOfWeek,
requestedPromptVersion, requestedPromptVersion,
profile, profile,
req.traceparent,
); );
const allCandidates: TipCandidate[] = [...signalCandidates, ...llmResult.candidates]; const allCandidates: TipCandidate[] = [...signalCandidates, ...llmResult.candidates];
@@ -240,7 +243,7 @@ router.post('/recommend', requireAuth, async (req: AuthenticatedRequest, res: Re
const t0 = Date.now(); const t0 = Date.now();
// Stage 2: score — egreedy bandit with random fallback // Stage 2: score — egreedy bandit with random fallback
const scored = await remotePolicy(req.userId!, allCandidates, profile); const scored = await remotePolicy(req.userId!, allCandidates, profile, req.traceparent);
const latencyMs = Date.now() - t0; const latencyMs = Date.now() - t0;
const tip = scored const tip = scored
? (allCandidates.find((t) => t.id === scored.tipId) ?? randomPolicy(allCandidates)) ? (allCandidates.find((t) => t.id === scored.tipId) ?? randomPolicy(allCandidates))
@@ -373,6 +376,7 @@ async function sendRewardWithRetry(
reward: number, reward: number,
features: TipCandidate['features'], features: TipCandidate['features'],
profile: Profile, profile: Profile,
traceparent?: string,
): Promise<void> { ): Promise<void> {
const body = JSON.stringify({ const body = JSON.stringify({
user_id: userId, user_id: userId,
@@ -387,7 +391,7 @@ async function sendRewardWithRetry(
try { try {
const res = await fetch(`${config.ML_SERVING_URL}/reward/egreedy/v2`, { const res = await fetch(`${config.ML_SERVING_URL}/reward/egreedy/v2`, {
method: 'POST', method: 'POST',
headers: { 'Content-Type': 'application/json' }, headers: { 'Content-Type': 'application/json', ...(traceparent ? { traceparent } : {}) },
body, body,
signal: AbortSignal.timeout(3000), signal: AbortSignal.timeout(3000),
}); });
@@ -395,7 +399,7 @@ async function sendRewardWithRetry(
throw new Error(`HTTP ${res.status}`); throw new Error(`HTTP ${res.status}`);
} catch (err: any) { } catch (err: any) {
if (attempt === 3) { if (attempt === 3) {
console.error(`[reward] failed after 3 attempts for tip ${tipId}: ${err.message}`); logger.error({ tipId, err }, 'reward: failed after 3 attempts');
bus.publish('signals.tip.reward_failed', { bus.publish('signals.tip.reward_failed', {
userId, userId,
tipId, tipId,
@@ -468,7 +472,7 @@ router.post('/tip/:id/feedback', requireAuth, async (req: AuthenticatedRequest,
if (candidate) { if (candidate) {
// Re-fetch profile for the v2 ridge update; TTL cache makes this near-instant. // Re-fetch profile for the v2 ridge update; TTL cache makes this near-instant.
const profile = await getProfile(req.userId!); const profile = await getProfile(req.userId!);
sendRewardWithRetry(req.userId!, tipId, reward, candidate.features, profile); sendRewardWithRetry(req.userId!, tipId, reward, candidate.features, profile, req.traceparent);
} }
// Delegate action to the owning signal source (e.g. mark done in Todoist) // Delegate action to the owning signal source (e.g. mark done in Todoist)

View File

@@ -8,6 +8,11 @@
*/ */
import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest'; import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest';
vi.mock('../../logger.js', () => ({
logger: { info: vi.fn(), warn: vi.fn(), error: vi.fn(), fatal: vi.fn() },
}));
import { logger } from '../../logger.js';
// ── mock the drizzle query chain: db.select(...).from(...).where(...) ──────── // ── mock the drizzle query chain: db.select(...).from(...).where(...) ────────
let users: { userId: string }[] = []; let users: { userId: string }[] = [];
const whereMock = vi.fn(async () => users); const whereMock = vi.fn(async () => users);
@@ -35,6 +40,7 @@ beforeEach(() => {
whereMock.mockClear(); whereMock.mockClear();
fromMock.mockClear(); fromMock.mockClear();
selectMock.mockClear(); selectMock.mockClear();
vi.clearAllMocks();
vi.useFakeTimers(); vi.useFakeTimers();
}); });
@@ -102,8 +108,6 @@ describe('startTodoistSyncScheduler', () => {
if (id === 'bad') throw new Error('todoist 401'); if (id === 'bad') throw new Error('todoist 401');
return []; return [];
}); });
const errSpy = vi.spyOn(console, 'error').mockImplementation(() => {});
const logSpy = vi.spyOn(console, 'log').mockImplementation(() => {});
startTodoistSyncScheduler(60_000); startTodoistSyncScheduler(60_000);
await vi.advanceTimersByTimeAsync(10_001); await vi.advanceTimersByTimeAsync(10_001);
@@ -112,19 +116,27 @@ describe('startTodoistSyncScheduler', () => {
await Promise.resolve(); await Promise.resolve();
expect(fetchSignalsMock).toHaveBeenCalledTimes(3); expect(fetchSignalsMock).toHaveBeenCalledTimes(3);
expect(errSpy).toHaveBeenCalledWith(expect.stringContaining('sync error'), expect.anything()); expect(logger.error).toHaveBeenCalledWith(
expect(logSpy).toHaveBeenCalledWith(expect.stringContaining('2 ok, 1 failed')); expect.objectContaining({ err: expect.anything() }),
'scheduler: sync error',
);
expect(logger.info).toHaveBeenCalledWith(
expect.objectContaining({ ok: 2, failed: 1 }),
'scheduler: todoist sync',
);
}); });
it('survives a db query failure — logs and skips the tick', async () => { it('survives a db query failure — logs and skips the tick', async () => {
const { startTodoistSyncScheduler } = await import('../scheduler.js'); const { startTodoistSyncScheduler } = await import('../scheduler.js');
whereMock.mockRejectedValueOnce(new Error('sqlite locked')); whereMock.mockRejectedValueOnce(new Error('sqlite locked'));
const errSpy = vi.spyOn(console, 'error').mockImplementation(() => {});
startTodoistSyncScheduler(60_000); startTodoistSyncScheduler(60_000);
await vi.advanceTimersByTimeAsync(10_001); await vi.advanceTimersByTimeAsync(10_001);
expect(fetchSignalsMock).not.toHaveBeenCalled(); expect(fetchSignalsMock).not.toHaveBeenCalled();
expect(errSpy).toHaveBeenCalledWith(expect.stringContaining('failed to query users')); expect(logger.error).toHaveBeenCalledWith(
expect.objectContaining({ err: expect.anything() }),
'scheduler: failed to query users',
);
}); });
}); });

View File

@@ -1,4 +1,5 @@
import type { Signal, SignalSource } from '@oo/shared-types'; import type { Signal, SignalSource } from '@oo/shared-types';
import { logger } from '../logger.js';
/** /**
* Merges signals from all registered sources for a user. * Merges signals from all registered sources for a user.
@@ -24,7 +25,7 @@ export class SignalAggregator {
if (r.status === 'fulfilled') { if (r.status === 'fulfilled') {
signals.push(...r.value); signals.push(...r.value);
} else { } else {
console.error(`[aggregator] source '${this.sources[i].id}' failed:`, r.reason); logger.error({ sourceId: this.sources[i]!.id, err: r.reason }, 'aggregator: source failed');
} }
} }
return signals; return signals;

View File

@@ -13,6 +13,7 @@ import { db } from '../db/index.js';
import { integrationTokens } from '../db/schema.js'; import { integrationTokens } from '../db/schema.js';
import { eq } from 'drizzle-orm'; import { eq } from 'drizzle-orm';
import { todoistSource } from './todoist.js'; import { todoistSource } from './todoist.js';
import { logger } from '../logger.js';
const DEFAULT_INTERVAL_MS = 15 * 60 * 1000; const DEFAULT_INTERVAL_MS = 15 * 60 * 1000;
@@ -25,7 +26,7 @@ export function startTodoistSyncScheduler(intervalMs = DEFAULT_INTERVAL_MS): Nod
.from(integrationTokens) .from(integrationTokens)
.where(eq(integrationTokens.tokenStatus, 'active')); .where(eq(integrationTokens.tokenStatus, 'active'));
} catch (err: any) { } catch (err: any) {
console.error(`[scheduler] failed to query users: ${err.message}`); logger.error({ err }, 'scheduler: failed to query users');
return; return;
} }
@@ -39,10 +40,10 @@ export function startTodoistSyncScheduler(intervalMs = DEFAULT_INTERVAL_MS): Nod
let failed = 0; let failed = 0;
for (const r of results) { for (const r of results) {
if (r.status === 'fulfilled') ok++; if (r.status === 'fulfilled') ok++;
else { failed++; console.error(`[scheduler] sync error:`, r.reason); } else { failed++; logger.error({ err: r.reason }, 'scheduler: sync error'); }
} }
console.log(`[scheduler] todoist sync: ${ok} ok, ${failed} failed (${users.length} users)`); logger.info({ ok, failed, total: users.length }, 'scheduler: todoist sync');
} }
// Run once shortly after startup, then on interval // Run once shortly after startup, then on interval

View File

@@ -3,6 +3,7 @@ import { db } from '../db/index.js';
import { integrationTokens } from '../db/schema.js'; import { integrationTokens } from '../db/schema.js';
import { eq, and } from 'drizzle-orm'; import { eq, and } from 'drizzle-orm';
import { bus } from '../events/bus.js'; import { bus } from '../events/bus.js';
import { logger } from '../logger.js';
const CACHE_TTL_MS = 30_000; const CACHE_TTL_MS = 30_000;
@@ -46,7 +47,7 @@ export class TodoistSignalSource implements SignalSource {
if (!res.ok) { if (!res.ok) {
if (res.status === 401) { if (res.status === 401) {
console.error(`[todoist] token expired for user ${userId}`); logger.warn({ userId }, 'todoist: token expired');
bus.publish('signals.integration.token_expired', { bus.publish('signals.integration.token_expired', {
userId, userId,
provider: 'todoist', provider: 'todoist',