feat(observability): structured logs, W3C trace IDs, Sentry hooks (#18)
- TS: pino + pino-http; every HTTP request log includes traceId from W3C traceparent header (generated if absent); forwarded to ml/serving on all /score, /generate, /reward, and /api/ml proxy calls - Python: structlog JSON; FastAPI middleware binds trace_id via contextvars so every log line within a request carries it - Sentry: optional SENTRY_DSN init in both runtimes (no-op if unset) - Replace all console.* calls across services/api with pino logger - Update tests to spy on logger instead of console Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
20
ml/serving/logging_config.py
Normal file
20
ml/serving/logging_config.py
Normal file
@@ -0,0 +1,20 @@
|
|||||||
|
"""Structlog JSON configuration — import once at process start."""
|
||||||
|
import logging
|
||||||
|
import structlog
|
||||||
|
|
||||||
|
|
||||||
|
def configure() -> None:
|
||||||
|
structlog.configure(
|
||||||
|
processors=[
|
||||||
|
structlog.contextvars.merge_contextvars,
|
||||||
|
structlog.stdlib.add_log_level,
|
||||||
|
structlog.stdlib.add_logger_name,
|
||||||
|
structlog.processors.TimeStamper(fmt="iso"),
|
||||||
|
structlog.processors.StackInfoRenderer(),
|
||||||
|
structlog.processors.JSONRenderer(),
|
||||||
|
],
|
||||||
|
wrapper_class=structlog.make_filtering_bound_logger(logging.INFO),
|
||||||
|
context_class=dict,
|
||||||
|
logger_factory=structlog.PrintLoggerFactory(),
|
||||||
|
)
|
||||||
|
logging.basicConfig(level=logging.WARNING)
|
||||||
@@ -34,12 +34,25 @@ from typing import Optional, Deque
|
|||||||
|
|
||||||
import httpx
|
import httpx
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from fastapi import FastAPI, HTTPException
|
import sentry_sdk
|
||||||
|
import structlog
|
||||||
|
import structlog.contextvars
|
||||||
|
from fastapi import FastAPI, HTTPException, Request
|
||||||
from pydantic import BaseModel
|
from pydantic import BaseModel
|
||||||
|
from starlette.middleware.base import BaseHTTPMiddleware
|
||||||
|
|
||||||
|
import logging_config
|
||||||
import nats_consumer
|
import nats_consumer
|
||||||
from prompts import get_prompt
|
from prompts import get_prompt
|
||||||
|
|
||||||
|
logging_config.configure()
|
||||||
|
|
||||||
|
_SENTRY_DSN = os.getenv("SENTRY_DSN")
|
||||||
|
if _SENTRY_DSN:
|
||||||
|
sentry_sdk.init(dsn=_SENTRY_DSN, environment=os.getenv("ENV", "development"))
|
||||||
|
|
||||||
|
log = structlog.get_logger()
|
||||||
|
|
||||||
|
|
||||||
@asynccontextmanager
|
@asynccontextmanager
|
||||||
async def lifespan(app: FastAPI):
|
async def lifespan(app: FastAPI):
|
||||||
@@ -50,6 +63,21 @@ async def lifespan(app: FastAPI):
|
|||||||
|
|
||||||
app = FastAPI(title="oO ML Serving", version="1.0.0", lifespan=lifespan)
|
app = FastAPI(title="oO ML Serving", version="1.0.0", lifespan=lifespan)
|
||||||
|
|
||||||
|
|
||||||
|
class _TracingMiddleware(BaseHTTPMiddleware):
|
||||||
|
async def dispatch(self, request: Request, call_next):
|
||||||
|
structlog.contextvars.clear_contextvars()
|
||||||
|
traceparent = request.headers.get("traceparent", "")
|
||||||
|
if traceparent:
|
||||||
|
parts = traceparent.split("-")
|
||||||
|
trace_id = parts[1] if len(parts) == 4 and len(parts[1]) == 32 else None
|
||||||
|
if trace_id:
|
||||||
|
structlog.contextvars.bind_contextvars(trace_id=trace_id)
|
||||||
|
return await call_next(request)
|
||||||
|
|
||||||
|
|
||||||
|
app.add_middleware(_TracingMiddleware)
|
||||||
|
|
||||||
LITELLM_URL = os.getenv("LITELLM_URL", "http://localhost:4000")
|
LITELLM_URL = os.getenv("LITELLM_URL", "http://localhost:4000")
|
||||||
LITELLM_MASTER_KEY = os.getenv("LITELLM_MASTER_KEY", "sk-oo-dev")
|
LITELLM_MASTER_KEY = os.getenv("LITELLM_MASTER_KEY", "sk-oo-dev")
|
||||||
|
|
||||||
|
|||||||
@@ -17,15 +17,15 @@ Config (env vars):
|
|||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
import json
|
import json
|
||||||
import logging
|
|
||||||
import os
|
import os
|
||||||
import time
|
import time
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
|
|
||||||
|
import structlog
|
||||||
from schemas import TaskSyncedPayload, TipFeedbackPayload
|
from schemas import TaskSyncedPayload, TipFeedbackPayload
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
log = structlog.get_logger(__name__)
|
||||||
|
|
||||||
NATS_URL = os.getenv("NATS_URL", "")
|
NATS_URL = os.getenv("NATS_URL", "")
|
||||||
NATS_DURABLE_PREFIX = os.getenv("NATS_DURABLE_PREFIX", "feature-pipeline")
|
NATS_DURABLE_PREFIX = os.getenv("NATS_DURABLE_PREFIX", "feature-pipeline")
|
||||||
@@ -56,15 +56,12 @@ async def _handle(subject: str, payload: dict, state_dir: Path) -> None:
|
|||||||
"last_sync_ts": msg.syncedAt,
|
"last_sync_ts": msg.syncedAt,
|
||||||
"task_count": msg.count,
|
"task_count": msg.count,
|
||||||
}))
|
}))
|
||||||
logger.info("[nats] task_synced user=%s count=%s", msg.userId, msg.count)
|
log.info("nats: task_synced", user_id=msg.userId, count=msg.count)
|
||||||
elif subject == "signals.tip.feedback":
|
elif subject == "signals.tip.feedback":
|
||||||
msg = TipFeedbackPayload.model_validate(payload)
|
msg = TipFeedbackPayload.model_validate(payload)
|
||||||
logger.info(
|
log.info("nats: tip_feedback", user_id=msg.userId, tip_id=msg.tipId, action=msg.action, reward=msg.reward)
|
||||||
"[nats] tip_feedback user=%s tip=%s action=%s reward=%s",
|
|
||||||
msg.userId, msg.tipId, msg.action, msg.reward,
|
|
||||||
)
|
|
||||||
else:
|
else:
|
||||||
logger.debug("[nats] unhandled subject=%s", subject)
|
log.debug("nats: unhandled subject", subject=subject)
|
||||||
|
|
||||||
|
|
||||||
# ── Consumer factory ───────────────────────────────────────────────────────
|
# ── Consumer factory ───────────────────────────────────────────────────────
|
||||||
@@ -80,7 +77,7 @@ def _make_handler(key: str, state_dir: Path):
|
|||||||
consumer_health[key]["processed"] += 1
|
consumer_health[key]["processed"] += 1
|
||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
consumer_health[key]["errors"] += 1
|
consumer_health[key]["errors"] += 1
|
||||||
logger.warning("[nats] processing error key=%s subject=%s: %s", key, msg.subject, exc)
|
log.warning("nats: processing error", key=key, subject=msg.subject, exc=str(exc))
|
||||||
await msg.nak()
|
await msg.nak()
|
||||||
return handler
|
return handler
|
||||||
|
|
||||||
@@ -91,7 +88,7 @@ async def start(state_dir: Path) -> None:
|
|||||||
"""Connect to NATS and register durable push consumers. No-op if NATS_URL is unset."""
|
"""Connect to NATS and register durable push consumers. No-op if NATS_URL is unset."""
|
||||||
global _nc
|
global _nc
|
||||||
if not NATS_URL:
|
if not NATS_URL:
|
||||||
logger.info("[nats] NATS_URL unset — JetStream consumers disabled")
|
log.info("nats: NATS_URL unset — JetStream consumers disabled")
|
||||||
return
|
return
|
||||||
|
|
||||||
try:
|
try:
|
||||||
@@ -105,9 +102,9 @@ async def start(state_dir: Path) -> None:
|
|||||||
max_reconnect_attempts=-1,
|
max_reconnect_attempts=-1,
|
||||||
)
|
)
|
||||||
js = _nc.jetstream()
|
js = _nc.jetstream()
|
||||||
logger.info("[nats] connected to %s", NATS_URL)
|
log.info("nats: connected", url=NATS_URL)
|
||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
logger.warning("[nats] connection failed: %s — consumers disabled", exc)
|
log.warning("nats: connection failed — consumers disabled", exc=str(exc))
|
||||||
_nc = None
|
_nc = None
|
||||||
return
|
return
|
||||||
|
|
||||||
@@ -126,9 +123,9 @@ async def start(state_dir: Path) -> None:
|
|||||||
config=config,
|
config=config,
|
||||||
)
|
)
|
||||||
_subs.append(sub)
|
_subs.append(sub)
|
||||||
logger.info("[nats] subscribed subject=%s durable=%s", subject, durable)
|
log.info("nats: subscribed", subject=subject, durable=durable)
|
||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
logger.warning("[nats] subscribe failed key=%s: %s", key, exc)
|
log.warning("nats: subscribe failed", key=key, exc=str(exc))
|
||||||
|
|
||||||
|
|
||||||
async def stop() -> None:
|
async def stop() -> None:
|
||||||
@@ -146,4 +143,4 @@ async def stop() -> None:
|
|||||||
except Exception:
|
except Exception:
|
||||||
pass
|
pass
|
||||||
_nc = None
|
_nc = None
|
||||||
logger.info("[nats] disconnected")
|
log.info("nats: disconnected")
|
||||||
|
|||||||
@@ -5,3 +5,5 @@ numpy>=1.26.0
|
|||||||
httpx>=0.27.0
|
httpx>=0.27.0
|
||||||
anthropic>=0.40.0
|
anthropic>=0.40.0
|
||||||
nats-py>=2.9.0
|
nats-py>=2.9.0
|
||||||
|
structlog>=24.1.0
|
||||||
|
sentry-sdk>=2.0.0
|
||||||
|
|||||||
877
pnpm-lock.yaml
generated
877
pnpm-lock.yaml
generated
File diff suppressed because it is too large
Load Diff
@@ -16,6 +16,7 @@
|
|||||||
},
|
},
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"@oo/shared-types": "workspace:*",
|
"@oo/shared-types": "workspace:*",
|
||||||
|
"@sentry/node": "^10.50.0",
|
||||||
"better-sqlite3": "^11.8.1",
|
"better-sqlite3": "^11.8.1",
|
||||||
"cookie-parser": "^1.4.7",
|
"cookie-parser": "^1.4.7",
|
||||||
"cors": "^2.8.5",
|
"cors": "^2.8.5",
|
||||||
@@ -27,6 +28,8 @@
|
|||||||
"nats": "^2.29.3",
|
"nats": "^2.29.3",
|
||||||
"node-fetch": "^3.3.2",
|
"node-fetch": "^3.3.2",
|
||||||
"openid-client": "^6.3.4",
|
"openid-client": "^6.3.4",
|
||||||
|
"pino": "^10.3.1",
|
||||||
|
"pino-http": "^11.0.0",
|
||||||
"web-push": "^3.6.7",
|
"web-push": "^3.6.7",
|
||||||
"zod": "^3.24.1"
|
"zod": "^3.24.1"
|
||||||
},
|
},
|
||||||
|
|||||||
@@ -121,13 +121,14 @@ describe('connectNats — bridge bus → JetStream', () => {
|
|||||||
|
|
||||||
it('swallows JetStream publish errors so the in-process bus keeps working', async () => {
|
it('swallows JetStream publish errors so the in-process bus keeps working', async () => {
|
||||||
const { connectNats } = await import('../nats.js');
|
const { connectNats } = await import('../nats.js');
|
||||||
|
const { logger } = await import('../../logger.js');
|
||||||
const { bus } = await import('../bus.js');
|
const { bus } = await import('../bus.js');
|
||||||
|
|
||||||
await connectNats('nats://test:4222');
|
await connectNats('nats://test:4222');
|
||||||
|
|
||||||
// Force the next js.publish to reject.
|
// Force the next js.publish to reject.
|
||||||
lastJsPublish.mockRejectedValueOnce(new Error('jetstream down'));
|
lastJsPublish.mockRejectedValueOnce(new Error('jetstream down'));
|
||||||
const errSpy = vi.spyOn(console, 'error').mockImplementation(() => {});
|
const errSpy = vi.spyOn(logger, 'error');
|
||||||
|
|
||||||
expect(() =>
|
expect(() =>
|
||||||
bus.publish('signals.task.synced', { userId: 'u', source: 'todoist', count: 0, syncedAt: '' }),
|
bus.publish('signals.task.synced', { userId: 'u', source: 'todoist', count: 0, syncedAt: '' }),
|
||||||
@@ -142,12 +143,16 @@ describe('connectNats — bridge bus → JetStream', () => {
|
|||||||
describe('connectNats — failure mode', () => {
|
describe('connectNats — failure mode', () => {
|
||||||
it('logs a warning and stays silent when connect rejects', async () => {
|
it('logs a warning and stays silent when connect rejects', async () => {
|
||||||
const { connectNats } = await import('../nats.js');
|
const { connectNats } = await import('../nats.js');
|
||||||
|
const { logger } = await import('../../logger.js');
|
||||||
|
|
||||||
lastConnect.mockRejectedValueOnce(new Error('ECONNREFUSED'));
|
lastConnect.mockRejectedValueOnce(new Error('ECONNREFUSED'));
|
||||||
const warnSpy = vi.spyOn(console, 'warn').mockImplementation(() => {});
|
const warnSpy = vi.spyOn(logger, 'warn');
|
||||||
|
|
||||||
await expect(connectNats('nats://nope:4222')).resolves.toBeUndefined();
|
await expect(connectNats('nats://nope:4222')).resolves.toBeUndefined();
|
||||||
expect(warnSpy).toHaveBeenCalledWith(expect.stringContaining('connection failed'));
|
expect(warnSpy).toHaveBeenCalledWith(
|
||||||
|
expect.objectContaining({ err: expect.anything() }),
|
||||||
|
expect.stringContaining('connection failed'),
|
||||||
|
);
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
|
|||||||
@@ -12,6 +12,7 @@
|
|||||||
|
|
||||||
import type { NatsConnection, JetStreamClient, StreamConfig } from 'nats';
|
import type { NatsConnection, JetStreamClient, StreamConfig } from 'nats';
|
||||||
import { bus } from './bus.js';
|
import { bus } from './bus.js';
|
||||||
|
import { logger } from '../logger.js';
|
||||||
|
|
||||||
let nc: NatsConnection | null = null;
|
let nc: NatsConnection | null = null;
|
||||||
let js: JetStreamClient | null = null;
|
let js: JetStreamClient | null = null;
|
||||||
@@ -67,13 +68,13 @@ export async function connectNats(natsUrl: string): Promise<void> {
|
|||||||
if (!js) return;
|
if (!js) return;
|
||||||
const data = new TextEncoder().encode(JSON.stringify(payload));
|
const data = new TextEncoder().encode(JSON.stringify(payload));
|
||||||
js.publish(subject, data).catch((err: Error) =>
|
js.publish(subject, data).catch((err: Error) =>
|
||||||
console.error(`[nats] publish failed for ${subject}: ${err.message}`),
|
logger.error({ err, subject }, 'nats publish failed'),
|
||||||
);
|
);
|
||||||
});
|
});
|
||||||
|
|
||||||
console.log(`[nats] connected to ${natsUrl}, streams: ${STREAMS.map((s) => s.name).join(', ')}`);
|
logger.info({ url: natsUrl, streams: STREAMS.map((s) => s.name) }, 'nats connected');
|
||||||
} catch (err: any) {
|
} catch (err: any) {
|
||||||
console.warn(`[nats] connection failed — running without JetStream: ${err.message}`);
|
logger.warn({ err }, 'nats connection failed — running without JetStream');
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -1,7 +1,10 @@
|
|||||||
import 'dotenv/config';
|
import 'dotenv/config';
|
||||||
|
import { logger } from './logger.js';
|
||||||
import express from 'express';
|
import express from 'express';
|
||||||
|
import { pinoHttp } from 'pino-http';
|
||||||
import cookieParser from 'cookie-parser';
|
import cookieParser from 'cookie-parser';
|
||||||
import cors from 'cors';
|
import cors from 'cors';
|
||||||
|
import { tracingMiddleware } from './middleware/tracing.js';
|
||||||
import { config } from './config.js';
|
import { config } from './config.js';
|
||||||
import { db, runMigrations } from './db/index.js';
|
import { db, runMigrations } from './db/index.js';
|
||||||
import { tipScores, tipFeedback } from './db/schema.js';
|
import { tipScores, tipFeedback } from './db/schema.js';
|
||||||
@@ -26,13 +29,11 @@ import { registerProfileSubscriptions } from './profile/subscriber.js';
|
|||||||
await mkdir(dirname(config.DATABASE_PATH), { recursive: true });
|
await mkdir(dirname(config.DATABASE_PATH), { recursive: true });
|
||||||
runMigrations();
|
runMigrations();
|
||||||
|
|
||||||
// Keep the API alive on stray async faults (e.g. a single bad admin route)
|
|
||||||
// rather than dropping the whole process.
|
|
||||||
process.on('unhandledRejection', (reason) => {
|
process.on('unhandledRejection', (reason) => {
|
||||||
console.error('[api] unhandledRejection', reason);
|
logger.error({ err: reason }, 'unhandledRejection');
|
||||||
});
|
});
|
||||||
process.on('uncaughtException', (err) => {
|
process.on('uncaughtException', (err) => {
|
||||||
console.error('[api] uncaughtException', err);
|
logger.fatal({ err }, 'uncaughtException');
|
||||||
});
|
});
|
||||||
|
|
||||||
const app = express();
|
const app = express();
|
||||||
@@ -43,6 +44,15 @@ app.use(
|
|||||||
credentials: true,
|
credentials: true,
|
||||||
}),
|
}),
|
||||||
);
|
);
|
||||||
|
app.use(tracingMiddleware);
|
||||||
|
app.use(
|
||||||
|
pinoHttp({
|
||||||
|
logger,
|
||||||
|
genReqId: (req) => req.traceId,
|
||||||
|
customProps: (req) => ({ traceId: req.traceId }),
|
||||||
|
autoLogging: { ignore: (req) => req.url === '/health' },
|
||||||
|
}),
|
||||||
|
);
|
||||||
app.use(express.json());
|
app.use(express.json());
|
||||||
app.use(cookieParser());
|
app.use(cookieParser());
|
||||||
app.use(sessionMiddleware);
|
app.use(sessionMiddleware);
|
||||||
@@ -56,16 +66,13 @@ app.use('/api/user', userRouter);
|
|||||||
app.use('/api/push', pushRouter);
|
app.use('/api/push', pushRouter);
|
||||||
app.use('/api/admin', adminRouter);
|
app.use('/api/admin', adminRouter);
|
||||||
|
|
||||||
// Proxy ml/serving endpoints through the API (admin-only).
|
|
||||||
// Allows admin UI to call /api/ml/stats/:userId, /api/ml/features/:userId
|
|
||||||
// without needing direct access to the ml/serving port.
|
|
||||||
app.use('/api/ml', requireAuth as any, requireAdmin as any, async (req: Request, res: Response) => {
|
app.use('/api/ml', requireAuth as any, requireAdmin as any, async (req: Request, res: Response) => {
|
||||||
const mlUrl = config.ML_SERVING_URL;
|
const mlUrl = config.ML_SERVING_URL;
|
||||||
const target = `${mlUrl}${req.path}`;
|
const target = `${mlUrl}${req.path}`;
|
||||||
try {
|
try {
|
||||||
const upstream = await fetch(target, {
|
const upstream = await fetch(target, {
|
||||||
method: req.method,
|
method: req.method,
|
||||||
headers: { 'Content-Type': 'application/json' },
|
headers: { 'Content-Type': 'application/json', traceparent: req.traceparent },
|
||||||
body: req.method !== 'GET' ? JSON.stringify(req.body) : undefined,
|
body: req.method !== 'GET' ? JSON.stringify(req.body) : undefined,
|
||||||
signal: AbortSignal.timeout(5000),
|
signal: AbortSignal.timeout(5000),
|
||||||
});
|
});
|
||||||
@@ -82,7 +89,7 @@ async function purgeExpiredData() {
|
|||||||
await db.delete(tipScores).where(lt(tipScores.servedAt, cutoff));
|
await db.delete(tipScores).where(lt(tipScores.servedAt, cutoff));
|
||||||
await db.delete(tipFeedback).where(lt(tipFeedback.createdAt, cutoff));
|
await db.delete(tipFeedback).where(lt(tipFeedback.createdAt, cutoff));
|
||||||
} catch (err: any) {
|
} catch (err: any) {
|
||||||
console.error(`[purge] retention cleanup failed: ${err.message}`);
|
logger.error({ err }, 'retention cleanup failed');
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -90,7 +97,7 @@ purgeExpiredData();
|
|||||||
setInterval(purgeExpiredData, 24 * 60 * 60 * 1000);
|
setInterval(purgeExpiredData, 24 * 60 * 60 * 1000);
|
||||||
|
|
||||||
app.listen(config.PORT, () => {
|
app.listen(config.PORT, () => {
|
||||||
console.log(`oO API listening on http://localhost:${config.PORT}`);
|
logger.info({ port: config.PORT }, 'oO API listening');
|
||||||
});
|
});
|
||||||
|
|
||||||
if (config.NATS_URL) {
|
if (config.NATS_URL) {
|
||||||
|
|||||||
12
services/api/src/logger.ts
Normal file
12
services/api/src/logger.ts
Normal file
@@ -0,0 +1,12 @@
|
|||||||
|
import pino from 'pino';
|
||||||
|
import * as Sentry from '@sentry/node';
|
||||||
|
|
||||||
|
if (process.env['SENTRY_DSN']) {
|
||||||
|
Sentry.init({
|
||||||
|
dsn: process.env['SENTRY_DSN'],
|
||||||
|
environment: process.env['NODE_ENV'] ?? 'development',
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
export const logger = pino({ level: process.env['LOG_LEVEL'] ?? 'info' });
|
||||||
|
export { Sentry };
|
||||||
26
services/api/src/middleware/tracing.ts
Normal file
26
services/api/src/middleware/tracing.ts
Normal file
@@ -0,0 +1,26 @@
|
|||||||
|
import { randomBytes } from 'crypto';
|
||||||
|
import type { Request, Response, NextFunction } from 'express';
|
||||||
|
|
||||||
|
declare global {
|
||||||
|
namespace Express {
|
||||||
|
interface Request {
|
||||||
|
traceId: string;
|
||||||
|
traceparent: string;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
export function tracingMiddleware(req: Request, _res: Response, next: NextFunction): void {
|
||||||
|
const incoming = req.headers['traceparent'] as string | undefined;
|
||||||
|
let traceId: string;
|
||||||
|
if (incoming) {
|
||||||
|
const parts = incoming.split('-');
|
||||||
|
traceId = parts.length === 4 && parts[1]?.length === 32 ? parts[1] : randomBytes(16).toString('hex');
|
||||||
|
} else {
|
||||||
|
traceId = randomBytes(16).toString('hex');
|
||||||
|
}
|
||||||
|
const parentId = randomBytes(8).toString('hex');
|
||||||
|
req.traceId = traceId;
|
||||||
|
req.traceparent = `00-${traceId}-${parentId}-01`;
|
||||||
|
next();
|
||||||
|
}
|
||||||
@@ -1,4 +1,5 @@
|
|||||||
import { type Router as ExpressRouter, Router, Response } from 'express';
|
import { type Router as ExpressRouter, Router, Response } from 'express';
|
||||||
|
import { logger } from '../logger.js';
|
||||||
import { db, rawSqlite } from '../db/index.js';
|
import { db, rawSqlite } from '../db/index.js';
|
||||||
import {
|
import {
|
||||||
users,
|
users,
|
||||||
@@ -766,7 +767,7 @@ router.post('/simulate/start', async (req: AuthenticatedRequest, res: Response)
|
|||||||
// — e.g. in the alpine api container) would emit an unhandled 'error' event
|
// — e.g. in the alpine api container) would emit an unhandled 'error' event
|
||||||
// and crash the whole API process.
|
// and crash the whole API process.
|
||||||
child.on('error', async (err) => {
|
child.on('error', async (err) => {
|
||||||
console.error('[sim] spawn error', err);
|
logger.error({ err }, 'sim: spawn error');
|
||||||
_simProcesses.delete(id);
|
_simProcesses.delete(id);
|
||||||
await db
|
await db
|
||||||
.update(simRuns)
|
.update(simRuns)
|
||||||
|
|||||||
@@ -5,6 +5,7 @@ import { db } from '../db/index.js';
|
|||||||
import { users, sessions } from '../db/schema.js';
|
import { users, sessions } from '../db/schema.js';
|
||||||
import { eq } from 'drizzle-orm';
|
import { eq } from 'drizzle-orm';
|
||||||
import { config } from '../config.js';
|
import { config } from '../config.js';
|
||||||
|
import { logger } from '../logger.js';
|
||||||
|
|
||||||
const router: ExpressRouter = Router();
|
const router: ExpressRouter = Router();
|
||||||
|
|
||||||
@@ -36,7 +37,7 @@ router.get('/login', async (req: Request, res: Response) => {
|
|||||||
setTimeout(() => pendingStates.delete(state), 10 * 60 * 1000);
|
setTimeout(() => pendingStates.delete(state), 10 * 60 * 1000);
|
||||||
|
|
||||||
const redirectUri = `${config.API_BASE_URL}/api/auth/callback`;
|
const redirectUri = `${config.API_BASE_URL}/api/auth/callback`;
|
||||||
console.log('[auth] redirect_uri sent to Google:', redirectUri);
|
logger.info({ redirectUri }, 'auth: redirect_uri');
|
||||||
const authUrl = client.buildAuthorizationUrl(cfg, {
|
const authUrl = client.buildAuthorizationUrl(cfg, {
|
||||||
redirect_uri: redirectUri,
|
redirect_uri: redirectUri,
|
||||||
scope: 'openid email profile',
|
scope: 'openid email profile',
|
||||||
@@ -72,7 +73,7 @@ router.get('/callback', async (req: Request, res: Response) => {
|
|||||||
expectedState: state,
|
expectedState: state,
|
||||||
});
|
});
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
console.error('OAuth callback error', err);
|
logger.error({ err }, 'auth: OAuth callback error');
|
||||||
res.status(400).json({ error: 'OAuth error' });
|
res.status(400).json({ error: 'OAuth error' });
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,5 +1,6 @@
|
|||||||
import { type Router as ExpressRouter, Router, Response } from 'express';
|
import { type Router as ExpressRouter, Router, Response } from 'express';
|
||||||
import { nanoid } from 'nanoid';
|
import { nanoid } from 'nanoid';
|
||||||
|
import { logger } from '../logger.js';
|
||||||
import { db } from '../db/index.js';
|
import { db } from '../db/index.js';
|
||||||
import { integrationTokens, tipFeedback, tipViews, tipScores } from '../db/schema.js';
|
import { integrationTokens, tipFeedback, tipViews, tipScores } from '../db/schema.js';
|
||||||
import { eq, and, desc } from 'drizzle-orm';
|
import { eq, and, desc } from 'drizzle-orm';
|
||||||
@@ -85,6 +86,7 @@ async function remotePolicy(
|
|||||||
userId: string,
|
userId: string,
|
||||||
tasks: TipCandidate[],
|
tasks: TipCandidate[],
|
||||||
profile: Profile,
|
profile: Profile,
|
||||||
|
traceparent?: string,
|
||||||
): Promise<{ tipId: string; score: number; policy: string } | null> {
|
): Promise<{ tipId: string; score: number; policy: string } | null> {
|
||||||
const hour = new Date().getHours();
|
const hour = new Date().getHours();
|
||||||
const dayOfWeek = new Date().getDay();
|
const dayOfWeek = new Date().getDay();
|
||||||
@@ -102,11 +104,10 @@ async function remotePolicy(
|
|||||||
profile_features: profile,
|
profile_features: profile,
|
||||||
};
|
};
|
||||||
|
|
||||||
// Active policy: egreedy-v2 (promoted from shadow after offline sim — ADR-0012)
|
|
||||||
try {
|
try {
|
||||||
const res = await fetch(`${config.ML_SERVING_URL}/score/egreedy/v2`, {
|
const res = await fetch(`${config.ML_SERVING_URL}/score/egreedy/v2`, {
|
||||||
method: 'POST',
|
method: 'POST',
|
||||||
headers: { 'Content-Type': 'application/json' },
|
headers: { 'Content-Type': 'application/json', ...(traceparent ? { traceparent } : {}) },
|
||||||
body: JSON.stringify(body),
|
body: JSON.stringify(body),
|
||||||
signal: AbortSignal.timeout(3000),
|
signal: AbortSignal.timeout(3000),
|
||||||
});
|
});
|
||||||
@@ -146,6 +147,7 @@ async function fetchLlmCandidates(
|
|||||||
dayOfWeek: number,
|
dayOfWeek: number,
|
||||||
promptVersion: string | null,
|
promptVersion: string | null,
|
||||||
profile: Profile,
|
profile: Profile,
|
||||||
|
traceparent?: string,
|
||||||
): Promise<LlmGenerateResult> {
|
): Promise<LlmGenerateResult> {
|
||||||
try {
|
try {
|
||||||
const tasks = signals.slice(0, 10).map((s) => ({
|
const tasks = signals.slice(0, 10).map((s) => ({
|
||||||
@@ -156,7 +158,7 @@ async function fetchLlmCandidates(
|
|||||||
}));
|
}));
|
||||||
const res = await fetch(`${config.ML_SERVING_URL}/generate`, {
|
const res = await fetch(`${config.ML_SERVING_URL}/generate`, {
|
||||||
method: 'POST',
|
method: 'POST',
|
||||||
headers: { 'Content-Type': 'application/json' },
|
headers: { 'Content-Type': 'application/json', ...(traceparent ? { traceparent } : {}) },
|
||||||
body: JSON.stringify({
|
body: JSON.stringify({
|
||||||
user_id: userId,
|
user_id: userId,
|
||||||
context: { tasks, hour_of_day: hour, day_of_week: dayOfWeek },
|
context: { tasks, hour_of_day: hour, day_of_week: dayOfWeek },
|
||||||
@@ -226,6 +228,7 @@ router.post('/recommend', requireAuth, async (req: AuthenticatedRequest, res: Re
|
|||||||
dayOfWeek,
|
dayOfWeek,
|
||||||
requestedPromptVersion,
|
requestedPromptVersion,
|
||||||
profile,
|
profile,
|
||||||
|
req.traceparent,
|
||||||
);
|
);
|
||||||
|
|
||||||
const allCandidates: TipCandidate[] = [...signalCandidates, ...llmResult.candidates];
|
const allCandidates: TipCandidate[] = [...signalCandidates, ...llmResult.candidates];
|
||||||
@@ -240,7 +243,7 @@ router.post('/recommend', requireAuth, async (req: AuthenticatedRequest, res: Re
|
|||||||
const t0 = Date.now();
|
const t0 = Date.now();
|
||||||
|
|
||||||
// Stage 2: score — egreedy bandit with random fallback
|
// Stage 2: score — egreedy bandit with random fallback
|
||||||
const scored = await remotePolicy(req.userId!, allCandidates, profile);
|
const scored = await remotePolicy(req.userId!, allCandidates, profile, req.traceparent);
|
||||||
const latencyMs = Date.now() - t0;
|
const latencyMs = Date.now() - t0;
|
||||||
const tip = scored
|
const tip = scored
|
||||||
? (allCandidates.find((t) => t.id === scored.tipId) ?? randomPolicy(allCandidates))
|
? (allCandidates.find((t) => t.id === scored.tipId) ?? randomPolicy(allCandidates))
|
||||||
@@ -373,6 +376,7 @@ async function sendRewardWithRetry(
|
|||||||
reward: number,
|
reward: number,
|
||||||
features: TipCandidate['features'],
|
features: TipCandidate['features'],
|
||||||
profile: Profile,
|
profile: Profile,
|
||||||
|
traceparent?: string,
|
||||||
): Promise<void> {
|
): Promise<void> {
|
||||||
const body = JSON.stringify({
|
const body = JSON.stringify({
|
||||||
user_id: userId,
|
user_id: userId,
|
||||||
@@ -387,7 +391,7 @@ async function sendRewardWithRetry(
|
|||||||
try {
|
try {
|
||||||
const res = await fetch(`${config.ML_SERVING_URL}/reward/egreedy/v2`, {
|
const res = await fetch(`${config.ML_SERVING_URL}/reward/egreedy/v2`, {
|
||||||
method: 'POST',
|
method: 'POST',
|
||||||
headers: { 'Content-Type': 'application/json' },
|
headers: { 'Content-Type': 'application/json', ...(traceparent ? { traceparent } : {}) },
|
||||||
body,
|
body,
|
||||||
signal: AbortSignal.timeout(3000),
|
signal: AbortSignal.timeout(3000),
|
||||||
});
|
});
|
||||||
@@ -395,7 +399,7 @@ async function sendRewardWithRetry(
|
|||||||
throw new Error(`HTTP ${res.status}`);
|
throw new Error(`HTTP ${res.status}`);
|
||||||
} catch (err: any) {
|
} catch (err: any) {
|
||||||
if (attempt === 3) {
|
if (attempt === 3) {
|
||||||
console.error(`[reward] failed after 3 attempts for tip ${tipId}: ${err.message}`);
|
logger.error({ tipId, err }, 'reward: failed after 3 attempts');
|
||||||
bus.publish('signals.tip.reward_failed', {
|
bus.publish('signals.tip.reward_failed', {
|
||||||
userId,
|
userId,
|
||||||
tipId,
|
tipId,
|
||||||
@@ -468,7 +472,7 @@ router.post('/tip/:id/feedback', requireAuth, async (req: AuthenticatedRequest,
|
|||||||
if (candidate) {
|
if (candidate) {
|
||||||
// Re-fetch profile for the v2 ridge update; TTL cache makes this near-instant.
|
// Re-fetch profile for the v2 ridge update; TTL cache makes this near-instant.
|
||||||
const profile = await getProfile(req.userId!);
|
const profile = await getProfile(req.userId!);
|
||||||
sendRewardWithRetry(req.userId!, tipId, reward, candidate.features, profile);
|
sendRewardWithRetry(req.userId!, tipId, reward, candidate.features, profile, req.traceparent);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Delegate action to the owning signal source (e.g. mark done in Todoist)
|
// Delegate action to the owning signal source (e.g. mark done in Todoist)
|
||||||
|
|||||||
@@ -8,6 +8,11 @@
|
|||||||
*/
|
*/
|
||||||
import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest';
|
import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest';
|
||||||
|
|
||||||
|
vi.mock('../../logger.js', () => ({
|
||||||
|
logger: { info: vi.fn(), warn: vi.fn(), error: vi.fn(), fatal: vi.fn() },
|
||||||
|
}));
|
||||||
|
import { logger } from '../../logger.js';
|
||||||
|
|
||||||
// ── mock the drizzle query chain: db.select(...).from(...).where(...) ────────
|
// ── mock the drizzle query chain: db.select(...).from(...).where(...) ────────
|
||||||
let users: { userId: string }[] = [];
|
let users: { userId: string }[] = [];
|
||||||
const whereMock = vi.fn(async () => users);
|
const whereMock = vi.fn(async () => users);
|
||||||
@@ -35,6 +40,7 @@ beforeEach(() => {
|
|||||||
whereMock.mockClear();
|
whereMock.mockClear();
|
||||||
fromMock.mockClear();
|
fromMock.mockClear();
|
||||||
selectMock.mockClear();
|
selectMock.mockClear();
|
||||||
|
vi.clearAllMocks();
|
||||||
vi.useFakeTimers();
|
vi.useFakeTimers();
|
||||||
});
|
});
|
||||||
|
|
||||||
@@ -102,8 +108,6 @@ describe('startTodoistSyncScheduler', () => {
|
|||||||
if (id === 'bad') throw new Error('todoist 401');
|
if (id === 'bad') throw new Error('todoist 401');
|
||||||
return [];
|
return [];
|
||||||
});
|
});
|
||||||
const errSpy = vi.spyOn(console, 'error').mockImplementation(() => {});
|
|
||||||
const logSpy = vi.spyOn(console, 'log').mockImplementation(() => {});
|
|
||||||
|
|
||||||
startTodoistSyncScheduler(60_000);
|
startTodoistSyncScheduler(60_000);
|
||||||
await vi.advanceTimersByTimeAsync(10_001);
|
await vi.advanceTimersByTimeAsync(10_001);
|
||||||
@@ -112,19 +116,27 @@ describe('startTodoistSyncScheduler', () => {
|
|||||||
await Promise.resolve();
|
await Promise.resolve();
|
||||||
|
|
||||||
expect(fetchSignalsMock).toHaveBeenCalledTimes(3);
|
expect(fetchSignalsMock).toHaveBeenCalledTimes(3);
|
||||||
expect(errSpy).toHaveBeenCalledWith(expect.stringContaining('sync error'), expect.anything());
|
expect(logger.error).toHaveBeenCalledWith(
|
||||||
expect(logSpy).toHaveBeenCalledWith(expect.stringContaining('2 ok, 1 failed'));
|
expect.objectContaining({ err: expect.anything() }),
|
||||||
|
'scheduler: sync error',
|
||||||
|
);
|
||||||
|
expect(logger.info).toHaveBeenCalledWith(
|
||||||
|
expect.objectContaining({ ok: 2, failed: 1 }),
|
||||||
|
'scheduler: todoist sync',
|
||||||
|
);
|
||||||
});
|
});
|
||||||
|
|
||||||
it('survives a db query failure — logs and skips the tick', async () => {
|
it('survives a db query failure — logs and skips the tick', async () => {
|
||||||
const { startTodoistSyncScheduler } = await import('../scheduler.js');
|
const { startTodoistSyncScheduler } = await import('../scheduler.js');
|
||||||
whereMock.mockRejectedValueOnce(new Error('sqlite locked'));
|
whereMock.mockRejectedValueOnce(new Error('sqlite locked'));
|
||||||
const errSpy = vi.spyOn(console, 'error').mockImplementation(() => {});
|
|
||||||
|
|
||||||
startTodoistSyncScheduler(60_000);
|
startTodoistSyncScheduler(60_000);
|
||||||
await vi.advanceTimersByTimeAsync(10_001);
|
await vi.advanceTimersByTimeAsync(10_001);
|
||||||
|
|
||||||
expect(fetchSignalsMock).not.toHaveBeenCalled();
|
expect(fetchSignalsMock).not.toHaveBeenCalled();
|
||||||
expect(errSpy).toHaveBeenCalledWith(expect.stringContaining('failed to query users'));
|
expect(logger.error).toHaveBeenCalledWith(
|
||||||
|
expect.objectContaining({ err: expect.anything() }),
|
||||||
|
'scheduler: failed to query users',
|
||||||
|
);
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|||||||
@@ -1,4 +1,5 @@
|
|||||||
import type { Signal, SignalSource } from '@oo/shared-types';
|
import type { Signal, SignalSource } from '@oo/shared-types';
|
||||||
|
import { logger } from '../logger.js';
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Merges signals from all registered sources for a user.
|
* Merges signals from all registered sources for a user.
|
||||||
@@ -24,7 +25,7 @@ export class SignalAggregator {
|
|||||||
if (r.status === 'fulfilled') {
|
if (r.status === 'fulfilled') {
|
||||||
signals.push(...r.value);
|
signals.push(...r.value);
|
||||||
} else {
|
} else {
|
||||||
console.error(`[aggregator] source '${this.sources[i].id}' failed:`, r.reason);
|
logger.error({ sourceId: this.sources[i]!.id, err: r.reason }, 'aggregator: source failed');
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return signals;
|
return signals;
|
||||||
|
|||||||
@@ -13,6 +13,7 @@ import { db } from '../db/index.js';
|
|||||||
import { integrationTokens } from '../db/schema.js';
|
import { integrationTokens } from '../db/schema.js';
|
||||||
import { eq } from 'drizzle-orm';
|
import { eq } from 'drizzle-orm';
|
||||||
import { todoistSource } from './todoist.js';
|
import { todoistSource } from './todoist.js';
|
||||||
|
import { logger } from '../logger.js';
|
||||||
|
|
||||||
const DEFAULT_INTERVAL_MS = 15 * 60 * 1000;
|
const DEFAULT_INTERVAL_MS = 15 * 60 * 1000;
|
||||||
|
|
||||||
@@ -25,7 +26,7 @@ export function startTodoistSyncScheduler(intervalMs = DEFAULT_INTERVAL_MS): Nod
|
|||||||
.from(integrationTokens)
|
.from(integrationTokens)
|
||||||
.where(eq(integrationTokens.tokenStatus, 'active'));
|
.where(eq(integrationTokens.tokenStatus, 'active'));
|
||||||
} catch (err: any) {
|
} catch (err: any) {
|
||||||
console.error(`[scheduler] failed to query users: ${err.message}`);
|
logger.error({ err }, 'scheduler: failed to query users');
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -39,10 +40,10 @@ export function startTodoistSyncScheduler(intervalMs = DEFAULT_INTERVAL_MS): Nod
|
|||||||
let failed = 0;
|
let failed = 0;
|
||||||
for (const r of results) {
|
for (const r of results) {
|
||||||
if (r.status === 'fulfilled') ok++;
|
if (r.status === 'fulfilled') ok++;
|
||||||
else { failed++; console.error(`[scheduler] sync error:`, r.reason); }
|
else { failed++; logger.error({ err: r.reason }, 'scheduler: sync error'); }
|
||||||
}
|
}
|
||||||
|
|
||||||
console.log(`[scheduler] todoist sync: ${ok} ok, ${failed} failed (${users.length} users)`);
|
logger.info({ ok, failed, total: users.length }, 'scheduler: todoist sync');
|
||||||
}
|
}
|
||||||
|
|
||||||
// Run once shortly after startup, then on interval
|
// Run once shortly after startup, then on interval
|
||||||
|
|||||||
@@ -3,6 +3,7 @@ import { db } from '../db/index.js';
|
|||||||
import { integrationTokens } from '../db/schema.js';
|
import { integrationTokens } from '../db/schema.js';
|
||||||
import { eq, and } from 'drizzle-orm';
|
import { eq, and } from 'drizzle-orm';
|
||||||
import { bus } from '../events/bus.js';
|
import { bus } from '../events/bus.js';
|
||||||
|
import { logger } from '../logger.js';
|
||||||
|
|
||||||
const CACHE_TTL_MS = 30_000;
|
const CACHE_TTL_MS = 30_000;
|
||||||
|
|
||||||
@@ -46,7 +47,7 @@ export class TodoistSignalSource implements SignalSource {
|
|||||||
|
|
||||||
if (!res.ok) {
|
if (!res.ok) {
|
||||||
if (res.status === 401) {
|
if (res.status === 401) {
|
||||||
console.error(`[todoist] token expired for user ${userId}`);
|
logger.warn({ userId }, 'todoist: token expired');
|
||||||
bus.publish('signals.integration.token_expired', {
|
bus.publish('signals.integration.token_expired', {
|
||||||
userId,
|
userId,
|
||||||
provider: 'todoist',
|
provider: 'todoist',
|
||||||
|
|||||||
Reference in New Issue
Block a user