feat(profile): user-profile feature registry + builder (phase A)

Centralizes user-level features (completion_rate_30d, dismiss_rate_30d, mean_dwell_ms_30d, preferred_hour, tip_volume_30d) in a TS registry that owns both definition and SQL aggregation, since the data lives in the TS-owned SQLite tables (tip_views/tip_feedback). Lazy TTL refresh keeps recommend latency bounded; values persist in user_profile_features (KV). ml/serving accepts profile_features on /score + /generate but does not yet consume them — extending the bandit feature vector changes D and resets every user's learned state, so that's a deliberate phase-B step. Includes ml/features/profile_schema.py as a contract mirror with a sync test that diffs name sets against registry.ts. ADR-0011 records the data-locality reasoning (registry in TS, not Python as the issue originally suggested). Phase B (deferred): event-driven incremental updates, bandit consumption with state migration, admin per-user profile page, staleness alerts. Refs #81. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-04-25 00:22:22 +00:00
parent 430804e9a5
commit 7d4c29e137
13 changed files with 636 additions and 2 deletions
--- a/services/api/src/routes/recommender.ts
+++ b/services/api/src/routes/recommender.ts
@@ -10,6 +10,7 @@ import type { TipCandidate, Signal } from '@oo/shared-types';
 import { todoistSource, dueAgeDays } from '../signals/todoist.js';
 export { dueAgeDays };
 import { SignalAggregator } from '../signals/aggregator.js';
+import { getProfile, type Profile } from '../profile/builder.js';

 const router: ExpressRouter = Router();

@@ -82,6 +83,7 @@ function signalToCandidate(signal: Signal): TipCandidate {
 async function remotePolicy(
  userId: string,
  tasks: TipCandidate[],
+  profile: Profile,
 ): Promise<{ tipId: string; score: number; policy: string } | null> {
  const hour = new Date().getHours();
  const dayOfWeek = new Date().getDay();
@@ -96,6 +98,7 @@ async function remotePolicy(
      features: t.features,
    })),
    context: { hour_of_day: hour, day_of_week: dayOfWeek },
+    profile_features: profile,
  };

  // Active policy: egreedy-v1 (selected over linucb-v1 after offline sim — ADR-0007)
@@ -141,6 +144,7 @@ async function fetchLlmCandidates(
  hour: number,
  dayOfWeek: number,
  promptVersion: string | null,
+  profile: Profile,
 ): Promise<LlmGenerateResult> {
  try {
    const tasks = signals.slice(0, 10).map((s) => ({
@@ -156,6 +160,7 @@ async function fetchLlmCandidates(
        user_id: userId,
        context: { tasks, hour_of_day: hour, day_of_week: dayOfWeek },
        n: 3,
+        profile_features: profile,
        ...(promptVersion ? { prompt_version: promptVersion } : {}),
      }),
      signal: AbortSignal.timeout(15_000),
@@ -208,6 +213,8 @@ router.post('/recommend', requireAuth, async (req: AuthenticatedRequest, res: Re

  // Stage 1: assemble candidates — aggregated signals + LLM-generated advice (parallel)
  const signals = await aggregator.fetchAll(req.userId!);
+  // Refresh + load the user-level profile feature dict (lazy TTL refresh).
+  const profile = await getProfile(req.userId!);

  const signalCandidates = signals.map(signalToCandidate);
  const requestedPromptVersion = pickPromptVersion();
@@ -217,6 +224,7 @@ router.post('/recommend', requireAuth, async (req: AuthenticatedRequest, res: Re
    hour,
    dayOfWeek,
    requestedPromptVersion,
+    profile,
  );

  const allCandidates: TipCandidate[] = [...signalCandidates, ...llmResult.candidates];
@@ -231,7 +239,7 @@ router.post('/recommend', requireAuth, async (req: AuthenticatedRequest, res: Re
  const t0 = Date.now();

  // Stage 2: score — egreedy bandit with random fallback
-  const scored = await remotePolicy(req.userId!, allCandidates);
+  const scored = await remotePolicy(req.userId!, allCandidates, profile);
  const latencyMs = Date.now() - t0;
  const tip = scored
    ? (allCandidates.find((t) => t.id === scored.tipId) ?? randomPolicy(allCandidates))