feat: MLOps external services, AI stack planning, admin MLOps hub

Infrastructure: - Add `mlops` compose profile: MLflow (basic-auth, /mlflow path) + Airflow (LocalExecutor, /airflow path) + airflow-db - infra/mlflow/basic_auth.ini for MLflow auth config - Caddy routes /mlflow* and /airflow* inside existing o.alogins.net block (see agap_git) - Dockerfile.admin: NEXT_PUBLIC_MLFLOW_URL / NEXT_PUBLIC_AIRFLOW_URL build args (default /mlflow, /airflow) Admin panel: - /admin/models: replace MLflow iframe with external link cards - /admin/experiments: replace LinUCB stats with MLOps hub (links to MLflow experiments/models + Airflow DAGs/datasets) - AdminShell: external nav links for MLflow ↗ and Airflow ↗ under MLOps section Docs & planning: - README: new AI stack section (Ollama/LiteLLM/OpenWebUI three-tier, tip generation pipeline, model aliases) - README: Phase 2 expanded with AI infra issues (#86-#93) and granular pipeline breakdown - README: Phase 4 expanded with LLM MLOps items (#94-#97) - CLAUDE.md: AI stack section, updated current phase (M1 shipped / M2 in progress), compose profiles, updated What NOT to do - docs/architecture/overview.md: AI stack section, updated decision flow diagram for Phase 2 LLM pipeline - ADR-0006: updated to reflect external services (path-based, not embedded) - Gitea issues #86-#97 created (M2: AI infra + pipeline; M4: LLM MLOps) Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-17 08:20:44 +00:00
parent faf44c18fc
commit 85367aeaa0
25 changed files with 695 additions and 222 deletions
--- a/services/api/src/routes/recommender.ts
+++ b/services/api/src/routes/recommender.ts
@@ -65,7 +65,17 @@ async function fetchTodoistTasks(userId: string, accessToken: string): Promise<C
    headers: { Authorization: `Bearer ${accessToken}` },
  });

-  if (!res.ok) return cached?.tasks ?? [];
+  if (!res.ok) {
+    if (res.status === 401) {
+      console.error(`[todoist] token expired for user ${userId}`);
+      bus.publish('signals.integration.token_expired', {
+        userId,
+        provider: 'todoist',
+        detectedAt: new Date().toISOString(),
+      });
+    }
+    return cached?.tasks ?? [];
+  }

  const body = (await res.json()) as {
    results: Array<{
@@ -230,18 +240,20 @@ router.post('/recommend', requireAuth, async (req: AuthenticatedRequest, res: Re
 // ---------------------------------------------------------------------------
 // Reward inference from action + dwell time
 //
-// Feedback is now 3 signals only: done / snooze / dismiss.
-// "Helpfulness" is inferred from how long the user took to act on a tip:
 //   dismiss              → -1.0 (clear rejection)
 //   snooze               → +0.1 (tip noticed, timing off — mild positive)
+//   helpful              → +0.5 (explicit positive signal)
+//   not_helpful          → -0.5 (explicit negative signal)
 //   done < 15 s          → -0.3 (almost certainly a stale task, not magic)
 //   done 15 s – 2 min    → +1.0 (magic zone: user saw tip and acted)
 //   done 2 – 10 min      → +0.6 (good: user engaged, acted in same session)
 //   done > 10 min        → +0.3 (eventually done; tip may have helped, unclear)
 // ---------------------------------------------------------------------------
 function inferReward(action: string, dwellMs: number | null): number {
-  if (action === 'dismiss') return -1.0;
-  if (action === 'snooze')  return 0.1;
+  if (action === 'dismiss')     return -1.0;
+  if (action === 'snooze')      return 0.1;
+  if (action === 'helpful')     return 0.5;
+  if (action === 'not_helpful') return -0.5;
  // done — use dwell time
  if (dwellMs === null || dwellMs < 0) return 0.5; // unknown dwell: neutral positive
  if (dwellMs < 15_000)   return -0.3; // stale / reflex
@@ -250,6 +262,51 @@ function inferReward(action: string, dwellMs: number | null): number {
  return 0.3;                           // eventually
 }

+// ---------------------------------------------------------------------------
+// Reward delivery with retry (bug #75 — was fire-and-forget)
+// ---------------------------------------------------------------------------
+async function sendRewardWithRetry(
+  userId: string,
+  tipId: string,
+  reward: number,
+  features: TaskFeatures,
+): Promise<void> {
+  const body = JSON.stringify({
+    user_id: userId,
+    tip_id: tipId,
+    reward,
+    features,
+    day_of_week: new Date().getDay(),
+  });
+
+  for (let attempt = 1; attempt <= 3; attempt++) {
+    try {
+      const res = await fetch(`${config.ML_SERVING_URL}/reward/egreedy`, {
+        method: 'POST',
+        headers: { 'Content-Type': 'application/json' },
+        body,
+        signal: AbortSignal.timeout(3000),
+      });
+      if (res.ok) return;
+      throw new Error(`HTTP ${res.status}`);
+    } catch (err: any) {
+      if (attempt === 3) {
+        console.error(`[reward] failed after 3 attempts for tip ${tipId}: ${err.message}`);
+        bus.publish('signals.tip.reward_failed', {
+          userId,
+          tipId,
+          reward,
+          attempts: 3,
+          error: err.message,
+          failedAt: new Date().toISOString(),
+        });
+        return;
+      }
+      await new Promise((r) => setTimeout(r, 250 * Math.pow(2, attempt)));
+    }
+  }
+}
+
 // ---------------------------------------------------------------------------
 // POST /api/tip/:id/feedback
 // ---------------------------------------------------------------------------
@@ -258,7 +315,7 @@ router.post('/tip/:id/feedback', requireAuth, async (req: AuthenticatedRequest,
  const tipId = String(req.params.id);
  const now = new Date();

-  const validActions = ['done', 'dismiss', 'snooze'];
+  const validActions = ['done', 'dismiss', 'snooze', 'helpful', 'not_helpful'];
  if (!validActions.includes(action)) {
    res.status(400).json({ error: 'Invalid action' });
    return;
@@ -297,25 +354,14 @@ router.post('/tip/:id/feedback', requireAuth, async (req: AuthenticatedRequest,
  bus.publish('signals.tip.feedback', {
    userId: req.userId!,
    tipId,
-    action: action as 'done' | 'dismiss' | 'snooze',
+    action: action as 'done' | 'dismiss' | 'snooze' | 'helpful' | 'not_helpful',
    reward,
    dwellMs,
    createdAt: now.toISOString(),
  });

  if (task) {
-    // Send reward to egreedy-v1 (active policy — ADR-0007)
-    fetch(`${config.ML_SERVING_URL}/reward/egreedy`, {
-      method: 'POST',
-      headers: { 'Content-Type': 'application/json' },
-      body: JSON.stringify({
-        user_id: req.userId!,
-        tip_id: tipId,
-        reward,
-        features: task.features,
-        day_of_week: new Date().getDay(),
-      }),
-    }).catch(() => {});
+    sendRewardWithRetry(req.userId!, tipId, reward, task.features);
  }

  // Mark complete in Todoist if done