feat(consents): auto-grant data:<provider> on connect; remove agent: consents (ADR-0015)

- integrations.ts: grant data:<provider> on OAuth callback, revoke on disconnect - Backfill migration: INSERT OR IGNORE data:<provider> for all active tokens - Agent manifests: drop agent:<id> from required_consents (momentum, time-of-day, overdue-task, recent-patterns, health-vitals) — per-agent control is a preference - eligibility.ts: update comment to reflect data:-only consent model - test_manifest.py: assert no agent: consents remain in any manifest - migrations.test.ts: backfill idempotency tests for issue #127 - Dockerfile.api: drop --offline flag (fixes ERR_PNPM_NO_OFFLINE_META) Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-05-12 15:09:58 +00:00
parent 34925310cf
commit 772bb6e194
11 changed files with 124 additions and 9 deletions
--- a/docs/adr/0015-data-source-consents.md
+++ b/docs/adr/0015-data-source-consents.md
@@ -0,0 +1,44 @@
+# ADR-0015 — Data-source consents only; drop per-agent consent gate
+
+**Date:** 2026-05-11  
+**Status:** Accepted  
+**Supersedes:** ADR-0014 §3 (consent model)
+
+## Context
+
+ADR-0014 introduced `required_consents` on agent manifests. In practice two
+unrelated concepts were mixed into that field:
+
+- `data:<source>` — which data source the agent reads.
+- `agent:<id>` — whether the user opted into this specific agent.
+
+No UI ever granted `agent:<id>` consents, so the eligibility filter at
+`services/api/src/profile/eligibility.ts` dropped every agent for every real
+user. The symptom was confirmed by MLflow trace
+`tr-591449ea8a72af8e81b6a585234a86ab`: user `ODGp4Gkr7JWemMsqcMLMn` had five
+fresh `agent_outputs` rows but the orchestrator received `agent_ids: []`.
+
+## Decision
+
+Collapse to a single consent dimension: **data source**.
+
+1. `required_consents` entries must all start with `data:`. Agent manifests no
+   longer list `agent:<id>` entries.
+2. Connecting a data source via the OAuth flow automatically grants
+   `data:<provider>` in `user_consents`. Disconnecting sets `revoked_at`.
+3. `data:core` continues to be auto-granted on signup.
+4. Per-agent control becomes a **preference** (`user_preferences[scope='agent:<id>', key='enabled']`), not a consent. The eligibility filter already honours this — the only change is removing the `agent:*` consent check that was always failing.
+5. Eligibility rule (final): an agent is eligible iff every `data:*` it
+   declares is granted and not revoked, no active context is in
+   `silenced_in_contexts`, and the `enabled` preference is not `false`.
+
+## Consequences
+
+- Agents that only require `data:core` (time-of-day, momentum, recent-patterns)
+  become eligible immediately after signup.
+- Agents requiring `data:todoist` or `data:google-health` become eligible as
+  soon as the user connects the integration — no extra consent step.
+- A backfill migration grants `data:<provider>` for every existing active
+  `integration_tokens` row, unblocking users who connected before this change.
+- `ml/agents/tests/test_manifest.py` asserts all `required_consents` start
+  with `data:`, preventing regression.
--- a/infra/docker/Dockerfile.api
+++ b/infra/docker/Dockerfile.api
@@ -16,7 +16,7 @@ COPY pnpm-lock.yaml ./
 RUN --mount=type=cache,id=pnpm,target=/pnpm/store pnpm fetch
 COPY . .
 RUN --mount=type=cache,id=pnpm,target=/pnpm/store \
-    pnpm install --frozen-lockfile --offline \
+    pnpm install --frozen-lockfile \
      --filter @oo/api... --filter @oo/shared-types
 RUN pnpm --filter @oo/shared-types build
 RUN pnpm --filter @oo/api build
--- a/ml/agents/health_vitals.py
+++ b/ml/agents/health_vitals.py
@@ -40,7 +40,7 @@ MANIFEST = AgentManifest(
        },
    },
    context_schema=["google-health.steps", "google-health.sleep", "google-health.activity", "google-health.heart_rate"],
-    required_consents=["data:core", "data:google-health", "agent:health-vitals"],
+    required_consents=["data:core", "data:google-health"],
    output_contract={"type": "snippet", "format": "free_text"},
    ttl_sec=1800,  # refresh every 30 min — health data changes during the day
    silenced_in_contexts=[],
--- a/ml/agents/momentum.py
+++ b/ml/agents/momentum.py
@@ -121,7 +121,7 @@ MANIFEST = AgentManifest(
        },
    },
    context_schema=["profile.features"],
-    required_consents=["data:core", "agent:momentum"],
+    required_consents=["data:core"],
    output_contract={"type": "snippet", "format": "free_text"},
    ttl_sec=21_600,
    inferred_params=[
--- a/ml/agents/overdue_task.py
+++ b/ml/agents/overdue_task.py
@@ -70,7 +70,7 @@ MANIFEST = AgentManifest(
        },
    },
    context_schema=["todoist.tasks"],
-    required_consents=["data:core", "data:todoist", "agent:overdue-task"],
+    required_consents=["data:core", "data:todoist"],
    output_contract={"type": "snippet", "format": "free_text"},
    ttl_sec=3600,
    silenced_in_contexts=["vacation"],
--- a/ml/agents/recent_patterns.py
+++ b/ml/agents/recent_patterns.py
@@ -131,7 +131,7 @@ MANIFEST = AgentManifest(
        },
    },
    context_schema=["tip_feedback", "profile.features"],
-    required_consents=["data:core", "agent:recent-patterns"],
+    required_consents=["data:core"],
    output_contract={"type": "snippet", "format": "free_text"},
    ttl_sec=86_400,
    inferred_params=[
--- a/ml/agents/tests/test_manifest.py
+++ b/ml/agents/tests/test_manifest.py
@@ -45,6 +45,7 @@ def test_manifest_required_fields(agent_id: str):
    assert isinstance(m.pref_schema, dict) and m.pref_schema.get("type") == "object"
    assert isinstance(m.required_consents, list) and m.required_consents
    assert "data:core" in m.required_consents, "every agent should require data:core"
+    assert all(c.startswith("data:") for c in m.required_consents), "only data: consents allowed; agent: consents have been removed"
    assert m.ttl_sec == get_agent(agent_id).ttl_seconds, "ttl divergence"


--- a/ml/agents/time_of_day.py
+++ b/ml/agents/time_of_day.py
@@ -126,7 +126,7 @@ MANIFEST = AgentManifest(
        },
    },
    context_schema=["profile.features"],
-    required_consents=["data:core", "agent:time-of-day"],
+    required_consents=["data:core"],
    output_contract={"type": "snippet", "format": "free_text"},
    ttl_sec=900,
    inferred_params=[
--- a/services/api/src/db/tests/migrations.test.ts
+++ b/services/api/src/db/tests/migrations.test.ts
@@ -85,3 +85,45 @@ describe('runMigrations — idempotency', () => {
  });
 });

+describe('runMigrations — issue #127 backfill', () => {
+  it('grants data:<provider> consent for existing active integration tokens', () => {
+    const sqlite = freshDb();
+    runMigrations(sqlite);
+
+    // Seed a user + active Todoist token (simulates pre-#127 state)
+    sqlite.exec(`
+      INSERT INTO users (id, email, role, created_at) VALUES ('u2', 'u2@test.com', 'user', '2026-01-01T00:00:00Z');
+      INSERT INTO user_consents (user_id, consent_key, granted_at) VALUES ('u2', 'data:core', '2026-01-01T00:00:00Z');
+      INSERT INTO integration_tokens (id, user_id, provider, access_token, token_status, connected_at)
+        VALUES ('tok1', 'u2', 'todoist', 'secret', 'active', '2026-01-02T00:00:00Z');
+    `);
+
+    // Re-run migrations — the backfill should insert data:todoist
+    runMigrations(sqlite);
+
+    const rows = sqlite
+      .prepare(`SELECT consent_key FROM user_consents WHERE user_id = 'u2' ORDER BY consent_key`)
+      .all() as { consent_key: string }[];
+    expect(rows.map((r) => r.consent_key)).toEqual(['data:core', 'data:todoist']);
+  });
+
+  it('is idempotent — running twice does not duplicate consent rows', () => {
+    const sqlite = freshDb();
+    runMigrations(sqlite);
+
+    sqlite.exec(`
+      INSERT INTO users (id, email, role, created_at) VALUES ('u3', 'u3@test.com', 'user', '2026-01-01T00:00:00Z');
+      INSERT INTO integration_tokens (id, user_id, provider, access_token, token_status, connected_at)
+        VALUES ('tok2', 'u3', 'todoist', 'secret', 'active', '2026-01-02T00:00:00Z');
+    `);
+
+    runMigrations(sqlite);
+    runMigrations(sqlite);
+
+    const count = (sqlite
+      .prepare(`SELECT COUNT(*) as n FROM user_consents WHERE user_id = 'u3' AND consent_key = 'data:todoist'`)
+      .get() as { n: number }).n;
+    expect(count).toBe(1);
+  });
+});
+
--- a/services/api/src/profile/eligibility.ts
+++ b/services/api/src/profile/eligibility.ts
@@ -1,8 +1,10 @@
 /**
- * Registry-driven agent eligibility filter (ADR-0014 step 5).
+ * Registry-driven agent eligibility filter (ADR-0014 step 5, updated by ADR-0015).
 *
 * Rules (all must pass for an agent to be eligible):
- *   1. All required_consents are granted and not revoked.
+ *   1. Every data:<source> in required_consents is granted and not revoked.
+ *      Consent is granted automatically when the user connects that data source.
+ *      agent:<id> consents no longer exist — per-agent control is a preference (rule 3).
 *   2. No silenced_in_contexts entry matches an active context.
 *   3. user_preferences[scope='agent:<id>', key='enabled'] is not false.
 *
--- a/services/api/src/routes/integrations.ts
+++ b/services/api/src/routes/integrations.ts
@@ -1,7 +1,7 @@
 import { type Router as ExpressRouter, Router, Request, Response } from 'express';
 import { nanoid } from 'nanoid';
 import { db } from '../db/index.js';
-import { integrationTokens } from '../db/schema.js';
+import { integrationTokens, userConsents } from '../db/schema.js';
 import { eq, and } from 'drizzle-orm';
 import { config } from '../config.js';
 import { requireAuth, AuthenticatedRequest } from '../middleware/session.js';
@@ -33,6 +33,28 @@ const GOOGLE_HEALTH_SCOPES = [
 // In-memory CSRF state store
 const pendingStates = new Map<string, { userId: string; redirectTo: string }>();

+async function grantDataSourceConsent(userId: string, provider: string): Promise<void> {
+  const consentKey = `data:${provider}`;
+  const now = new Date().toISOString();
+  await db.insert(userConsents)
+    .values({ userId, consentKey, grantedAt: now, revokedAt: null })
+    .onConflictDoUpdate({
+      target: [userConsents.userId, userConsents.consentKey],
+      set: { grantedAt: now, revokedAt: null },
+    });
+}
+
+async function revokeDataSourceConsent(userId: string, provider: string): Promise<void> {
+  const consentKey = `data:${provider}`;
+  const now = new Date().toISOString();
+  await db.insert(userConsents)
+    .values({ userId, consentKey, grantedAt: now, revokedAt: now })
+    .onConflictDoUpdate({
+      target: [userConsents.userId, userConsents.consentKey],
+      set: { revokedAt: now },
+    });
+}
+
 /** GET /api/integrations — list connected integrations */
 router.get('/', requireAuth, async (req: AuthenticatedRequest, res: Response) => {
  const tokens = await db
@@ -118,6 +140,7 @@ router.get('/todoist/callback', async (req: Request, res: Response) => {
    tokenStatus: 'active',
    connectedAt: now,
  });
+  await grantDataSourceConsent(pending.userId, 'todoist');

  res.redirect(`${config.WEB_BASE_URL}${pending.redirectTo}?connected=todoist`);
 });
@@ -208,6 +231,7 @@ router.get('/google-health/callback', async (req: Request, res: Response) => {
    tokenStatus: 'active',
    connectedAt: now.toISOString(),
  });
+  await grantDataSourceConsent(pending.userId, 'google-health');

  res.redirect(`${config.WEB_BASE_URL}${pending.redirectTo}?connected=google-health`);
 });
@@ -238,6 +262,8 @@ router.delete('/:provider', requireAuth, async (req: AuthenticatedRequest, res:
    await fetch(`${GOOGLE_REVOKE_URL}?token=${token.accessToken}`, { method: 'POST' }).catch(() => {});
  }

+  await revokeDataSourceConsent(req.userId!, provider);
+
  await db
    .delete(integrationTokens)
    .where(