feat: ε-greedy v1 as active policy; dwell-time reward inference; offline sim framework

- Promote egreedy-v1 to active serving policy (ADR-0007): /score/egreedy + /reward/egreedy replaces linucb-v1 endpoints after offline sim shows +10.7% mean reward (−0.548 vs −0.606) - Replace explicit helpful/not_helpful feedback with dwell-time inferred reward (inferReward): dismiss=−1.0, snooze=+0.1, done<15s=−0.3, done 15s–2min=+1.0, done 2–10min=+0.6, done>10min=+0.3 - Add ml/serving ε-greedy endpoints: /score/egreedy, /reward/egreedy, /stats/egreedy/{user_id} with d=7 feature vector (base 5 + sin/cos day-of-week encoding) - Add offline simulation framework (ml/experiments/sim): rule/LLM/claude-code judges, two-phase score+reward, synthetic personas, task generator; results stored in sim_runs/sim_events - Add /admin/simulations page: start runs, live-poll status, reward curve SVG, action/persona tables - Fix egreedy day_of_week training skew: reward endpoint now uses actual dow instead of hardcoded 0 - Fix runner.py proxy bypass: httpx.Client(trust_env=False) for localhost ML calls - Add dwellMs to TipFeedbackEvent contract and bus.test.ts fixture - Schema: sim_runs, sim_events tables; tip_feedback gains dwell_ms, reward_milli columns - ADR-0006: admin console framework; ADR-0007: egreedy-v1 policy selection rationale Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-16 07:44:37 +00:00
parent c5ea18ec6e
commit faf44c18fc
48 changed files with 6151 additions and 40 deletions
--- a/apps/web/e2e/sign-in.spec.ts
+++ b/apps/web/e2e/sign-in.spec.ts
@@ -0,0 +1,11 @@
+import { test, expect } from '@playwright/test';
+
+test('sign-in page loads and shows Google button', async ({ page }) => {
+  await page.goto('/sign-in');
+  await expect(page.getByRole('link', { name: /google/i })).toBeVisible();
+});
+
+test('unauthenticated root redirects to sign-in', async ({ page }) => {
+  await page.goto('/');
+  await expect(page).toHaveURL(/sign-in/);
+});
--- a/apps/web/package.json
+++ b/apps/web/package.json
@@ -7,6 +7,10 @@
    "build": "next build",
    "start": "next start -p 3079",
    "lint": "next lint",
+    "test": "vitest run",
+    "test:watch": "vitest",
+    "test:e2e": "playwright test",
+    "test:e2e:ui": "playwright test --ui",
    "type-check": "tsc --noEmit",
    "clean": "rm -rf .next"
  },
@@ -17,9 +21,17 @@
    "react-dom": "^19.0.0"
  },
  "devDependencies": {
+    "@playwright/test": "^1.59.1",
+    "@testing-library/jest-dom": "^6.9.1",
+    "@testing-library/react": "^16.3.2",
+    "@testing-library/user-event": "^14.6.1",
+    "@types/node": "^22.10.5",
    "@types/react": "^19.0.0",
    "@types/react-dom": "^19.0.0",
-    "@types/node": "^22.10.5",
-    "typescript": "^5.7.3"
+    "@vitejs/plugin-react": "^6.0.1",
+    "@vitest/coverage-v8": "^4.1.4",
+    "jsdom": "^29.0.2",
+    "typescript": "^5.7.3",
+    "vitest": "^4.1.4"
  }
 }
--- a/apps/web/playwright.config.ts
+++ b/apps/web/playwright.config.ts
@@ -0,0 +1,24 @@
+import { defineConfig, devices } from '@playwright/test';
+
+export default defineConfig({
+  testDir: './e2e',
+  fullyParallel: true,
+  forbidOnly: !!process.env.CI,
+  retries: process.env.CI ? 2 : 0,
+  reporter: 'html',
+  use: {
+    baseURL: process.env.BASE_URL ?? 'http://localhost:3079',
+    trace: 'on-first-retry',
+  },
+  projects: [
+    { name: 'chromium', use: { ...devices['Desktop Chrome'] } },
+  ],
+  // Start dev server automatically in CI; locally, run `pnpm dev` first
+  webServer: process.env.CI
+    ? {
+        command: 'pnpm build && pnpm start',
+        url: 'http://localhost:3079',
+        reuseExistingServer: false,
+      }
+    : undefined,
+});
--- a/apps/web/src/components/tests/TipPage.test.tsx
+++ b/apps/web/src/components/tests/TipPage.test.tsx
@@ -0,0 +1,131 @@
+import { describe, it, expect, vi, beforeEach } from 'vitest';
+import { render, screen, waitFor, act, fireEvent } from '@testing-library/react';
+import userEvent from '@testing-library/user-event';
+
+// Mock the API module — we test UI behaviour, not network calls
+vi.mock('@/lib/api', () => ({
+  getRecommendation: vi.fn(),
+  sendFeedback: vi.fn().mockResolvedValue(undefined),
+  getVapidPublicKey: vi.fn(),
+  subscribePush: vi.fn(),
+}));
+
+import { getRecommendation, sendFeedback } from '@/lib/api';
+import TipPage from '@/app/tip/page';
+
+const mockGetRec = getRecommendation as ReturnType<typeof vi.fn>;
+const mockSendFeedback = sendFeedback as ReturnType<typeof vi.fn>;
+
+beforeEach(() => {
+  vi.clearAllMocks();
+});
+
+describe('TipPage — empty / error states', () => {
+  it('shows "All clear." when no tip is returned', async () => {
+    mockGetRec.mockResolvedValue(null);
+    render(<TipPage />);
+    await waitFor(() => expect(screen.getByText('All clear.')).toBeInTheDocument());
+  });
+
+  it('shows "All clear." when getRecommendation throws', async () => {
+    mockGetRec.mockRejectedValue(Object.assign(new Error('Network error'), { status: 503 }));
+    render(<TipPage />);
+    await waitFor(() => expect(screen.getByText('All clear.')).toBeInTheDocument());
+  });
+
+  it('"Check again" button re-calls getRecommendation', async () => {
+    mockGetRec.mockResolvedValue(null);
+    render(<TipPage />);
+    await waitFor(() => screen.getByText('Check again'));
+
+    mockGetRec.mockResolvedValue({
+      tip: { id: 'todoist:2', content: 'New tip', source: 'todoist', createdAt: '' },
+    });
+    fireEvent.click(screen.getByText('Check again'));
+    await waitFor(() => expect(mockGetRec).toHaveBeenCalledTimes(2));
+  });
+});
+
+describe('TipPage — tip display', () => {
+  it('renders tip content after loading', async () => {
+    mockGetRec.mockResolvedValue({
+      tip: { id: 'todoist:1', content: 'Write the test', source: 'todoist', createdAt: '' },
+    });
+    render(<TipPage />);
+    await waitFor(() => expect(screen.getByText('Write the test')).toBeInTheDocument());
+  });
+
+  it('shows "hold to act" hint when tip is displayed', async () => {
+    mockGetRec.mockResolvedValue({
+      tip: { id: 'todoist:3', content: 'Do the thing', source: 'todoist', createdAt: '' },
+    });
+    render(<TipPage />);
+    await waitFor(() => expect(screen.getByText(/hold to act/i)).toBeInTheDocument());
+  });
+
+  it('shows "reading you…" while loading', async () => {
+    // Never resolves during this assertion
+    mockGetRec.mockReturnValue(new Promise(() => {}));
+    render(<TipPage />);
+    expect(screen.getByText(/reading you/i)).toBeInTheDocument();
+  });
+});
+
+describe('TipPage — action sheet', () => {
+  // Render with real timers, THEN switch to fake for hold simulation
+  async function renderTipAndHold(id: string, content: string) {
+    mockGetRec.mockResolvedValue({ tip: { id, content, source: 'todoist', createdAt: '' } });
+    render(<TipPage />);
+    // Wait for tip to appear (real timers — no deadlock)
+    await screen.findByText(content);
+    const main = screen.getByRole('main');
+
+    // Switch to fake timers now that the component is fully loaded
+    vi.useFakeTimers();
+    act(() => { main.dispatchEvent(new PointerEvent('pointerdown', { bubbles: true })); });
+    act(() => { vi.advanceTimersByTime(650); });
+    vi.useRealTimers();
+
+    // Wait for action sheet
+    await screen.findByText('Done ✓');
+    return main;
+  }
+
+  it('action sheet appears after a long press (600 ms)', async () => {
+    await renderTipAndHold('tip:lp', 'Hold me');
+    expect(screen.getByText('Done ✓')).toBeInTheDocument();
+  });
+
+  it('action sheet does not appear on short press (<600 ms)', async () => {
+    mockGetRec.mockResolvedValue({ tip: { id: 'tip:sp', content: 'Short press', source: 'todoist', createdAt: '' } });
+    render(<TipPage />);
+    await screen.findByText('Short press');
+    const main = screen.getByRole('main');
+
+    vi.useFakeTimers();
+    act(() => { main.dispatchEvent(new PointerEvent('pointerdown', { bubbles: true })); });
+    act(() => { vi.advanceTimersByTime(200); });
+    act(() => { main.dispatchEvent(new PointerEvent('pointerup', { bubbles: true })); });
+    vi.useRealTimers();
+
+    expect(screen.queryByText('Done ✓')).not.toBeInTheDocument();
+  });
+
+  it('clicking "Done ✓" calls sendFeedback with action=done', async () => {
+    await renderTipAndHold('tip:d', 'Do it');
+    await act(async () => { fireEvent.click(screen.getByText('Done ✓')); });
+    expect(mockSendFeedback).toHaveBeenCalledWith('tip:d', { action: 'done' });
+  });
+
+  it('clicking "Dismiss" calls sendFeedback with action=dismiss', async () => {
+    await renderTipAndHold('tip:dis', 'Dismiss me');
+    await act(async () => { fireEvent.click(screen.getByText('Dismiss')); });
+    expect(mockSendFeedback).toHaveBeenCalledWith('tip:dis', { action: 'dismiss' });
+  });
+
+  it('clicking "Helpful" calls sendFeedback with action=helpful (non-navigating)', async () => {
+    await renderTipAndHold('tip:help', 'Helpful tip');
+    await act(async () => { fireEvent.click(screen.getByText('Helpful')); });
+    expect(mockSendFeedback).toHaveBeenCalledWith('tip:help', { action: 'helpful' });
+  });
+});
--- a/apps/web/src/test/setup.ts
+++ b/apps/web/src/test/setup.ts
@@ -0,0 +1 @@
+import '@testing-library/jest-dom';
--- a/apps/web/vitest.config.ts
+++ b/apps/web/vitest.config.ts
@@ -0,0 +1,23 @@
+import { defineConfig } from 'vitest/config';
+import react from '@vitejs/plugin-react';
+import { resolve } from 'path';
+
+export default defineConfig({
+  plugins: [react()],
+  test: {
+    globals: true,
+    environment: 'jsdom',
+    setupFiles: ['./src/test/setup.ts'],
+    exclude: ['e2e/**', 'node_modules/**'],
+    coverage: {
+      provider: 'v8',
+      reporter: ['text', 'lcov'],
+      include: ['src/**'],
+    },
+  },
+  resolve: {
+    alias: {
+      '@': resolve(__dirname, 'src'),
+    },
+  },
+});