feat: ε-greedy v1 as active policy; dwell-time reward inference; offline sim framework

- Promote egreedy-v1 to active serving policy (ADR-0007): /score/egreedy + /reward/egreedy
  replaces linucb-v1 endpoints after offline sim shows +10.7% mean reward (−0.548 vs −0.606)
- Replace explicit helpful/not_helpful feedback with dwell-time inferred reward (inferReward):
  dismiss=−1.0, snooze=+0.1, done<15s=−0.3, done 15s–2min=+1.0, done 2–10min=+0.6, done>10min=+0.3
- Add ml/serving ε-greedy endpoints: /score/egreedy, /reward/egreedy, /stats/egreedy/{user_id}
  with d=7 feature vector (base 5 + sin/cos day-of-week encoding)
- Add offline simulation framework (ml/experiments/sim): rule/LLM/claude-code judges,
  two-phase score+reward, synthetic personas, task generator; results stored in sim_runs/sim_events
- Add /admin/simulations page: start runs, live-poll status, reward curve SVG, action/persona tables
- Fix egreedy day_of_week training skew: reward endpoint now uses actual dow instead of hardcoded 0
- Fix runner.py proxy bypass: httpx.Client(trust_env=False) for localhost ML calls
- Add dwellMs to TipFeedbackEvent contract and bus.test.ts fixture
- Schema: sim_runs, sim_events tables; tip_feedback gains dwell_ms, reward_milli columns
- ADR-0006: admin console framework; ADR-0007: egreedy-v1 policy selection rationale

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-04-16 07:44:37 +00:00
parent c5ea18ec6e
commit faf44c18fc
48 changed files with 6151 additions and 40 deletions

View File

@@ -0,0 +1,173 @@
import { describe, it, expect, vi } from 'vitest';
import { Bus, bus } from '../bus.js';
// Use a fresh Bus instance for isolation in most tests
function makeBus() {
return new Bus();
}
describe('EventBus — delivery', () => {
it('delivers a published event to subscribers', () => {
const b = makeBus();
const handler = vi.fn();
b.subscribe('signals.tip.served', handler);
const payload = { userId: 'u1', tipId: 'tip:1', policy: 'random', servedAt: new Date().toISOString() };
b.publish('signals.tip.served', payload);
expect(handler).toHaveBeenCalledOnce();
expect(handler).toHaveBeenCalledWith(payload);
});
it('delivers to multiple subscribers on the same subject', () => {
const b = makeBus();
const h1 = vi.fn();
const h2 = vi.fn();
b.subscribe('signals.tip.served', h1);
b.subscribe('signals.tip.served', h2);
b.publish('signals.tip.served', { userId: 'u', tipId: 't', policy: 'p', servedAt: '' });
expect(h1).toHaveBeenCalledOnce();
expect(h2).toHaveBeenCalledOnce();
});
it('does not deliver to handlers on a different subject', () => {
const b = makeBus();
const feedbackHandler = vi.fn();
b.subscribe('signals.tip.feedback', feedbackHandler);
b.publish('signals.tip.served', { userId: 'u', tipId: 't', policy: 'p', servedAt: '' });
expect(feedbackHandler).not.toHaveBeenCalled();
});
it('does not call a handler after bus.off()', () => {
const b = makeBus();
const handler = vi.fn();
b.subscribe('signals.tip.served', handler);
b.off('signals.tip.served', handler);
b.publish('signals.tip.served', { userId: 'u', tipId: 't', policy: 'p', servedAt: '' });
expect(handler).not.toHaveBeenCalled();
});
it('does not throw when publishing with no subscribers', () => {
const b = makeBus();
expect(() =>
b.publish('signals.task.synced', { userId: 'u', count: 3, syncedAt: '' }),
).not.toThrow();
});
it('reward maps correctly: done=1, dismiss=-1, snooze=0', () => {
const b = makeBus();
const cases: Array<['done' | 'dismiss' | 'snooze', number]> = [
['done', 1.0],
['dismiss', -1.0],
['snooze', 0.0],
];
for (const [action, expected] of cases) {
const handler = vi.fn();
b.subscribe('signals.tip.feedback', handler);
const payload = {
userId: 'u1',
tipId: 'todoist:42',
action,
reward: action === 'done' ? 1.0 : action === 'dismiss' ? -1.0 : 0.0,
dwellMs: null,
createdAt: new Date().toISOString(),
};
b.publish('signals.tip.feedback', payload);
expect(handler).toHaveBeenCalledWith(expect.objectContaining({ reward: expected }));
b.off('signals.tip.feedback', handler);
}
});
});
describe('EventBus — ring buffer / tail()', () => {
it('tail() returns published events', () => {
const b = makeBus();
b.publish('signals.tip.served', { userId: 'u1', tipId: 't1', policy: 'p', servedAt: '' });
b.publish('signals.tip.served', { userId: 'u2', tipId: 't2', policy: 'p', servedAt: '' });
const events = b.tail();
expect(events.length).toBeGreaterThanOrEqual(2);
});
it('tail() filters by subject prefix', () => {
const b = makeBus();
b.publish('signals.tip.served', { userId: 'u', tipId: 't', policy: 'p', servedAt: '' });
b.publish('signals.task.synced', { userId: 'u', count: 1, syncedAt: '' });
const tipEvents = b.tail({ subject: 'signals.tip' });
expect(tipEvents.every((e) => e.subject.startsWith('signals.tip'))).toBe(true);
const taskEvents = b.tail({ subject: 'signals.task' });
expect(taskEvents.every((e) => e.subject.startsWith('signals.task'))).toBe(true);
});
it('tail() filters by userId', () => {
const b = makeBus();
b.publish('signals.tip.served', { userId: 'alice', tipId: 't1', policy: 'p', servedAt: '' });
b.publish('signals.tip.served', { userId: 'bob', tipId: 't2', policy: 'p', servedAt: '' });
const aliceEvents = b.tail({ userId: 'alice' });
expect(aliceEvents.every((e) => (e.payload as any).userId === 'alice')).toBe(true);
});
it('tail() respects limit', () => {
const b = makeBus();
for (let i = 0; i < 10; i++) {
b.publish('signals.tip.served', { userId: 'u', tipId: `t${i}`, policy: 'p', servedAt: '' });
}
const events = b.tail({ limit: 3 });
expect(events).toHaveLength(3);
});
it('tail() returns only events after `since` id', () => {
const b = makeBus();
b.publish('signals.tip.served', { userId: 'u', tipId: 't1', policy: 'p', servedAt: '' });
const snap = b.tail();
const lastId = snap[snap.length - 1].id;
b.publish('signals.tip.served', { userId: 'u', tipId: 't2', policy: 'p', servedAt: '' });
const after = b.tail({ since: lastId });
expect(after).toHaveLength(1);
expect((after[0].payload as any).tipId).toBe('t2');
});
it('assigns monotonically increasing ids', () => {
const b = makeBus();
b.publish('signals.tip.served', { userId: 'u', tipId: 't1', policy: 'p', servedAt: '' });
b.publish('signals.tip.served', { userId: 'u', tipId: 't2', policy: 'p', servedAt: '' });
const events = b.tail();
const ids = events.map((e) => e.id);
for (let i = 1; i < ids.length; i++) {
expect(ids[i]).toBeGreaterThan(ids[i - 1]);
}
});
it('ring buffer caps at 500 entries and evicts oldest', () => {
const b = makeBus();
// Publish 502 events — the first two should be evicted
for (let i = 0; i < 502; i++) {
b.publish('signals.tip.served', { userId: 'u', tipId: `t${i}`, policy: 'p', servedAt: '' });
}
const all = b.tail({ limit: 1000 });
expect(all).toHaveLength(500);
// Oldest surviving entry should be the 3rd published (index 2)
expect((all[0].payload as any).tipId).toBe('t2');
});
});
describe('EventBus — singleton bus export', () => {
it('singleton bus is a Bus instance', () => {
expect(bus).toBeInstanceOf(Bus);
});
});

View File

@@ -22,8 +22,9 @@ export type TipServedEvent = {
export type TipFeedbackEvent = {
userId: string;
tipId: string;
action: 'done' | 'dismiss' | 'snooze' | 'helpful' | 'not_helpful';
reward: number;
action: 'done' | 'dismiss' | 'snooze';
reward: number; // inferred from action + dwellMs (see inferReward in recommender.ts)
dwellMs: number | null;
createdAt: string;
};
@@ -91,4 +92,5 @@ class Bus extends EventEmitter {
}
}
export { Bus };
export const bus = new Bus();