feat(agents): per-user baseline + stdev inference for momentum agent (#114)

Adds two InferredParams (TTL=7d) computed from 28-day rolling daily done counts:
- baseline_completions_per_day: mean done events/day over the window
- stdev: stdev of daily counts (floored at 0.1 to avoid division by zero)

MomentumAgent.compute() now calculates a z-score from recent done events in
inp.feedback_history vs the inferred baseline. Snippet language switches to
z-score framing ("above your usual pace", "slowing down") when |z| >= 1.0,
falling back to engagement_trend labels when in the normal range.

- engagement_trend InferredParam preserved for backward compatibility
- momentum_window pref added (default 7, user-overridable)
- 14 new tests covering power user, casual user, returning-from-break, and
  relative stdev comparison; engagement_trend tests updated for z-score priority
- Agent bumped to v1.2.0

Closes #114

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-05-06 05:18:29 +00:00
parent 04212ff318
commit 4cade4868b
2 changed files with 245 additions and 24 deletions

View File

@@ -1,5 +1,8 @@
from __future__ import annotations
import math
import statistics
from collections import defaultdict
from datetime import datetime, timedelta, timezone
from typing import ClassVar
@@ -8,6 +11,49 @@ from .inference.history import UserHistory
from .manifest import AgentManifest, InferredParam
def _parse_dt(iso: str) -> datetime:
try:
dt = datetime.fromisoformat(iso.replace("Z", "+00:00"))
if dt.tzinfo is None:
dt = dt.replace(tzinfo=timezone.utc)
return dt
except ValueError:
return datetime.min.replace(tzinfo=timezone.utc)
def _daily_done_counts(history: UserHistory, window_days: int = 28) -> list[int]:
"""Count done-action events per calendar day over the last window_days days."""
if not history.events:
return []
latest = max(_parse_dt(e.created_at) for e in history.events)
cutoff = latest - timedelta(days=window_days)
by_day: dict[tuple[int, int, int], int] = defaultdict(int)
for e in history.events:
if e.action == "done":
dt = _parse_dt(e.created_at)
if dt >= cutoff:
by_day[(dt.year, dt.month, dt.day)] += 1
# Return counts for every day in the window, including zero-completion days.
counts = []
for offset in range(window_days):
day = (latest - timedelta(days=offset)).date()
counts.append(by_day.get((day.year, day.month, day.day), 0))
return counts
def _infer_baseline_completions_per_day(history: UserHistory) -> float:
counts = _daily_done_counts(history)
return statistics.mean(counts) if counts else 1.0
def _infer_stdev(history: UserHistory) -> float:
counts = _daily_done_counts(history)
if len(counts) < 2:
return 1.0
sd = statistics.stdev(counts)
return max(sd, 0.1) # floor so we never divide by zero in z-score
def _infer_engagement_trend(history: UserHistory) -> str:
"""Compare done-rate in the most recent 7 days vs the 7 days before that."""
events = sorted(history.events, key=lambda e: e.created_at)
@@ -26,7 +72,7 @@ def _infer_engagement_trend(history: UserHistory) -> str:
older = [e for e in events if cutoff_older <= _parse_dt(e.created_at) < cutoff_recent]
if len(older) < 3:
return "stable" # not enough baseline to compare
return "stable"
recent_rate = sum(1 for e in recent if e.action == "done") / max(len(recent), 1)
older_rate = sum(1 for e in older if e.action == "done") / max(len(older), 1)
@@ -39,19 +85,9 @@ def _infer_engagement_trend(history: UserHistory) -> str:
return "stable"
def _parse_dt(iso: str) -> datetime:
try:
dt = datetime.fromisoformat(iso.replace("Z", "+00:00"))
if dt.tzinfo is None:
dt = dt.replace(tzinfo=timezone.utc)
return dt
except ValueError:
return datetime.min.replace(tzinfo=timezone.utc)
MANIFEST = AgentManifest(
id="momentum",
version="1.1.0", # bumped: engagement_trend InferredParam added (#114)
version="1.2.0", # #114: baseline + stdev inferred params; z-score snippet language
description="Characterises the user's recent engagement trend from profile features.",
pref_schema={
"type": "object",
@@ -64,6 +100,24 @@ MANIFEST = AgentManifest(
"default": 25,
"description": "Completion rate below which momentum hints at low engagement.",
},
"baseline_completions_per_day": {
"type": "number",
"minimum": 0,
"default": 1.0,
"description": "User's normal daily done-task rate (inferred from 28d history).",
},
"stdev": {
"type": "number",
"minimum": 0,
"default": 1.0,
"description": "Stdev of daily completion counts; used for z-score normalisation.",
},
"momentum_window": {
"type": "integer",
"minimum": 1,
"default": 7,
"description": "Days of recent history to measure current momentum against baseline.",
},
},
},
context_schema=["profile.features"],
@@ -73,15 +127,42 @@ MANIFEST = AgentManifest(
inferred_params=[
InferredParam(
key="engagement_trend",
ttl_sec=21_600, # recompute every 6 hours alongside snippet
ttl_sec=21_600,
cold_start_default="stable",
min_history=10,
infer=_infer_engagement_trend,
),
InferredParam(
key="baseline_completions_per_day",
ttl_sec=7 * 86_400,
cold_start_default=1.0,
min_history=14,
infer=_infer_baseline_completions_per_day,
),
InferredParam(
key="stdev",
ttl_sec=7 * 86_400,
cold_start_default=1.0,
min_history=14,
infer=_infer_stdev,
),
],
)
def _z_score_label(z: float) -> str | None:
"""Map z-score to a human-readable momentum label, or None if within normal range."""
if z >= 2.0:
return "well above your usual pace"
if z >= 1.0:
return "above your usual pace"
if z <= -2.0:
return "well below your usual pace"
if z <= -1.0:
return "below your usual pace"
return None
class MomentumAgent(BaseAgent):
"""Characterises the user's recent engagement trend from profile features."""
agent_id: ClassVar[str] = MANIFEST.id
@@ -93,6 +174,20 @@ class MomentumAgent(BaseAgent):
dismiss = inp.profile.get("dismiss_rate_30d")
volume = inp.profile.get("tip_volume_30d")
trend: str = inp.agent_prefs.get("engagement_trend", "stable")
baseline: float = float(inp.agent_prefs.get("baseline_completions_per_day", 1.0))
stdev: float = max(float(inp.agent_prefs.get("stdev", 1.0)), 0.1)
window: int = int(inp.agent_prefs.get("momentum_window", 7))
# Count done events in the recent window from feedback_history.
now = inp.now.astimezone(timezone.utc)
cutoff = now - timedelta(days=window)
recent_done = sum(
1 for e in inp.feedback_history
if e.get("action") == "done" and _parse_dt(e.get("created_at", "")) >= cutoff
)
recent_rate = recent_done / window # completions/day over the window
z = (recent_rate - baseline) / stdev
z_label = _z_score_label(z)
parts: list[str] = []
@@ -120,7 +215,21 @@ class MomentumAgent(BaseAgent):
if volume is not None and int(volume) < 5:
parts.append("Very few tips served so far — this is an early-stage user.")
if trend == "up":
# Z-score takes precedence over trend label when we have a baseline.
if z_label:
if z > 0:
parts.append(
f"Completion pace is {z_label} "
f"({recent_done} done in the last {window}d vs "
f"~{baseline * window:.1f} expected) — build on the momentum."
)
else:
parts.append(
f"Completion pace is {z_label} "
f"({recent_done} done in the last {window}d vs "
f"~{baseline * window:.1f} expected) — a motivational or easy-win tip may help."
)
elif trend == "up":
parts.append("Engagement is trending up compared to last week — build on the momentum.")
elif trend == "down":
parts.append("Engagement is trending down — a motivational or easy-win tip may help.")
@@ -131,5 +240,10 @@ class MomentumAgent(BaseAgent):
"dismiss_rate_30d": dismiss,
"tip_volume_30d": volume,
"engagement_trend": trend,
"baseline_completions_per_day": baseline,
"stdev": stdev,
"momentum_window": window,
"recent_done_count": recent_done,
"z_score": round(z, 2),
}
return self._make_output(inp, prompt, snapshot)