feat(features): per-feature freshness spec — JIT vs batched (#61)

Each ml/features/*.py now declares freshness, source, and fallback per feature. ProfileFeature gains ttl_sec (mirrored from registry.ts), freshness="batched", source, and fallback. context.py adds ContextFeatureSpec + CONTEXT_FEATURES for the three JIT features (hour_of_day, day_of_week, tasks). CI test parses ttlSec from registry.ts to catch drift. ml/README updated with split JIT/batched feature contract. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-25 17:02:55 +00:00
parent bd3ea1b8b1
commit 45416000f9
6 changed files with 218 additions and 21 deletions
--- a/ml/README.md
+++ b/ml/README.md
@@ -5,7 +5,7 @@ Python. Owns models, features, training, online scoring.
 | Dir | Role | Phase |
 |---|---|---|
 | `serving/` | FastAPI online scorer (`/score`, `/generate`) + LiteLLM gateway + prompt registry (`prompts.py`) + JetStream consumers for `signals.>` / `feedback.>`, called by `recommender` | 1–2 |
-| `features/` | context assembler (`context.py`): signals → `PromptContext`; Feast adapter later | 2 |
+| `features/` | context assembler (`context.py`): signals → `PromptContext`; profile-feature schema mirror (`profile_schema.py`); Feast adapter later | 2 |
 | `pipelines/` | batch feature + training DAGs (Prefect/Airflow) | 4 |
 | `registry/` | MLflow-backed model registry integration | 4 |
 | `experiments/` | A/B assignment + multi-armed bandit policies | 4 |
@@ -18,14 +18,24 @@ Python. Owns models, features, training, online scoring.
 - Training reads from the offline feature store; serving reads from the online feature store; definitions are shared (no train/serve skew).
 - Shadow deploys before any policy change that affects real users.
-## Profile-feature contract
+## Feature contract
 ### Profile features (batched)
 User-level features (completion rate, preferred hour, tip volume…) are computed
-by the TypeScript recommender and shipped to ml/serving on every `/score` and
+by the TypeScript recommender and shipped to `ml/serving` on every `/score` and
 `/generate` call as `profile_features: dict | None`. The Python mirror in
-`features/profile_schema.py` documents the available names + dtypes — keep it
+`features/profile_schema.py` documents each feature's name, dtype, TTL, source,
-in sync with `services/api/src/profile/registry.ts` (a CI-style test asserts
+and null fallback — keep it in sync with `services/api/src/profile/registry.ts`
-the name sets match). See ADR-0011.
+(a CI-style test asserts names and `ttlSec` values match). See ADR-0011.
 ### Context features (JIT)
 Request-time signals assembled by `features/context.py` (`hour_of_day`,
 `day_of_week`, task list). These are never cached — they are derived from the
 system clock and the live Todoist feed at the moment of the score call.
 `CONTEXT_FEATURES` in `context.py` declares freshness, source, and fallback for
 each field (issue #61).
 ## Prompt registry
--- a/ml/features/init.py
+++ b/ml/features/init.py
@@ -1,3 +1,8 @@
-from .context import build_context, PromptContext, TaskSignal
+from .context import build_context, PromptContext, TaskSignal, ContextFeatureSpec, CONTEXT_FEATURES
 from .profile_schema import ProfileFeature, PROFILE_FEATURES, feature_names
-__all__ = ["build_context", "PromptContext", "TaskSignal"]
+__all__ = [
    "build_context", "PromptContext", "TaskSignal",
    "ContextFeatureSpec", "CONTEXT_FEATURES",
    "ProfileFeature", "PROFILE_FEATURES", "feature_names",
 ]
--- a/ml/features/context.py
+++ b/ml/features/context.py
@@ -2,12 +2,56 @@
 Context assembler — converts raw user signals into a PromptContext for LLM tip generation.
 Usage:
-    from ml.features.context import build_context
+    from ml.features.context import build_context, CONTEXT_FEATURES
    ctx = build_context(tasks, hour_of_day=9, day_of_week=2)
 Feature-spec (issue #61):
  All context features are JIT — they are assembled at request time from live
  sources (system clock, caller-supplied task list) rather than read from a
  cached profile store. They carry no TTL because they are never persisted.
 """
 from __future__ import annotations
 from dataclasses import dataclass, field
 from typing import Literal
@dataclass(frozen=True)
 class ContextFeatureSpec:
    name: str
    dtype: Literal["numeric", "categorical", "list"]
    freshness: Literal["jit", "batched"]
    source: str
    fallback: str
    description: str
 CONTEXT_FEATURES: tuple[ContextFeatureSpec, ...] = (
    ContextFeatureSpec(
        name="hour_of_day",
        dtype="numeric",
        freshness="jit",
        source="request",
        fallback="12",
        description="Current hour (0–23), supplied by the caller at score time.",
    ),
    ContextFeatureSpec(
        name="day_of_week",
        dtype="numeric",
        freshness="jit",
        source="request",
        fallback="0",
        description="ISO weekday (0=Monday … 6=Sunday), supplied by the caller at score time.",
    ),
    ContextFeatureSpec(
        name="tasks",
        dtype="list",
        freshness="jit",
        source="todoist-integration",
        fallback="[]",
        description="User's open tasks fetched live from the Todoist integration at request time.",
    ),
 )
@dataclass
--- a/ml/features/profile_schema.py
+++ b/ml/features/profile_schema.py
@@ -8,6 +8,12 @@ code (ml/serving, eval harnesses, notebooks) knows what fields to expect on
 Update this file whenever you add or rename a feature in the TS registry.
 The accompanying test asserts the two stay in sync at the name level.
 Feature-spec fields (issue #61):
  freshness — "batched": value cached in profile store, recomputed on TTL/event.
  ttl_sec   — cache lifetime in seconds; mirrors ``ttlSec`` in registry.ts.
  source    — where the value originates.
  fallback  — raw value returned when the feature is unavailable (null stored).
 """
 from __future__ import annotations
@@ -16,6 +22,10 @@ from typing import Literal
 Dtype = Literal["numeric", "categorical"]
 Freshness = Literal["jit", "batched"]
 _HOUR = 3600
 _DAY = 86_400
@dataclass(frozen=True)
@@ -23,28 +33,57 @@ class ProfileFeature:
    name: str
    dtype: Dtype
    description: str
    freshness: Freshness
    ttl_sec: int
    source: str
    fallback: str
 PROFILE_FEATURES: tuple[ProfileFeature, ...] = (
    ProfileFeature(
-        "completion_rate_30d", "numeric",
+        name="completion_rate_30d",
-        'Fraction of tips served in the last 30 days that received a "done" reaction.',
+        dtype="numeric",
        description='Fraction of tips served in the last 30 days that received a "done" reaction.',
        freshness="batched",
        ttl_sec=6 * _HOUR,
        source="profile_store",
        fallback="0.0",
    ),
    ProfileFeature(
-        "dismiss_rate_30d", "numeric",
+        name="dismiss_rate_30d",
-        'Fraction of tips served in the last 30 days that received a "dismiss" reaction.',
+        dtype="numeric",
        description='Fraction of tips served in the last 30 days that received a "dismiss" reaction.',
        freshness="batched",
        ttl_sec=6 * _HOUR,
        source="profile_store",
        fallback="0.0",
    ),
    ProfileFeature(
-        "mean_dwell_ms_30d", "numeric",
+        name="mean_dwell_ms_30d",
-        "Average dwell time (ms between served and reacted) over the last 30 days.",
+        dtype="numeric",
        description="Average dwell time (ms between served and reacted) over the last 30 days.",
        freshness="batched",
        ttl_sec=6 * _HOUR,
        source="profile_store",
        fallback="null — serving normalises to 0.0",
    ),
    ProfileFeature(
-        "preferred_hour", "numeric",
+        name="preferred_hour",
-        'Hour-of-day with the most "done" reactions in the last 30 days (0-23).',
+        dtype="numeric",
        description='Hour-of-day with the most "done" reactions in the last 30 days (0–23).',
        freshness="batched",
        ttl_sec=_DAY,
        source="profile_store",
        fallback="null — serving normalises to 0.5 (neutral alignment)",
    ),
    ProfileFeature(
-        "tip_volume_30d", "numeric",
+        name="tip_volume_30d",
-        "Number of tips served to the user in the last 30 days.",
+        dtype="numeric",
        description="Number of tips served to the user in the last 30 days.",
        freshness="batched",
        ttl_sec=_HOUR,
        source="profile_store",
        fallback="0",
    ),
 )
--- a/ml/features/test_context.py
+++ b/ml/features/test_context.py
@@ -1,7 +1,7 @@
 """Tests for ml/features/context.py"""
 import pytest
 import sys, os; sys.path.insert(0, os.path.dirname(__file__))
-from context import build_context, TaskSignal, PromptContext
+from context import build_context, TaskSignal, PromptContext, CONTEXT_FEATURES
 def test_empty_tasks():
@@ -62,3 +62,30 @@ def test_due_date_none_preserved():
    tasks = [TaskSignal(id="x", content="No due", due_date=None)]
    ctx = build_context(tasks)
    assert ctx.tasks[0]["due_date"] is None
 # ── CONTEXT_FEATURES spec tests (issue #61) ──────────────────────────────────
 def test_context_features_expected_names():
    names = {f.name for f in CONTEXT_FEATURES}
    assert names == {"hour_of_day", "day_of_week", "tasks"}
 def test_context_features_all_jit():
    for f in CONTEXT_FEATURES:
        assert f.freshness == "jit", f"{f.name}: expected freshness='jit', got {f.freshness!r}"
 def test_context_features_source_set():
    for f in CONTEXT_FEATURES:
        assert f.source, f"{f.name}: source must not be empty"
 def test_context_features_fallback_set():
    for f in CONTEXT_FEATURES:
        assert f.fallback, f"{f.name}: fallback must not be empty"
 def test_context_features_no_duplicates():
    names = [f.name for f in CONTEXT_FEATURES]
    assert len(names) == len(set(names)), f"duplicate names: {names}"
--- a/ml/features/test_profile_schema.py
+++ b/ml/features/test_profile_schema.py
@@ -1,4 +1,4 @@
-"""Smoke test for profile_schema mirror (#81 phase A).
+"""Smoke test for profile_schema mirror (#81 phase A, #61 freshness spec).
 The TS registry in services/api/src/profile/registry.ts is the source of truth.
 This test checks the names listed here match the registry by reading the TS
@@ -14,6 +14,18 @@ from ml.features.profile_schema import PROFILE_FEATURES, feature_names
 REGISTRY_PATH = Path(__file__).resolve().parents[2] / "services" / "api" / "src" / "profile" / "registry.ts"
 _HOUR = 3600
 _DAY = 86_400
 # Expected ttl_sec values mirrored from registry.ts — keeps the two in sync.
 _EXPECTED_TTL: dict[str, int] = {
    "completion_rate_30d": 6 * _HOUR,
    "dismiss_rate_30d":    6 * _HOUR,
    "mean_dwell_ms_30d":   6 * _HOUR,
    "preferred_hour":      _DAY,
    "tip_volume_30d":      _HOUR,
 }
 def _ts_registry_names() -> set[str]:
    text = REGISTRY_PATH.read_text(encoding="utf-8")
@@ -21,6 +33,35 @@ def _ts_registry_names() -> set[str]:
    return set(re.findall(r"name:\s*'([a-zA-Z0-9_]+)'", text))
 def _ts_registry_ttls() -> dict[str, int]:
    """Parse ttlSec values from registry.ts (crude but sufficient for drift detection).
    Handles TS symbolic constants (HOUR, DAY) and expressions like ``6 * HOUR``.
    """
    text = REGISTRY_PATH.read_text(encoding="utf-8")
    # Extract numeric constants: `const HOUR = 3600;` or `const DAY = 86_400;`
    consts: dict[str, int] = {}
    for m in re.finditer(r"const\s+([A-Z_]+)\s*=\s*([\d_]+)", text):
        consts[m.group(1)] = int(m.group(2).replace("_", ""))
    def _eval_expr(expr: str) -> int:
        tokens = [t.strip() for t in expr.split("*")]
        result = 1
        for t in tokens:
            result *= consts[t] if t in consts else int(t)
        return result
    result: dict[str, int] = {}
    for block in re.split(r"\{", text):
        name_m = re.search(r"name:\s*'([a-zA-Z0-9_]+)'", block)
        # ttlSec may be a constant name, a number, or `N * CONST`
        ttl_m = re.search(r"ttlSec:\s*([A-Za-z0-9_]+(?:\s*\*\s*[A-Za-z0-9_]+)?)", block)
        if name_m and ttl_m:
            result[name_m.group(1)] = _eval_expr(ttl_m.group(1))
    return result
 def test_python_mirror_matches_ts_registry():
    py_names = feature_names()
    ts_names = _ts_registry_names()
@@ -39,3 +80,34 @@ def test_profile_schema_no_duplicates():
 def test_profile_schema_dtypes_known():
    for f in PROFILE_FEATURES:
        assert f.dtype in {"numeric", "categorical"}
 def test_all_profile_features_are_batched():
    for f in PROFILE_FEATURES:
        assert f.freshness == "batched", f"{f.name}: expected freshness='batched', got {f.freshness!r}"
 def test_profile_feature_ttl_matches_ts_registry():
    ts_ttls = _ts_registry_ttls()
    for f in PROFILE_FEATURES:
        assert f.name in ts_ttls, f"{f.name} not found in TS registry ttlSec parse"
        assert f.ttl_sec == ts_ttls[f.name], (
            f"{f.name}: Python ttl_sec={f.ttl_sec} != TS ttlSec={ts_ttls[f.name]}"
        )
 def test_profile_feature_ttl_matches_expected():
    for f in PROFILE_FEATURES:
        assert f.ttl_sec == _EXPECTED_TTL[f.name], (
            f"{f.name}: ttl_sec={f.ttl_sec}, expected {_EXPECTED_TTL[f.name]}"
        )
 def test_profile_feature_source_is_profile_store():
    for f in PROFILE_FEATURES:
        assert f.source == "profile_store", f"{f.name}: unexpected source {f.source!r}"
 def test_profile_feature_fallback_set():
    for f in PROFILE_FEATURES:
        assert f.fallback, f"{f.name}: fallback must not be empty"