feat(features): per-feature freshness spec — JIT vs batched (#61)

Each ml/features/*.py now declares freshness, source, and fallback per
feature. ProfileFeature gains ttl_sec (mirrored from registry.ts),
freshness="batched", source, and fallback. context.py adds
ContextFeatureSpec + CONTEXT_FEATURES for the three JIT features
(hour_of_day, day_of_week, tasks). CI test parses ttlSec from registry.ts
to catch drift. ml/README updated with split JIT/batched feature contract.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-04-25 17:02:55 +00:00
parent bd3ea1b8b1
commit 45416000f9
6 changed files with 218 additions and 21 deletions

View File

@@ -8,6 +8,12 @@ code (ml/serving, eval harnesses, notebooks) knows what fields to expect on
Update this file whenever you add or rename a feature in the TS registry.
The accompanying test asserts the two stay in sync at the name level.
Feature-spec fields (issue #61):
freshness — "batched": value cached in profile store, recomputed on TTL/event.
ttl_sec — cache lifetime in seconds; mirrors ``ttlSec`` in registry.ts.
source — where the value originates.
fallback — raw value returned when the feature is unavailable (null stored).
"""
from __future__ import annotations
@@ -16,6 +22,10 @@ from typing import Literal
Dtype = Literal["numeric", "categorical"]
Freshness = Literal["jit", "batched"]
_HOUR = 3600
_DAY = 86_400
@dataclass(frozen=True)
@@ -23,28 +33,57 @@ class ProfileFeature:
name: str
dtype: Dtype
description: str
freshness: Freshness
ttl_sec: int
source: str
fallback: str
PROFILE_FEATURES: tuple[ProfileFeature, ...] = (
ProfileFeature(
"completion_rate_30d", "numeric",
'Fraction of tips served in the last 30 days that received a "done" reaction.',
name="completion_rate_30d",
dtype="numeric",
description='Fraction of tips served in the last 30 days that received a "done" reaction.',
freshness="batched",
ttl_sec=6 * _HOUR,
source="profile_store",
fallback="0.0",
),
ProfileFeature(
"dismiss_rate_30d", "numeric",
'Fraction of tips served in the last 30 days that received a "dismiss" reaction.',
name="dismiss_rate_30d",
dtype="numeric",
description='Fraction of tips served in the last 30 days that received a "dismiss" reaction.',
freshness="batched",
ttl_sec=6 * _HOUR,
source="profile_store",
fallback="0.0",
),
ProfileFeature(
"mean_dwell_ms_30d", "numeric",
"Average dwell time (ms between served and reacted) over the last 30 days.",
name="mean_dwell_ms_30d",
dtype="numeric",
description="Average dwell time (ms between served and reacted) over the last 30 days.",
freshness="batched",
ttl_sec=6 * _HOUR,
source="profile_store",
fallback="null — serving normalises to 0.0",
),
ProfileFeature(
"preferred_hour", "numeric",
'Hour-of-day with the most "done" reactions in the last 30 days (0-23).',
name="preferred_hour",
dtype="numeric",
description='Hour-of-day with the most "done" reactions in the last 30 days (023).',
freshness="batched",
ttl_sec=_DAY,
source="profile_store",
fallback="null — serving normalises to 0.5 (neutral alignment)",
),
ProfileFeature(
"tip_volume_30d", "numeric",
"Number of tips served to the user in the last 30 days.",
name="tip_volume_30d",
dtype="numeric",
description="Number of tips served to the user in the last 30 days.",
freshness="batched",
ttl_sec=_HOUR,
source="profile_store",
fallback="0",
),
)