Each ml/features/*.py now declares freshness, source, and fallback per feature. ProfileFeature gains ttl_sec (mirrored from registry.ts), freshness="batched", source, and fallback. context.py adds ContextFeatureSpec + CONTEXT_FEATURES for the three JIT features (hour_of_day, day_of_week, tasks). CI test parses ttlSec from registry.ts to catch drift. ml/README updated with split JIT/batched feature contract. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
93 lines
2.9 KiB
Python
93 lines
2.9 KiB
Python
"""Profile-feature schema mirror (#81 phase A).
|
||
|
||
The TypeScript registry in ``services/api/src/profile/registry.ts`` is the
|
||
*source of truth* — features are computed there because the data lives in the
|
||
TS-owned SQLite DB. This module is a documentation/typing mirror so Python
|
||
code (ml/serving, eval harnesses, notebooks) knows what fields to expect on
|
||
``profile_features`` payloads without round-tripping the API.
|
||
|
||
Update this file whenever you add or rename a feature in the TS registry.
|
||
The accompanying test asserts the two stay in sync at the name level.
|
||
|
||
Feature-spec fields (issue #61):
|
||
freshness — "batched": value cached in profile store, recomputed on TTL/event.
|
||
ttl_sec — cache lifetime in seconds; mirrors ``ttlSec`` in registry.ts.
|
||
source — where the value originates.
|
||
fallback — raw value returned when the feature is unavailable (null stored).
|
||
"""
|
||
from __future__ import annotations
|
||
|
||
from dataclasses import dataclass
|
||
from typing import Literal
|
||
|
||
|
||
Dtype = Literal["numeric", "categorical"]
|
||
Freshness = Literal["jit", "batched"]
|
||
|
||
_HOUR = 3600
|
||
_DAY = 86_400
|
||
|
||
|
||
@dataclass(frozen=True)
|
||
class ProfileFeature:
|
||
name: str
|
||
dtype: Dtype
|
||
description: str
|
||
freshness: Freshness
|
||
ttl_sec: int
|
||
source: str
|
||
fallback: str
|
||
|
||
|
||
PROFILE_FEATURES: tuple[ProfileFeature, ...] = (
|
||
ProfileFeature(
|
||
name="completion_rate_30d",
|
||
dtype="numeric",
|
||
description='Fraction of tips served in the last 30 days that received a "done" reaction.',
|
||
freshness="batched",
|
||
ttl_sec=6 * _HOUR,
|
||
source="profile_store",
|
||
fallback="0.0",
|
||
),
|
||
ProfileFeature(
|
||
name="dismiss_rate_30d",
|
||
dtype="numeric",
|
||
description='Fraction of tips served in the last 30 days that received a "dismiss" reaction.',
|
||
freshness="batched",
|
||
ttl_sec=6 * _HOUR,
|
||
source="profile_store",
|
||
fallback="0.0",
|
||
),
|
||
ProfileFeature(
|
||
name="mean_dwell_ms_30d",
|
||
dtype="numeric",
|
||
description="Average dwell time (ms between served and reacted) over the last 30 days.",
|
||
freshness="batched",
|
||
ttl_sec=6 * _HOUR,
|
||
source="profile_store",
|
||
fallback="null — serving normalises to 0.0",
|
||
),
|
||
ProfileFeature(
|
||
name="preferred_hour",
|
||
dtype="numeric",
|
||
description='Hour-of-day with the most "done" reactions in the last 30 days (0–23).',
|
||
freshness="batched",
|
||
ttl_sec=_DAY,
|
||
source="profile_store",
|
||
fallback="null — serving normalises to 0.5 (neutral alignment)",
|
||
),
|
||
ProfileFeature(
|
||
name="tip_volume_30d",
|
||
dtype="numeric",
|
||
description="Number of tips served to the user in the last 30 days.",
|
||
freshness="batched",
|
||
ttl_sec=_HOUR,
|
||
source="profile_store",
|
||
fallback="0",
|
||
),
|
||
)
|
||
|
||
|
||
def feature_names() -> set[str]:
|
||
return {f.name for f in PROFILE_FEATURES}
|