"""Profile-feature schema mirror (#81 phase A). The TypeScript registry in ``services/api/src/profile/registry.ts`` is the *source of truth* — features are computed there because the data lives in the TS-owned SQLite DB. This module is a documentation/typing mirror so Python code (ml/serving, eval harnesses, notebooks) knows what fields to expect on ``profile_features`` payloads without round-tripping the API. Update this file whenever you add or rename a feature in the TS registry. The accompanying test asserts the two stay in sync at the name level. Feature-spec fields (issue #61): freshness — "batched": value cached in profile store, recomputed on TTL/event. ttl_sec — cache lifetime in seconds; mirrors ``ttlSec`` in registry.ts. source — where the value originates. fallback — raw value returned when the feature is unavailable (null stored). """ from __future__ import annotations from dataclasses import dataclass from typing import Literal Dtype = Literal["numeric", "categorical"] Freshness = Literal["jit", "batched"] _HOUR = 3600 _DAY = 86_400 @dataclass(frozen=True) class ProfileFeature: name: str dtype: Dtype description: str freshness: Freshness ttl_sec: int source: str fallback: str PROFILE_FEATURES: tuple[ProfileFeature, ...] = ( ProfileFeature( name="completion_rate_30d", dtype="numeric", description='Fraction of tips served in the last 30 days that received a "done" reaction.', freshness="batched", ttl_sec=6 * _HOUR, source="profile_store", fallback="0.0", ), ProfileFeature( name="dismiss_rate_30d", dtype="numeric", description='Fraction of tips served in the last 30 days that received a "dismiss" reaction.', freshness="batched", ttl_sec=6 * _HOUR, source="profile_store", fallback="0.0", ), ProfileFeature( name="mean_dwell_ms_30d", dtype="numeric", description="Average dwell time (ms between served and reacted) over the last 30 days.", freshness="batched", ttl_sec=6 * _HOUR, source="profile_store", fallback="null — serving normalises to 0.0", ), ProfileFeature( name="preferred_hour", dtype="numeric", description='Hour-of-day with the most "done" reactions in the last 30 days (0–23).', freshness="batched", ttl_sec=_DAY, source="profile_store", fallback="null — serving normalises to 0.5 (neutral alignment)", ), ProfileFeature( name="tip_volume_30d", dtype="numeric", description="Number of tips served to the user in the last 30 days.", freshness="batched", ttl_sec=_HOUR, source="profile_store", fallback="0", ), ) def feature_names() -> set[str]: return {f.name for f in PROFILE_FEATURES}