Files
oO/ml/features/profile_schema.py
alvis 17b9516903 feat(features): mirror invalidatedBy into Python ProfileFeature (#61)
Adds invalidated_by: tuple[str, ...] to ProfileFeature, mirroring the
invalidatedBy bus subjects from registry.ts. Adds a test that parses the
TS source and asserts Python stays in sync — same drift-detection pattern
used for names and ttlSec.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-05-06 07:10:36 +00:00

101 lines
3.3 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""Profile-feature schema mirror (#81 phase A).
The TypeScript registry in ``services/api/src/profile/registry.ts`` is the
*source of truth* — features are computed there because the data lives in the
TS-owned SQLite DB. This module is a documentation/typing mirror so Python
code (ml/serving, eval harnesses, notebooks) knows what fields to expect on
``profile_features`` payloads without round-tripping the API.
Update this file whenever you add or rename a feature in the TS registry.
The accompanying test asserts the two stay in sync at the name level.
Feature-spec fields (issue #61):
freshness — "batched": value cached in profile store, recomputed on TTL/event.
ttl_sec — cache lifetime in seconds; mirrors ``ttlSec`` in registry.ts.
source — where the value originates.
fallback — raw value returned when the feature is unavailable (null stored).
invalidated_by — bus event subjects that trigger recompute for the affected user;
mirrors ``invalidatedBy`` in registry.ts. Empty = TTL-only refresh.
"""
from __future__ import annotations
from dataclasses import dataclass
from typing import Literal
Dtype = Literal["numeric", "categorical"]
Freshness = Literal["jit", "batched"]
_HOUR = 3600
_DAY = 86_400
@dataclass(frozen=True)
class ProfileFeature:
name: str
dtype: Dtype
description: str
freshness: Freshness
ttl_sec: int
source: str
fallback: str
invalidated_by: tuple[str, ...] = ()
PROFILE_FEATURES: tuple[ProfileFeature, ...] = (
ProfileFeature(
name="completion_rate_30d",
dtype="numeric",
description='Fraction of tips served in the last 30 days that received a "done" reaction.',
freshness="batched",
ttl_sec=6 * _HOUR,
source="profile_store",
fallback="0.0",
invalidated_by=("signals.tip.feedback",),
),
ProfileFeature(
name="dismiss_rate_30d",
dtype="numeric",
description='Fraction of tips served in the last 30 days that received a "dismiss" reaction.',
freshness="batched",
ttl_sec=6 * _HOUR,
source="profile_store",
fallback="0.0",
invalidated_by=("signals.tip.feedback",),
),
ProfileFeature(
name="mean_dwell_ms_30d",
dtype="numeric",
description="Average dwell time (ms between served and reacted) over the last 30 days.",
freshness="batched",
ttl_sec=6 * _HOUR,
source="profile_store",
fallback="null — serving normalises to 0.0",
invalidated_by=("signals.tip.feedback",),
),
ProfileFeature(
name="preferred_hour",
dtype="numeric",
description='Hour-of-day with the most "done" reactions in the last 30 days (023).',
freshness="batched",
ttl_sec=_DAY,
source="profile_store",
fallback="null — serving normalises to 0.5 (neutral alignment)",
invalidated_by=("signals.tip.feedback",),
),
ProfileFeature(
name="tip_volume_30d",
dtype="numeric",
description="Number of tips served to the user in the last 30 days.",
freshness="batched",
ttl_sec=_HOUR,
source="profile_store",
fallback="0",
invalidated_by=("signals.tip.served",),
),
)
def feature_names() -> set[str]:
return {f.name for f in PROFILE_FEATURES}