feat(profile): user-profile feature registry + builder (phase A)

Centralizes user-level features (completion_rate_30d, dismiss_rate_30d, mean_dwell_ms_30d, preferred_hour, tip_volume_30d) in a TS registry that owns both definition and SQL aggregation, since the data lives in the TS-owned SQLite tables (tip_views/tip_feedback). Lazy TTL refresh keeps recommend latency bounded; values persist in user_profile_features (KV). ml/serving accepts profile_features on /score + /generate but does not yet consume them — extending the bandit feature vector changes D and resets every user's learned state, so that's a deliberate phase-B step. Includes ml/features/profile_schema.py as a contract mirror with a sync test that diffs name sets against registry.ts. ADR-0011 records the data-locality reasoning (registry in TS, not Python as the issue originally suggested). Phase B (deferred): event-driven incremental updates, bandit consumption with state migration, admin per-user profile page, staleness alerts. Refs #81. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-04-25 00:22:22 +00:00
parent 430804e9a5
commit 7d4c29e137
13 changed files with 636 additions and 2 deletions
--- a/ml/README.md
+++ b/ml/README.md
@@ -18,6 +18,15 @@ Python. Owns models, features, training, online scoring.
 - Training reads from the offline feature store; serving reads from the online feature store; definitions are shared (no train/serve skew).
 - Shadow deploys before any policy change that affects real users.

+## Profile-feature contract
+
+User-level features (completion rate, preferred hour, tip volume…) are computed
+by the TypeScript recommender and shipped to ml/serving on every `/score` and
+`/generate` call as `profile_features: dict | None`. The Python mirror in
+`features/profile_schema.py` documents the available names + dtypes — keep it
+in sync with `services/api/src/profile/registry.ts` (a CI-style test asserts
+the name sets match). See ADR-0011.
+
 ## Prompt registry

 `serving/prompts.py` keys tip-generation prompts by stable version string. Adding a new variant means adding an entry — no caller changes. Selection precedence: `POST /generate` body's `prompt_version` field → env `DEFAULT_PROMPT_VERSION` → `"v1"`. The TypeScript recommender drives selection via `TIP_PROMPT_VERSION` (single value or comma-separated rotation); the version actually used flows back in the response and is persisted to `tip_scores.prompt_version` so the admin reward-analytics dashboard can bucket reactions per variant.
--- a/ml/features/profile_schema.py
+++ b/ml/features/profile_schema.py
@@ -0,0 +1,53 @@
+"""Profile-feature schema mirror (#81 phase A).
+
+The TypeScript registry in ``services/api/src/profile/registry.ts`` is the
+*source of truth* — features are computed there because the data lives in the
+TS-owned SQLite DB. This module is a documentation/typing mirror so Python
+code (ml/serving, eval harnesses, notebooks) knows what fields to expect on
+``profile_features`` payloads without round-tripping the API.
+
+Update this file whenever you add or rename a feature in the TS registry.
+The accompanying test asserts the two stay in sync at the name level.
+"""
+from __future__ import annotations
+
+from dataclasses import dataclass
+from typing import Literal
+
+
+Dtype = Literal["numeric", "categorical"]
+
+
+@dataclass(frozen=True)
+class ProfileFeature:
+    name: str
+    dtype: Dtype
+    description: str
+
+
+PROFILE_FEATURES: tuple[ProfileFeature, ...] = (
+    ProfileFeature(
+        "completion_rate_30d", "numeric",
+        'Fraction of tips served in the last 30 days that received a "done" reaction.',
+    ),
+    ProfileFeature(
+        "dismiss_rate_30d", "numeric",
+        'Fraction of tips served in the last 30 days that received a "dismiss" reaction.',
+    ),
+    ProfileFeature(
+        "mean_dwell_ms_30d", "numeric",
+        "Average dwell time (ms between served and reacted) over the last 30 days.",
+    ),
+    ProfileFeature(
+        "preferred_hour", "numeric",
+        'Hour-of-day with the most "done" reactions in the last 30 days (0-23).',
+    ),
+    ProfileFeature(
+        "tip_volume_30d", "numeric",
+        "Number of tips served to the user in the last 30 days.",
+    ),
+)
+
+
+def feature_names() -> set[str]:
+    return {f.name for f in PROFILE_FEATURES}
--- a/ml/features/test_profile_schema.py
+++ b/ml/features/test_profile_schema.py
@@ -0,0 +1,41 @@
+"""Smoke test for profile_schema mirror (#81 phase A).
+
+The TS registry in services/api/src/profile/registry.ts is the source of truth.
+This test checks the names listed here match the registry by reading the TS
+file and grepping for `name: '...'`. Crude but cheap, and it catches the
+common rename/add-without-mirror failure mode.
+"""
+from __future__ import annotations
+import re
+from pathlib import Path
+
+from ml.features.profile_schema import PROFILE_FEATURES, feature_names
+
+
+REGISTRY_PATH = Path(__file__).resolve().parents[2] / "services" / "api" / "src" / "profile" / "registry.ts"
+
+
+def _ts_registry_names() -> set[str]:
+    text = REGISTRY_PATH.read_text(encoding="utf-8")
+    # Each FEATURES entry has `name: 'something_30d',`. Extract every match.
+    return set(re.findall(r"name:\s*'([a-zA-Z0-9_]+)'", text))
+
+
+def test_python_mirror_matches_ts_registry():
+    py_names = feature_names()
+    ts_names = _ts_registry_names()
+    assert py_names == ts_names, (
+        f"Profile feature names drifted between TS registry and Python mirror.\n"
+        f"  in Python only: {sorted(py_names - ts_names)}\n"
+        f"  in TS only:     {sorted(ts_names - py_names)}"
+    )
+
+
+def test_profile_schema_no_duplicates():
+    names = [f.name for f in PROFILE_FEATURES]
+    assert len(names) == len(set(names)), f"duplicate names: {names}"
+
+
+def test_profile_schema_dtypes_known():
+    for f in PROFILE_FEATURES:
+        assert f.dtype in {"numeric", "categorical"}
--- a/ml/serving/main.py
+++ b/ml/serving/main.py
@@ -152,6 +152,11 @@ class ScoreRequest(BaseModel):
    user_id: str
    candidates: list[Candidate]
    context: Context = Context()
+    # User-level features computed by the API (#81 phase A). Accepted, logged,
+    # but not yet consumed by the bandit — extending the feature vector
+    # changes `D` and resets every user's learned state, which is a deliberate
+    # follow-up (phase B), not a side effect of this PR.
+    profile_features: Optional[dict] = None


 class ScoreResponse(BaseModel):
@@ -184,6 +189,9 @@ class GenerateRequest(BaseModel):
    context: PromptContext = PromptContext()
    n: int = 3
    prompt_version: Optional[str] = None  # None → server default (env DEFAULT_PROMPT_VERSION)
+    # User-level features (#81 phase A). Accepted by the contract; not yet
+    # injected into the prompt — that's a #84-style prompt-design decision.
+    profile_features: Optional[dict] = None


 class TipCandidate(BaseModel):