Centralizes user-level features (completion_rate_30d, dismiss_rate_30d, mean_dwell_ms_30d, preferred_hour, tip_volume_30d) in a TS registry that owns both definition and SQL aggregation, since the data lives in the TS-owned SQLite tables (tip_views/tip_feedback). Lazy TTL refresh keeps recommend latency bounded; values persist in user_profile_features (KV). ml/serving accepts profile_features on /score + /generate but does not yet consume them — extending the bandit feature vector changes D and resets every user's learned state, so that's a deliberate phase-B step. Includes ml/features/profile_schema.py as a contract mirror with a sync test that diffs name sets against registry.ts. ADR-0011 records the data-locality reasoning (registry in TS, not Python as the issue originally suggested). Phase B (deferred): event-driven incremental updates, bandit consumption with state migration, admin per-user profile page, staleness alerts. Refs #81. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
42 lines
1.4 KiB
Python
42 lines
1.4 KiB
Python
"""Smoke test for profile_schema mirror (#81 phase A).
|
|
|
|
The TS registry in services/api/src/profile/registry.ts is the source of truth.
|
|
This test checks the names listed here match the registry by reading the TS
|
|
file and grepping for `name: '...'`. Crude but cheap, and it catches the
|
|
common rename/add-without-mirror failure mode.
|
|
"""
|
|
from __future__ import annotations
|
|
import re
|
|
from pathlib import Path
|
|
|
|
from ml.features.profile_schema import PROFILE_FEATURES, feature_names
|
|
|
|
|
|
REGISTRY_PATH = Path(__file__).resolve().parents[2] / "services" / "api" / "src" / "profile" / "registry.ts"
|
|
|
|
|
|
def _ts_registry_names() -> set[str]:
|
|
text = REGISTRY_PATH.read_text(encoding="utf-8")
|
|
# Each FEATURES entry has `name: 'something_30d',`. Extract every match.
|
|
return set(re.findall(r"name:\s*'([a-zA-Z0-9_]+)'", text))
|
|
|
|
|
|
def test_python_mirror_matches_ts_registry():
|
|
py_names = feature_names()
|
|
ts_names = _ts_registry_names()
|
|
assert py_names == ts_names, (
|
|
f"Profile feature names drifted between TS registry and Python mirror.\n"
|
|
f" in Python only: {sorted(py_names - ts_names)}\n"
|
|
f" in TS only: {sorted(ts_names - py_names)}"
|
|
)
|
|
|
|
|
|
def test_profile_schema_no_duplicates():
|
|
names = [f.name for f in PROFILE_FEATURES]
|
|
assert len(names) == len(set(names)), f"duplicate names: {names}"
|
|
|
|
|
|
def test_profile_schema_dtypes_known():
|
|
for f in PROFILE_FEATURES:
|
|
assert f.dtype in {"numeric", "categorical"}
|