feat(features): per-feature freshness spec — JIT vs batched (#61)
Each ml/features/*.py now declares freshness, source, and fallback per feature. ProfileFeature gains ttl_sec (mirrored from registry.ts), freshness="batched", source, and fallback. context.py adds ContextFeatureSpec + CONTEXT_FEATURES for the three JIT features (hour_of_day, day_of_week, tasks). CI test parses ttlSec from registry.ts to catch drift. ml/README updated with split JIT/batched feature contract. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -1,4 +1,4 @@
|
||||
"""Smoke test for profile_schema mirror (#81 phase A).
|
||||
"""Smoke test for profile_schema mirror (#81 phase A, #61 freshness spec).
|
||||
|
||||
The TS registry in services/api/src/profile/registry.ts is the source of truth.
|
||||
This test checks the names listed here match the registry by reading the TS
|
||||
@@ -14,6 +14,18 @@ from ml.features.profile_schema import PROFILE_FEATURES, feature_names
|
||||
|
||||
REGISTRY_PATH = Path(__file__).resolve().parents[2] / "services" / "api" / "src" / "profile" / "registry.ts"
|
||||
|
||||
_HOUR = 3600
|
||||
_DAY = 86_400
|
||||
|
||||
# Expected ttl_sec values mirrored from registry.ts — keeps the two in sync.
|
||||
_EXPECTED_TTL: dict[str, int] = {
|
||||
"completion_rate_30d": 6 * _HOUR,
|
||||
"dismiss_rate_30d": 6 * _HOUR,
|
||||
"mean_dwell_ms_30d": 6 * _HOUR,
|
||||
"preferred_hour": _DAY,
|
||||
"tip_volume_30d": _HOUR,
|
||||
}
|
||||
|
||||
|
||||
def _ts_registry_names() -> set[str]:
|
||||
text = REGISTRY_PATH.read_text(encoding="utf-8")
|
||||
@@ -21,6 +33,35 @@ def _ts_registry_names() -> set[str]:
|
||||
return set(re.findall(r"name:\s*'([a-zA-Z0-9_]+)'", text))
|
||||
|
||||
|
||||
def _ts_registry_ttls() -> dict[str, int]:
|
||||
"""Parse ttlSec values from registry.ts (crude but sufficient for drift detection).
|
||||
|
||||
Handles TS symbolic constants (HOUR, DAY) and expressions like ``6 * HOUR``.
|
||||
"""
|
||||
text = REGISTRY_PATH.read_text(encoding="utf-8")
|
||||
|
||||
# Extract numeric constants: `const HOUR = 3600;` or `const DAY = 86_400;`
|
||||
consts: dict[str, int] = {}
|
||||
for m in re.finditer(r"const\s+([A-Z_]+)\s*=\s*([\d_]+)", text):
|
||||
consts[m.group(1)] = int(m.group(2).replace("_", ""))
|
||||
|
||||
def _eval_expr(expr: str) -> int:
|
||||
tokens = [t.strip() for t in expr.split("*")]
|
||||
result = 1
|
||||
for t in tokens:
|
||||
result *= consts[t] if t in consts else int(t)
|
||||
return result
|
||||
|
||||
result: dict[str, int] = {}
|
||||
for block in re.split(r"\{", text):
|
||||
name_m = re.search(r"name:\s*'([a-zA-Z0-9_]+)'", block)
|
||||
# ttlSec may be a constant name, a number, or `N * CONST`
|
||||
ttl_m = re.search(r"ttlSec:\s*([A-Za-z0-9_]+(?:\s*\*\s*[A-Za-z0-9_]+)?)", block)
|
||||
if name_m and ttl_m:
|
||||
result[name_m.group(1)] = _eval_expr(ttl_m.group(1))
|
||||
return result
|
||||
|
||||
|
||||
def test_python_mirror_matches_ts_registry():
|
||||
py_names = feature_names()
|
||||
ts_names = _ts_registry_names()
|
||||
@@ -39,3 +80,34 @@ def test_profile_schema_no_duplicates():
|
||||
def test_profile_schema_dtypes_known():
|
||||
for f in PROFILE_FEATURES:
|
||||
assert f.dtype in {"numeric", "categorical"}
|
||||
|
||||
|
||||
def test_all_profile_features_are_batched():
|
||||
for f in PROFILE_FEATURES:
|
||||
assert f.freshness == "batched", f"{f.name}: expected freshness='batched', got {f.freshness!r}"
|
||||
|
||||
|
||||
def test_profile_feature_ttl_matches_ts_registry():
|
||||
ts_ttls = _ts_registry_ttls()
|
||||
for f in PROFILE_FEATURES:
|
||||
assert f.name in ts_ttls, f"{f.name} not found in TS registry ttlSec parse"
|
||||
assert f.ttl_sec == ts_ttls[f.name], (
|
||||
f"{f.name}: Python ttl_sec={f.ttl_sec} != TS ttlSec={ts_ttls[f.name]}"
|
||||
)
|
||||
|
||||
|
||||
def test_profile_feature_ttl_matches_expected():
|
||||
for f in PROFILE_FEATURES:
|
||||
assert f.ttl_sec == _EXPECTED_TTL[f.name], (
|
||||
f"{f.name}: ttl_sec={f.ttl_sec}, expected {_EXPECTED_TTL[f.name]}"
|
||||
)
|
||||
|
||||
|
||||
def test_profile_feature_source_is_profile_store():
|
||||
for f in PROFILE_FEATURES:
|
||||
assert f.source == "profile_store", f"{f.name}: unexpected source {f.source!r}"
|
||||
|
||||
|
||||
def test_profile_feature_fallback_set():
|
||||
for f in PROFILE_FEATURES:
|
||||
assert f.fallback, f"{f.name}: fallback must not be empty"
|
||||
|
||||
Reference in New Issue
Block a user