Split monolithic test_pipeline.py into focused integration test scripts

- common.py: shared config, URL constants, benchmark questions, all helpers (get, post_json, check_sse, qdrant_count, fetch_logs, parse_run_block, wait_for, etc.) - test_health.py: service health checks (deepagents, bifrost, GPU/CPU Ollama, Qdrant, SearXNG) - test_memory.py: name store/recall pipeline, memory benchmark (5 facts + 10 recalls), dedup test - test_routing.py: easy/medium/hard tier routing benchmarks with --easy/medium/hard-only flags - Removed test_pipeline.py Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-12 16:02:57 +00:00
parent 50097d6092
commit 021104f510
6 changed files with 1255 additions and 1304 deletions
--- a/tests/integration/test_health.py
+++ b/tests/integration/test_health.py
@@ -0,0 +1,214 @@
+#!/usr/bin/env python3
+"""
+Adolf service health integration tests.
+
+Checks:
+  1.  deepagents /health — agent_ready
+  1b. openmemory /sse reachable
+  1c. grammy /sse reachable
+  2.  Bifrost /health, /v1/models, direct inference, deepagents startup log
+  3.  GPU Ollama — reachable, qwen3:8b present
+  4.  CPU Ollama — reachable, nomic-embed-text present
+  5.  Qdrant — reachable, adolf_memories collection, vector dims=768
+  6.  SearXNG — reachable, JSON results, latency < 5s
+
+Usage:
+    python3 test_health.py
+"""
+
+import json
+import sys
+import time
+import urllib.request
+
+from common import (
+    DEEPAGENTS, BIFROST, GRAMMY_HOST, GRAMMY_PORT,
+    OLLAMA_GPU, OLLAMA_CPU, QDRANT, SEARXNG, COMPOSE_FILE,
+    INFO, FAIL,
+    report, print_summary, tf,
+    get, post_json, check_sse, fetch_logs,
+)
+
+results = []
+timings = {}
+
+
+# ── 1. Service health ─────────────────────────────────────────────────────────
+print(f"\n[{INFO}] 1. Service health")
+t0 = time.monotonic()
+
+try:
+    status, body = get(f"{DEEPAGENTS}/health")
+    data = json.loads(body)
+    ok = status == 200 and data.get("agent_ready") is True
+    report(results, "deepagents /health — agent_ready", ok,
+           f"agent_ready={data.get('agent_ready')}")
+except Exception as e:
+    report(results, "deepagents /health", False, str(e))
+
+ok, detail = check_sse("localhost", 8765, "/sse")
+report(results, "openmemory /sse reachable", ok, detail)
+
+ok, detail = check_sse(GRAMMY_HOST, GRAMMY_PORT, "/sse")
+report(results, "grammy /sse reachable", ok, detail)
+
+timings["health_check"] = time.monotonic() - t0
+
+
+# ── 2. Bifrost gateway ────────────────────────────────────────────────────────
+print(f"\n[{INFO}] 2. Bifrost gateway (port 8080)")
+t0 = time.monotonic()
+
+try:
+    status, body = get(f"{BIFROST}/health", timeout=5)
+    report(results, "Bifrost /health reachable", status == 200, f"HTTP {status}")
+except Exception as e:
+    report(results, "Bifrost /health reachable", False, str(e))
+
+try:
+    status, body = get(f"{BIFROST}/v1/models", timeout=5)
+    data = json.loads(body)
+    model_ids = [m.get("id", "") for m in data.get("data", [])]
+    gpu_models = [m for m in model_ids if m.startswith("ollama/")]
+    report(results, "Bifrost lists ollama GPU models", len(gpu_models) > 0,
+           f"found: {gpu_models}")
+    for expected in ["ollama/qwen3:4b", "ollama/qwen3:8b", "ollama/qwen2.5:1.5b"]:
+        report(results, f"  model {expected} listed", expected in model_ids)
+except Exception as e:
+    report(results, "Bifrost /v1/models", False, str(e))
+
+print(f"  [bifrost-infer] POST /v1/chat/completions → ollama/qwen2.5:0.5b ...")
+t_infer = time.monotonic()
+try:
+    infer_payload = {
+        "model": "ollama/qwen2.5:0.5b",
+        "messages": [{"role": "user", "content": "Reply with exactly one word: pong"}],
+        "max_tokens": 16,
+    }
+    data = json.dumps(infer_payload).encode()
+    req = urllib.request.Request(
+        f"{BIFROST}/v1/chat/completions",
+        data=data,
+        headers={"Content-Type": "application/json"},
+        method="POST",
+    )
+    with urllib.request.urlopen(req, timeout=60) as r:
+        infer_status = r.status
+        infer_body = json.loads(r.read().decode())
+    infer_elapsed = time.monotonic() - t_infer
+    reply_content = infer_body.get("choices", [{}])[0].get("message", {}).get("content", "")
+    used_model = infer_body.get("model", "")
+    report(results, "Bifrost → Ollama GPU inference succeeds",
+           infer_status == 200 and bool(reply_content),
+           f"{infer_elapsed:.1f}s  model={used_model!r}  reply={reply_content[:60]!r}")
+    timings["bifrost_direct_infer"] = infer_elapsed
+except Exception as e:
+    report(results, "Bifrost → Ollama GPU inference succeeds", False, str(e))
+    timings["bifrost_direct_infer"] = None
+
+try:
+    import subprocess
+    r = subprocess.run(
+        ["docker", "compose", "-f", COMPOSE_FILE, "logs", "deepagents",
+         "--since=3600s", "--no-log-prefix"],
+        capture_output=True, text=True, timeout=10,
+    )
+    log_lines = r.stdout.splitlines()
+    bifrost_line = next(
+        (l for l in log_lines if "[agent] bifrost=" in l and "bifrost:8080" in l),
+        None,
+    )
+    report(results, "deepagents startup log confirms bifrost URL",
+           bifrost_line is not None,
+           bifrost_line.strip() if bifrost_line else "line not found in logs")
+    if bifrost_line:
+        has_prefix = "router=ollama/" in bifrost_line and "medium=ollama/" in bifrost_line
+        report(results, "deepagents model names use ollama/ prefix", has_prefix,
+               bifrost_line.strip())
+except Exception as e:
+    report(results, "deepagents startup log check", False, str(e))
+
+timings["bifrost_check"] = time.monotonic() - t0
+
+
+# ── 3. GPU Ollama ─────────────────────────────────────────────────────────────
+print(f"\n[{INFO}] 3. GPU Ollama (port 11436)")
+t0 = time.monotonic()
+
+try:
+    status, body = get(f"{OLLAMA_GPU}/api/tags")
+    models = [m["name"] for m in json.loads(body).get("models", [])]
+    has_qwen = any("qwen3" in m for m in models)
+    report(results, "GPU Ollama reachable", True, f"models: {models}")
+    report(results, "qwen3:8b present", has_qwen)
+except Exception as e:
+    report(results, "GPU Ollama reachable", False, str(e))
+    report(results, "qwen3:8b present", False, "skipped")
+
+timings["gpu_ollama_ping"] = time.monotonic() - t0
+
+
+# ── 4. CPU Ollama ─────────────────────────────────────────────────────────────
+print(f"\n[{INFO}] 4. CPU Ollama (port 11435)")
+t0 = time.monotonic()
+
+try:
+    status, body = get(f"{OLLAMA_CPU}/api/tags")
+    models = [m["name"] for m in json.loads(body).get("models", [])]
+    has_embed = any("nomic-embed-text" in m for m in models)
+    report(results, "CPU Ollama reachable", True, f"models: {models}")
+    report(results, "nomic-embed-text present", has_embed)
+except Exception as e:
+    report(results, "CPU Ollama reachable", False, str(e))
+    report(results, "nomic-embed-text present", False, "skipped")
+
+timings["cpu_ollama_ping"] = time.monotonic() - t0
+
+
+# ── 5. Qdrant ─────────────────────────────────────────────────────────────────
+print(f"\n[{INFO}] 5. Qdrant (port 6333)")
+t0 = time.monotonic()
+
+try:
+    status, body = get(f"{QDRANT}/collections")
+    cols = [c["name"] for c in json.loads(body).get("result", {}).get("collections", [])]
+    report(results, "Qdrant reachable", True, f"collections: {cols}")
+    report(results, "adolf_memories collection exists", "adolf_memories" in cols)
+except Exception as e:
+    report(results, "Qdrant reachable", False, str(e))
+    report(results, "adolf_memories collection exists", False, "skipped")
+
+try:
+    status, body = get(f"{QDRANT}/collections/adolf_memories")
+    info = json.loads(body).get("result", {})
+    dims = info.get("config", {}).get("params", {}).get("vectors", {}).get("size")
+    report(results, "vector dims = 768", dims == 768, f"got {dims}")
+except Exception as e:
+    report(results, "adolf_memories collection info", False, str(e))
+
+timings["qdrant_ping"] = time.monotonic() - t0
+
+
+# ── 6. SearXNG ────────────────────────────────────────────────────────────────
+print(f"\n[{INFO}] 6. SearXNG (port 11437)")
+t0 = time.monotonic()
+
+try:
+    status, body = get(f"{SEARXNG}/search?q=test&format=json", timeout=15)
+    elapsed = time.monotonic() - t0
+    n = len(json.loads(body).get("results", []))
+    report(results, "SearXNG reachable + JSON results", status == 200 and n > 0,
+           f"{n} results in {elapsed:.1f}s")
+    report(results, "SearXNG response < 5s", elapsed < 5, f"{elapsed:.2f}s")
+    timings["searxng_latency"] = elapsed
+except Exception as e:
+    report(results, "SearXNG reachable", False, str(e))
+    report(results, "SearXNG response < 5s", False, "skipped")
+    timings["searxng_latency"] = None
+
+timings["searxng_check"] = time.monotonic() - t0
+
+
+# ── summary ───────────────────────────────────────────────────────────────────
+print_summary(results)
+sys.exit(0 if all(ok for _, ok in results) else 1)