Split monolithic test_pipeline.py into focused integration test scripts
- common.py: shared config, URL constants, benchmark questions, all helpers (get, post_json, check_sse, qdrant_count, fetch_logs, parse_run_block, wait_for, etc.) - test_health.py: service health checks (deepagents, bifrost, GPU/CPU Ollama, Qdrant, SearXNG) - test_memory.py: name store/recall pipeline, memory benchmark (5 facts + 10 recalls), dedup test - test_routing.py: easy/medium/hard tier routing benchmarks with --easy/medium/hard-only flags - Removed test_pipeline.py Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
214
tests/integration/test_health.py
Normal file
214
tests/integration/test_health.py
Normal file
@@ -0,0 +1,214 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Adolf service health integration tests.
|
||||
|
||||
Checks:
|
||||
1. deepagents /health — agent_ready
|
||||
1b. openmemory /sse reachable
|
||||
1c. grammy /sse reachable
|
||||
2. Bifrost /health, /v1/models, direct inference, deepagents startup log
|
||||
3. GPU Ollama — reachable, qwen3:8b present
|
||||
4. CPU Ollama — reachable, nomic-embed-text present
|
||||
5. Qdrant — reachable, adolf_memories collection, vector dims=768
|
||||
6. SearXNG — reachable, JSON results, latency < 5s
|
||||
|
||||
Usage:
|
||||
python3 test_health.py
|
||||
"""
|
||||
|
||||
import json
|
||||
import sys
|
||||
import time
|
||||
import urllib.request
|
||||
|
||||
from common import (
|
||||
DEEPAGENTS, BIFROST, GRAMMY_HOST, GRAMMY_PORT,
|
||||
OLLAMA_GPU, OLLAMA_CPU, QDRANT, SEARXNG, COMPOSE_FILE,
|
||||
INFO, FAIL,
|
||||
report, print_summary, tf,
|
||||
get, post_json, check_sse, fetch_logs,
|
||||
)
|
||||
|
||||
results = []
|
||||
timings = {}
|
||||
|
||||
|
||||
# ── 1. Service health ─────────────────────────────────────────────────────────
|
||||
print(f"\n[{INFO}] 1. Service health")
|
||||
t0 = time.monotonic()
|
||||
|
||||
try:
|
||||
status, body = get(f"{DEEPAGENTS}/health")
|
||||
data = json.loads(body)
|
||||
ok = status == 200 and data.get("agent_ready") is True
|
||||
report(results, "deepagents /health — agent_ready", ok,
|
||||
f"agent_ready={data.get('agent_ready')}")
|
||||
except Exception as e:
|
||||
report(results, "deepagents /health", False, str(e))
|
||||
|
||||
ok, detail = check_sse("localhost", 8765, "/sse")
|
||||
report(results, "openmemory /sse reachable", ok, detail)
|
||||
|
||||
ok, detail = check_sse(GRAMMY_HOST, GRAMMY_PORT, "/sse")
|
||||
report(results, "grammy /sse reachable", ok, detail)
|
||||
|
||||
timings["health_check"] = time.monotonic() - t0
|
||||
|
||||
|
||||
# ── 2. Bifrost gateway ────────────────────────────────────────────────────────
|
||||
print(f"\n[{INFO}] 2. Bifrost gateway (port 8080)")
|
||||
t0 = time.monotonic()
|
||||
|
||||
try:
|
||||
status, body = get(f"{BIFROST}/health", timeout=5)
|
||||
report(results, "Bifrost /health reachable", status == 200, f"HTTP {status}")
|
||||
except Exception as e:
|
||||
report(results, "Bifrost /health reachable", False, str(e))
|
||||
|
||||
try:
|
||||
status, body = get(f"{BIFROST}/v1/models", timeout=5)
|
||||
data = json.loads(body)
|
||||
model_ids = [m.get("id", "") for m in data.get("data", [])]
|
||||
gpu_models = [m for m in model_ids if m.startswith("ollama/")]
|
||||
report(results, "Bifrost lists ollama GPU models", len(gpu_models) > 0,
|
||||
f"found: {gpu_models}")
|
||||
for expected in ["ollama/qwen3:4b", "ollama/qwen3:8b", "ollama/qwen2.5:1.5b"]:
|
||||
report(results, f" model {expected} listed", expected in model_ids)
|
||||
except Exception as e:
|
||||
report(results, "Bifrost /v1/models", False, str(e))
|
||||
|
||||
print(f" [bifrost-infer] POST /v1/chat/completions → ollama/qwen2.5:0.5b ...")
|
||||
t_infer = time.monotonic()
|
||||
try:
|
||||
infer_payload = {
|
||||
"model": "ollama/qwen2.5:0.5b",
|
||||
"messages": [{"role": "user", "content": "Reply with exactly one word: pong"}],
|
||||
"max_tokens": 16,
|
||||
}
|
||||
data = json.dumps(infer_payload).encode()
|
||||
req = urllib.request.Request(
|
||||
f"{BIFROST}/v1/chat/completions",
|
||||
data=data,
|
||||
headers={"Content-Type": "application/json"},
|
||||
method="POST",
|
||||
)
|
||||
with urllib.request.urlopen(req, timeout=60) as r:
|
||||
infer_status = r.status
|
||||
infer_body = json.loads(r.read().decode())
|
||||
infer_elapsed = time.monotonic() - t_infer
|
||||
reply_content = infer_body.get("choices", [{}])[0].get("message", {}).get("content", "")
|
||||
used_model = infer_body.get("model", "")
|
||||
report(results, "Bifrost → Ollama GPU inference succeeds",
|
||||
infer_status == 200 and bool(reply_content),
|
||||
f"{infer_elapsed:.1f}s model={used_model!r} reply={reply_content[:60]!r}")
|
||||
timings["bifrost_direct_infer"] = infer_elapsed
|
||||
except Exception as e:
|
||||
report(results, "Bifrost → Ollama GPU inference succeeds", False, str(e))
|
||||
timings["bifrost_direct_infer"] = None
|
||||
|
||||
try:
|
||||
import subprocess
|
||||
r = subprocess.run(
|
||||
["docker", "compose", "-f", COMPOSE_FILE, "logs", "deepagents",
|
||||
"--since=3600s", "--no-log-prefix"],
|
||||
capture_output=True, text=True, timeout=10,
|
||||
)
|
||||
log_lines = r.stdout.splitlines()
|
||||
bifrost_line = next(
|
||||
(l for l in log_lines if "[agent] bifrost=" in l and "bifrost:8080" in l),
|
||||
None,
|
||||
)
|
||||
report(results, "deepagents startup log confirms bifrost URL",
|
||||
bifrost_line is not None,
|
||||
bifrost_line.strip() if bifrost_line else "line not found in logs")
|
||||
if bifrost_line:
|
||||
has_prefix = "router=ollama/" in bifrost_line and "medium=ollama/" in bifrost_line
|
||||
report(results, "deepagents model names use ollama/ prefix", has_prefix,
|
||||
bifrost_line.strip())
|
||||
except Exception as e:
|
||||
report(results, "deepagents startup log check", False, str(e))
|
||||
|
||||
timings["bifrost_check"] = time.monotonic() - t0
|
||||
|
||||
|
||||
# ── 3. GPU Ollama ─────────────────────────────────────────────────────────────
|
||||
print(f"\n[{INFO}] 3. GPU Ollama (port 11436)")
|
||||
t0 = time.monotonic()
|
||||
|
||||
try:
|
||||
status, body = get(f"{OLLAMA_GPU}/api/tags")
|
||||
models = [m["name"] for m in json.loads(body).get("models", [])]
|
||||
has_qwen = any("qwen3" in m for m in models)
|
||||
report(results, "GPU Ollama reachable", True, f"models: {models}")
|
||||
report(results, "qwen3:8b present", has_qwen)
|
||||
except Exception as e:
|
||||
report(results, "GPU Ollama reachable", False, str(e))
|
||||
report(results, "qwen3:8b present", False, "skipped")
|
||||
|
||||
timings["gpu_ollama_ping"] = time.monotonic() - t0
|
||||
|
||||
|
||||
# ── 4. CPU Ollama ─────────────────────────────────────────────────────────────
|
||||
print(f"\n[{INFO}] 4. CPU Ollama (port 11435)")
|
||||
t0 = time.monotonic()
|
||||
|
||||
try:
|
||||
status, body = get(f"{OLLAMA_CPU}/api/tags")
|
||||
models = [m["name"] for m in json.loads(body).get("models", [])]
|
||||
has_embed = any("nomic-embed-text" in m for m in models)
|
||||
report(results, "CPU Ollama reachable", True, f"models: {models}")
|
||||
report(results, "nomic-embed-text present", has_embed)
|
||||
except Exception as e:
|
||||
report(results, "CPU Ollama reachable", False, str(e))
|
||||
report(results, "nomic-embed-text present", False, "skipped")
|
||||
|
||||
timings["cpu_ollama_ping"] = time.monotonic() - t0
|
||||
|
||||
|
||||
# ── 5. Qdrant ─────────────────────────────────────────────────────────────────
|
||||
print(f"\n[{INFO}] 5. Qdrant (port 6333)")
|
||||
t0 = time.monotonic()
|
||||
|
||||
try:
|
||||
status, body = get(f"{QDRANT}/collections")
|
||||
cols = [c["name"] for c in json.loads(body).get("result", {}).get("collections", [])]
|
||||
report(results, "Qdrant reachable", True, f"collections: {cols}")
|
||||
report(results, "adolf_memories collection exists", "adolf_memories" in cols)
|
||||
except Exception as e:
|
||||
report(results, "Qdrant reachable", False, str(e))
|
||||
report(results, "adolf_memories collection exists", False, "skipped")
|
||||
|
||||
try:
|
||||
status, body = get(f"{QDRANT}/collections/adolf_memories")
|
||||
info = json.loads(body).get("result", {})
|
||||
dims = info.get("config", {}).get("params", {}).get("vectors", {}).get("size")
|
||||
report(results, "vector dims = 768", dims == 768, f"got {dims}")
|
||||
except Exception as e:
|
||||
report(results, "adolf_memories collection info", False, str(e))
|
||||
|
||||
timings["qdrant_ping"] = time.monotonic() - t0
|
||||
|
||||
|
||||
# ── 6. SearXNG ────────────────────────────────────────────────────────────────
|
||||
print(f"\n[{INFO}] 6. SearXNG (port 11437)")
|
||||
t0 = time.monotonic()
|
||||
|
||||
try:
|
||||
status, body = get(f"{SEARXNG}/search?q=test&format=json", timeout=15)
|
||||
elapsed = time.monotonic() - t0
|
||||
n = len(json.loads(body).get("results", []))
|
||||
report(results, "SearXNG reachable + JSON results", status == 200 and n > 0,
|
||||
f"{n} results in {elapsed:.1f}s")
|
||||
report(results, "SearXNG response < 5s", elapsed < 5, f"{elapsed:.2f}s")
|
||||
timings["searxng_latency"] = elapsed
|
||||
except Exception as e:
|
||||
report(results, "SearXNG reachable", False, str(e))
|
||||
report(results, "SearXNG response < 5s", False, "skipped")
|
||||
timings["searxng_latency"] = None
|
||||
|
||||
timings["searxng_check"] = time.monotonic() - t0
|
||||
|
||||
|
||||
# ── summary ───────────────────────────────────────────────────────────────────
|
||||
print_summary(results)
|
||||
sys.exit(0 if all(ok for _, ok in results) else 1)
|
||||
Reference in New Issue
Block a user