#!/usr/bin/env python3 """ Adolf service health integration tests. Checks: 1. deepagents /health — agent_ready 1b. openmemory /sse reachable 1c. grammy /sse reachable 2. Bifrost /health, /v1/models, direct inference, deepagents startup log 3. GPU Ollama — reachable, qwen3:8b present 4. CPU Ollama — reachable, nomic-embed-text present 5. Qdrant — reachable, adolf_memories collection, vector dims=768 6. SearXNG — reachable, JSON results, latency < 5s Usage: python3 test_health.py """ import json import sys import time import urllib.request from common import ( DEEPAGENTS, BIFROST, GRAMMY_HOST, GRAMMY_PORT, OLLAMA_GPU, OLLAMA_CPU, QDRANT, SEARXNG, COMPOSE_FILE, INFO, FAIL, report, print_summary, tf, get, post_json, check_sse, fetch_logs, ) results = [] timings = {} # ── 1. Service health ───────────────────────────────────────────────────────── print(f"\n[{INFO}] 1. Service health") t0 = time.monotonic() try: status, body = get(f"{DEEPAGENTS}/health") data = json.loads(body) ok = status == 200 and data.get("agent_ready") is True report(results, "deepagents /health — agent_ready", ok, f"agent_ready={data.get('agent_ready')}") except Exception as e: report(results, "deepagents /health", False, str(e)) ok, detail = check_sse("localhost", 8765, "/sse") report(results, "openmemory /sse reachable", ok, detail) ok, detail = check_sse(GRAMMY_HOST, GRAMMY_PORT, "/sse") report(results, "grammy /sse reachable", ok, detail) timings["health_check"] = time.monotonic() - t0 # ── 2. Bifrost gateway ──────────────────────────────────────────────────────── print(f"\n[{INFO}] 2. Bifrost gateway (port 8080)") t0 = time.monotonic() try: status, body = get(f"{BIFROST}/health", timeout=5) report(results, "Bifrost /health reachable", status == 200, f"HTTP {status}") except Exception as e: report(results, "Bifrost /health reachable", False, str(e)) try: status, body = get(f"{BIFROST}/v1/models", timeout=5) data = json.loads(body) model_ids = [m.get("id", "") for m in data.get("data", [])] gpu_models = [m for m in model_ids if m.startswith("ollama/")] report(results, "Bifrost lists ollama GPU models", len(gpu_models) > 0, f"found: {gpu_models}") for expected in ["ollama/qwen3:4b", "ollama/qwen3:8b", "ollama/qwen2.5:1.5b"]: report(results, f" model {expected} listed", expected in model_ids) except Exception as e: report(results, "Bifrost /v1/models", False, str(e)) print(f" [bifrost-infer] POST /v1/chat/completions → ollama/qwen2.5:0.5b ...") t_infer = time.monotonic() try: infer_payload = { "model": "ollama/qwen2.5:0.5b", "messages": [{"role": "user", "content": "Reply with exactly one word: pong"}], "max_tokens": 16, } data = json.dumps(infer_payload).encode() req = urllib.request.Request( f"{BIFROST}/v1/chat/completions", data=data, headers={"Content-Type": "application/json"}, method="POST", ) with urllib.request.urlopen(req, timeout=60) as r: infer_status = r.status infer_body = json.loads(r.read().decode()) infer_elapsed = time.monotonic() - t_infer reply_content = infer_body.get("choices", [{}])[0].get("message", {}).get("content", "") used_model = infer_body.get("model", "") report(results, "Bifrost → Ollama GPU inference succeeds", infer_status == 200 and bool(reply_content), f"{infer_elapsed:.1f}s model={used_model!r} reply={reply_content[:60]!r}") timings["bifrost_direct_infer"] = infer_elapsed except Exception as e: report(results, "Bifrost → Ollama GPU inference succeeds", False, str(e)) timings["bifrost_direct_infer"] = None try: import subprocess r = subprocess.run( ["docker", "compose", "-f", COMPOSE_FILE, "logs", "deepagents", "--since=3600s", "--no-log-prefix"], capture_output=True, text=True, timeout=10, ) log_lines = r.stdout.splitlines() bifrost_line = next( (l for l in log_lines if "[agent] bifrost=" in l and "bifrost:8080" in l), None, ) report(results, "deepagents startup log confirms bifrost URL", bifrost_line is not None, bifrost_line.strip() if bifrost_line else "line not found in logs") if bifrost_line: has_prefix = "router=ollama/" in bifrost_line and "medium=ollama/" in bifrost_line report(results, "deepagents model names use ollama/ prefix", has_prefix, bifrost_line.strip()) except Exception as e: report(results, "deepagents startup log check", False, str(e)) timings["bifrost_check"] = time.monotonic() - t0 # ── 3. GPU Ollama ───────────────────────────────────────────────────────────── print(f"\n[{INFO}] 3. GPU Ollama (port 11436)") t0 = time.monotonic() try: status, body = get(f"{OLLAMA_GPU}/api/tags") models = [m["name"] for m in json.loads(body).get("models", [])] has_qwen = any("qwen3" in m for m in models) report(results, "GPU Ollama reachable", True, f"models: {models}") report(results, "qwen3:8b present", has_qwen) except Exception as e: report(results, "GPU Ollama reachable", False, str(e)) report(results, "qwen3:8b present", False, "skipped") timings["gpu_ollama_ping"] = time.monotonic() - t0 # ── 4. CPU Ollama ───────────────────────────────────────────────────────────── print(f"\n[{INFO}] 4. CPU Ollama (port 11435)") t0 = time.monotonic() try: status, body = get(f"{OLLAMA_CPU}/api/tags") models = [m["name"] for m in json.loads(body).get("models", [])] has_embed = any("nomic-embed-text" in m for m in models) report(results, "CPU Ollama reachable", True, f"models: {models}") report(results, "nomic-embed-text present", has_embed) except Exception as e: report(results, "CPU Ollama reachable", False, str(e)) report(results, "nomic-embed-text present", False, "skipped") timings["cpu_ollama_ping"] = time.monotonic() - t0 # ── 5. Qdrant ───────────────────────────────────────────────────────────────── print(f"\n[{INFO}] 5. Qdrant (port 6333)") t0 = time.monotonic() try: status, body = get(f"{QDRANT}/collections") cols = [c["name"] for c in json.loads(body).get("result", {}).get("collections", [])] report(results, "Qdrant reachable", True, f"collections: {cols}") report(results, "adolf_memories collection exists", "adolf_memories" in cols) except Exception as e: report(results, "Qdrant reachable", False, str(e)) report(results, "adolf_memories collection exists", False, "skipped") try: status, body = get(f"{QDRANT}/collections/adolf_memories") info = json.loads(body).get("result", {}) dims = info.get("config", {}).get("params", {}).get("vectors", {}).get("size") report(results, "vector dims = 768", dims == 768, f"got {dims}") except Exception as e: report(results, "adolf_memories collection info", False, str(e)) timings["qdrant_ping"] = time.monotonic() - t0 # ── 6. SearXNG ──────────────────────────────────────────────────────────────── print(f"\n[{INFO}] 6. SearXNG (port 11437)") t0 = time.monotonic() try: status, body = get(f"{SEARXNG}/search?q=test&format=json", timeout=15) elapsed = time.monotonic() - t0 n = len(json.loads(body).get("results", [])) report(results, "SearXNG reachable + JSON results", status == 200 and n > 0, f"{n} results in {elapsed:.1f}s") report(results, "SearXNG response < 5s", elapsed < 5, f"{elapsed:.2f}s") timings["searxng_latency"] = elapsed except Exception as e: report(results, "SearXNG reachable", False, str(e)) report(results, "SearXNG response < 5s", False, "skipped") timings["searxng_latency"] = None timings["searxng_check"] = time.monotonic() - t0 # ── summary ─────────────────────────────────────────────────────────────────── print_summary(results) sys.exit(0 if all(ok for _, ok in results) else 1)