#!/usr/bin/env python3
"""
Adolf service health integration tests.

Checks:
  1.  deepagents /health — agent_ready
  1b. openmemory /sse reachable
  1c. grammy /sse reachable
  2.  Bifrost /health, /v1/models, direct inference, deepagents startup log
  3.  GPU Ollama — reachable, qwen3:8b present
  4.  CPU Ollama — reachable, nomic-embed-text present
  5.  Qdrant — reachable, adolf_memories collection, vector dims=768
  6.  SearXNG — reachable, JSON results, latency < 5s

Usage:
    python3 test_health.py
"""

import json
import sys
import time
import urllib.request

from common import (
    DEEPAGENTS, BIFROST, GRAMMY_HOST, GRAMMY_PORT,
    OLLAMA_GPU, OLLAMA_CPU, QDRANT, SEARXNG, COMPOSE_FILE,
    INFO, FAIL,
    report, print_summary, tf,
    get, post_json, check_sse, fetch_logs,
)

results = []
timings = {}


# ── 1. Service health ─────────────────────────────────────────────────────────
print(f"\n[{INFO}] 1. Service health")
t0 = time.monotonic()

try:
    status, body = get(f"{DEEPAGENTS}/health")
    data = json.loads(body)
    ok = status == 200 and data.get("agent_ready") is True
    report(results, "deepagents /health — agent_ready", ok,
           f"agent_ready={data.get('agent_ready')}")
except Exception as e:
    report(results, "deepagents /health", False, str(e))

ok, detail = check_sse("localhost", 8765, "/sse")
report(results, "openmemory /sse reachable", ok, detail)

ok, detail = check_sse(GRAMMY_HOST, GRAMMY_PORT, "/sse")
report(results, "grammy /sse reachable", ok, detail)

timings["health_check"] = time.monotonic() - t0


# ── 2. Bifrost gateway ────────────────────────────────────────────────────────
print(f"\n[{INFO}] 2. Bifrost gateway (port 8080)")
t0 = time.monotonic()

try:
    status, body = get(f"{BIFROST}/health", timeout=5)
    report(results, "Bifrost /health reachable", status == 200, f"HTTP {status}")
except Exception as e:
    report(results, "Bifrost /health reachable", False, str(e))

try:
    status, body = get(f"{BIFROST}/v1/models", timeout=5)
    data = json.loads(body)
    model_ids = [m.get("id", "") for m in data.get("data", [])]
    gpu_models = [m for m in model_ids if m.startswith("ollama/")]
    report(results, "Bifrost lists ollama GPU models", len(gpu_models) > 0,
           f"found: {gpu_models}")
    for expected in ["ollama/qwen3:4b", "ollama/qwen3:8b", "ollama/qwen2.5:1.5b"]:
        report(results, f"  model {expected} listed", expected in model_ids)
except Exception as e:
    report(results, "Bifrost /v1/models", False, str(e))

print(f"  [bifrost-infer] POST /v1/chat/completions → ollama/qwen2.5:0.5b ...")
t_infer = time.monotonic()
try:
    infer_payload = {
        "model": "ollama/qwen2.5:0.5b",
        "messages": [{"role": "user", "content": "Reply with exactly one word: pong"}],
        "max_tokens": 16,
    }
    data = json.dumps(infer_payload).encode()
    req = urllib.request.Request(
        f"{BIFROST}/v1/chat/completions",
        data=data,
        headers={"Content-Type": "application/json"},
        method="POST",
    )
    with urllib.request.urlopen(req, timeout=60) as r:
        infer_status = r.status
        infer_body = json.loads(r.read().decode())
    infer_elapsed = time.monotonic() - t_infer
    reply_content = infer_body.get("choices", [{}])[0].get("message", {}).get("content", "")
    used_model = infer_body.get("model", "")
    report(results, "Bifrost → Ollama GPU inference succeeds",
           infer_status == 200 and bool(reply_content),
           f"{infer_elapsed:.1f}s  model={used_model!r}  reply={reply_content[:60]!r}")
    timings["bifrost_direct_infer"] = infer_elapsed
except Exception as e:
    report(results, "Bifrost → Ollama GPU inference succeeds", False, str(e))
    timings["bifrost_direct_infer"] = None

try:
    import subprocess
    r = subprocess.run(
        ["docker", "compose", "-f", COMPOSE_FILE, "logs", "deepagents",
         "--since=3600s", "--no-log-prefix"],
        capture_output=True, text=True, timeout=10,
    )
    log_lines = r.stdout.splitlines()
    bifrost_line = next(
        (l for l in log_lines if "[agent] bifrost=" in l and "bifrost:8080" in l),
        None,
    )
    report(results, "deepagents startup log confirms bifrost URL",
           bifrost_line is not None,
           bifrost_line.strip() if bifrost_line else "line not found in logs")
    if bifrost_line:
        has_prefix = "router=ollama/" in bifrost_line and "medium=ollama/" in bifrost_line
        report(results, "deepagents model names use ollama/ prefix", has_prefix,
               bifrost_line.strip())
except Exception as e:
    report(results, "deepagents startup log check", False, str(e))

timings["bifrost_check"] = time.monotonic() - t0


# ── 3. GPU Ollama ─────────────────────────────────────────────────────────────
print(f"\n[{INFO}] 3. GPU Ollama (port 11436)")
t0 = time.monotonic()

try:
    status, body = get(f"{OLLAMA_GPU}/api/tags")
    models = [m["name"] for m in json.loads(body).get("models", [])]
    has_qwen = any("qwen3" in m for m in models)
    report(results, "GPU Ollama reachable", True, f"models: {models}")
    report(results, "qwen3:8b present", has_qwen)
except Exception as e:
    report(results, "GPU Ollama reachable", False, str(e))
    report(results, "qwen3:8b present", False, "skipped")

timings["gpu_ollama_ping"] = time.monotonic() - t0


# ── 4. CPU Ollama ─────────────────────────────────────────────────────────────
print(f"\n[{INFO}] 4. CPU Ollama (port 11435)")
t0 = time.monotonic()

try:
    status, body = get(f"{OLLAMA_CPU}/api/tags")
    models = [m["name"] for m in json.loads(body).get("models", [])]
    has_embed = any("nomic-embed-text" in m for m in models)
    report(results, "CPU Ollama reachable", True, f"models: {models}")
    report(results, "nomic-embed-text present", has_embed)
except Exception as e:
    report(results, "CPU Ollama reachable", False, str(e))
    report(results, "nomic-embed-text present", False, "skipped")

timings["cpu_ollama_ping"] = time.monotonic() - t0


# ── 5. Qdrant ─────────────────────────────────────────────────────────────────
print(f"\n[{INFO}] 5. Qdrant (port 6333)")
t0 = time.monotonic()

try:
    status, body = get(f"{QDRANT}/collections")
    cols = [c["name"] for c in json.loads(body).get("result", {}).get("collections", [])]
    report(results, "Qdrant reachable", True, f"collections: {cols}")
    report(results, "adolf_memories collection exists", "adolf_memories" in cols)
except Exception as e:
    report(results, "Qdrant reachable", False, str(e))
    report(results, "adolf_memories collection exists", False, "skipped")

try:
    status, body = get(f"{QDRANT}/collections/adolf_memories")
    info = json.loads(body).get("result", {})
    dims = info.get("config", {}).get("params", {}).get("vectors", {}).get("size")
    report(results, "vector dims = 768", dims == 768, f"got {dims}")
except Exception as e:
    report(results, "adolf_memories collection info", False, str(e))

timings["qdrant_ping"] = time.monotonic() - t0


# ── 6. SearXNG ────────────────────────────────────────────────────────────────
print(f"\n[{INFO}] 6. SearXNG (port 11437)")
t0 = time.monotonic()

try:
    status, body = get(f"{SEARXNG}/search?q=test&format=json", timeout=15)
    elapsed = time.monotonic() - t0
    n = len(json.loads(body).get("results", []))
    report(results, "SearXNG reachable + JSON results", status == 200 and n > 0,
           f"{n} results in {elapsed:.1f}s")
    report(results, "SearXNG response < 5s", elapsed < 5, f"{elapsed:.2f}s")
    timings["searxng_latency"] = elapsed
except Exception as e:
    report(results, "SearXNG reachable", False, str(e))
    report(results, "SearXNG response < 5s", False, "skipped")
    timings["searxng_latency"] = None

timings["searxng_check"] = time.monotonic() - t0


# ── summary ───────────────────────────────────────────────────────────────────
print_summary(results)
sys.exit(0 if all(ok for _, ok in results) else 1)