adolf/agent.py

import asyncio
import os
import time
from contextlib import asynccontextmanager

from fastapi import FastAPI, BackgroundTasks, Request
from fastapi.responses import JSONResponse, StreamingResponse
from pydantic import BaseModel

import re as _re
import httpx as _httpx

from langchain_ollama import ChatOllama
from langchain_mcp_adapters.client import MultiServerMCPClient
from langchain_community.utilities import SearxSearchWrapper
from langchain_core.tools import Tool

from vram_manager import VRAMManager
from router import Router
from agent_factory import build_medium_agent, build_complex_agent
import channels

OLLAMA_BASE_URL = os.getenv("OLLAMA_BASE_URL", "http://localhost:11434")
ROUTER_MODEL = os.getenv("DEEPAGENTS_ROUTER_MODEL", "qwen2.5:0.5b")
MEDIUM_MODEL = os.getenv("DEEPAGENTS_MODEL", "qwen3:4b")
COMPLEX_MODEL = os.getenv("DEEPAGENTS_COMPLEX_MODEL", "qwen3:8b")
SEARXNG_URL = os.getenv("SEARXNG_URL", "http://host.docker.internal:11437")
OPENMEMORY_URL = os.getenv("OPENMEMORY_URL", "http://openmemory:8765")
CRAWL4AI_URL = os.getenv("CRAWL4AI_URL", "http://crawl4ai:11235")

MAX_HISTORY_TURNS = 5
_conversation_buffers: dict[str, list] = {}

MEDIUM_SYSTEM_PROMPT = (
    "You are a helpful AI assistant. "
    "Use web_search for questions about current events or facts you don't know. "
    "Reply concisely."
)

COMPLEX_SYSTEM_PROMPT = (
    "You are a deep research assistant. "
    "web_search automatically fetches full page content from top results — use it 6+ times with different queries. "
    "Also call fetch_url on any specific URL you want to read in full.\n\n"
    "Run searches in English AND Russian/Latvian. "
    "After getting results, run follow-up searches based on new facts found.\n\n"
    "Write a structured markdown report with sections: "
    "Overview, Education, Career, Publications, Online Presence, Interesting Findings.\n"
    "Every fact must link to the real URL it came from: [fact](url). "
    "NEVER invent URLs. End with: **Sources checked: N**"
)

medium_agent = None
complex_agent = None
router: Router = None
vram_manager: VRAMManager = None
mcp_client = None

# GPU mutex: one LLM inference at a time
_reply_semaphore = asyncio.Semaphore(1)


@asynccontextmanager
async def lifespan(app: FastAPI):
    global medium_agent, complex_agent, router, vram_manager, mcp_client

    # Register channel adapters
    channels.register_defaults()

    # Three model instances
    router_model = ChatOllama(
        model=ROUTER_MODEL, base_url=OLLAMA_BASE_URL, think=False, num_ctx=4096,
        temperature=0,
    )
    medium_model = ChatOllama(
        model=MEDIUM_MODEL, base_url=OLLAMA_BASE_URL, think=False, num_ctx=8192
    )
    complex_model = ChatOllama(
        model=COMPLEX_MODEL, base_url=OLLAMA_BASE_URL, think=True, num_ctx=16384
    )

    vram_manager = VRAMManager(base_url=OLLAMA_BASE_URL)
    router = Router(model=router_model)

    mcp_connections = {
        "openmemory": {"transport": "sse", "url": f"{OPENMEMORY_URL}/sse"},
    }
    mcp_client = MultiServerMCPClient(mcp_connections)
    for attempt in range(12):
        try:
            mcp_tools = await mcp_client.get_tools()
            break
        except Exception as e:
            if attempt == 11:
                raise
            print(f"[agent] MCP not ready (attempt {attempt + 1}/12): {e}. Retrying in 5s...")
            await asyncio.sleep(5)

    agent_tools = [t for t in mcp_tools if t.name not in ("add_memory", "search_memory", "get_all_memories")]

    searx = SearxSearchWrapper(searx_host=SEARXNG_URL)

    def _crawl4ai_fetch(url: str) -> str:
        """Fetch a URL via Crawl4AI (JS-rendered, bot-bypass) and return clean markdown."""
        try:
            r = _httpx.post(f"{CRAWL4AI_URL}/crawl", json={"urls": [url]}, timeout=60)
            r.raise_for_status()
            results = r.json().get("results", [])
            if not results or not results[0].get("success"):
                return ""
            md_obj = results[0].get("markdown") or {}
            md = md_obj.get("raw_markdown") if isinstance(md_obj, dict) else str(md_obj)
            return (md or "")[:5000]
        except Exception as e:
            return f"[fetch error: {e}]"

    def _search_and_read(query: str) -> str:
        """Search the web and automatically fetch full content of top results.
        Returns snippets + full page content from the top URLs."""
        import json as _json
        # Get structured results from SearXNG
        try:
            r = _httpx.get(
                f"{SEARXNG_URL}/search",
                params={"q": query, "format": "json"},
                timeout=15,
            )
            data = r.json()
            items = data.get("results", [])[:5]
        except Exception as e:
            return f"[search error: {e}]"

        if not items:
            return "No results found."

        out = [f"Search: {query}\n"]
        for i, item in enumerate(items, 1):
            url = item.get("url", "")
            title = item.get("title", "")
            snippet = item.get("content", "")[:300]
            out.append(f"\n[{i}] {title}\nURL: {url}\nSnippet: {snippet}")

        # Auto-fetch top 2 URLs for full content
        out.append("\n\n--- Full page content ---")
        for item in items[:2]:
            url = item.get("url", "")
            if not url:
                continue
            content = _crawl4ai_fetch(url)
            if content and not content.startswith("[fetch error"):
                out.append(f"\n### {url}\n{content[:3000]}")

        return "\n".join(out)

    agent_tools.append(Tool(
        name="web_search",
        func=_search_and_read,
        description=(
            "Search the web and read full content of top results. "
            "Returns search snippets AND full page text from the top URLs. "
            "Use multiple different queries to research a topic thoroughly."
        ),
    ))

    def _fetch_url(url: str) -> str:
        """Fetch and read the full text content of a URL."""
        content = _crawl4ai_fetch(url)
        return content if content else "[fetch_url: empty or blocked]"

    agent_tools.append(Tool(
        name="fetch_url",
        func=_fetch_url,
        description=(
            "Fetch and read the full text content of a specific URL. "
            "Use for URLs not covered by web_search. Input: a single URL string."
        ),
    ))

    medium_agent = build_medium_agent(
        model=medium_model,
        agent_tools=agent_tools,
        system_prompt=MEDIUM_SYSTEM_PROMPT,
    )
    complex_agent = build_complex_agent(
        model=complex_model,
        agent_tools=agent_tools,
        system_prompt=COMPLEX_SYSTEM_PROMPT.format(user_id="{user_id}"),
    )

    print(
        f"[agent] three-tier: router={ROUTER_MODEL} | medium={MEDIUM_MODEL} | complex={COMPLEX_MODEL}",
        flush=True,
    )
    print(f"[agent] agent tools: {[t.name for t in agent_tools]}", flush=True)

    yield

    medium_agent = None
    complex_agent = None
    router = None
    vram_manager = None
    mcp_client = None


app = FastAPI(lifespan=lifespan)


# ── request models ─────────────────────────────────────────────────────────────

class InboundMessage(BaseModel):
    text: str
    session_id: str          # e.g. "tg-346967270", "cli-alvis"
    channel: str             # "telegram" | "cli"
    user_id: str = ""        # human identity; defaults to session_id if empty
    metadata: dict = {}


class ChatRequest(BaseModel):
    """Legacy model — kept for test_pipeline.py compatibility."""
    message: str
    chat_id: str


# ── helpers ────────────────────────────────────────────────────────────────────

def _extract_final_text(result) -> str | None:
    msgs = result.get("messages", [])
    for m in reversed(msgs):
        if type(m).__name__ == "AIMessage" and getattr(m, "content", ""):
            return m.content
    if isinstance(result, dict) and result.get("output"):
        return result["output"]
    return None


def _log_messages(result):
    msgs = result.get("messages", [])
    for m in msgs:
        role = type(m).__name__
        content = getattr(m, "content", "")
        tool_calls = getattr(m, "tool_calls", [])
        if content:
            print(f"[agent]   {role}: {str(content)[:150]}", flush=True)
        for tc in tool_calls:
            print(f"[agent]   {role} → {tc['name']}({tc['args']})", flush=True)


# ── core task ──────────────────────────────────────────────────────────────────

async def run_agent_task(message: str, session_id: str, channel: str = "telegram"):
    print(f"[agent] queued: {message[:80]!r} chat={session_id}", flush=True)

    force_complex = False
    clean_message = message
    if message.startswith("/think "):
        force_complex = True
        clean_message = message[len("/think "):]
        print("[agent] /think prefix → force_complex=True", flush=True)

    async with _reply_semaphore:
        t0 = time.monotonic()
        history = _conversation_buffers.get(session_id, [])
        print(f"[agent] running: {clean_message[:80]!r}", flush=True)

        tier, light_reply = await router.route(clean_message, history, force_complex)
        print(f"[agent] tier={tier} message={clean_message[:60]!r}", flush=True)

        final_text = None
        try:
            if tier == "light":
                final_text = light_reply
                llm_elapsed = time.monotonic() - t0
                print(f"[agent] light path: answered by router", flush=True)

            elif tier == "medium":
                system_prompt = MEDIUM_SYSTEM_PROMPT
                result = await medium_agent.ainvoke({
                    "messages": [
                        {"role": "system", "content": system_prompt},
                        *history,
                        {"role": "user", "content": clean_message},
                    ]
                })
                llm_elapsed = time.monotonic() - t0
                _log_messages(result)
                final_text = _extract_final_text(result)

            else:  # complex
                ok = await vram_manager.enter_complex_mode()
                if not ok:
                    print("[agent] complex→medium fallback (eviction timeout)", flush=True)
                    tier = "medium"
                    result = await medium_agent.ainvoke({
                        "messages": [
                            {"role": "system", "content": MEDIUM_SYSTEM_PROMPT},
                            *history,
                            {"role": "user", "content": clean_message},
                        ]
                    })
                else:
                    system_prompt = COMPLEX_SYSTEM_PROMPT.format(user_id=session_id)
                    result = await complex_agent.ainvoke({
                        "messages": [
                            {"role": "system", "content": system_prompt},
                            *history,
                            {"role": "user", "content": clean_message},
                        ]
                    })
                    asyncio.create_task(vram_manager.exit_complex_mode())

                llm_elapsed = time.monotonic() - t0
                _log_messages(result)
                final_text = _extract_final_text(result)

        except Exception as e:
            import traceback
            llm_elapsed = time.monotonic() - t0
            print(f"[agent] error after {llm_elapsed:.1f}s for chat {session_id}: {e}", flush=True)
            traceback.print_exc()

        # Deliver reply through the originating channel
        if final_text:
            t1 = time.monotonic()
            await channels.deliver(session_id, channel, final_text)
            send_elapsed = time.monotonic() - t1
            print(
                f"[agent] replied in {time.monotonic() - t0:.1f}s "
                f"(llm={llm_elapsed:.1f}s, send={send_elapsed:.1f}s) tier={tier}",
                flush=True,
            )
            print(f"[agent] reply_text: {final_text}", flush=True)
        else:
            print("[agent] warning: no text reply from agent", flush=True)

        # Update conversation buffer
        if final_text:
            buf = _conversation_buffers.get(session_id, [])
            buf.append({"role": "user", "content": clean_message})
            buf.append({"role": "assistant", "content": final_text})
            _conversation_buffers[session_id] = buf[-(MAX_HISTORY_TURNS * 2):]


# ── endpoints ──────────────────────────────────────────────────────────────────

@app.post("/message")
async def message(request: InboundMessage, background_tasks: BackgroundTasks):
    """Unified inbound endpoint for all channels."""
    if medium_agent is None:
        return JSONResponse(status_code=503, content={"error": "Agent not ready"})
    session_id = request.session_id
    channel = request.channel
    background_tasks.add_task(run_agent_task, request.text, session_id, channel)
    return JSONResponse(status_code=202, content={"status": "accepted"})


@app.post("/chat")
async def chat(request: ChatRequest, background_tasks: BackgroundTasks):
    """Legacy endpoint — maps chat_id to tg-<chat_id> session for backward compatibility."""
    if medium_agent is None:
        return JSONResponse(status_code=503, content={"error": "Agent not ready"})
    session_id = f"tg-{request.chat_id}"
    background_tasks.add_task(run_agent_task, request.message, session_id, "telegram")
    return JSONResponse(status_code=202, content={"status": "accepted"})


@app.get("/reply/{session_id}")
async def reply_stream(session_id: str):
    """
    SSE endpoint — streams the reply for a session once available, then closes.
    Used by CLI client and wiki_research.py instead of log polling.
    """
    q = channels.pending_replies.setdefault(session_id, asyncio.Queue())

    async def event_generator():
        try:
            text = await asyncio.wait_for(q.get(), timeout=900)
            # Escape newlines so entire reply fits in one SSE data line
            yield f"data: {text.replace(chr(10), '\\n').replace(chr(13), '')}\n\n"
        except asyncio.TimeoutError:
            yield "data: [timeout]\n\n"

    return StreamingResponse(event_generator(), media_type="text/event-stream")


@app.get("/health")
async def health():
    return {"status": "ok", "agent_ready": medium_agent is not None}