Add Rich token streaming: server SSE + CLI live display + CLI container

Server (agent.py):
- _stream_queues: per-session asyncio.Queue for token chunks
- _push_stream_chunk() / _end_stream() helpers
- Medium tier: astream() with <think> block filtering — real token streaming
- Light tier: full reply pushed as single chunk then [DONE]
- Complex tier: full reply pushed after agent completes then [DONE]
- GET /stream/{session_id} SSE endpoint (data: <chunk>\n\n, data: [DONE]\n\n)
- medium_model promoted to module-level global for astream() access

CLI (cli.py):
- stream_reply(): reads /stream/ SSE, renders tokens live with Rich Live (transient)
- Final reply rendered as Markdown after stream completes
- os.getlogin() replaced with os.getenv("USER") for container compatibility

Dockerfile.cli + docker-compose cli service (profiles: tools):
- Run: docker compose --profile tools run --rm -it cli

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Alvis
2026-03-12 17:26:52 +00:00
parent edc9a96f7a
commit b04e8a0925
6 changed files with 151 additions and 38 deletions

61
cli.py
View File

@@ -1,9 +1,9 @@
#!/usr/bin/env python3
"""
Adolf CLI — interactive REPL for the multi-channel gateway.
Adolf CLI — interactive REPL with Rich streaming display.
Usage:
python3 cli.py [--url http://localhost:8000] [--session cli-alvis]
python3 cli.py [--url http://deepagents:8000] [--session cli-alvis]
"""
import argparse
@@ -12,7 +12,13 @@ import os
import sys
import urllib.request
GATEWAY = "http://localhost:8000"
from rich.console import Console
from rich.live import Live
from rich.markdown import Markdown
from rich.text import Text
GATEWAY = "http://deepagents:8000"
console = Console()
def post_message(gateway: str, text: str, session_id: str) -> None:
@@ -20,7 +26,7 @@ def post_message(gateway: str, text: str, session_id: str) -> None:
"text": text,
"session_id": session_id,
"channel": "cli",
"user_id": os.getlogin(),
"user_id": os.getenv("USER", "user"),
}).encode()
req = urllib.request.Request(
f"{gateway}/message",
@@ -30,33 +36,49 @@ def post_message(gateway: str, text: str, session_id: str) -> None:
)
with urllib.request.urlopen(req, timeout=10) as r:
if r.status != 202:
print(f"[error] gateway returned {r.status}", file=sys.stderr)
console.print(f"[red][error] gateway returned {r.status}[/red]")
sys.exit(1)
def wait_for_reply(gateway: str, session_id: str, timeout: int = 400) -> str:
"""Open SSE stream and return first data event."""
def stream_reply(gateway: str, session_id: str, timeout: int = 400) -> str:
"""
Open the /stream/{session_id} SSE endpoint and display tokens live with
Rich. Returns the full assembled reply text.
"""
req = urllib.request.Request(
f"{gateway}/reply/{session_id}",
f"{gateway}/stream/{session_id}",
headers={"Accept": "text/event-stream"},
)
buffer = ""
with urllib.request.urlopen(req, timeout=timeout + 5) as r:
for raw_line in r:
line = raw_line.decode("utf-8").rstrip("\n")
if line.startswith("data:"):
return line[5:].strip().replace("\\n", "\n")
return ""
with Live(Text(""), console=console, refresh_per_second=20, transient=True) as live:
for raw_line in r:
line = raw_line.decode("utf-8").rstrip("\n")
if not line.startswith("data:"):
continue
chunk = line[5:].strip()
if chunk == "[DONE]":
break
chunk = chunk.replace("\\n", "\n")
buffer += chunk
live.update(Text(buffer))
# Render the complete reply as Markdown once streaming is done
console.print(Markdown(buffer))
return buffer
def main():
parser = argparse.ArgumentParser(description="Adolf CLI")
parser.add_argument("--url", default=GATEWAY, help="Gateway URL")
parser.add_argument("--session", default=f"cli-{os.getlogin()}", help="Session ID")
parser.add_argument("--session", default=f"cli-{os.getenv('USER', 'user')}",
help="Session ID")
parser.add_argument("--timeout", type=int, default=400, help="Reply timeout (seconds)")
args = parser.parse_args()
print(f"Adolf CLI (session={args.session}, gateway={args.url})")
print("Type your message and press Enter. Ctrl+C or Ctrl+D to exit.\n")
console.print(f"[bold]Adolf CLI[/bold] (session=[cyan]{args.session}[/cyan], "
f"gateway=[cyan]{args.url}[/cyan])")
console.print("Type your message and press Enter. Ctrl+C or Ctrl+D to exit.\n")
try:
while True:
@@ -68,12 +90,11 @@ def main():
continue
post_message(args.url, text, args.session)
print("...", end="", flush=True)
reply = wait_for_reply(args.url, args.session, timeout=args.timeout)
print(f"\r{reply}\n")
stream_reply(args.url, args.session, timeout=args.timeout)
console.print()
except KeyboardInterrupt:
print("\nbye")
console.print("\n[dim]bye[/dim]")
if __name__ == "__main__":