diff --git a/agent.py b/agent.py index 7c650a4..54018aa 100644 --- a/agent.py +++ b/agent.py @@ -2,7 +2,7 @@ import asyncio import json as _json_module import os import time -from contextlib import asynccontextmanager +from contextlib import asynccontextmanager, nullcontext from pathlib import Path from fastapi import FastAPI, BackgroundTasks, Request @@ -440,7 +440,7 @@ async def _run_agent_pipeline( no_inference: if True, routing decision is still made but inference is skipped — yields "I don't know" immediately Caller is responsible for scheduling _store_memory after consuming all chunks. """ - async with _reply_semaphore: + async with (nullcontext() if no_inference else _reply_semaphore): t0 = time.monotonic() clean_message = message print(f"[agent] running: {clean_message[:80]!r}", flush=True)