diff --git a/agent.py b/agent.py
index b532c58..fed4af7 100644
--- a/agent.py
+++ b/agent.py
@@ -431,13 +431,13 @@ async def _run_agent_pipeline(
     history: list[dict],
     session_id: str,
     tier_override: str | None = None,
-    dry_run: bool = False,
+    no_inference: bool = False,
     tier_capture: list | None = None,
 ) -> AsyncGenerator[str, None]:
     """Core pipeline: pre-flight → routing → inference. Yields text chunks.
 
     tier_override: "light" | "medium" | "complex" | None (auto-route)
-    dry_run: if True and tier=complex, log tier=complex but use medium model (avoids API cost)
+    no_inference: if True, routing decision is still made but inference is skipped — yields "I don't know" immediately
     Caller is responsible for scheduling _store_memory after consuming all chunks.
     """
     async with _reply_semaphore:
@@ -471,7 +471,7 @@ async def _run_agent_pipeline(
 
         try:
             # Short-circuit: fast tool already has the answer
-            if fast_context and tier_override is None and not url_context:
+            if fast_context and tier_override is None and not url_context and not no_inference:
                 tier = "fast"
                 final_text = fast_context
                 llm_elapsed = time.monotonic() - t0
@@ -494,17 +494,14 @@ async def _run_agent_pipeline(
                         light_reply = None
                         print("[agent] URL in message → upgraded light→medium", flush=True)
 
-                # Dry-run: log as complex but infer with medium (no remote API call)
-                effective_tier = tier
-                if dry_run and tier == "complex":
-                    effective_tier = "medium"
-                    print(f"[agent] tier=complex (dry-run) → using medium model, message={clean_message[:60]!r}", flush=True)
-                else:
-                    print(f"[agent] tier={tier} message={clean_message[:60]!r}", flush=True)
-                tier = effective_tier
+                print(f"[agent] tier={tier} message={clean_message[:60]!r}", flush=True)
                 if tier_capture is not None:
                     tier_capture.append(tier)
 
+                if no_inference:
+                    yield "I don't know"
+                    return
+
                 if tier == "light":
                     final_text = light_reply
                     llm_elapsed = time.monotonic() - t0
@@ -594,7 +591,7 @@ async def run_agent_task(
     t0 = time.monotonic()
 
     meta = metadata or {}
-    dry_run = bool(meta.get("dry_run", False))
+    no_inference = bool(meta.get("no_inference", False))
     is_benchmark = bool(meta.get("benchmark", False))
 
     history = _conversation_buffers.get(session_id, [])
@@ -602,7 +599,7 @@ async def run_agent_task(
     actual_tier = "unknown"
     tier_capture: list = []
 
-    async for chunk in _run_agent_pipeline(message, history, session_id, dry_run=dry_run, tier_capture=tier_capture):
+    async for chunk in _run_agent_pipeline(message, history, session_id, no_inference=no_inference, tier_capture=tier_capture):
         await _push_stream_chunk(session_id, chunk)
         if final_text is None:
             final_text = chunk