feat: rename dry_run to no_inference for all tiers #17

Merged
alvis merged 1 commits from worktree-agent-afc013ce into main 2026-03-24 07:27:04 +00:00

View File

@@ -431,13 +431,13 @@ async def _run_agent_pipeline(
history: list[dict], history: list[dict],
session_id: str, session_id: str,
tier_override: str | None = None, tier_override: str | None = None,
dry_run: bool = False, no_inference: bool = False,
tier_capture: list | None = None, tier_capture: list | None = None,
) -> AsyncGenerator[str, None]: ) -> AsyncGenerator[str, None]:
"""Core pipeline: pre-flight → routing → inference. Yields text chunks. """Core pipeline: pre-flight → routing → inference. Yields text chunks.
tier_override: "light" | "medium" | "complex" | None (auto-route) tier_override: "light" | "medium" | "complex" | None (auto-route)
dry_run: if True and tier=complex, log tier=complex but use medium model (avoids API cost) no_inference: if True, routing decision is still made but inference is skipped — yields "I don't know" immediately
Caller is responsible for scheduling _store_memory after consuming all chunks. Caller is responsible for scheduling _store_memory after consuming all chunks.
""" """
async with _reply_semaphore: async with _reply_semaphore:
@@ -471,7 +471,7 @@ async def _run_agent_pipeline(
try: try:
# Short-circuit: fast tool already has the answer # Short-circuit: fast tool already has the answer
if fast_context and tier_override is None and not url_context: if fast_context and tier_override is None and not url_context and not no_inference:
tier = "fast" tier = "fast"
final_text = fast_context final_text = fast_context
llm_elapsed = time.monotonic() - t0 llm_elapsed = time.monotonic() - t0
@@ -494,17 +494,14 @@ async def _run_agent_pipeline(
light_reply = None light_reply = None
print("[agent] URL in message → upgraded light→medium", flush=True) print("[agent] URL in message → upgraded light→medium", flush=True)
# Dry-run: log as complex but infer with medium (no remote API call)
effective_tier = tier
if dry_run and tier == "complex":
effective_tier = "medium"
print(f"[agent] tier=complex (dry-run) → using medium model, message={clean_message[:60]!r}", flush=True)
else:
print(f"[agent] tier={tier} message={clean_message[:60]!r}", flush=True) print(f"[agent] tier={tier} message={clean_message[:60]!r}", flush=True)
tier = effective_tier
if tier_capture is not None: if tier_capture is not None:
tier_capture.append(tier) tier_capture.append(tier)
if no_inference:
yield "I don't know"
return
if tier == "light": if tier == "light":
final_text = light_reply final_text = light_reply
llm_elapsed = time.monotonic() - t0 llm_elapsed = time.monotonic() - t0
@@ -594,7 +591,7 @@ async def run_agent_task(
t0 = time.monotonic() t0 = time.monotonic()
meta = metadata or {} meta = metadata or {}
dry_run = bool(meta.get("dry_run", False)) no_inference = bool(meta.get("no_inference", False))
is_benchmark = bool(meta.get("benchmark", False)) is_benchmark = bool(meta.get("benchmark", False))
history = _conversation_buffers.get(session_id, []) history = _conversation_buffers.get(session_id, [])
@@ -602,7 +599,7 @@ async def run_agent_task(
actual_tier = "unknown" actual_tier = "unknown"
tier_capture: list = [] tier_capture: list = []
async for chunk in _run_agent_pipeline(message, history, session_id, dry_run=dry_run, tier_capture=tier_capture): async for chunk in _run_agent_pipeline(message, history, session_id, no_inference=no_inference, tier_capture=tier_capture):
await _push_stream_chunk(session_id, chunk) await _push_stream_chunk(session_id, chunk)
if final_text is None: if final_text is None:
final_text = chunk final_text = chunk