Files
adolf/tests/unit/test_agent_helpers.py
Alvis 50097d6092 Embed Crawl4AI at all tiers, restore qwen3:4b medium, update docs
- Pre-routing URL fetch: any message with URLs gets content fetched
  async (httpx.AsyncClient) before routing via _fetch_urls_from_message()
- URL context and memories gathered concurrently with asyncio.gather
- Light tier upgraded to medium when URL content is present
- url_context injected into system prompt for medium and complex agents
- Complex agent retains web_search/fetch_url tools + receives pre-fetched content
- Medium model restored to qwen3:4b (was temporarily qwen2.5:1.5b)
- Unit tests added for _extract_urls
- ARCHITECTURE.md: added Tool Handling, Crawl4AI Integration, Memory Pipeline sections
- CLAUDE.md: updated request flow and Crawl4AI integration docs

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-12 15:49:34 +00:00

199 lines
7.1 KiB
Python

"""
Unit tests for agent.py helper functions:
- _strip_think(text)
- _extract_final_text(result)
agent.py has heavy FastAPI/LangChain imports; conftest.py stubs them out so
these pure functions can be imported and tested in isolation.
"""
import pytest
# conftest.py has already installed all stubs into sys.modules.
# The FastAPI app is instantiated at module level in agent.py —
# with the mocked fastapi, that just creates a MagicMock() object
# and the route decorators are no-ops.
from agent import _strip_think, _extract_final_text, _extract_urls
# ── _strip_think ───────────────────────────────────────────────────────────────
class TestStripThink:
def test_removes_single_think_block(self):
text = "<think>internal reasoning</think>Final answer."
assert _strip_think(text) == "Final answer."
def test_removes_multiline_think_block(self):
text = "<think>\nLine one.\nLine two.\n</think>\nResult here."
assert _strip_think(text) == "Result here."
def test_no_think_block_unchanged(self):
text = "This is a plain answer with no think block."
assert _strip_think(text) == text
def test_removes_multiple_think_blocks(self):
text = "<think>step 1</think>middle<think>step 2</think>end"
assert _strip_think(text) == "middleend"
def test_strips_surrounding_whitespace(self):
text = " <think>stuff</think> answer "
assert _strip_think(text) == "answer"
def test_empty_think_block(self):
text = "<think></think>Hello."
assert _strip_think(text) == "Hello."
def test_empty_string(self):
assert _strip_think("") == ""
def test_only_think_block_returns_empty(self):
text = "<think>nothing useful</think>"
assert _strip_think(text) == ""
def test_think_block_with_nested_tags(self):
text = "<think>I should use <b>bold</b> here</think>Done."
assert _strip_think(text) == "Done."
def test_preserves_markdown(self):
text = "<think>plan</think>## Report\n\n- Point one\n- Point two"
result = _strip_think(text)
assert result == "## Report\n\n- Point one\n- Point two"
# ── _extract_final_text ────────────────────────────────────────────────────────
class TestExtractFinalText:
def _ai_msg(self, content: str, tool_calls=None):
"""Create a minimal AIMessage-like object."""
class AIMessage:
pass
m = AIMessage()
m.content = content
m.tool_calls = tool_calls or []
return m
def _human_msg(self, content: str):
class HumanMessage:
pass
m = HumanMessage()
m.content = content
return m
def test_returns_last_ai_message_content(self):
result = {
"messages": [
self._human_msg("what is 2+2"),
self._ai_msg("The answer is 4."),
]
}
assert _extract_final_text(result) == "The answer is 4."
def test_returns_last_of_multiple_ai_messages(self):
result = {
"messages": [
self._ai_msg("First response."),
self._human_msg("follow-up"),
self._ai_msg("Final response."),
]
}
assert _extract_final_text(result) == "Final response."
def test_skips_empty_ai_messages(self):
result = {
"messages": [
self._ai_msg("Real answer."),
self._ai_msg(""), # empty — should be skipped
]
}
assert _extract_final_text(result) == "Real answer."
def test_strips_think_tags_from_ai_message(self):
result = {
"messages": [
self._ai_msg("<think>reasoning here</think>Clean reply."),
]
}
assert _extract_final_text(result) == "Clean reply."
def test_falls_back_to_output_field(self):
result = {
"messages": [],
"output": "Fallback output.",
}
assert _extract_final_text(result) == "Fallback output."
def test_strips_think_from_output_field(self):
result = {
"messages": [],
"output": "<think>thoughts</think>Actual output.",
}
assert _extract_final_text(result) == "Actual output."
def test_returns_none_when_no_content(self):
result = {"messages": []}
assert _extract_final_text(result) is None
def test_returns_none_when_no_messages_and_no_output(self):
result = {"messages": [], "output": ""}
# output is falsy → returns None
assert _extract_final_text(result) is None
def test_skips_non_ai_messages(self):
result = {
"messages": [
self._human_msg("user question"),
]
}
assert _extract_final_text(result) is None
def test_handles_ai_message_with_tool_calls_but_no_content(self):
"""AIMessage that only has tool_calls (no content) should be skipped."""
msg = self._ai_msg("", tool_calls=[{"name": "web_search", "args": {}}])
result = {"messages": [msg]}
assert _extract_final_text(result) is None
def test_multiline_think_stripped_correctly(self):
result = {
"messages": [
self._ai_msg("<think>\nLong\nreasoning\nblock\n</think>\n## Report\n\nSome content."),
]
}
assert _extract_final_text(result) == "## Report\n\nSome content."
# ── _extract_urls ──────────────────────────────────────────────────────────────
class TestExtractUrls:
def test_single_url(self):
assert _extract_urls("check this out https://example.com please") == ["https://example.com"]
def test_multiple_urls(self):
urls = _extract_urls("see https://foo.com and https://bar.org/path?q=1")
assert urls == ["https://foo.com", "https://bar.org/path?q=1"]
def test_no_urls(self):
assert _extract_urls("no links here at all") == []
def test_http_and_https(self):
urls = _extract_urls("http://old.site and https://new.site")
assert "http://old.site" in urls
assert "https://new.site" in urls
def test_url_at_start_of_message(self):
assert _extract_urls("https://example.com is interesting") == ["https://example.com"]
def test_url_only(self):
assert _extract_urls("https://example.com/page") == ["https://example.com/page"]
def test_url_with_path_and_query(self):
url = "https://example.com/articles/123?ref=home&page=2"
assert _extract_urls(url) == [url]
def test_empty_string(self):
assert _extract_urls("") == []
def test_does_not_include_surrounding_quotes(self):
# URLs inside quotes should not include the quote character
urls = _extract_urls('visit "https://example.com" today')
assert urls == ["https://example.com"]