Embed Crawl4AI at all tiers, restore qwen3:4b medium, update docs
- Pre-routing URL fetch: any message with URLs gets content fetched async (httpx.AsyncClient) before routing via _fetch_urls_from_message() - URL context and memories gathered concurrently with asyncio.gather - Light tier upgraded to medium when URL content is present - url_context injected into system prompt for medium and complex agents - Complex agent retains web_search/fetch_url tools + receives pre-fetched content - Medium model restored to qwen3:4b (was temporarily qwen2.5:1.5b) - Unit tests added for _extract_urls - ARCHITECTURE.md: added Tool Handling, Crawl4AI Integration, Memory Pipeline sections - CLAUDE.md: updated request flow and Crawl4AI integration docs Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
0
tests/__init__.py
Normal file
0
tests/__init__.py
Normal file
0
tests/integration/__init__.py
Normal file
0
tests/integration/__init__.py
Normal file
0
tests/unit/__init__.py
Normal file
0
tests/unit/__init__.py
Normal file
@@ -13,7 +13,7 @@ import pytest
|
||||
# The FastAPI app is instantiated at module level in agent.py —
|
||||
# with the mocked fastapi, that just creates a MagicMock() object
|
||||
# and the route decorators are no-ops.
|
||||
from agent import _strip_think, _extract_final_text
|
||||
from agent import _strip_think, _extract_final_text, _extract_urls
|
||||
|
||||
|
||||
# ── _strip_think ───────────────────────────────────────────────────────────────
|
||||
@@ -159,3 +159,40 @@ class TestExtractFinalText:
|
||||
]
|
||||
}
|
||||
assert _extract_final_text(result) == "## Report\n\nSome content."
|
||||
|
||||
|
||||
# ── _extract_urls ──────────────────────────────────────────────────────────────
|
||||
|
||||
class TestExtractUrls:
|
||||
def test_single_url(self):
|
||||
assert _extract_urls("check this out https://example.com please") == ["https://example.com"]
|
||||
|
||||
def test_multiple_urls(self):
|
||||
urls = _extract_urls("see https://foo.com and https://bar.org/path?q=1")
|
||||
assert urls == ["https://foo.com", "https://bar.org/path?q=1"]
|
||||
|
||||
def test_no_urls(self):
|
||||
assert _extract_urls("no links here at all") == []
|
||||
|
||||
def test_http_and_https(self):
|
||||
urls = _extract_urls("http://old.site and https://new.site")
|
||||
assert "http://old.site" in urls
|
||||
assert "https://new.site" in urls
|
||||
|
||||
def test_url_at_start_of_message(self):
|
||||
assert _extract_urls("https://example.com is interesting") == ["https://example.com"]
|
||||
|
||||
def test_url_only(self):
|
||||
assert _extract_urls("https://example.com/page") == ["https://example.com/page"]
|
||||
|
||||
def test_url_with_path_and_query(self):
|
||||
url = "https://example.com/articles/123?ref=home&page=2"
|
||||
assert _extract_urls(url) == [url]
|
||||
|
||||
def test_empty_string(self):
|
||||
assert _extract_urls("") == []
|
||||
|
||||
def test_does_not_include_surrounding_quotes(self):
|
||||
# URLs inside quotes should not include the quote character
|
||||
urls = _extract_urls('visit "https://example.com" today')
|
||||
assert urls == ["https://example.com"]
|
||||
|
||||
Reference in New Issue
Block a user