Embed Crawl4AI at all tiers, restore qwen3:4b medium, update docs

- Pre-routing URL fetch: any message with URLs gets content fetched
  async (httpx.AsyncClient) before routing via _fetch_urls_from_message()
- URL context and memories gathered concurrently with asyncio.gather
- Light tier upgraded to medium when URL content is present
- url_context injected into system prompt for medium and complex agents
- Complex agent retains web_search/fetch_url tools + receives pre-fetched content
- Medium model restored to qwen3:4b (was temporarily qwen2.5:1.5b)
- Unit tests added for _extract_urls
- ARCHITECTURE.md: added Tool Handling, Crawl4AI Integration, Memory Pipeline sections
- CLAUDE.md: updated request flow and Crawl4AI integration docs

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Alvis
2026-03-12 15:49:34 +00:00
parent f9618a9bbf
commit 50097d6092
8 changed files with 183 additions and 31 deletions

0
tests/__init__.py Normal file
View File

View File

0
tests/unit/__init__.py Normal file
View File

View File

@@ -13,7 +13,7 @@ import pytest
# The FastAPI app is instantiated at module level in agent.py —
# with the mocked fastapi, that just creates a MagicMock() object
# and the route decorators are no-ops.
from agent import _strip_think, _extract_final_text
from agent import _strip_think, _extract_final_text, _extract_urls
# ── _strip_think ───────────────────────────────────────────────────────────────
@@ -159,3 +159,40 @@ class TestExtractFinalText:
]
}
assert _extract_final_text(result) == "## Report\n\nSome content."
# ── _extract_urls ──────────────────────────────────────────────────────────────
class TestExtractUrls:
def test_single_url(self):
assert _extract_urls("check this out https://example.com please") == ["https://example.com"]
def test_multiple_urls(self):
urls = _extract_urls("see https://foo.com and https://bar.org/path?q=1")
assert urls == ["https://foo.com", "https://bar.org/path?q=1"]
def test_no_urls(self):
assert _extract_urls("no links here at all") == []
def test_http_and_https(self):
urls = _extract_urls("http://old.site and https://new.site")
assert "http://old.site" in urls
assert "https://new.site" in urls
def test_url_at_start_of_message(self):
assert _extract_urls("https://example.com is interesting") == ["https://example.com"]
def test_url_only(self):
assert _extract_urls("https://example.com/page") == ["https://example.com/page"]
def test_url_with_path_and_query(self):
url = "https://example.com/articles/123?ref=home&page=2"
assert _extract_urls(url) == [url]
def test_empty_string(self):
assert _extract_urls("") == []
def test_does_not_include_surrounding_quotes(self):
# URLs inside quotes should not include the quote character
urls = _extract_urls('visit "https://example.com" today')
assert urls == ["https://example.com"]