Compare commits
8 Commits
47a1166be6
...
main
| Author | SHA1 | Date | |
|---|---|---|---|
| 887d4b8d90 | |||
| 4e6d3090c2 | |||
| 5b09a99a7f | |||
| 3fb90ae083 | |||
| 4d37ac65b2 | |||
| b7d5896076 | |||
| fc53632c7b | |||
| 9c2f27eed4 |
1
.gitignore
vendored
1
.gitignore
vendored
@@ -2,7 +2,6 @@ __pycache__/
|
|||||||
*.pyc
|
*.pyc
|
||||||
logs/*.jsonl
|
logs/*.jsonl
|
||||||
adolf_tuning_data/voice_audio/
|
adolf_tuning_data/voice_audio/
|
||||||
benchmarks/benchmark.json
|
|
||||||
benchmarks/results_latest.json
|
benchmarks/results_latest.json
|
||||||
benchmarks/voice_results*.json
|
benchmarks/voice_results*.json
|
||||||
benchmarks/voice_audio/
|
benchmarks/voice_audio/
|
||||||
|
|||||||
55
agent.py
55
agent.py
@@ -2,7 +2,7 @@ import asyncio
|
|||||||
import json as _json_module
|
import json as _json_module
|
||||||
import os
|
import os
|
||||||
import time
|
import time
|
||||||
from contextlib import asynccontextmanager
|
from contextlib import asynccontextmanager, nullcontext
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
from fastapi import FastAPI, BackgroundTasks, Request
|
from fastapi import FastAPI, BackgroundTasks, Request
|
||||||
@@ -431,31 +431,35 @@ async def _run_agent_pipeline(
|
|||||||
history: list[dict],
|
history: list[dict],
|
||||||
session_id: str,
|
session_id: str,
|
||||||
tier_override: str | None = None,
|
tier_override: str | None = None,
|
||||||
dry_run: bool = False,
|
no_inference: bool = False,
|
||||||
tier_capture: list | None = None,
|
tier_capture: list | None = None,
|
||||||
) -> AsyncGenerator[str, None]:
|
) -> AsyncGenerator[str, None]:
|
||||||
"""Core pipeline: pre-flight → routing → inference. Yields text chunks.
|
"""Core pipeline: pre-flight → routing → inference. Yields text chunks.
|
||||||
|
|
||||||
tier_override: "light" | "medium" | "complex" | None (auto-route)
|
tier_override: "light" | "medium" | "complex" | None (auto-route)
|
||||||
dry_run: if True and tier=complex, log tier=complex but use medium model (avoids API cost)
|
no_inference: if True, routing decision is still made but inference is skipped — yields "I don't know" immediately
|
||||||
Caller is responsible for scheduling _store_memory after consuming all chunks.
|
Caller is responsible for scheduling _store_memory after consuming all chunks.
|
||||||
"""
|
"""
|
||||||
async with _reply_semaphore:
|
async with (nullcontext() if no_inference else _reply_semaphore):
|
||||||
t0 = time.monotonic()
|
t0 = time.monotonic()
|
||||||
clean_message = message
|
clean_message = message
|
||||||
print(f"[agent] running: {clean_message[:80]!r}", flush=True)
|
print(f"[agent] running: {clean_message[:80]!r}", flush=True)
|
||||||
|
|
||||||
# Fetch URL content, memories, and fast-tool context concurrently
|
# Fetch URL content, memories, and fast-tool context concurrently
|
||||||
url_context, memories, fast_context = await asyncio.gather(
|
# Skip preflight IO in no_inference mode — only routing decision needed
|
||||||
_fetch_urls_from_message(clean_message),
|
if no_inference:
|
||||||
_retrieve_memories(clean_message, session_id),
|
url_context = memories = fast_context = None
|
||||||
_fast_tool_runner.run_matching(clean_message),
|
else:
|
||||||
)
|
url_context, memories, fast_context = await asyncio.gather(
|
||||||
if url_context:
|
_fetch_urls_from_message(clean_message),
|
||||||
print(f"[agent] crawl4ai: {len(url_context)} chars fetched", flush=True)
|
_retrieve_memories(clean_message, session_id),
|
||||||
if fast_context:
|
_fast_tool_runner.run_matching(clean_message),
|
||||||
names = _fast_tool_runner.matching_names(clean_message)
|
)
|
||||||
print(f"[agent] fast_tools={names}: {len(fast_context)} chars injected", flush=True)
|
if url_context:
|
||||||
|
print(f"[agent] crawl4ai: {len(url_context)} chars fetched", flush=True)
|
||||||
|
if fast_context:
|
||||||
|
names = _fast_tool_runner.matching_names(clean_message)
|
||||||
|
print(f"[agent] fast_tools={names}: {len(fast_context)} chars injected", flush=True)
|
||||||
|
|
||||||
# Build enriched history
|
# Build enriched history
|
||||||
enriched_history = list(history)
|
enriched_history = list(history)
|
||||||
@@ -471,7 +475,7 @@ async def _run_agent_pipeline(
|
|||||||
|
|
||||||
try:
|
try:
|
||||||
# Short-circuit: fast tool already has the answer
|
# Short-circuit: fast tool already has the answer
|
||||||
if fast_context and tier_override is None and not url_context:
|
if fast_context and tier_override is None and not url_context and not no_inference:
|
||||||
tier = "fast"
|
tier = "fast"
|
||||||
final_text = fast_context
|
final_text = fast_context
|
||||||
llm_elapsed = time.monotonic() - t0
|
llm_elapsed = time.monotonic() - t0
|
||||||
@@ -485,26 +489,23 @@ async def _run_agent_pipeline(
|
|||||||
tier = tier_override
|
tier = tier_override
|
||||||
light_reply = None
|
light_reply = None
|
||||||
if tier_override == "light":
|
if tier_override == "light":
|
||||||
tier, light_reply = await router.route(clean_message, enriched_history)
|
tier, light_reply = await router.route(clean_message, enriched_history, no_inference=no_inference)
|
||||||
tier = "light"
|
tier = "light"
|
||||||
else:
|
else:
|
||||||
tier, light_reply = await router.route(clean_message, enriched_history)
|
tier, light_reply = await router.route(clean_message, enriched_history, no_inference=no_inference)
|
||||||
if url_context and tier == "light":
|
if url_context and tier == "light":
|
||||||
tier = "medium"
|
tier = "medium"
|
||||||
light_reply = None
|
light_reply = None
|
||||||
print("[agent] URL in message → upgraded light→medium", flush=True)
|
print("[agent] URL in message → upgraded light→medium", flush=True)
|
||||||
|
|
||||||
# Dry-run: log as complex but infer with medium (no remote API call)
|
print(f"[agent] tier={tier} message={clean_message[:60]!r}", flush=True)
|
||||||
effective_tier = tier
|
|
||||||
if dry_run and tier == "complex":
|
|
||||||
effective_tier = "medium"
|
|
||||||
print(f"[agent] tier=complex (dry-run) → using medium model, message={clean_message[:60]!r}", flush=True)
|
|
||||||
else:
|
|
||||||
print(f"[agent] tier={tier} message={clean_message[:60]!r}", flush=True)
|
|
||||||
tier = effective_tier
|
|
||||||
if tier_capture is not None:
|
if tier_capture is not None:
|
||||||
tier_capture.append(tier)
|
tier_capture.append(tier)
|
||||||
|
|
||||||
|
if no_inference:
|
||||||
|
yield "I don't know"
|
||||||
|
return
|
||||||
|
|
||||||
if tier == "light":
|
if tier == "light":
|
||||||
final_text = light_reply
|
final_text = light_reply
|
||||||
llm_elapsed = time.monotonic() - t0
|
llm_elapsed = time.monotonic() - t0
|
||||||
@@ -594,7 +595,7 @@ async def run_agent_task(
|
|||||||
t0 = time.monotonic()
|
t0 = time.monotonic()
|
||||||
|
|
||||||
meta = metadata or {}
|
meta = metadata or {}
|
||||||
dry_run = bool(meta.get("dry_run", False))
|
no_inference = bool(meta.get("no_inference", False))
|
||||||
is_benchmark = bool(meta.get("benchmark", False))
|
is_benchmark = bool(meta.get("benchmark", False))
|
||||||
|
|
||||||
history = _conversation_buffers.get(session_id, [])
|
history = _conversation_buffers.get(session_id, [])
|
||||||
@@ -602,7 +603,7 @@ async def run_agent_task(
|
|||||||
actual_tier = "unknown"
|
actual_tier = "unknown"
|
||||||
tier_capture: list = []
|
tier_capture: list = []
|
||||||
|
|
||||||
async for chunk in _run_agent_pipeline(message, history, session_id, dry_run=dry_run, tier_capture=tier_capture):
|
async for chunk in _run_agent_pipeline(message, history, session_id, no_inference=no_inference, tier_capture=tier_capture):
|
||||||
await _push_stream_chunk(session_id, chunk)
|
await _push_stream_chunk(session_id, chunk)
|
||||||
if final_text is None:
|
if final_text is None:
|
||||||
final_text = chunk
|
final_text = chunk
|
||||||
|
|||||||
137
benchmarks/benchmark.json
Normal file
137
benchmarks/benchmark.json
Normal file
@@ -0,0 +1,137 @@
|
|||||||
|
{
|
||||||
|
"description": "Adolf routing benchmark — домашние сценарии, Alexa/Google-Home стиль, русский язык",
|
||||||
|
"tiers": {
|
||||||
|
"light": "Приветствия, прощания, подтверждения, простые разговорные фразы. Не требуют поиска или действий.",
|
||||||
|
"medium": "Управление домом, погода/пробки, таймеры, напоминания, покупки, личная память, быстрые вопросы.",
|
||||||
|
"complex": "Глубокое исследование, сравнение технологий, подробные руководства с несколькими источниками."
|
||||||
|
},
|
||||||
|
"queries": [
|
||||||
|
{"id": 1, "tier": "light", "category": "greetings", "query": "привет"},
|
||||||
|
{"id": 2, "tier": "light", "category": "greetings", "query": "пока"},
|
||||||
|
{"id": 3, "tier": "light", "category": "greetings", "query": "спасибо"},
|
||||||
|
{"id": 4, "tier": "light", "category": "greetings", "query": "привет, как дела?"},
|
||||||
|
{"id": 5, "tier": "light", "category": "greetings", "query": "окей"},
|
||||||
|
{"id": 6, "tier": "light", "category": "greetings", "query": "добрый вечер"},
|
||||||
|
{"id": 7, "tier": "light", "category": "greetings", "query": "доброе утро"},
|
||||||
|
{"id": 8, "tier": "light", "category": "greetings", "query": "добрый день"},
|
||||||
|
{"id": 9, "tier": "light", "category": "greetings", "query": "hi"},
|
||||||
|
{"id": 10, "tier": "light", "category": "greetings", "query": "thanks"},
|
||||||
|
{"id": 11, "tier": "light", "category": "greetings", "query": "отлично, спасибо"},
|
||||||
|
{"id": 12, "tier": "light", "category": "greetings", "query": "понятно"},
|
||||||
|
{"id": 13, "tier": "light", "category": "greetings", "query": "ясно"},
|
||||||
|
{"id": 14, "tier": "light", "category": "greetings", "query": "ладно"},
|
||||||
|
{"id": 15, "tier": "light", "category": "greetings", "query": "договорились"},
|
||||||
|
{"id": 16, "tier": "light", "category": "greetings", "query": "good morning"},
|
||||||
|
{"id": 17, "tier": "light", "category": "greetings", "query": "good night"},
|
||||||
|
{"id": 18, "tier": "light", "category": "greetings", "query": "всё понятно"},
|
||||||
|
{"id": 19, "tier": "light", "category": "greetings", "query": "да"},
|
||||||
|
{"id": 20, "tier": "light", "category": "greetings", "query": "нет"},
|
||||||
|
{"id": 21, "tier": "light", "category": "greetings", "query": "не нужно"},
|
||||||
|
{"id": 22, "tier": "light", "category": "greetings", "query": "отмена"},
|
||||||
|
{"id": 23, "tier": "light", "category": "greetings", "query": "стоп"},
|
||||||
|
{"id": 24, "tier": "light", "category": "greetings", "query": "подожди"},
|
||||||
|
{"id": 25, "tier": "light", "category": "greetings", "query": "повтори"},
|
||||||
|
{"id": 26, "tier": "light", "category": "greetings", "query": "ты тут?"},
|
||||||
|
{"id": 27, "tier": "light", "category": "greetings", "query": "слышишь меня?"},
|
||||||
|
{"id": 28, "tier": "light", "category": "greetings", "query": "всё ок"},
|
||||||
|
{"id": 29, "tier": "light", "category": "greetings", "query": "хорошо"},
|
||||||
|
{"id": 30, "tier": "light", "category": "greetings", "query": "пожалуйста"},
|
||||||
|
|
||||||
|
{"id": 31, "tier": "medium", "category": "weather_commute", "query": "какая сегодня погода в Балашихе"},
|
||||||
|
{"id": 32, "tier": "medium", "category": "weather_commute", "query": "пойдет ли сегодня дождь"},
|
||||||
|
{"id": 33, "tier": "medium", "category": "weather_commute", "query": "какая температура на улице сейчас"},
|
||||||
|
{"id": 34, "tier": "medium", "category": "weather_commute", "query": "будет ли снег сегодня"},
|
||||||
|
{"id": 35, "tier": "medium", "category": "weather_commute", "query": "погода на завтра"},
|
||||||
|
{"id": 36, "tier": "medium", "category": "weather_commute", "query": "сколько ехать до Москвы сейчас"},
|
||||||
|
{"id": 37, "tier": "medium", "category": "weather_commute", "query": "какие пробки на дороге до Москвы"},
|
||||||
|
{"id": 38, "tier": "medium", "category": "weather_commute", "query": "время в пути на работу"},
|
||||||
|
{"id": 39, "tier": "medium", "category": "weather_commute", "query": "есть ли пробки сейчас"},
|
||||||
|
{"id": 40, "tier": "medium", "category": "weather_commute", "query": "стоит ли брать зонтик"},
|
||||||
|
|
||||||
|
{"id": 41, "tier": "medium", "category": "smart_home_control", "query": "включи свет в гостиной"},
|
||||||
|
{"id": 42, "tier": "medium", "category": "smart_home_control", "query": "выключи свет на кухне"},
|
||||||
|
{"id": 43, "tier": "medium", "category": "smart_home_control", "query": "какая температура дома"},
|
||||||
|
{"id": 44, "tier": "medium", "category": "smart_home_control", "query": "установи температуру 22 градуса"},
|
||||||
|
{"id": 45, "tier": "medium", "category": "smart_home_control", "query": "включи свет в спальне на 50 процентов"},
|
||||||
|
{"id": 46, "tier": "medium", "category": "smart_home_control", "query": "выключи все лампочки"},
|
||||||
|
{"id": 47, "tier": "medium", "category": "smart_home_control", "query": "какие устройства сейчас включены"},
|
||||||
|
{"id": 48, "tier": "medium", "category": "smart_home_control", "query": "закрыты ли все окна"},
|
||||||
|
{"id": 49, "tier": "medium", "category": "smart_home_control", "query": "включи вентилятор в детской"},
|
||||||
|
{"id": 50, "tier": "medium", "category": "smart_home_control", "query": "есть ли кто-нибудь дома"},
|
||||||
|
{"id": 51, "tier": "medium", "category": "smart_home_control", "query": "включи ночной режим"},
|
||||||
|
{"id": 52, "tier": "medium", "category": "smart_home_control", "query": "какое потребление электричества сегодня"},
|
||||||
|
{"id": 53, "tier": "medium", "category": "smart_home_control", "query": "выключи телевизор"},
|
||||||
|
{"id": 54, "tier": "medium", "category": "smart_home_control", "query": "открой шторы в гостиной"},
|
||||||
|
{"id": 55, "tier": "medium", "category": "smart_home_control", "query": "установи будильник на 7 утра"},
|
||||||
|
{"id": 56, "tier": "medium", "category": "smart_home_control", "query": "включи кофемашину"},
|
||||||
|
{"id": 57, "tier": "medium", "category": "smart_home_control", "query": "выключи свет во всём доме"},
|
||||||
|
{"id": 58, "tier": "medium", "category": "smart_home_control", "query": "сколько у нас датчиков движения"},
|
||||||
|
{"id": 59, "tier": "medium", "category": "smart_home_control", "query": "состояние всех дверных замков"},
|
||||||
|
{"id": 60, "tier": "medium", "category": "smart_home_control", "query": "включи режим кино в гостиной"},
|
||||||
|
{"id": 61, "tier": "medium", "category": "smart_home_control", "query": "прибавь яркость в детской"},
|
||||||
|
{"id": 62, "tier": "medium", "category": "smart_home_control", "query": "закрой все шторы"},
|
||||||
|
{"id": 63, "tier": "medium", "category": "smart_home_control", "query": "кто последний открывал входную дверь"},
|
||||||
|
{"id": 64, "tier": "medium", "category": "smart_home_control", "query": "заблокируй входную дверь"},
|
||||||
|
{"id": 65, "tier": "medium", "category": "smart_home_control", "query": "покажи камеру у входа"},
|
||||||
|
|
||||||
|
{"id": 66, "tier": "medium", "category": "timers_reminders", "query": "поставь таймер на 10 минут"},
|
||||||
|
{"id": 67, "tier": "medium", "category": "timers_reminders", "query": "напомни мне позвонить врачу в 15:00"},
|
||||||
|
{"id": 68, "tier": "medium", "category": "timers_reminders", "query": "поставь будильник на завтра в 6:30"},
|
||||||
|
{"id": 69, "tier": "medium", "category": "timers_reminders", "query": "напомни выключить плиту через 20 минут"},
|
||||||
|
{"id": 70, "tier": "medium", "category": "timers_reminders", "query": "сколько времени осталось на таймере"},
|
||||||
|
|
||||||
|
{"id": 71, "tier": "medium", "category": "shopping_cooking", "query": "добавь молоко в список покупок"},
|
||||||
|
{"id": 72, "tier": "medium", "category": "shopping_cooking", "query": "что есть в списке покупок"},
|
||||||
|
{"id": 73, "tier": "medium", "category": "shopping_cooking", "query": "добавь хлеб и яйца в список покупок"},
|
||||||
|
{"id": 74, "tier": "medium", "category": "shopping_cooking", "query": "сколько граммов муки нужно для блинов на 4 человека"},
|
||||||
|
{"id": 75, "tier": "medium", "category": "shopping_cooking", "query": "какой рецепт борща ты знаешь"},
|
||||||
|
|
||||||
|
{"id": 76, "tier": "medium", "category": "personal_memory", "query": "как меня зовут"},
|
||||||
|
{"id": 77, "tier": "medium", "category": "personal_memory", "query": "где я живу"},
|
||||||
|
{"id": 78, "tier": "medium", "category": "personal_memory", "query": "что мы обсуждали в прошлый раз"},
|
||||||
|
{"id": 79, "tier": "medium", "category": "personal_memory", "query": "что ты знаешь о моем домашнем сервере"},
|
||||||
|
{"id": 80, "tier": "medium", "category": "personal_memory", "query": "напомни, какие сервисы я запускаю"},
|
||||||
|
{"id": 81, "tier": "medium", "category": "personal_memory", "query": "что я говорил о своей сети"},
|
||||||
|
{"id": 82, "tier": "medium", "category": "personal_memory", "query": "что я просил тебя запомнить"},
|
||||||
|
|
||||||
|
{"id": 83, "tier": "medium", "category": "quick_info", "query": "какой сейчас курс биткоина"},
|
||||||
|
{"id": 84, "tier": "medium", "category": "quick_info", "query": "курс доллара к рублю сейчас"},
|
||||||
|
{"id": 85, "tier": "medium", "category": "quick_info", "query": "есть ли проблемы у Cloudflare сегодня"},
|
||||||
|
{"id": 86, "tier": "medium", "category": "quick_info", "query": "какая последняя версия Docker"},
|
||||||
|
{"id": 87, "tier": "medium", "category": "quick_info", "query": "какие новые функции в Home Assistant 2024"},
|
||||||
|
{"id": 88, "tier": "medium", "category": "quick_info", "query": "как проверить использование диска в Linux"},
|
||||||
|
{"id": 89, "tier": "medium", "category": "quick_info", "query": "как перезапустить Docker контейнер"},
|
||||||
|
{"id": 90, "tier": "medium", "category": "quick_info", "query": "как посмотреть логи Docker контейнера"},
|
||||||
|
|
||||||
|
{"id": 91, "tier": "complex", "category": "infrastructure", "query": "исследуй и сравни Proxmox, Unraid и TrueNAS для домашней лаборатории"},
|
||||||
|
{"id": 92, "tier": "complex", "category": "infrastructure", "query": "напиши подробное руководство по безопасности домашнего сервера, подключенного к интернету"},
|
||||||
|
{"id": 93, "tier": "complex", "category": "infrastructure", "query": "исследуй все доступные дашборды для самохостинга и сравни их функции"},
|
||||||
|
{"id": 94, "tier": "complex", "category": "infrastructure", "query": "исследуй лучший стек мониторинга для самохостинга в 2024 году со всеми вариантами"},
|
||||||
|
{"id": 95, "tier": "complex", "category": "infrastructure", "query": "сравни все системы резервного копирования для Linux: Restic, Borg, Duplicati, Timeshift"},
|
||||||
|
{"id": 96, "tier": "complex", "category": "infrastructure", "query": "напиши полное руководство по настройке обратного прокси Caddy для домашнего сервера с SSL"},
|
||||||
|
{"id": 97, "tier": "complex", "category": "network", "query": "исследуй и сравни WireGuard, OpenVPN и Tailscale для домашней VPN с детальными плюсами и минусами"},
|
||||||
|
{"id": 98, "tier": "complex", "category": "network", "query": "исследуй лучшие практики сегментации домашней сети с VLAN и правилами файрвола"},
|
||||||
|
{"id": 99, "tier": "complex", "category": "network", "query": "изучи все самохостируемые DNS решения и их возможности"},
|
||||||
|
{"id": 100, "tier": "complex", "category": "network", "query": "исследуй лучшие самохостируемые системы мониторинга сети: Zabbix, Grafana, Prometheus, Netdata"},
|
||||||
|
{"id": 101, "tier": "complex", "category": "home_assistant", "query": "исследуй и сравни все платформы умного дома: Home Assistant, OpenHAB и Domoticz"},
|
||||||
|
{"id": 102, "tier": "complex", "category": "home_assistant", "query": "изучи лучшие Zigbee координаторы и их совместимость с Home Assistant в 2024 году"},
|
||||||
|
{"id": 103, "tier": "complex", "category": "home_assistant", "query": "напиши детальный отчет о поддержке протокола Matter и совместимых устройствах"},
|
||||||
|
{"id": 104, "tier": "complex", "category": "home_assistant", "query": "исследуй все способы интеграции умных ламп с Home Assistant: Zigbee, WiFi, Bluetooth"},
|
||||||
|
{"id": 105, "tier": "complex", "category": "home_assistant", "query": "найди и сравни все варианты датчиков движения для умного дома с оценками и ценами"},
|
||||||
|
{"id": 106, "tier": "complex", "category": "home_assistant", "query": "напиши подробное руководство по настройке автоматизаций в Home Assistant для умного освещения"},
|
||||||
|
{"id": 107, "tier": "complex", "category": "home_assistant", "query": "исследуй все варианты голосового управления умным домом на русском языке, включая локальные решения"},
|
||||||
|
{"id": 108, "tier": "complex", "category": "home_assistant", "query": "исследуй все протоколы умного дома и их плюсы и минусы: Zigbee, Z-Wave, WiFi, Thread, Bluetooth"},
|
||||||
|
{"id": 109, "tier": "complex", "category": "media_files", "query": "исследуй и сравни все самохостируемые решения для хранения фотографий с детальным сравнением функций"},
|
||||||
|
{"id": 110, "tier": "complex", "category": "media_files", "query": "изучи лучшие самохостируемые медиасерверы: Jellyfin, Plex и Emby — с характеристиками и отзывами"},
|
||||||
|
{"id": 111, "tier": "complex", "category": "media_files", "query": "сравни все самохостируемые облачные хранилища: Nextcloud, Seafile, Owncloud — производительность и функции"},
|
||||||
|
{"id": 112, "tier": "complex", "category": "research", "query": "исследуй последние достижения в локальном LLM инференсе и оборудовании для него"},
|
||||||
|
{"id": 113, "tier": "complex", "category": "research", "query": "изучи лучшие опенсорс альтернативы Google сервисов для приватного домашнего окружения"},
|
||||||
|
{"id": 114, "tier": "complex", "category": "research", "query": "изучи все варианты локального запуска языковых моделей на видеокарте 8 ГБ VRAM"},
|
||||||
|
{"id": 115, "tier": "complex", "category": "research", "query": "найди и сравни все фреймворки для создания локальных AI ассистентов с открытым исходным кодом"},
|
||||||
|
{"id": 116, "tier": "complex", "category": "research", "query": "изучи все доступные локальные ассистенты с голосовым управлением на русском языке"},
|
||||||
|
{"id": 117, "tier": "complex", "category": "infrastructure", "query": "изучи свежие CVE и уязвимости в популярном самохостируемом ПО: Gitea, Nextcloud, Jellyfin"},
|
||||||
|
{"id": 118, "tier": "complex", "category": "infrastructure", "query": "напиши детальное сравнение систем управления конфигурацией: Ansible, Salt, Puppet для домашнего окружения"},
|
||||||
|
{"id": 119, "tier": "complex", "category": "network", "query": "исследуй все самохостируемые решения для блокировки рекламы: Pi-hole, AdGuard Home, NextDNS"},
|
||||||
|
{"id": 120, "tier": "complex", "category": "research", "query": "напиши подробный отчет о технологиях синтеза речи с открытым исходным кодом на русском языке"}
|
||||||
|
]
|
||||||
|
}
|
||||||
@@ -30,7 +30,7 @@ import httpx
|
|||||||
ADOLF_URL = "http://localhost:8000"
|
ADOLF_URL = "http://localhost:8000"
|
||||||
DATASET = Path(__file__).parent / "benchmark.json"
|
DATASET = Path(__file__).parent / "benchmark.json"
|
||||||
RESULTS = Path(__file__).parent / "routing_results_latest.json"
|
RESULTS = Path(__file__).parent / "routing_results_latest.json"
|
||||||
QUERY_TIMEOUT = 30 # seconds — routing is fast, no LLM wait
|
QUERY_TIMEOUT = 1 # 1s strict deadline — routing must decide within 1 second
|
||||||
|
|
||||||
|
|
||||||
# ── Log helpers ────────────────────────────────────────────────────────────────
|
# ── Log helpers ────────────────────────────────────────────────────────────────
|
||||||
@@ -139,9 +139,10 @@ async def run(queries: list[dict]) -> list[dict]:
|
|||||||
except Exception:
|
except Exception:
|
||||||
pass # timeout or connection issue — check logs anyway
|
pass # timeout or connection issue — check logs anyway
|
||||||
|
|
||||||
await asyncio.sleep(0.3)
|
|
||||||
logs_after = get_log_tail(300)
|
logs_after = get_log_tail(300)
|
||||||
actual = extract_tier_from_logs(logs_before, logs_after)
|
actual = extract_tier_from_logs(logs_before, logs_after)
|
||||||
|
if actual is None:
|
||||||
|
actual = "timeout"
|
||||||
|
|
||||||
elapsed = time.monotonic() - t0
|
elapsed = time.monotonic() - t0
|
||||||
match = actual == expected or (actual == "fast" and expected == "medium")
|
match = actual == expected or (actual == "fast" and expected == "medium")
|
||||||
@@ -149,7 +150,7 @@ async def run(queries: list[dict]) -> list[dict]:
|
|||||||
correct += 1
|
correct += 1
|
||||||
|
|
||||||
mark = "✓" if match else "✗"
|
mark = "✓" if match else "✗"
|
||||||
actual_str = actual or "?"
|
actual_str = actual
|
||||||
print(f"{actual_str:8} {mark:3} {elapsed:5.1f}s {category:22} {query_text[:40]}")
|
print(f"{actual_str:8} {mark:3} {elapsed:5.1f}s {category:22} {query_text[:40]}")
|
||||||
|
|
||||||
results.append({
|
results.append({
|
||||||
|
|||||||
@@ -12,7 +12,7 @@ Usage:
|
|||||||
python3 run_voice_benchmark.py [options]
|
python3 run_voice_benchmark.py [options]
|
||||||
python3 run_voice_benchmark.py --tier light|medium|complex
|
python3 run_voice_benchmark.py --tier light|medium|complex
|
||||||
python3 run_voice_benchmark.py --ids 1,2,3
|
python3 run_voice_benchmark.py --ids 1,2,3
|
||||||
python3 run_voice_benchmark.py --dry-run # complex queries use medium model
|
python3 run_voice_benchmark.py --no-inference # skip LLM inference — routing only, all tiers
|
||||||
|
|
||||||
IMPORTANT: Always check GPU is free before running. Done automatically.
|
IMPORTANT: Always check GPU is free before running. Done automatically.
|
||||||
|
|
||||||
@@ -210,9 +210,9 @@ def get_log_tail(n: int = 60) -> str:
|
|||||||
|
|
||||||
def extract_tier_from_logs(logs_before: str, logs_after: str) -> str | None:
|
def extract_tier_from_logs(logs_before: str, logs_after: str) -> str | None:
|
||||||
before_lines = set(logs_before.splitlines())
|
before_lines = set(logs_before.splitlines())
|
||||||
new_lines = [l for l in logs_after.splitlines() if l not in before_lines]
|
new_lines = [line for line in logs_after.splitlines() if line not in before_lines]
|
||||||
for line in reversed(new_lines):
|
for line in new_lines:
|
||||||
m = re.search(r"tier=(\w+(?:\s*\(dry-run\))?)", line)
|
m = re.search(r"tier=(\w+(?:\s*\(no-inference\))?)", line)
|
||||||
if m:
|
if m:
|
||||||
return m.group(1).split()[0]
|
return m.group(1).split()[0]
|
||||||
return None
|
return None
|
||||||
@@ -222,14 +222,14 @@ async def post_to_adolf(
|
|||||||
client: httpx.AsyncClient,
|
client: httpx.AsyncClient,
|
||||||
query_id: int,
|
query_id: int,
|
||||||
text: str,
|
text: str,
|
||||||
dry_run: bool = False,
|
no_inference: bool = False,
|
||||||
) -> bool:
|
) -> bool:
|
||||||
payload = {
|
payload = {
|
||||||
"text": text,
|
"text": text,
|
||||||
"session_id": f"voice-bench-{query_id}",
|
"session_id": f"voice-bench-{query_id}",
|
||||||
"channel": "cli",
|
"channel": "cli",
|
||||||
"user_id": "benchmark",
|
"user_id": "benchmark",
|
||||||
"metadata": {"dry_run": dry_run, "benchmark": True, "voice": True},
|
"metadata": {"no_inference": no_inference, "benchmark": True, "voice": True},
|
||||||
}
|
}
|
||||||
try:
|
try:
|
||||||
r = await client.post(f"{ADOLF_URL}/message", json=payload, timeout=10)
|
r = await client.post(f"{ADOLF_URL}/message", json=payload, timeout=10)
|
||||||
@@ -259,7 +259,7 @@ def filter_queries(queries, tier, category, ids):
|
|||||||
|
|
||||||
# ── Main run ───────────────────────────────────────────────────────────────────
|
# ── Main run ───────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
async def run(queries: list[dict], dry_run: bool = False, save_audio: bool = False) -> None:
|
async def run(queries: list[dict], no_inference: bool = False, save_audio: bool = False) -> None:
|
||||||
async with httpx.AsyncClient() as client:
|
async with httpx.AsyncClient() as client:
|
||||||
# Check Adolf
|
# Check Adolf
|
||||||
try:
|
try:
|
||||||
@@ -272,7 +272,7 @@ async def run(queries: list[dict], dry_run: bool = False, save_audio: bool = Fal
|
|||||||
total = len(queries)
|
total = len(queries)
|
||||||
results = []
|
results = []
|
||||||
|
|
||||||
dry_label = " [DRY-RUN]" if dry_run else ""
|
dry_label = " [NO-INFERENCE: routing only]" if no_inference else ""
|
||||||
print(f"Voice benchmark: {total} queries{dry_label}\n")
|
print(f"Voice benchmark: {total} queries{dry_label}\n")
|
||||||
print(f"{'ID':>3} {'EXP':8} {'ACT':8} {'OK':3} {'WER':5} {'TRANSCRIPT'}")
|
print(f"{'ID':>3} {'EXP':8} {'ACT':8} {'OK':3} {'WER':5} {'TRANSCRIPT'}")
|
||||||
print("─" * 100)
|
print("─" * 100)
|
||||||
@@ -312,11 +312,10 @@ async def run(queries: list[dict], dry_run: bool = False, save_audio: bool = Fal
|
|||||||
wer_count += 1
|
wer_count += 1
|
||||||
|
|
||||||
# Step 3: Send to Adolf
|
# Step 3: Send to Adolf
|
||||||
send_dry = dry_run and expected == "complex"
|
|
||||||
logs_before = get_log_tail(60)
|
logs_before = get_log_tail(60)
|
||||||
t0 = time.monotonic()
|
t0 = time.monotonic()
|
||||||
|
|
||||||
ok_post = await post_to_adolf(client, qid, transcript, dry_run=send_dry)
|
ok_post = await post_to_adolf(client, qid, transcript, no_inference=no_inference)
|
||||||
if not ok_post:
|
if not ok_post:
|
||||||
print(f"{'?':8} {'ERR':3} {wer:4.2f} {transcript[:50]}")
|
print(f"{'?':8} {'ERR':3} {wer:4.2f} {transcript[:50]}")
|
||||||
results.append({"id": qid, "expected": expected, "actual": None, "ok": False, "wer": wer, "transcript": transcript})
|
results.append({"id": qid, "expected": expected, "actual": None, "ok": False, "wer": wer, "transcript": transcript})
|
||||||
@@ -349,7 +348,7 @@ async def run(queries: list[dict], dry_run: bool = False, save_audio: bool = Fal
|
|||||||
"original": original,
|
"original": original,
|
||||||
"transcript": transcript,
|
"transcript": transcript,
|
||||||
"elapsed": round(elapsed, 1),
|
"elapsed": round(elapsed, 1),
|
||||||
"dry_run": send_dry,
|
"no_inference": no_inference,
|
||||||
})
|
})
|
||||||
|
|
||||||
await asyncio.sleep(0.5)
|
await asyncio.sleep(0.5)
|
||||||
@@ -374,7 +373,7 @@ async def run(queries: list[dict], dry_run: bool = False, save_audio: bool = Fal
|
|||||||
if wrong:
|
if wrong:
|
||||||
print(f"\nMisclassified after voice ({len(wrong)}):")
|
print(f"\nMisclassified after voice ({len(wrong)}):")
|
||||||
for r in wrong:
|
for r in wrong:
|
||||||
print(f" id={r['id']:3} expected={r.get('expected','?'):8} actual={r.get('actual','?'):8} transcript={r.get('transcript','')[:50]}")
|
print(f" id={r['id']:3} expected={r.get('expected') or '?':8} actual={r.get('actual') or '?':8} transcript={r.get('transcript','')[:50]}")
|
||||||
|
|
||||||
high_wer = [r for r in results if r.get("wer") and r["wer"] > 0.3]
|
high_wer = [r for r in results if r.get("wer") and r["wer"] > 0.3]
|
||||||
if high_wer:
|
if high_wer:
|
||||||
@@ -402,14 +401,14 @@ def main():
|
|||||||
parser.add_argument("--tier", choices=["light", "medium", "complex"])
|
parser.add_argument("--tier", choices=["light", "medium", "complex"])
|
||||||
parser.add_argument("--category")
|
parser.add_argument("--category")
|
||||||
parser.add_argument("--ids", help="Comma-separated IDs")
|
parser.add_argument("--ids", help="Comma-separated IDs")
|
||||||
parser.add_argument("--dry-run", action="store_true",
|
parser.add_argument("--no-inference", action="store_true",
|
||||||
help="Complex queries use medium model for inference (no API cost)")
|
help="Skip LLM inference for all tiers — routing decisions only (no GPU/API cost)")
|
||||||
parser.add_argument("--save-audio", action="store_true",
|
parser.add_argument("--save-audio", action="store_true",
|
||||||
help="Save synthesized WAV files to voice_audio/ directory")
|
help="Save synthesized WAV files to voice_audio/ directory")
|
||||||
parser.add_argument("--skip-gpu-check", action="store_true")
|
parser.add_argument("--skip-gpu-check", action="store_true")
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
if not preflight_checks(skip_gpu_check=args.skip_gpu_check):
|
if not preflight_checks(skip_gpu_check=args.skip_gpu_check or args.no_inference):
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
||||||
queries = load_dataset()
|
queries = load_dataset()
|
||||||
@@ -419,7 +418,7 @@ def main():
|
|||||||
print("No queries match filters.")
|
print("No queries match filters.")
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
||||||
asyncio.run(run(queries, dry_run=args.dry_run, save_audio=args.save_audio))
|
asyncio.run(run(queries, no_inference=args.no_inference, save_audio=args.save_audio))
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|||||||
11
router.py
11
router.py
@@ -52,6 +52,10 @@ _LIGHT_PATTERNS = re.compile(
|
|||||||
r"|окей|хорошо|отлично|понятно|ок|ладно|договорились|спс|благодарю"
|
r"|окей|хорошо|отлично|понятно|ок|ладно|договорились|спс|благодарю"
|
||||||
r"|пожалуйста|не за что|всё понятно|ясно"
|
r"|пожалуйста|не за что|всё понятно|ясно"
|
||||||
r"|как дела|как ты|как жизнь|всё хорошо|всё ок"
|
r"|как дела|как ты|как жизнь|всё хорошо|всё ок"
|
||||||
|
# Assistant control words / confirmations
|
||||||
|
r"|да|нет|стоп|отмена|отменить|подожди|повтори|повторить|не нужно|не надо"
|
||||||
|
r"|слышишь\s+меня|ты\s+тут|отлично[,!]?\s+спасибо"
|
||||||
|
r"|yes|no|stop|cancel|wait|repeat"
|
||||||
# Russian tech definitions — static knowledge (no tools needed)
|
# Russian tech definitions — static knowledge (no tools needed)
|
||||||
r"|что\s+такое\s+\S+"
|
r"|что\s+такое\s+\S+"
|
||||||
r"|что\s+означает\s+\S+"
|
r"|что\s+означает\s+\S+"
|
||||||
@@ -422,10 +426,11 @@ class Router:
|
|||||||
self,
|
self,
|
||||||
message: str,
|
message: str,
|
||||||
history: list[dict],
|
history: list[dict],
|
||||||
|
no_inference: bool = False,
|
||||||
) -> tuple[str, Optional[str]]:
|
) -> tuple[str, Optional[str]]:
|
||||||
"""
|
"""
|
||||||
Returns (tier, reply_or_None).
|
Returns (tier, reply_or_None).
|
||||||
For light tier: also generates the reply inline.
|
For light tier: also generates the reply inline (unless no_inference=True).
|
||||||
For medium/complex: reply is None.
|
For medium/complex: reply is None.
|
||||||
"""
|
"""
|
||||||
if self._fast_tool_runner and self._fast_tool_runner.any_matches(message.strip()):
|
if self._fast_tool_runner and self._fast_tool_runner.any_matches(message.strip()):
|
||||||
@@ -435,6 +440,8 @@ class Router:
|
|||||||
|
|
||||||
if _LIGHT_PATTERNS.match(message.strip()):
|
if _LIGHT_PATTERNS.match(message.strip()):
|
||||||
print("[router] regex→light", flush=True)
|
print("[router] regex→light", flush=True)
|
||||||
|
if no_inference:
|
||||||
|
return "light", None
|
||||||
return await self._generate_light_reply(message, history)
|
return await self._generate_light_reply(message, history)
|
||||||
|
|
||||||
if _COMPLEX_PATTERNS.search(message.strip()):
|
if _COMPLEX_PATTERNS.search(message.strip()):
|
||||||
@@ -447,7 +454,7 @@ class Router:
|
|||||||
|
|
||||||
tier = await self._classify_by_embedding(message)
|
tier = await self._classify_by_embedding(message)
|
||||||
|
|
||||||
if tier != "light":
|
if tier != "light" or no_inference:
|
||||||
return tier, None
|
return tier, None
|
||||||
|
|
||||||
return await self._generate_light_reply(message, history)
|
return await self._generate_light_reply(message, history)
|
||||||
|
|||||||
Reference in New Issue
Block a user