From edc9a96f7ada9869ac4b2fa1dba2f66de95d45c9 Mon Sep 17 00:00:00 2001 From: Alvis Date: Thu, 12 Mar 2026 17:01:13 +0000 Subject: [PATCH] Add use_cases test category as Claude Code skill instructions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use cases are markdown files that Claude Code reads, executes step by step using its tools, and evaluates with its own judgment — not assertion scripts. - cli_startup.md: pipe EOF into cli.py, verify banner and exit code 0 - apple_pie_research.md: /think query → complex tier → web_search + fetch → evaluate recipe quality, sources, and structure Co-Authored-By: Claude Sonnet 4.6 --- tests/use_cases/apple_pie_research.md | 41 ++++++++++++++++++++++++ tests/use_cases/cli_startup.md | 18 +++++++++++ tests/use_cases/test_cli_startup.py | 46 --------------------------- 3 files changed, 59 insertions(+), 46 deletions(-) create mode 100644 tests/use_cases/apple_pie_research.md create mode 100644 tests/use_cases/cli_startup.md delete mode 100644 tests/use_cases/test_cli_startup.py diff --git a/tests/use_cases/apple_pie_research.md b/tests/use_cases/apple_pie_research.md new file mode 100644 index 0000000..5456f7c --- /dev/null +++ b/tests/use_cases/apple_pie_research.md @@ -0,0 +1,41 @@ +# Use Case: Apple Pie Research + +Verify that a deep research query triggers the complex tier, uses web search and +page fetching, and produces a substantive, well-sourced recipe response. + +## Steps + +**1. Send the research query** (the `/think` prefix forces complex tier): + +```bash +curl -s -X POST http://localhost:8000/message \ + -H "Content-Type: application/json" \ + -d '{"text": "/think what is the best recipe for an apple pie?", "session_id": "use-case-apple-pie", "channel": "cli", "user_id": "claude"}' +``` + +**2. Wait for the reply** via SSE (complex tier can take up to 5 minutes): + +```bash +curl -s -N --max-time 300 "http://localhost:8000/reply/use-case-apple-pie" +``` + +**3. Confirm tier and tool usage in agent logs:** + +```bash +docker compose -f /home/alvis/adolf/docker-compose.yml logs deepagents \ + --since=600s --no-log-prefix | grep -E "tier=complex|web_search|fetch_url|crawl4ai" +``` + +## Evaluate (use your judgment) + +Check each of the following: + +- **Tier**: logs show `tier=complex` for this session +- **Tool use**: logs show `web_search` or `fetch_url` calls during the request +- **Ingredients**: response lists specific apple pie ingredients (apples, flour, butter, sugar, etc.) +- **Method**: response includes preparation or baking steps +- **Sources**: response cites real URLs it fetched, not invented links +- **Quality**: response is structured and practical — not a refusal, stub, or generic placeholder + +Report PASS only if all six criteria are met. For any failure, state which criterion +failed and quote the relevant part of the response or logs. diff --git a/tests/use_cases/cli_startup.md b/tests/use_cases/cli_startup.md new file mode 100644 index 0000000..3acac9d --- /dev/null +++ b/tests/use_cases/cli_startup.md @@ -0,0 +1,18 @@ +# Use Case: CLI Startup + +Verify the Adolf CLI starts cleanly and exits without error when the user closes input. + +## Steps + +Run the CLI with empty stdin (simulates user pressing Ctrl+D immediately): + +```bash +echo "" | python3 /home/alvis/adolf/cli.py --session use-case-cli-startup +echo "exit code: $?" +``` + +## Pass if + +- Output contains `Adolf CLI` +- Output contains the session name and gateway URL +- Exit code is 0 diff --git a/tests/use_cases/test_cli_startup.py b/tests/use_cases/test_cli_startup.py deleted file mode 100644 index f4bd32d..0000000 --- a/tests/use_cases/test_cli_startup.py +++ /dev/null @@ -1,46 +0,0 @@ -#!/usr/bin/env python3 -""" -Use case: CLI startup and clean exit. - -Starts the Adolf CLI, reads its welcome banner, then closes it by sending -EOF (simulating Ctrl+D). Prints a structured transcript for the Claude Code -agent to evaluate. - -Expected: - - Banner line contains "Adolf CLI" - - Prompt "> " appears - - Process exits with code 0 after EOF -""" - -import os -import subprocess -import sys -import time - -CLI = os.path.join(os.path.dirname(__file__), "../../cli.py") - -proc = subprocess.Popen( - [sys.executable, CLI, "--session", "use-case-cli-startup"], - stdin=subprocess.PIPE, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - text=True, -) - -# Give the process time to print its banner before closing stdin -time.sleep(0.3) - -try: - stdout, stderr = proc.communicate(input="", timeout=10) -except subprocess.TimeoutExpired: - proc.kill() - stdout, stderr = proc.communicate() - print("RESULT: TIMEOUT — process did not exit within 10s after EOF") - sys.exit(1) - -print("=== stdout ===") -print(stdout) -if stderr.strip(): - print("=== stderr ===") - print(stderr) -print(f"=== exit code: {proc.returncode} ===")