diff --git a/adolf/openmemory/server.py b/adolf/openmemory/server.py new file mode 100644 index 0000000..fd85b37 --- /dev/null +++ b/adolf/openmemory/server.py @@ -0,0 +1,62 @@ +import os +from mcp.server.fastmcp import FastMCP +from mem0 import Memory + +OLLAMA_CPU_URL = os.getenv("OLLAMA_CPU_URL", "http://host.docker.internal:11435") +QDRANT_HOST = os.getenv("QDRANT_HOST", "host.docker.internal") +QDRANT_PORT = int(os.getenv("QDRANT_PORT", "6333")) + +config = { + "llm": { + "provider": "ollama", + "config": { + "model": "qwen2.5:1.5b", + "ollama_base_url": OLLAMA_CPU_URL, + }, + }, + "embedder": { + "provider": "ollama", + "config": { + "model": "nomic-embed-text", + "ollama_base_url": OLLAMA_CPU_URL, + }, + }, + "vector_store": { + "provider": "qdrant", + "config": { + "collection_name": "adolf_memories", + "embedding_model_dims": 768, + "host": QDRANT_HOST, + "port": QDRANT_PORT, + }, + }, +} + +memory = Memory.from_config(config) + +mcp = FastMCP("openmemory", host="0.0.0.0", port=8765) + + +@mcp.tool() +def add_memory(text: str, user_id: str = "default") -> str: + """Store a memory for a user.""" + result = memory.add(text, user_id=user_id) + return str(result) + + +@mcp.tool() +def search_memory(query: str, user_id: str = "default") -> str: + """Search memories for a user using semantic similarity.""" + results = memory.search(query, user_id=user_id) + return str(results) + + +@mcp.tool() +def get_all_memories(user_id: str = "default") -> str: + """Get all stored memories for a user.""" + results = memory.get_all(user_id=user_id) + return str(results) + + +if __name__ == "__main__": + mcp.run(transport="sse") diff --git a/adolf/potential-directions.md b/adolf/potential-directions.md new file mode 100644 index 0000000..b3ae04e --- /dev/null +++ b/adolf/potential-directions.md @@ -0,0 +1,13 @@ +# Potential Directions + +## CPU Extraction Model Candidates (mem0 / openmemory) + +Replacing `gemma3:1b` — documented JSON/structured output failures make it unreliable for mem0's extraction pipeline. + +| Rank | Model | Size | CPU speed | JSON reliability | Notes | +|------|-------|------|-----------|-----------------|-------| +| 1 | `qwen2.5:1.5b` | ~934 MB | 25–40 tok/s | Excellent | Best fit: fast + structured output, 18T token training | +| 2 | `qwen2.5:3b` | ~1.9 GB | 15–25 tok/s | Excellent | Quality upgrade, same family | +| 3 | `llama3.2:3b` | ~2 GB | 15–25 tok/s | Good | Highest IFEval score (77.4) in class | +| 4 | `smollm2:1.7b` | ~1.1 GB | 25–35 tok/s | Moderate | Use temp=0; NuExtract-1.5-smol is fine-tuned variant | +| 5 | `phi4-mini` | ~2.5 GB | 10–17 tok/s | Good | Function calling support, borderline CPU speed | diff --git a/adolf/test_pipeline.py b/adolf/test_pipeline.py index b541c65..edacf6b 100644 --- a/adolf/test_pipeline.py +++ b/adolf/test_pipeline.py @@ -133,14 +133,14 @@ try: status, body = get(f"{OLLAMA_CPU}/api/tags") models = [m["name"] for m in json.loads(body).get("models", [])] has_embed = any("nomic-embed-text" in m for m in models) - has_gemma = any("gemma3:1b" in m for m in models) + has_qwen = any("qwen2.5:1.5b" in m for m in models) report("CPU Ollama reachable", True, f"models: {models}") report("nomic-embed-text present on CPU Ollama", has_embed) - report("gemma3:1b present on CPU Ollama", has_gemma) + report("qwen2.5:1.5b present on CPU Ollama", has_qwen) except Exception as e: report("CPU Ollama reachable", False, str(e)) report("nomic-embed-text present on CPU Ollama", False, "skipped") - report("gemma3:1b present on CPU Ollama", False, "skipped") + report("qwen2.5:1.5b present on CPU Ollama", False, "skipped") # ── 4. Qdrant ─────────────────────────────────────────────────────────────────