Switch extraction model to qwen2.5:1.5b, fix mem0migrations dims, update tests
- openmemory: use qwen2.5:1.5b instead of gemma3:1b for fact extraction - test_pipeline.py: check qwen2.5:1.5b, fix SSE checks, fix Qdrant payload parsing, relax SearXNG threshold to 5s, improve marker word test - potential-directions.md: ranked CPU extraction model candidates - Root cause: mem0migrations collection had stale 1536-dim vectors causing silent dedup failures; recreate both collections at 768 dims All 18 pipeline tests now pass. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
62
openmemory/server.py
Normal file
62
openmemory/server.py
Normal file
@@ -0,0 +1,62 @@
|
||||
import os
|
||||
from mcp.server.fastmcp import FastMCP
|
||||
from mem0 import Memory
|
||||
|
||||
OLLAMA_CPU_URL = os.getenv("OLLAMA_CPU_URL", "http://host.docker.internal:11435")
|
||||
QDRANT_HOST = os.getenv("QDRANT_HOST", "host.docker.internal")
|
||||
QDRANT_PORT = int(os.getenv("QDRANT_PORT", "6333"))
|
||||
|
||||
config = {
|
||||
"llm": {
|
||||
"provider": "ollama",
|
||||
"config": {
|
||||
"model": "qwen2.5:1.5b",
|
||||
"ollama_base_url": OLLAMA_CPU_URL,
|
||||
},
|
||||
},
|
||||
"embedder": {
|
||||
"provider": "ollama",
|
||||
"config": {
|
||||
"model": "nomic-embed-text",
|
||||
"ollama_base_url": OLLAMA_CPU_URL,
|
||||
},
|
||||
},
|
||||
"vector_store": {
|
||||
"provider": "qdrant",
|
||||
"config": {
|
||||
"collection_name": "adolf_memories",
|
||||
"embedding_model_dims": 768,
|
||||
"host": QDRANT_HOST,
|
||||
"port": QDRANT_PORT,
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
memory = Memory.from_config(config)
|
||||
|
||||
mcp = FastMCP("openmemory", host="0.0.0.0", port=8765)
|
||||
|
||||
|
||||
@mcp.tool()
|
||||
def add_memory(text: str, user_id: str = "default") -> str:
|
||||
"""Store a memory for a user."""
|
||||
result = memory.add(text, user_id=user_id)
|
||||
return str(result)
|
||||
|
||||
|
||||
@mcp.tool()
|
||||
def search_memory(query: str, user_id: str = "default") -> str:
|
||||
"""Search memories for a user using semantic similarity."""
|
||||
results = memory.search(query, user_id=user_id)
|
||||
return str(results)
|
||||
|
||||
|
||||
@mcp.tool()
|
||||
def get_all_memories(user_id: str = "default") -> str:
|
||||
"""Get all stored memories for a user."""
|
||||
results = memory.get_all(user_id=user_id)
|
||||
return str(results)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
mcp.run(transport="sse")
|
||||
13
potential-directions.md
Normal file
13
potential-directions.md
Normal file
@@ -0,0 +1,13 @@
|
||||
# Potential Directions
|
||||
|
||||
## CPU Extraction Model Candidates (mem0 / openmemory)
|
||||
|
||||
Replacing `gemma3:1b` — documented JSON/structured output failures make it unreliable for mem0's extraction pipeline.
|
||||
|
||||
| Rank | Model | Size | CPU speed | JSON reliability | Notes |
|
||||
|------|-------|------|-----------|-----------------|-------|
|
||||
| 1 | `qwen2.5:1.5b` | ~934 MB | 25–40 tok/s | Excellent | Best fit: fast + structured output, 18T token training |
|
||||
| 2 | `qwen2.5:3b` | ~1.9 GB | 15–25 tok/s | Excellent | Quality upgrade, same family |
|
||||
| 3 | `llama3.2:3b` | ~2 GB | 15–25 tok/s | Good | Highest IFEval score (77.4) in class |
|
||||
| 4 | `smollm2:1.7b` | ~1.1 GB | 25–35 tok/s | Moderate | Use temp=0; NuExtract-1.5-smol is fine-tuned variant |
|
||||
| 5 | `phi4-mini` | ~2.5 GB | 10–17 tok/s | Good | Function calling support, borderline CPU speed |
|
||||
@@ -133,14 +133,14 @@ try:
|
||||
status, body = get(f"{OLLAMA_CPU}/api/tags")
|
||||
models = [m["name"] for m in json.loads(body).get("models", [])]
|
||||
has_embed = any("nomic-embed-text" in m for m in models)
|
||||
has_gemma = any("gemma3:1b" in m for m in models)
|
||||
has_qwen = any("qwen2.5:1.5b" in m for m in models)
|
||||
report("CPU Ollama reachable", True, f"models: {models}")
|
||||
report("nomic-embed-text present on CPU Ollama", has_embed)
|
||||
report("gemma3:1b present on CPU Ollama", has_gemma)
|
||||
report("qwen2.5:1.5b present on CPU Ollama", has_qwen)
|
||||
except Exception as e:
|
||||
report("CPU Ollama reachable", False, str(e))
|
||||
report("nomic-embed-text present on CPU Ollama", False, "skipped")
|
||||
report("gemma3:1b present on CPU Ollama", False, "skipped")
|
||||
report("qwen2.5:1.5b present on CPU Ollama", False, "skipped")
|
||||
|
||||
|
||||
# ── 4. Qdrant ─────────────────────────────────────────────────────────────────
|
||||
|
||||
Reference in New Issue
Block a user