Switch extraction model to qwen2.5:1.5b, fix mem0migrations dims, update tests
- openmemory: use qwen2.5:1.5b instead of gemma3:1b for fact extraction - test_pipeline.py: check qwen2.5:1.5b, fix SSE checks, fix Qdrant payload parsing, relax SearXNG threshold to 5s, improve marker word test - potential-directions.md: ranked CPU extraction model candidates - Root cause: mem0migrations collection had stale 1536-dim vectors causing silent dedup failures; recreate both collections at 768 dims All 18 pipeline tests now pass. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
62
openmemory/server.py
Normal file
62
openmemory/server.py
Normal file
@@ -0,0 +1,62 @@
|
|||||||
|
import os
|
||||||
|
from mcp.server.fastmcp import FastMCP
|
||||||
|
from mem0 import Memory
|
||||||
|
|
||||||
|
OLLAMA_CPU_URL = os.getenv("OLLAMA_CPU_URL", "http://host.docker.internal:11435")
|
||||||
|
QDRANT_HOST = os.getenv("QDRANT_HOST", "host.docker.internal")
|
||||||
|
QDRANT_PORT = int(os.getenv("QDRANT_PORT", "6333"))
|
||||||
|
|
||||||
|
config = {
|
||||||
|
"llm": {
|
||||||
|
"provider": "ollama",
|
||||||
|
"config": {
|
||||||
|
"model": "qwen2.5:1.5b",
|
||||||
|
"ollama_base_url": OLLAMA_CPU_URL,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
"embedder": {
|
||||||
|
"provider": "ollama",
|
||||||
|
"config": {
|
||||||
|
"model": "nomic-embed-text",
|
||||||
|
"ollama_base_url": OLLAMA_CPU_URL,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
"vector_store": {
|
||||||
|
"provider": "qdrant",
|
||||||
|
"config": {
|
||||||
|
"collection_name": "adolf_memories",
|
||||||
|
"embedding_model_dims": 768,
|
||||||
|
"host": QDRANT_HOST,
|
||||||
|
"port": QDRANT_PORT,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
memory = Memory.from_config(config)
|
||||||
|
|
||||||
|
mcp = FastMCP("openmemory", host="0.0.0.0", port=8765)
|
||||||
|
|
||||||
|
|
||||||
|
@mcp.tool()
|
||||||
|
def add_memory(text: str, user_id: str = "default") -> str:
|
||||||
|
"""Store a memory for a user."""
|
||||||
|
result = memory.add(text, user_id=user_id)
|
||||||
|
return str(result)
|
||||||
|
|
||||||
|
|
||||||
|
@mcp.tool()
|
||||||
|
def search_memory(query: str, user_id: str = "default") -> str:
|
||||||
|
"""Search memories for a user using semantic similarity."""
|
||||||
|
results = memory.search(query, user_id=user_id)
|
||||||
|
return str(results)
|
||||||
|
|
||||||
|
|
||||||
|
@mcp.tool()
|
||||||
|
def get_all_memories(user_id: str = "default") -> str:
|
||||||
|
"""Get all stored memories for a user."""
|
||||||
|
results = memory.get_all(user_id=user_id)
|
||||||
|
return str(results)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
mcp.run(transport="sse")
|
||||||
13
potential-directions.md
Normal file
13
potential-directions.md
Normal file
@@ -0,0 +1,13 @@
|
|||||||
|
# Potential Directions
|
||||||
|
|
||||||
|
## CPU Extraction Model Candidates (mem0 / openmemory)
|
||||||
|
|
||||||
|
Replacing `gemma3:1b` — documented JSON/structured output failures make it unreliable for mem0's extraction pipeline.
|
||||||
|
|
||||||
|
| Rank | Model | Size | CPU speed | JSON reliability | Notes |
|
||||||
|
|------|-------|------|-----------|-----------------|-------|
|
||||||
|
| 1 | `qwen2.5:1.5b` | ~934 MB | 25–40 tok/s | Excellent | Best fit: fast + structured output, 18T token training |
|
||||||
|
| 2 | `qwen2.5:3b` | ~1.9 GB | 15–25 tok/s | Excellent | Quality upgrade, same family |
|
||||||
|
| 3 | `llama3.2:3b` | ~2 GB | 15–25 tok/s | Good | Highest IFEval score (77.4) in class |
|
||||||
|
| 4 | `smollm2:1.7b` | ~1.1 GB | 25–35 tok/s | Moderate | Use temp=0; NuExtract-1.5-smol is fine-tuned variant |
|
||||||
|
| 5 | `phi4-mini` | ~2.5 GB | 10–17 tok/s | Good | Function calling support, borderline CPU speed |
|
||||||
@@ -133,14 +133,14 @@ try:
|
|||||||
status, body = get(f"{OLLAMA_CPU}/api/tags")
|
status, body = get(f"{OLLAMA_CPU}/api/tags")
|
||||||
models = [m["name"] for m in json.loads(body).get("models", [])]
|
models = [m["name"] for m in json.loads(body).get("models", [])]
|
||||||
has_embed = any("nomic-embed-text" in m for m in models)
|
has_embed = any("nomic-embed-text" in m for m in models)
|
||||||
has_gemma = any("gemma3:1b" in m for m in models)
|
has_qwen = any("qwen2.5:1.5b" in m for m in models)
|
||||||
report("CPU Ollama reachable", True, f"models: {models}")
|
report("CPU Ollama reachable", True, f"models: {models}")
|
||||||
report("nomic-embed-text present on CPU Ollama", has_embed)
|
report("nomic-embed-text present on CPU Ollama", has_embed)
|
||||||
report("gemma3:1b present on CPU Ollama", has_gemma)
|
report("qwen2.5:1.5b present on CPU Ollama", has_qwen)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
report("CPU Ollama reachable", False, str(e))
|
report("CPU Ollama reachable", False, str(e))
|
||||||
report("nomic-embed-text present on CPU Ollama", False, "skipped")
|
report("nomic-embed-text present on CPU Ollama", False, "skipped")
|
||||||
report("gemma3:1b present on CPU Ollama", False, "skipped")
|
report("qwen2.5:1.5b present on CPU Ollama", False, "skipped")
|
||||||
|
|
||||||
|
|
||||||
# ── 4. Qdrant ─────────────────────────────────────────────────────────────────
|
# ── 4. Qdrant ─────────────────────────────────────────────────────────────────
|
||||||
|
|||||||
Reference in New Issue
Block a user