wiki search people tested pipeline

2026-03-05 11:22:34 +00:00
parent 09a93c661e
commit a30936f120
152 changed files with 47694 additions and 263 deletions
--- a/adolf/openmemory/Dockerfile
+++ b/adolf/openmemory/Dockerfile
@@ -0,0 +1,6 @@
+FROM python:3.12-slim
+WORKDIR /app
+COPY requirements.txt .
+RUN pip install --no-cache-dir -r requirements.txt
+COPY server.py .
+CMD ["python", "server.py"]
--- a/adolf/openmemory/requirements.txt
+++ b/adolf/openmemory/requirements.txt
@@ -0,0 +1,6 @@
+mem0ai
+ollama
+fastapi
+uvicorn
+mcp[cli]
+qdrant-client
--- a/adolf/openmemory/server.py
+++ b/adolf/openmemory/server.py
@@ -1,24 +1,109 @@
+import json
 import os
 from mcp.server.fastmcp import FastMCP
 from mem0 import Memory

+# Extraction LLM — GPU Ollama (qwen3:4b, same model as medium agent)
+# Runs after reply when GPU is idle; spin-wait in agent.py prevents contention
+OLLAMA_GPU_URL = os.getenv("OLLAMA_GPU_URL", "http://host.docker.internal:11436")
+
+# Embedding — CPU Ollama (nomic-embed-text, 137 MB RAM)
+# Used for both search (50-150ms, acceptable) and store-time embedding
 OLLAMA_CPU_URL = os.getenv("OLLAMA_CPU_URL", "http://host.docker.internal:11435")
+
 QDRANT_HOST = os.getenv("QDRANT_HOST", "host.docker.internal")
 QDRANT_PORT = int(os.getenv("QDRANT_PORT", "6333"))

+# Change 2: Custom extraction prompt
+# /no_think disables qwen3 thinking tokens so output is clean JSON
+EXTRACTION_PROMPT = """/no_think
+You are a memory extraction assistant. Extract factual statements from a conversation that are worth remembering long-term.
+
+Extract facts from BOTH user AND assistant messages, including:
+- User details, preferences, and personal information
+- User's plans, goals, and intentions
+- The assistant's name or persona (if set by the user or stated by the assistant)
+- Any commitments or agreements made
+- Key facts stated as true
+
+Return ONLY valid JSON in this exact format:
+{"facts": ["fact 1", "fact 2"]}
+
+If there are no facts worth storing, return: {"facts": []}
+
+IMPORTANT rules:
+- Extract the EXACT concrete values mentioned. Never say "not known" or "unspecified".
+- If the user states their name, job, pet, city, allergy, or preference — store the exact value.
+- A single message may contain multiple facts — extract ALL of them.
+- Do NOT extract vague summaries. Extract specific facts with real values.
+
+Examples:
+
+Input: "User: I live in Berlin\nAssistant: Got it, you're in Berlin!"
+Output: {"facts": ["User lives in Berlin"]}
+
+Input: "User: My name is Alice and I live in Tokyo\nAssistant: Nice to meet you Alice!"
+Output: {"facts": ["User's name is Alice", "User lives in Tokyo"]}
+
+Input: "User: I work as a software engineer at a startup\nAssistant: Cool!"
+Output: {"facts": ["User works as a software engineer at a startup"]}
+
+Input: "User: I have a cat named Whiskers\nAssistant: Whiskers is a cute name!"
+Output: {"facts": ["User has a cat named Whiskers"]}
+
+Input: "User: I'm allergic to nuts\nAssistant: I'll remember that."
+Output: {"facts": ["User is allergic to nuts"]}
+
+Input: "User: remember that your name is Adolf\nAssistant: My name is Adolf!"
+Output: {"facts": ["Assistant's name is Adolf"]}
+
+Input: "User: what time is it?\nAssistant: I don't have access to real-time data."
+Output: {"facts": []}
+
+Input: "User: I prefer dark mode\nAssistant: Noted, I'll keep that in mind."
+Output: {"facts": ["User prefers dark mode"]}
+
+Now extract facts from this conversation:"""
+
+# Update/dedup decision prompt — overrides mem0's default.
+# qwen2.5:1.5b struggles with the default multi-step reasoning; this version is
+# more explicit: list existing, list new, decide ADD/NONE per item.
+UPDATE_PROMPT = """/no_think
+You manage a memory store. Given EXISTING memories and NEW facts:
+- For each EXISTING memory: output NONE (no change) or UPDATE (if a new fact replaces it) or DELETE.
+- For each NEW fact: output ADD if it is not already covered by existing memories. Output NONE if it is already covered.
+- IMPORTANT: You MUST include ALL new facts in your output — either as ADD or NONE.
+- Output ONLY valid JSON, no explanation.
+
+Example A — new fact is genuinely new:
+Existing: [{"id": "0", "text": "User lives in Berlin"}]
+New facts: ["User is allergic to nuts"]
+Output: {"memory": [{"id": "0", "text": "User lives in Berlin", "event": "NONE"}, {"id": "1", "text": "User is allergic to nuts", "event": "ADD"}]}
+
+Example B — new fact updates an existing one:
+Existing: [{"id": "0", "text": "User lives in Berlin"}]
+New facts: ["User lives in Paris"]
+Output: {"memory": [{"id": "0", "text": "User lives in Paris", "event": "UPDATE", "old_memory": "User lives in Berlin"}]}
+
+Example C — new fact already covered:
+Existing: [{"id": "0", "text": "User is allergic to nuts"}]
+New facts: ["User has a nut allergy"]
+Output: {"memory": [{"id": "0", "text": "User is allergic to nuts", "event": "NONE"}]}"""
+
 config = {
    "llm": {
        "provider": "ollama",
        "config": {
-            "model": "qwen2.5:1.5b",
-            "ollama_base_url": OLLAMA_CPU_URL,
+            "model": "qwen3:4b",
+            "ollama_base_url": OLLAMA_GPU_URL,
+            "temperature": 0.1,  # consistent JSON output
        },
    },
    "embedder": {
        "provider": "ollama",
        "config": {
            "model": "nomic-embed-text",
-            "ollama_base_url": OLLAMA_CPU_URL,
+            "ollama_base_url": OLLAMA_CPU_URL,   # CPU: 50-150ms per query, no GPU needed
        },
    },
    "vector_store": {
@@ -30,6 +115,8 @@ config = {
            "port": QDRANT_PORT,
        },
    },
+    "custom_fact_extraction_prompt": EXTRACTION_PROMPT,
+    "custom_update_memory_prompt": UPDATE_PROMPT,
 }

 memory = Memory.from_config(config)
@@ -41,21 +128,27 @@ mcp = FastMCP("openmemory", host="0.0.0.0", port=8765)
 def add_memory(text: str, user_id: str = "default") -> str:
    """Store a memory for a user."""
    result = memory.add(text, user_id=user_id)
-    return str(result)
+    # Change 3: return clean JSON instead of Python repr
+    return json.dumps(result, default=str)


@mcp.tool()
 def search_memory(query: str, user_id: str = "default") -> str:
    """Search memories for a user using semantic similarity."""
-    results = memory.search(query, user_id=user_id)
-    return str(results)
+    results = memory.search(query, user_id=user_id, limit=10, threshold=0.3)
+    # Filter to only return results with score >= 0.5 to avoid irrelevant noise
+    if isinstance(results, dict) and "results" in results:
+        results["results"] = [r for r in results["results"] if r.get("score", 0) >= 0.5]
+    return json.dumps(results, default=str)


@mcp.tool()
-def get_all_memories(user_id: str = "default") -> str:
-    """Get all stored memories for a user."""
-    results = memory.get_all(user_id=user_id)
-    return str(results)
+def get_all_memories(user_id: str = "default", limit: int = 50) -> str:
+    """Get stored memories for a user (up to limit)."""
+    # Change 5: cap results to avoid flooding context
+    results = memory.get_all(user_id=user_id, limit=limit)
+    # Change 3: return clean JSON instead of Python repr
+    return json.dumps(results, default=str)


 if __name__ == "__main__":