From 8cb860797e101ab6ef8ed72c307cc3b5e08787a2 Mon Sep 17 00:00:00 2001
From: Clawdbot <clawdbot@apilab.us>
Date: Fri, 6 Feb 2026 15:07:07 +1100
Subject: [PATCH] Initial scaffold for knowledge-mcp

---
 README.md        | 40 ++++++++++++++++++++++
 requirements.txt |  6 ++++
 server.py        | 87 ++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 133 insertions(+)
 create mode 100644 README.md
 create mode 100644 requirements.txt
 create mode 100644 server.py

diff --git a/README.md b/README.md
new file mode 100644
index 0000000..cbca63a
--- /dev/null
+++ b/README.md
@@ -0,0 +1,40 @@
+# knowledge-mcp
+
+A Model Context Protocol (MCP) server that provides scoped RAG workspaces ("Notebooks") backed by **Qdrant** and **TEI**.
+
+## Overview
+
+This server enables an agent to:
+1.  Create named "Notebooks" (Qdrant Collections).
+2.  Ingest documents (PDF, Markdown, Text) into specific notebooks.
+3.  Query specific notebooks using vector search (RAG).
+4.  Synthesize findings across a notebook.
+
+Designed to replicate the **NotebookLM** experience: clean, focused, bounded context.
+
+## Stack
+*   **Language:** Python 3.11+
+*   **Framework:** `mcp` SDK
+*   **Vector DB:** Qdrant
+*   **Embeddings:** Text Embeddings Inference (TEI) - `BAAI/bge-base-en-v1.5`
+
+## Tools
+
+### `notebook.create`
+Creates a new isolated workspace (Qdrant Collection).
+- `name`: string (e.g., "project-alpha")
+
+### `notebook.add_source`
+Ingests a document into the notebook.
+- `notebook`: string
+- `url`: string (URL or local path)
+
+### `notebook.query`
+Performs a semantic search/RAG generation against the notebook.
+- `notebook`: string
+- `query`: string
+
+## Configuration
+Env vars:
+- `QDRANT_URL`: URL to Qdrant (e.g., `http://qdrant.openshift-gitops.svc:6333`)
+- `TEI_URL`: URL to TEI (e.g., `http://text-embeddings.tei.svc.cluster.local:8080`)
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..e844665
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,6 @@
+mcp
+httpx
+qdrant-client
+beautifulsoup4
+pypdf
+python-dotenv
diff --git a/server.py b/server.py
new file mode 100644
index 0000000..86536ff
--- /dev/null
+++ b/server.py
@@ -0,0 +1,87 @@
+import os
+import httpx
+from mcp.server.fastmcp import FastMCP
+from qdrant_client import QdrantClient
+from qdrant_client.models import Distance, VectorParams, PointStruct
+import uuid
+import logging
+
+# Configuration
+QDRANT_URL = os.getenv("QDRANT_URL", "http://qdrant.openshift-gitops.svc:6333")
+TEI_URL = os.getenv("TEI_URL", "http://text-embeddings.tei.svc.cluster.local:8080")
+EMBEDDING_DIM = 768  # BAAI/bge-base-en-v1.5
+
+# Initialize
+mcp = FastMCP("knowledge-mcp")
+qdrant = QdrantClient(url=QDRANT_URL)
+
+def get_embedding(text: str) -> list[float]:
+    """Get embedding from TEI."""
+    url = f"{TEI_URL}/embed" # Adjust based on TEI version, often /v1/embeddings or /embed
+    # Trying standard TEI /embed endpoint for raw lists
+    try:
+        response = httpx.post(url, json={"inputs": text}, timeout=10.0)
+        response.raise_for_status()
+        return response.json()[0]
+    except Exception as e:
+        # Fallback to OpenAI compatible endpoint if needed
+        logging.error(f"Embedding failed: {e}")
+        raise
+
+@mcp.tool()
+def create_notebook(name: str) -> str:
+    """Create a new RAG notebook (Qdrant collection)."""
+    clean_name = name.lower().replace(" ", "-")
+    
+    # Check if exists
+    if qdrant.collection_exists(clean_name):
+        return f"Notebook '{clean_name}' already exists."
+
+    qdrant.create_collection(
+        collection_name=clean_name,
+        vectors_config=VectorParams(size=EMBEDDING_DIM, distance=Distance.COSINE),
+    )
+    return f"Notebook '{clean_name}' created successfully."
+
+@mcp.tool()
+def add_source(notebook: str, text: str, source_name: str = "manual") -> str:
+    """Add text content to a notebook. Ingests, chunks, and indexes."""
+    if not qdrant.collection_exists(notebook):
+        return f"Error: Notebook '{notebook}' does not exist."
+
+    # Very basic chunking for now
+    chunks = [text[i:i+500] for i in range(0, len(text), 500)]
+    points = []
+
+    for chunk in chunks:
+        vector = get_embedding(chunk)
+        points.append(PointStruct(
+            id=str(uuid.uuid4()),
+            vector=vector,
+            payload={"source": source_name, "text": chunk}
+        ))
+
+    qdrant.upsert(collection_name=notebook, points=points)
+    return f"Added {len(points)} chunks from '{source_name}' to '{notebook}'."
+
+@mcp.tool()
+def query_notebook(notebook: str, query: str, limit: int = 5) -> str:
+    """Query the notebook for relevant context."""
+    if not qdrant.collection_exists(notebook):
+        return f"Error: Notebook '{notebook}' does not exist."
+
+    vector = get_embedding(query)
+    hits = qdrant.search(
+        collection_name=notebook,
+        query_vector=vector,
+        limit=limit
+    )
+
+    results = []
+    for hit in hits:
+        results.append(f"--- (Score: {hit.score:.2f}) ---\n{hit.payload.get('text', '')}\n")
+
+    return "\n".join(results)
+
+if __name__ == "__main__":
+    mcp.run()