Initial scaffold for knowledge-mcp

2026-02-06 15:07:07 +11:00
commit 8cb860797e
3 changed files with 133 additions and 0 deletions
--- a/README.md
+++ b/README.md
@@ -0,0 +1,40 @@
 # knowledge-mcp
 A Model Context Protocol (MCP) server that provides scoped RAG workspaces ("Notebooks") backed by **Qdrant** and **TEI**.
 ## Overview
 This server enables an agent to:
 1.  Create named "Notebooks" (Qdrant Collections).
 2.  Ingest documents (PDF, Markdown, Text) into specific notebooks.
 3.  Query specific notebooks using vector search (RAG).
 4.  Synthesize findings across a notebook.
 Designed to replicate the **NotebookLM** experience: clean, focused, bounded context.
 ## Stack
 *   **Language:** Python 3.11+
 *   **Framework:** `mcp` SDK
 *   **Vector DB:** Qdrant
 *   **Embeddings:** Text Embeddings Inference (TEI) - `BAAI/bge-base-en-v1.5`
 ## Tools
 ### `notebook.create`
 Creates a new isolated workspace (Qdrant Collection).
 - `name`: string (e.g., "project-alpha")
 ### `notebook.add_source`
 Ingests a document into the notebook.
 - `notebook`: string
 - `url`: string (URL or local path)
 ### `notebook.query`
 Performs a semantic search/RAG generation against the notebook.
 - `notebook`: string
 - `query`: string
 ## Configuration
 Env vars:
 - `QDRANT_URL`: URL to Qdrant (e.g., `http://qdrant.openshift-gitops.svc:6333`)
 - `TEI_URL`: URL to TEI (e.g., `http://text-embeddings.tei.svc.cluster.local:8080`)
--- a/requirements.txt
+++ b/requirements.txt
@@ -0,0 +1,6 @@
 mcp
 httpx
 qdrant-client
 beautifulsoup4
 pypdf
 python-dotenv
--- a/server.py
+++ b/server.py
@@ -0,0 +1,87 @@
 import os
 import httpx
 from mcp.server.fastmcp import FastMCP
 from qdrant_client import QdrantClient
 from qdrant_client.models import Distance, VectorParams, PointStruct
 import uuid
 import logging
 # Configuration
 QDRANT_URL = os.getenv("QDRANT_URL", "http://qdrant.openshift-gitops.svc:6333")
 TEI_URL = os.getenv("TEI_URL", "http://text-embeddings.tei.svc.cluster.local:8080")
 EMBEDDING_DIM = 768  # BAAI/bge-base-en-v1.5
 # Initialize
 mcp = FastMCP("knowledge-mcp")
 qdrant = QdrantClient(url=QDRANT_URL)
 def get_embedding(text: str) -> list[float]:
    """Get embedding from TEI."""
    url = f"{TEI_URL}/embed" # Adjust based on TEI version, often /v1/embeddings or /embed
    # Trying standard TEI /embed endpoint for raw lists
    try:
        response = httpx.post(url, json={"inputs": text}, timeout=10.0)
        response.raise_for_status()
        return response.json()[0]
    except Exception as e:
        # Fallback to OpenAI compatible endpoint if needed
        logging.error(f"Embedding failed: {e}")
        raise
@mcp.tool()
 def create_notebook(name: str) -> str:
    """Create a new RAG notebook (Qdrant collection)."""
    clean_name = name.lower().replace(" ", "-")
    # Check if exists
    if qdrant.collection_exists(clean_name):
        return f"Notebook '{clean_name}' already exists."
    qdrant.create_collection(
        collection_name=clean_name,
        vectors_config=VectorParams(size=EMBEDDING_DIM, distance=Distance.COSINE),
    )
    return f"Notebook '{clean_name}' created successfully."
@mcp.tool()
 def add_source(notebook: str, text: str, source_name: str = "manual") -> str:
    """Add text content to a notebook. Ingests, chunks, and indexes."""
    if not qdrant.collection_exists(notebook):
        return f"Error: Notebook '{notebook}' does not exist."
    # Very basic chunking for now
    chunks = [text[i:i+500] for i in range(0, len(text), 500)]
    points = []
    for chunk in chunks:
        vector = get_embedding(chunk)
        points.append(PointStruct(
            id=str(uuid.uuid4()),
            vector=vector,
            payload={"source": source_name, "text": chunk}
        ))
    qdrant.upsert(collection_name=notebook, points=points)
    return f"Added {len(points)} chunks from '{source_name}' to '{notebook}'."
@mcp.tool()
 def query_notebook(notebook: str, query: str, limit: int = 5) -> str:
    """Query the notebook for relevant context."""
    if not qdrant.collection_exists(notebook):
        return f"Error: Notebook '{notebook}' does not exist."
    vector = get_embedding(query)
    hits = qdrant.search(
        collection_name=notebook,
        query_vector=vector,
        limit=limit
    )
    results = []
    for hit in hits:
        results.append(f"--- (Score: {hit.score:.2f}) ---\n{hit.payload.get('text', '')}\n")
    return "\n".join(results)
 if __name__ == "__main__":
    mcp.run()