Initial scaffold for knowledge-mcp
This commit is contained in:
40
README.md
Normal file
40
README.md
Normal file
@@ -0,0 +1,40 @@
|
|||||||
|
# knowledge-mcp
|
||||||
|
|
||||||
|
A Model Context Protocol (MCP) server that provides scoped RAG workspaces ("Notebooks") backed by **Qdrant** and **TEI**.
|
||||||
|
|
||||||
|
## Overview
|
||||||
|
|
||||||
|
This server enables an agent to:
|
||||||
|
1. Create named "Notebooks" (Qdrant Collections).
|
||||||
|
2. Ingest documents (PDF, Markdown, Text) into specific notebooks.
|
||||||
|
3. Query specific notebooks using vector search (RAG).
|
||||||
|
4. Synthesize findings across a notebook.
|
||||||
|
|
||||||
|
Designed to replicate the **NotebookLM** experience: clean, focused, bounded context.
|
||||||
|
|
||||||
|
## Stack
|
||||||
|
* **Language:** Python 3.11+
|
||||||
|
* **Framework:** `mcp` SDK
|
||||||
|
* **Vector DB:** Qdrant
|
||||||
|
* **Embeddings:** Text Embeddings Inference (TEI) - `BAAI/bge-base-en-v1.5`
|
||||||
|
|
||||||
|
## Tools
|
||||||
|
|
||||||
|
### `notebook.create`
|
||||||
|
Creates a new isolated workspace (Qdrant Collection).
|
||||||
|
- `name`: string (e.g., "project-alpha")
|
||||||
|
|
||||||
|
### `notebook.add_source`
|
||||||
|
Ingests a document into the notebook.
|
||||||
|
- `notebook`: string
|
||||||
|
- `url`: string (URL or local path)
|
||||||
|
|
||||||
|
### `notebook.query`
|
||||||
|
Performs a semantic search/RAG generation against the notebook.
|
||||||
|
- `notebook`: string
|
||||||
|
- `query`: string
|
||||||
|
|
||||||
|
## Configuration
|
||||||
|
Env vars:
|
||||||
|
- `QDRANT_URL`: URL to Qdrant (e.g., `http://qdrant.openshift-gitops.svc:6333`)
|
||||||
|
- `TEI_URL`: URL to TEI (e.g., `http://text-embeddings.tei.svc.cluster.local:8080`)
|
||||||
6
requirements.txt
Normal file
6
requirements.txt
Normal file
@@ -0,0 +1,6 @@
|
|||||||
|
mcp
|
||||||
|
httpx
|
||||||
|
qdrant-client
|
||||||
|
beautifulsoup4
|
||||||
|
pypdf
|
||||||
|
python-dotenv
|
||||||
87
server.py
Normal file
87
server.py
Normal file
@@ -0,0 +1,87 @@
|
|||||||
|
import os
|
||||||
|
import httpx
|
||||||
|
from mcp.server.fastmcp import FastMCP
|
||||||
|
from qdrant_client import QdrantClient
|
||||||
|
from qdrant_client.models import Distance, VectorParams, PointStruct
|
||||||
|
import uuid
|
||||||
|
import logging
|
||||||
|
|
||||||
|
# Configuration
|
||||||
|
QDRANT_URL = os.getenv("QDRANT_URL", "http://qdrant.openshift-gitops.svc:6333")
|
||||||
|
TEI_URL = os.getenv("TEI_URL", "http://text-embeddings.tei.svc.cluster.local:8080")
|
||||||
|
EMBEDDING_DIM = 768 # BAAI/bge-base-en-v1.5
|
||||||
|
|
||||||
|
# Initialize
|
||||||
|
mcp = FastMCP("knowledge-mcp")
|
||||||
|
qdrant = QdrantClient(url=QDRANT_URL)
|
||||||
|
|
||||||
|
def get_embedding(text: str) -> list[float]:
|
||||||
|
"""Get embedding from TEI."""
|
||||||
|
url = f"{TEI_URL}/embed" # Adjust based on TEI version, often /v1/embeddings or /embed
|
||||||
|
# Trying standard TEI /embed endpoint for raw lists
|
||||||
|
try:
|
||||||
|
response = httpx.post(url, json={"inputs": text}, timeout=10.0)
|
||||||
|
response.raise_for_status()
|
||||||
|
return response.json()[0]
|
||||||
|
except Exception as e:
|
||||||
|
# Fallback to OpenAI compatible endpoint if needed
|
||||||
|
logging.error(f"Embedding failed: {e}")
|
||||||
|
raise
|
||||||
|
|
||||||
|
@mcp.tool()
|
||||||
|
def create_notebook(name: str) -> str:
|
||||||
|
"""Create a new RAG notebook (Qdrant collection)."""
|
||||||
|
clean_name = name.lower().replace(" ", "-")
|
||||||
|
|
||||||
|
# Check if exists
|
||||||
|
if qdrant.collection_exists(clean_name):
|
||||||
|
return f"Notebook '{clean_name}' already exists."
|
||||||
|
|
||||||
|
qdrant.create_collection(
|
||||||
|
collection_name=clean_name,
|
||||||
|
vectors_config=VectorParams(size=EMBEDDING_DIM, distance=Distance.COSINE),
|
||||||
|
)
|
||||||
|
return f"Notebook '{clean_name}' created successfully."
|
||||||
|
|
||||||
|
@mcp.tool()
|
||||||
|
def add_source(notebook: str, text: str, source_name: str = "manual") -> str:
|
||||||
|
"""Add text content to a notebook. Ingests, chunks, and indexes."""
|
||||||
|
if not qdrant.collection_exists(notebook):
|
||||||
|
return f"Error: Notebook '{notebook}' does not exist."
|
||||||
|
|
||||||
|
# Very basic chunking for now
|
||||||
|
chunks = [text[i:i+500] for i in range(0, len(text), 500)]
|
||||||
|
points = []
|
||||||
|
|
||||||
|
for chunk in chunks:
|
||||||
|
vector = get_embedding(chunk)
|
||||||
|
points.append(PointStruct(
|
||||||
|
id=str(uuid.uuid4()),
|
||||||
|
vector=vector,
|
||||||
|
payload={"source": source_name, "text": chunk}
|
||||||
|
))
|
||||||
|
|
||||||
|
qdrant.upsert(collection_name=notebook, points=points)
|
||||||
|
return f"Added {len(points)} chunks from '{source_name}' to '{notebook}'."
|
||||||
|
|
||||||
|
@mcp.tool()
|
||||||
|
def query_notebook(notebook: str, query: str, limit: int = 5) -> str:
|
||||||
|
"""Query the notebook for relevant context."""
|
||||||
|
if not qdrant.collection_exists(notebook):
|
||||||
|
return f"Error: Notebook '{notebook}' does not exist."
|
||||||
|
|
||||||
|
vector = get_embedding(query)
|
||||||
|
hits = qdrant.search(
|
||||||
|
collection_name=notebook,
|
||||||
|
query_vector=vector,
|
||||||
|
limit=limit
|
||||||
|
)
|
||||||
|
|
||||||
|
results = []
|
||||||
|
for hit in hits:
|
||||||
|
results.append(f"--- (Score: {hit.score:.2f}) ---\n{hit.payload.get('text', '')}\n")
|
||||||
|
|
||||||
|
return "\n".join(results)
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
mcp.run()
|
||||||
Reference in New Issue
Block a user