Initial TEI deployment for Clawdbot memory search

2026-01-30 02:55:56 +00:00
commit 85764de991
3 changed files with 139 additions and 0 deletions
--- a/README.md
+++ b/README.md
@@ -0,0 +1,54 @@
+# TEI - Text Embeddings Inference
+
+Lightweight embedding service for Clawdbot memory search.
+
+## Overview
+
+Runs [HuggingFace Text Embeddings Inference](https://github.com/huggingface/text-embeddings-inference) (TEI) on CPU to provide OpenAI-compatible embeddings for semantic memory search.
+
+- **Model:** `sentence-transformers/all-MiniLM-L6-v2` (~90MB, 384 dimensions)
+- **Endpoint:** `http://text-embeddings:8080/v1/embeddings`
+- **No GPU required**
+
+## Deploy
+
+```bash
+oc new-project tei
+oc apply -f deployment.yaml
+```
+
+## Clawdbot Config
+
+Add to `~/.clawdbot/clawdbot.json`:
+
+```json5
+{
+  "agents": {
+    "defaults": {
+      "memorySearch": {
+        "enabled": true,
+        "provider": "openai",
+        "model": "sentence-transformers/all-MiniLM-L6-v2",
+        "remote": {
+          "baseUrl": "http://text-embeddings.tei.svc.cluster.local:8080/v1/",
+          "apiKey": "not-needed"
+        }
+      }
+    }
+  }
+}
+```
+
+## Resources
+
+- **Memory:** 512Mi request, 1Gi limit
+- **CPU:** 250m request, 1000m limit
+- **Storage:** 1Gi PVC for model cache
+
+## Test
+
+```bash
+curl -X POST http://text-embeddings:8080/v1/embeddings \
+  -H "Content-Type: application/json" \
+  -d '{"input": "Hello world", "model": "sentence-transformers/all-MiniLM-L6-v2"}'
+```
--- a/deployment.yaml
+++ b/deployment.yaml
@@ -0,0 +1,78 @@
+---
+apiVersion: v1
+kind: PersistentVolumeClaim
+metadata:
+  name: tei-model-cache
+spec:
+  accessModes:
+    - ReadWriteOnce
+  resources:
+    requests:
+      storage: 1Gi
+---
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: text-embeddings
+  labels:
+    app: text-embeddings
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: text-embeddings
+  template:
+    metadata:
+      labels:
+        app: text-embeddings
+    spec:
+      containers:
+        - name: tei
+          image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
+          args:
+            - --model-id
+            - sentence-transformers/all-MiniLM-L6-v2
+            - --port
+            - "8080"
+          ports:
+            - containerPort: 8080
+          resources:
+            requests:
+              memory: "512Mi"
+              cpu: "250m"
+            limits:
+              memory: "1Gi"
+              cpu: "1000m"
+          volumeMounts:
+            - name: model-cache
+              mountPath: /data
+          env:
+            - name: HF_HOME
+              value: /data
+          readinessProbe:
+            httpGet:
+              path: /health
+              port: 8080
+            initialDelaySeconds: 30
+            periodSeconds: 10
+          livenessProbe:
+            httpGet:
+              path: /health
+              port: 8080
+            initialDelaySeconds: 60
+            periodSeconds: 30
+      volumes:
+        - name: model-cache
+          persistentVolumeClaim:
+            claimName: tei-model-cache
+---
+apiVersion: v1
+kind: Service
+metadata:
+  name: text-embeddings
+spec:
+  selector:
+    app: text-embeddings
+  ports:
+    - port: 8080
+      targetPort: 8080
--- a/kustomization.yaml
+++ b/kustomization.yaml
@@ -0,0 +1,7 @@
+apiVersion: kustomize.config.k8s.io/v1beta1
+kind: Kustomization
+
+namespace: tei
+
+resources:
+  - deployment.yaml