--- apiVersion: v1 kind: PersistentVolumeClaim metadata: name: tei-model-cache namespace: tei spec: accessModes: - ReadWriteOnce resources: requests: storage: 4Gi --- apiVersion: apps/v1 kind: Deployment metadata: name: text-embeddings namespace: tei labels: app: text-embeddings spec: replicas: 1 selector: matchLabels: app: text-embeddings template: metadata: labels: app: text-embeddings spec: containers: - name: tei image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.6 args: - --model-id - BAAI/bge-base-en-v1.5 - --port - "8080" - --auto-truncate ports: - containerPort: 8080 resources: requests: memory: "512Mi" cpu: "250m" limits: memory: "1Gi" cpu: "1000m" volumeMounts: - name: model-cache mountPath: /data env: - name: HF_HOME value: /data - name: HF_ENDPOINT value: https://huggingface.co readinessProbe: httpGet: path: /health port: 8080 initialDelaySeconds: 30 periodSeconds: 10 livenessProbe: httpGet: path: /health port: 8080 initialDelaySeconds: 60 periodSeconds: 30 volumes: - name: model-cache persistentVolumeClaim: claimName: tei-model-cache --- apiVersion: v1 kind: Service metadata: name: text-embeddings namespace: tei spec: selector: app: text-embeddings ports: - port: 8080 targetPort: 8080