tei/deployment.yaml

---
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
  name: tei-model-cache
  namespace: tei
spec:
  accessModes:
    - ReadWriteOnce
  resources:
    requests:
      storage: 4Gi
---
apiVersion: apps/v1
kind: Deployment
metadata:
  name: text-embeddings
  namespace: tei
  labels:
    app: text-embeddings
spec:
  replicas: 1
  selector:
    matchLabels:
      app: text-embeddings
  template:
    metadata:
      labels:
        app: text-embeddings
    spec:
      containers:
        - name: tei
          image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
          args:
            - --model-id
            - sentence-transformers/all-MiniLM-L6-v2
            - --port
            - "8080"
          ports:
            - containerPort: 8080
          resources:
            requests:
              memory: "512Mi"
              cpu: "250m"
            limits:
              memory: "1Gi"
              cpu: "1000m"
          volumeMounts:
            - name: model-cache
              mountPath: /data
          env:
            - name: HF_HOME
              value: /data
            - name: HF_ENDPOINT
              value: https://huggingface.co
          readinessProbe:
            httpGet:
              path: /health
              port: 8080
            initialDelaySeconds: 30
            periodSeconds: 10
          livenessProbe:
            httpGet:
              path: /health
              port: 8080
            initialDelaySeconds: 60
            periodSeconds: 30
      volumes:
        - name: model-cache
          persistentVolumeClaim:
            claimName: tei-model-cache
---
apiVersion: v1
kind: Service
metadata:
  name: text-embeddings
  namespace: tei
spec:
  selector:
    app: text-embeddings
  ports:
    - port: 8080
      targetPort: 8080