# Default values for ollama-helm. # This is a YAML-formatted file. # Declare variables to be passed into your templates. # -- Number of replicas replicaCount: 1 # Knative configuration knative: # -- Enable Knative integration enabled: false # -- Knative service container concurrency containerConcurrency: 0 # -- Knative service timeout seconds timeoutSeconds: 300 # -- Knative service response start timeout seconds responseStartTimeoutSeconds: 300 # -- Knative service idle timeout seconds idleTimeoutSeconds: 300 # Docker image image: # -- Docker image registry repository: docker.io/ollama/ollama # -- Docker pull policy pullPolicy: IfNotPresent # -- Docker image tag, overrides the image tag whose default is the chart appVersion. tag: "" # -- Docker registry secret names as an array imagePullSecrets: [] # -- String to partially override template (will maintain the release name) nameOverride: "" # -- String to fully override template fullnameOverride: "" # -- String to fully override namespace namespaceOverride: "" # Ollama parameters ollama: gpu: # -- Enable GPU integration enabled: true # -- GPU type: 'nvidia' or 'amd' # If 'ollama.gpu.enabled', default value is nvidia # If set to 'amd', this will add 'rocm' suffix to image tag if 'image.tag' is not override # This is due cause AMD and CPU/CUDA are different images type: "amd" # -- Specify the number of GPU # If you use MIG section below then this parameter is ignored number: 1 # -- only for nvidia cards; change to (example) 'nvidia.com/mig-1g.10gb' to use MIG slice #nvidiaResource: "nvidia.com/gpu" # nvidiaResource: "nvidia.com/mig-1g.10gb" # example # If you want to use more than one NVIDIA MIG you can use the following syntax (then nvidiaResource is ignored and only the configuration in the following MIG section is used) mig: # -- Enable multiple mig devices # If enabled you will have to specify the mig devices # If enabled is set to false this section is ignored enabled: false # -- Specify the mig devices and the corresponding number devices: {} # 1g.10gb: 1 # 3g.40gb: 1 models: #clean: true pull: #- deepseek-r1:1.5b - smollm2:1.7b run: #- deepseek-r1:1.5b - smollm2:1.7b insecure: false # -- Override ollama-data volume mount path, default: "/root/.ollama" mountPath: "" # Service account # ref: https://kubernetes.io/docs/tasks/configure-pod-container/configure-service-account/ serviceAccount: # -- Specifies whether a service account should be created create: true # -- Automatically mount a ServiceAccount's API credentials? automount: true # -- Annotations to add to the service account annotations: {} # -- The name of the service account to use. # If not set and create is true, a name is generated using the fullname template name: "" # -- Map of annotations to add to the pods podAnnotations: {} # -- Map of labels to add to the pods podLabels: {} # -- Pod Security Context podSecurityContext: # fsGroup: 2000 # runAsUser: 0 #runAsGroup: 0 # Primary group #supplementalGroups: # - 39 # - 998 # -- Container Security Context securityContext: # capabilities: # drop: # - ALL # readOnlyRootFilesystem: true # runAsNonRoot: true # runAsUser: 1000 privileged: true # -- Specify runtime class runtimeClassName: "" # Configure Service service: # -- Service type type: ClusterIP # -- Service port port: 11434 # -- Service node port when service type is 'NodePort' nodePort: 31434 # -- Load Balancer IP address loadBalancerIP: # -- Annotations to add to the service annotations: {} # Configure the ingress resource that allows you to access the ingress: # -- Enable ingress controller resource enabled: true # -- IngressClass that will be used to implement the Ingress (Kubernetes 1.18+) className: "openshift-default" # -- Additional annotations for the Ingress resource. annotations: # kubernetes.io/ingress.class: traefik kubernetes.io/ingress.class: openshift-default kubernetes.io/tls-acme: "true" cert-manager.io/cluster-issuer: "letsencrypt-dns01-cloudflare" haproxy.router.openshift.io/timeout: 600s # The list of hostnames to be covered with this ingress record. hosts: - host: ollama.apilab.us paths: - path: / pathType: Prefix - host: ollama.apilab.us paths: - path: / pathType: Prefix # -- The tls configuration for hostnames to be covered with this ingress record. tls: - secretName: ollama-tls hosts: - ollama.apilab.us # Configure resource requests and limits # ref: http://kubernetes.io/docs/user-guide/compute-resources/ resources: # -- Pod requests requests: # Memory request memory: 4096Mi # CPU request cpu: 2000m # -- Pod limit limits: # Memory limit memory: 8192Mi # CPU limit cpu: 4000m amd.com/gpu: 1 # Configure extra options for liveness probe # ref: https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-probes/#configure-probes livenessProbe: # -- Enable livenessProbe enabled: true # -- Request path for livenessProbe path: / # -- Initial delay seconds for livenessProbe initialDelaySeconds: 60 # -- Period seconds for livenessProbe periodSeconds: 10 # -- Timeout seconds for livenessProbe timeoutSeconds: 5 # -- Failure threshold for livenessProbe failureThreshold: 6 # -- Success threshold for livenessProbe successThreshold: 1 # Configure extra options for readiness probe # ref: https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-probes/#configure-probes readinessProbe: # -- Enable readinessProbe enabled: true # -- Request path for readinessProbe path: / # -- Initial delay seconds for readinessProbe initialDelaySeconds: 30 # -- Period seconds for readinessProbe periodSeconds: 5 # -- Timeout seconds for readinessProbe timeoutSeconds: 3 # -- Failure threshold for readinessProbe failureThreshold: 6 # -- Success threshold for readinessProbe successThreshold: 1 # Configure autoscaling autoscaling: # -- Enable autoscaling enabled: false # -- Number of minimum replicas minReplicas: 1 # -- Number of maximum replicas maxReplicas: 100 # -- CPU usage to target replica targetCPUUtilizationPercentage: 80 # -- targetMemoryUtilizationPercentage: 80 # -- Additional volumes on the output Deployment definition. volumes: # -- - name: foo # secret: # secretName: mysecret # optional: false - name: host-volumes # hostPath: # path: /opt/amdgpu/share/libdrm # - name: kfd # hostPath: # path: /dev/kfd - name: dri hostPath: path: /dev/dri type: Directory # -- Additional volumeMounts on the output Deployment definition. volumeMounts: # -- - name: foo # mountPath: "/etc/foo" # readOnly: true # - name: host-volumes # mountPath: /opt/amdgpu/share/libdrm - name: dri mountPath: /dev/dri # - name: kfd # mountPath: /dev/kfd # -- Additional arguments on the output Deployment definition. extraArgs: [] # -- Additional environments variables on the output Deployment definition. # For extra OLLAMA env, please refer to https://github.com/ollama/ollama/blob/main/envconfig/config.go extraEnv: - name: OLLAMA_DEBUG value: "0" - name: GIN_MODE value: "release" - name: HSA_OVERRIDE_GFX_VERSION value: "11.0.2" - name: HIP_VISIBLE_DEVICES value: "0" - name: GPU_DEVICE_ORDINAL value: "0" # -- Additionl environment variables from external sources (like ConfigMap) extraEnvFrom: [] # - configMapRef: # name: my-env-configmap # Enable persistence using Persistent Volume Claims # ref: https://kubernetes.io/docs/concepts/storage/persistent-volumes/ persistentVolume: # -- Enable persistence using PVC enabled: true # -- Ollama server data Persistent Volume access modes # Must match those of existing PV or dynamic provisioner # Ref: http://kubernetes.io/docs/user-guide/persistent-volumes/ accessModes: - ReadWriteOnce # -- Ollama server data Persistent Volume annotations annotations: {} # -- If you'd like to bring your own PVC for persisting Ollama state, pass the name of the # created + ready PVC here. If set, this Chart will not create the default PVC. # Requires server.persistentVolume.enabled: true # existingClaim: "ollama" # -- Ollama server data Persistent Volume size size: 30Gi # -- Ollama server data Persistent Volume Storage Class # If defined, storageClassName: # If set to "-", storageClassName: "", which disables dynamic provisioning # If undefined (the default) or set to null, no storageClassName spec is # set, choosing the default provisioner. (gp2 on AWS, standard on # GKE, AWS & OpenStack) storageClass: "local-nvme-retain" # -- Ollama server data Persistent Volume Binding Mode # If defined, volumeMode: # If empty (the default) or set to null, no volumeBindingMode spec is # set, choosing the default mode. volumeMode: "" # -- Subdirectory of Ollama server data Persistent Volume to mount # Useful if the volume's root directory is not empty #subPath: "ollama-data" # -- Pre-existing PV to attach this claim to # Useful if a CSI auto-provisions a PV for you and you want to always # reference the PV moving forward # volumeName: "pvc-9583b3c6-7bbd-403c-abac-6fe728dfb8c4" # -- Node labels for pod assignment. #nodeSelector: # topology.kubernetes.io/zone: lab-sno # -- Tolerations for pod assignment tolerations: [] # -- Affinity for pod assignment affinity: {} # -- Lifecycle for pod assignment (override ollama.models startup pull/run) lifecycle: {} # How to replace existing pods updateStrategy: # -- Deployment strategy can be "Recreate" or "RollingUpdate". Default is Recreate type: "Recreate" # -- Topology Spread Constraints for pod assignment topologySpreadConstraints: {} # -- Init containers to add to the pod initContainers: [] # - name: startup-tool # image: alpine:3 # command: [sh, -c] # args: # - echo init # -- Use the host’s ipc namespace. hostIPC: false # -- Use the host’s pid namespace hostPID: false # -- Use the host's network namespace. hostNetwork: false