Files
ollama/values.yaml
Conan Scott 5b23a3b1d7
All checks were successful
continuous-integration/publish-helm Helm publish succeeded
removed redundant path
2026-01-06 22:43:50 +11:00

399 lines
10 KiB
YAML
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# Default values for ollama-helm.
# This is a YAML-formatted file.
# Declare variables to be passed into your templates.
# -- Number of replicas
replicaCount: 0
# Knative configuration
knative:
# -- Enable Knative integration
enabled: false
# -- Knative service container concurrency
containerConcurrency: 0
# -- Knative service timeout seconds
timeoutSeconds: 300
# -- Knative service response start timeout seconds
responseStartTimeoutSeconds: 300
# -- Knative service idle timeout seconds
idleTimeoutSeconds: 300
# Docker image
image:
# -- Docker image registry
repository: docker.io/ollama/ollama
# -- Docker pull policy
pullPolicy: IfNotPresent
# -- Docker image tag, overrides the image tag whose default is the chart appVersion.
tag: ""
# -- Docker registry secret names as an array
imagePullSecrets: []
# -- String to partially override template (will maintain the release name)
nameOverride: ""
# -- String to fully override template
fullnameOverride: ""
# -- String to fully override namespace
namespaceOverride: ""
# Ollama parameters
ollama:
gpu:
# -- Enable GPU integration
enabled: true
# -- GPU type: 'nvidia' or 'amd'
# If 'ollama.gpu.enabled', default value is nvidia
# If set to 'amd', this will add 'rocm' suffix to image tag if 'image.tag' is not override
# This is due cause AMD and CPU/CUDA are different images
type: "amd"
# -- Specify the number of GPU
# If you use MIG section below then this parameter is ignored
number: 1
# -- only for nvidia cards; change to (example) 'nvidia.com/mig-1g.10gb' to use MIG slice
#nvidiaResource: "nvidia.com/gpu"
# nvidiaResource: "nvidia.com/mig-1g.10gb" # example
# If you want to use more than one NVIDIA MIG you can use the following syntax (then nvidiaResource is ignored and only the configuration in the following MIG section is used)
mig:
# -- Enable multiple mig devices
# If enabled you will have to specify the mig devices
# If enabled is set to false this section is ignored
enabled: false
# -- Specify the mig devices and the corresponding number
devices:
{}
# 1g.10gb: 1
# 3g.40gb: 1
models:
#clean: true
pull:
#- deepseek-r1:1.5b
- smollm2:1.7b
run:
#- deepseek-r1:1.5b
- smollm2:1.7b
insecure: false
# -- Override ollama-data volume mount path, default: "/root/.ollama"
mountPath: ""
# Service account
# ref: https://kubernetes.io/docs/tasks/configure-pod-container/configure-service-account/
serviceAccount:
# -- Specifies whether a service account should be created
create: true
# -- Automatically mount a ServiceAccount's API credentials?
automount: true
# -- Annotations to add to the service account
annotations: {}
# -- The name of the service account to use.
# If not set and create is true, a name is generated using the fullname template
name: ""
# -- Map of annotations to add to the pods
podAnnotations: {}
# -- Map of labels to add to the pods
podLabels: {}
# -- Pod Security Context
podSecurityContext:
# fsGroup: 2000
# runAsUser: 0
#runAsGroup: 0 # Primary group
#supplementalGroups:
# - 39
# - 998
# -- Container Security Context
securityContext:
# capabilities:
# drop:
# - ALL
# readOnlyRootFilesystem: true
# runAsNonRoot: true
# runAsUser: 1000
privileged: true
# -- Specify runtime class
runtimeClassName: ""
# Configure Service
service:
# -- Service type
type: ClusterIP
# -- Service port
port: 11434
# -- Service node port when service type is 'NodePort'
nodePort: 31434
# -- Load Balancer IP address
loadBalancerIP:
# -- Annotations to add to the service
annotations: {}
# Configure the ingress resource that allows you to access the
ingress:
# -- Enable ingress controller resource
enabled: true
# -- IngressClass that will be used to implement the Ingress (Kubernetes 1.18+)
className: "openshift-default"
# -- Additional annotations for the Ingress resource.
annotations:
# kubernetes.io/ingress.class: traefik
kubernetes.io/ingress.class: openshift-default
kubernetes.io/tls-acme: "true"
cert-manager.io/cluster-issuer: "letsencrypt-dns01-cloudflare"
haproxy.router.openshift.io/timeout: 600s
# The list of hostnames to be covered with this ingress record.
hosts:
- host: ollama.apilab.us
paths:
- path: /
pathType: Prefix
# -- The tls configuration for hostnames to be covered with this ingress record.
tls:
- secretName: ollama-tls
hosts:
- ollama.apilab.us
# Configure resource requests and limits
# ref: http://kubernetes.io/docs/user-guide/compute-resources/
resources:
# -- Pod requests
requests:
# Memory request
memory: 4096Mi
# CPU request
cpu: 2000m
# -- Pod limit
limits:
# Memory limit
memory: 8192Mi
# CPU limit
cpu: 4000m
amd.com/gpu: 1
# Configure extra options for liveness probe
# ref: https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-probes/#configure-probes
livenessProbe:
# -- Enable livenessProbe
enabled: true
# -- Request path for livenessProbe
path: /
# -- Initial delay seconds for livenessProbe
initialDelaySeconds: 60
# -- Period seconds for livenessProbe
periodSeconds: 10
# -- Timeout seconds for livenessProbe
timeoutSeconds: 5
# -- Failure threshold for livenessProbe
failureThreshold: 6
# -- Success threshold for livenessProbe
successThreshold: 1
# Configure extra options for readiness probe
# ref: https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-probes/#configure-probes
readinessProbe:
# -- Enable readinessProbe
enabled: true
# -- Request path for readinessProbe
path: /
# -- Initial delay seconds for readinessProbe
initialDelaySeconds: 30
# -- Period seconds for readinessProbe
periodSeconds: 5
# -- Timeout seconds for readinessProbe
timeoutSeconds: 3
# -- Failure threshold for readinessProbe
failureThreshold: 6
# -- Success threshold for readinessProbe
successThreshold: 1
# Configure autoscaling
autoscaling:
# -- Enable autoscaling
enabled: false
# -- Number of minimum replicas
minReplicas: 1
# -- Number of maximum replicas
maxReplicas: 100
# -- CPU usage to target replica
targetCPUUtilizationPercentage: 80
# -- targetMemoryUtilizationPercentage: 80
# -- Additional volumes on the output Deployment definition.
volumes:
# -- - name: foo
# secret:
# secretName: mysecret
# optional: false
- name: host-volumes
# hostPath:
# path: /opt/amdgpu/share/libdrm
# - name: kfd
# hostPath:
# path: /dev/kfd
- name: dri
hostPath:
path: /dev/dri
type: Directory
# -- Additional volumeMounts on the output Deployment definition.
volumeMounts:
# -- - name: foo
# mountPath: "/etc/foo"
# readOnly: true
# - name: host-volumes
# mountPath: /opt/amdgpu/share/libdrm
- name: dri
mountPath: /dev/dri
# - name: kfd
# mountPath: /dev/kfd
# -- Additional arguments on the output Deployment definition.
extraArgs: []
# -- Additional environments variables on the output Deployment definition.
# For extra OLLAMA env, please refer to https://github.com/ollama/ollama/blob/main/envconfig/config.go
extraEnv:
- name: OLLAMA_DEBUG
value: "0"
- name: GIN_MODE
value: "release"
- name: HSA_OVERRIDE_GFX_VERSION
value: "11.0.2"
- name: HIP_VISIBLE_DEVICES
value: "0"
- name: GPU_DEVICE_ORDINAL
value: "0"
# -- Additionl environment variables from external sources (like ConfigMap)
extraEnvFrom: []
# - configMapRef:
# name: my-env-configmap
# Enable persistence using Persistent Volume Claims
# ref: https://kubernetes.io/docs/concepts/storage/persistent-volumes/
persistentVolume:
# -- Enable persistence using PVC
enabled: true
# -- Ollama server data Persistent Volume access modes
# Must match those of existing PV or dynamic provisioner
# Ref: http://kubernetes.io/docs/user-guide/persistent-volumes/
accessModes:
- ReadWriteOnce
# -- Ollama server data Persistent Volume annotations
annotations: {}
# -- If you'd like to bring your own PVC for persisting Ollama state, pass the name of the
# created + ready PVC here. If set, this Chart will not create the default PVC.
# Requires server.persistentVolume.enabled: true
# existingClaim: "ollama"
# -- Ollama server data Persistent Volume size
size: 30Gi
# -- Ollama server data Persistent Volume Storage Class
# If defined, storageClassName: <storageClass>
# If set to "-", storageClassName: "", which disables dynamic provisioning
# If undefined (the default) or set to null, no storageClassName spec is
# set, choosing the default provisioner. (gp2 on AWS, standard on
# GKE, AWS & OpenStack)
storageClass: "local-nvme-retain"
# -- Ollama server data Persistent Volume Binding Mode
# If defined, volumeMode: <volumeMode>
# If empty (the default) or set to null, no volumeBindingMode spec is
# set, choosing the default mode.
volumeMode: ""
# -- Subdirectory of Ollama server data Persistent Volume to mount
# Useful if the volume's root directory is not empty
#subPath: "ollama-data"
# -- Pre-existing PV to attach this claim to
# Useful if a CSI auto-provisions a PV for you and you want to always
# reference the PV moving forward
# volumeName: "pvc-9583b3c6-7bbd-403c-abac-6fe728dfb8c4"
# -- Node labels for pod assignment.
#nodeSelector:
# topology.kubernetes.io/zone: lab-sno
# -- Tolerations for pod assignment
tolerations: []
# -- Affinity for pod assignment
affinity: {}
# -- Lifecycle for pod assignment (override ollama.models startup pull/run)
lifecycle: {}
# How to replace existing pods
updateStrategy:
# -- Deployment strategy can be "Recreate" or "RollingUpdate". Default is Recreate
type: "Recreate"
# -- Topology Spread Constraints for pod assignment
topologySpreadConstraints: {}
# -- Init containers to add to the pod
initContainers: []
# - name: startup-tool
# image: alpine:3
# command: [sh, -c]
# args:
# - echo init
# -- Use the hosts ipc namespace.
hostIPC: false
# -- Use the hosts pid namespace
hostPID: false
# -- Use the host's network namespace.
hostNetwork: false