commit 83bafc5e35678079acc9cbabf789a1151b171859 Author: Conan Scott Date: Mon Dec 29 20:26:48 2025 +1100 first commit diff --git a/Chart.yaml b/Chart.yaml new file mode 100644 index 0000000..6f8f1bb --- /dev/null +++ b/Chart.yaml @@ -0,0 +1,28 @@ +annotations: + artifacthub.io/category: ai-machine-learning + artifacthub.io/changes: | + - kind: changed + description: upgrade app version to 0.5.7 + links: + - name: Ollama release v0.5.7 + url: https://github.com/ollama/ollama/releases/tag/v0.5.7 +apiVersion: v2 +appVersion: 0.5.7 +description: Get up and running with large language models locally. +home: https://ollama.ai/ +icon: https://ollama.ai/public/ollama.png +keywords: +- ai +- llm +- llama +- mistral +kubeVersion: ^1.16.0-0 +maintainers: +- email: contact@otwld.com + name: OTWLD +name: ollama +sources: +- https://github.com/ollama/ollama +- https://github.com/otwld/ollama-helm +type: application +version: 1.4.0 diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..01a4c80 --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2024 OTWLD + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/README.md b/README.md new file mode 100644 index 0000000..b6e744d --- /dev/null +++ b/README.md @@ -0,0 +1,295 @@ +![otwld ollama helm chart banner](./banner.png) + +![GitHub License](https://img.shields.io/github/license/otwld/ollama-helm) +[![Artifact Hub](https://img.shields.io/endpoint?url=https://artifacthub.io/badge/repository/ollama-helm)](https://artifacthub.io/packages/helm/ollama-helm/ollama) +[![Build Status](https://drone.otwld.com/api/badges/otwld/ollama-helm/status.svg)](https://drone.otwld.com/otwld/ollama-helm) +[![Discord](https://img.shields.io/badge/Discord-OTWLD-blue?logo=discord&logoColor=white)](https://discord.gg/U24mpqTynB) + +[Ollama](https://ollama.ai/), get up and running with large language models, locally. + +This Community Chart is for deploying [Ollama](https://github.com/ollama/ollama). + +## Requirements + +- Kubernetes: `>= 1.16.0-0` for **CPU only** + +- Kubernetes: `>= 1.26.0-0` for **GPU** stable support (NVIDIA and AMD) + +*Not all GPUs are currently supported with ollama (especially with AMD)* + +## Deploying Ollama chart + +To install the `ollama` chart in the `ollama` namespace: + +```console +helm repo add ollama-helm https://otwld.github.io/ollama-helm/ +helm repo update +helm install ollama ollama-helm/ollama --namespace ollama +``` + +## Upgrading Ollama chart + +First please read the [release notes](https://github.com/ollama/ollama/releases) of Ollama to make sure there are no +backwards incompatible changes. + +Make adjustments to your values as needed, then run `helm upgrade`: + +```console +# -- This pulls the latest version of the ollama chart from the repo. +helm repo update +helm upgrade ollama ollama-helm/ollama --namespace ollama --values values.yaml +``` + +## Uninstalling Ollama chart + +To uninstall/delete the `ollama` deployment in the `ollama` namespace: + +```console +helm delete ollama --namespace ollama +``` + +Substitute your values if they differ from the examples. See `helm delete --help` for a full reference on `delete` +parameters and flags. + +## Interact with Ollama + +- **Ollama documentation can be found [HERE](https://github.com/ollama/ollama/tree/main/docs)** +- Interact with RESTful API: [Ollama API](https://github.com/ollama/ollama/blob/main/docs/api.md) +- Interact with official clients libraries: [ollama-js](https://github.com/ollama/ollama-js#custom-client) + and [ollama-python](https://github.com/ollama/ollama-python#custom-client) +- Interact with langchain: [langchain-js](https://github.com/ollama/ollama/blob/main/docs/tutorials/langchainjs.md) + and [langchain-python](https://github.com/ollama/ollama/blob/main/docs/tutorials/langchainpy.md) + +## Examples + +- **It's highly recommended to run an updated version of Kubernetes for deploying ollama with GPU** + +### Basic values.yaml example with GPU and two models pulled at startup + +``` +ollama: + gpu: + # -- Enable GPU integration + enabled: true + + # -- GPU type: 'nvidia' or 'amd' + type: 'nvidia' + + # -- Specify the number of GPU to 1 + number: 1 + + # -- List of models to pull at container startup + models: + pull: + - mistral + - llama2 +``` + +--- + +### Basic values.yaml example with Ingress + +``` +ollama: + models: + pull: + - llama2 + +ingress: + enabled: true + hosts: + - host: ollama.domain.lan + paths: + - path: / + pathType: Prefix +``` + +- *API is now reachable at `ollama.domain.lan`* + +## Upgrading from 0.X.X to 1.X.X + +The version 1.X.X introduces the ability to load models in memory at startup, the values have been changed. + +Please change `ollama.models` to `ollama.models.pull` to avoid errors before upgrading: + +```yaml +ollama: + models: + - mistral + - llama2 +``` + +To: + +```yaml +ollama: + models: + pull: + - mistral + - llama2 +``` + +## Helm Values + +- See [values.yaml](values.yaml) to see the Chart's default values. + +| Key | Type | Default | Description | +|--------------------------------------------|--------|---------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| affinity | object | `{}` | Affinity for pod assignment | +| autoscaling.enabled | bool | `false` | Enable autoscaling | +| autoscaling.maxReplicas | int | `100` | Number of maximum replicas | +| autoscaling.minReplicas | int | `1` | Number of minimum replicas | +| autoscaling.targetCPUUtilizationPercentage | int | `80` | CPU usage to target replica | +| extraArgs | list | `[]` | Additional arguments on the output Deployment definition. | +| extraEnv | list | `[]` | Additional environments variables on the output Deployment definition. For extra OLLAMA env, please refer to https://github.com/ollama/ollama/blob/main/envconfig/config.go | +| extraEnvFrom | list | `[]` | Additionl environment variables from external sources (like ConfigMap) | +| fullnameOverride | string | `""` | String to fully override template | +| hostIPC | bool | `false` | Use the host’s ipc namespace. | +| hostNetwork | bool | `false` | Use the host's network namespace. | +| hostPID | bool | `false` | Use the host’s pid namespace | +| image.pullPolicy | string | `"IfNotPresent"` | Docker pull policy | +| image.repository | string | `"ollama/ollama"` | Docker image registry | +| image.tag | string | `""` | Docker image tag, overrides the image tag whose default is the chart appVersion. | +| imagePullSecrets | list | `[]` | Docker registry secret names as an array | +| ingress.annotations | object | `{}` | Additional annotations for the Ingress resource. | +| ingress.className | string | `""` | IngressClass that will be used to implement the Ingress (Kubernetes 1.18+) | +| ingress.enabled | bool | `false` | Enable ingress controller resource | +| ingress.hosts[0].host | string | `"ollama.local"` | | +| ingress.hosts[0].paths[0].path | string | `"/"` | | +| ingress.hosts[0].paths[0].pathType | string | `"Prefix"` | | +| ingress.tls | list | `[]` | The tls configuration for hostnames to be covered with this ingress record. | +| initContainers | list | `[]` | Init containers to add to the pod | +| knative.containerConcurrency | int | `0` | Knative service container concurrency | +| knative.enabled | bool | `false` | Enable Knative integration | +| knative.idleTimeoutSeconds | int | `300` | Knative service idle timeout seconds | +| knative.responseStartTimeoutSeconds | int | `300` | Knative service response start timeout seconds | +| knative.timeoutSeconds | int | `300` | Knative service timeout seconds | +| lifecycle | object | `{}` | Lifecycle for pod assignment (override ollama.models startup pulling) | +| livenessProbe.enabled | bool | `true` | Enable livenessProbe | +| livenessProbe.failureThreshold | int | `6` | Failure threshold for livenessProbe | +| livenessProbe.initialDelaySeconds | int | `60` | Initial delay seconds for livenessProbe | +| livenessProbe.path | string | `"/"` | Request path for livenessProbe | +| livenessProbe.periodSeconds | int | `10` | Period seconds for livenessProbe | +| livenessProbe.successThreshold | int | `1` | Success threshold for livenessProbe | +| livenessProbe.timeoutSeconds | int | `5` | Timeout seconds for livenessProbe | +| nameOverride | string | `""` | String to partially override template (will maintain the release name) | +| namespaceOverride | string | `""` | String to override the namespace | +| nodeSelector | object | `{}` | Node labels for pod assignment. | +| ollama.gpu.enabled | bool | `false` | Enable GPU integration | +| ollama.gpu.mig.devices | object | `{}` | Specify the mig devices and the corresponding number | +| ollama.gpu.mig.enabled | bool | `false` | Enable multiple mig devices If enabled you will have to specify the mig devices If enabled is set to false this section is ignored | +| ollama.gpu.number | int | `1` | Specify the number of GPU If you use MIG section below then this parameter is ignored | +| ollama.gpu.nvidiaResource | string | `"nvidia.com/gpu"` | only for nvidia cards; change to (example) 'nvidia.com/mig-1g.10gb' to use MIG slice | +| ollama.gpu.type | string | `"nvidia"` | GPU type: 'nvidia' or 'amd' If 'ollama.gpu.enabled', default value is nvidia If set to 'amd', this will add 'rocm' suffix to image tag if 'image.tag' is not override This is due cause AMD and CPU/CUDA are different images | +| ollama.insecure | bool | `false` | Add insecure flag for pulling at container startup | +| ollama.models.pull | list | `[]` | List of models to pull at container startup The more you add, the longer the container will take to start if models are not present pull: - llama2 - mistral | +| ollama.models.run | list | `[]` | List of models to load in memory at container startup run: - llama2 - mistral | +| ollama.mountPath | string | `""` | Override ollama-data volume mount path, default: "/root/.ollama" | +| persistentVolume.accessModes | list | `["ReadWriteOnce"]` | Ollama server data Persistent Volume access modes Must match those of existing PV or dynamic provisioner Ref: http://kubernetes.io/docs/user-guide/persistent-volumes/ | +| persistentVolume.annotations | object | `{}` | Ollama server data Persistent Volume annotations | +| persistentVolume.enabled | bool | `false` | Enable persistence using PVC | +| persistentVolume.existingClaim | string | `""` | If you'd like to bring your own PVC for persisting Ollama state, pass the name of the created + ready PVC here. If set, this Chart will not create the default PVC. Requires server.persistentVolume.enabled: true | +| persistentVolume.size | string | `"30Gi"` | Ollama server data Persistent Volume size | +| persistentVolume.storageClass | string | `""` | Ollama server data Persistent Volume Storage Class If defined, storageClassName: If set to "-", storageClassName: "", which disables dynamic provisioning If undefined (the default) or set to null, no storageClassName spec is set, choosing the default provisioner. (gp2 on AWS, standard on GKE, AWS & OpenStack) | +| persistentVolume.subPath | string | `""` | Subdirectory of Ollama server data Persistent Volume to mount Useful if the volume's root directory is not empty | +| persistentVolume.volumeMode | string | `""` | Ollama server data Persistent Volume Binding Mode If defined, volumeMode: If empty (the default) or set to null, no volumeBindingMode spec is set, choosing the default mode. | +| persistentVolume.volumeName | string | `""` | Pre-existing PV to attach this claim to Useful if a CSI auto-provisions a PV for you and you want to always reference the PV moving forward | +| podAnnotations | object | `{}` | Map of annotations to add to the pods | +| podLabels | object | `{}` | Map of labels to add to the pods | +| podSecurityContext | object | `{}` | Pod Security Context | +| readinessProbe.enabled | bool | `true` | Enable readinessProbe | +| readinessProbe.failureThreshold | int | `6` | Failure threshold for readinessProbe | +| readinessProbe.initialDelaySeconds | int | `30` | Initial delay seconds for readinessProbe | +| readinessProbe.path | string | `"/"` | Request path for readinessProbe | +| readinessProbe.periodSeconds | int | `5` | Period seconds for readinessProbe | +| readinessProbe.successThreshold | int | `1` | Success threshold for readinessProbe | +| readinessProbe.timeoutSeconds | int | `3` | Timeout seconds for readinessProbe | +| replicaCount | int | `1` | Number of replicas | +| resources.limits | object | `{}` | Pod limit | +| resources.requests | object | `{}` | Pod requests | +| runtimeClassName | string | `""` | Specify runtime class | +| securityContext | object | `{}` | Container Security Context | +| service.annotations | object | `{}` | Annotations to add to the service | +| service.loadBalancerIP | string | `nil` | Load Balancer IP address | +| service.nodePort | int | `31434` | Service node port when service type is 'NodePort' | +| service.port | int | `11434` | Service port | +| service.type | string | `"ClusterIP"` | Service type | +| serviceAccount.annotations | object | `{}` | Annotations to add to the service account | +| serviceAccount.automount | bool | `true` | Automatically mount a ServiceAccount's API credentials? | +| serviceAccount.create | bool | `true` | Specifies whether a service account should be created | +| serviceAccount.name | string | `""` | The name of the service account to use. If not set and create is true, a name is generated using the fullname template | +| tolerations | list | `[]` | Tolerations for pod assignment | +| topologySpreadConstraints | object | `{}` | Topology Spread Constraints for pod assignment | +| updateStrategy.type | string | `"Recreate"` | Deployment strategy can be "Recreate" or "RollingUpdate". Default is Recreate | +| volumeMounts | list | `[]` | Additional volumeMounts on the output Deployment definition. | +| volumes | list | `[]` | Additional volumes on the output Deployment definition. | + +---------------------------------------------- + +## Core team + + + + + + +
+ Jean Baptiste Detroyes
     Jean Baptiste Detroyes     
+
+ + +
+
+ Jean Baptiste Detroyes
     Nathan Tréhout     
+
+ + + +
+
+ +## Support + +- For questions, suggestions, and discussion about Ollama please refer to + the [Ollama issue page](https://github.com/ollama/ollama/issues) +- For questions, suggestions, and discussion about this chart please + visit [Ollama-Helm issue page](https://github.com/otwld/ollama-helm/issues) or join + our [OTWLD Discord](https://discord.gg/U24mpqTynB) diff --git a/artifacthub-repo.yml b/artifacthub-repo.yml new file mode 100644 index 0000000..63d1dd1 --- /dev/null +++ b/artifacthub-repo.yml @@ -0,0 +1,15 @@ +# Artifact Hub repository metadata file +# +# Some settings like the verified publisher flag or the ignored packages won't +# be applied until the next time the repository is processed. Please keep in +# mind that the repository won't be processed if it has not changed since the +# last time it was processed. Depending on the repository kind, this is checked +# in a different way. For Helm http based repositories, we consider it has +# changed if the `index.yaml` file changes. For git based repositories, it does +# when the hash of the last commit in the branch you set up changes. This does +# NOT apply to ownership claim operations, which are processed immediately. +# +repositoryID: 2ccfd0bd-c123-483e-96f6-eb446fa850ac +owners: + - name: Outworld + email: tech@outworld.fr \ No newline at end of file diff --git a/banner.png b/banner.png new file mode 100644 index 0000000..72e6f57 Binary files /dev/null and b/banner.png differ diff --git a/templates/NOTES.txt b/templates/NOTES.txt new file mode 100644 index 0000000..4f4889d --- /dev/null +++ b/templates/NOTES.txt @@ -0,0 +1,25 @@ +1. Get the application URL by running these commands: +{{- if .Values.knative.enabled }} + export KSERVICE_URL=$(kubectl get ksvc --namespace {{ .Release.Namespace }} {{ include "ollama.fullname" . }} -o jsonpath={.status.url}) + echo "Visit $KSERVICE_URL to use your application" +{{- else if .Values.ingress.enabled }} +{{- range $host := .Values.ingress.hosts }} + {{- range .paths }} + http{{ if $.Values.ingress.tls }}s{{ end }}://{{ $host.host }}{{ .path }} + {{- end }} +{{- end }} +{{- else if contains "NodePort" .Values.service.type }} + export NODE_PORT=$(kubectl get --namespace {{ .Release.Namespace }} -o jsonpath="{.spec.ports[0].nodePort}" services {{ include "ollama.fullname" . }}) + export NODE_IP=$(kubectl get nodes --namespace {{ .Release.Namespace }} -o jsonpath="{.items[0].status.addresses[0].address}") + echo http://$NODE_IP:$NODE_PORT +{{- else if contains "LoadBalancer" .Values.service.type }} + NOTE: It may take a few minutes for the LoadBalancer IP to be available. + You can watch the status of by running 'kubectl get --namespace {{ .Release.Namespace }} svc -w {{ include "ollama.fullname" . }}' + export SERVICE_IP=$(kubectl get svc --namespace {{ .Release.Namespace }} {{ include "ollama.fullname" . }} --template "{{"{{ range (index .status.loadBalancer.ingress 0) }}{{.}}{{ end }}"}}") + echo http://$SERVICE_IP:{{ .Values.service.port }} +{{- else if contains "ClusterIP" .Values.service.type }} + export POD_NAME=$(kubectl get pods --namespace {{ .Release.Namespace }} -l "app.kubernetes.io/name={{ include "ollama.name" . }},app.kubernetes.io/instance={{ .Release.Name }}" -o jsonpath="{.items[0].metadata.name}") + export CONTAINER_PORT=$(kubectl get pod --namespace {{ .Release.Namespace }} $POD_NAME -o jsonpath="{.spec.containers[0].ports[0].containerPort}") + echo "Visit http://127.0.0.1:8080 to use your application" + kubectl --namespace {{ .Release.Namespace }} port-forward $POD_NAME 8080:$CONTAINER_PORT +{{- end }} diff --git a/templates/_helpers.tpl b/templates/_helpers.tpl new file mode 100644 index 0000000..3a4c4b0 --- /dev/null +++ b/templates/_helpers.tpl @@ -0,0 +1,91 @@ +{{/* +Allow the release namespace to be overridden for multi-namespace deployments in combined charts +*/}} +{{- define "ollama.namespace" -}} + {{- if .Values.namespaceOverride -}} + {{- .Values.namespaceOverride -}} + {{- else -}} + {{- .Release.Namespace -}} + {{- end -}} +{{- end -}} + +{{/* +Expand the name of the chart. +*/}} +{{- define "ollama.name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Create a default fully qualified app name. +We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). +If release name contains chart name it will be used as a full name. +*/}} +{{- define "ollama.fullname" -}} +{{- if .Values.fullnameOverride }} +{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- $name := default .Chart.Name .Values.nameOverride }} +{{- if contains $name .Release.Name }} +{{- .Release.Name | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }} +{{- end }} +{{- end }} +{{- end }} + +{{/* +Create chart name and version as used by the chart label. +*/}} +{{- define "ollama.chart" -}} +{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Common labels +*/}} +{{- define "ollama.labels" -}} +helm.sh/chart: {{ include "ollama.chart" . }} +{{ include "ollama.selectorLabels" . }} +{{- if .Chart.AppVersion }} +app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} +{{- end }} +app.kubernetes.io/managed-by: {{ .Release.Service }} +{{- end }} + +{{/* +Selector labels +*/}} +{{- define "ollama.selectorLabels" -}} +app.kubernetes.io/name: {{ include "ollama.name" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +{{- end }} + +{{/* +Create the name of the service account to use +*/}} +{{- define "ollama.serviceAccountName" -}} +{{- if .Values.serviceAccount.create }} +{{- default (include "ollama.fullname" .) .Values.serviceAccount.name }} +{{- else }} +{{- default "default" .Values.serviceAccount.name }} +{{- end }} +{{- end }} + +{{/* +Create the pull model list +*/}} +{{- define "ollama.modelPullList" -}} +{{- with .Values.ollama.models.pull -}} +{{- . | uniq | join " " -}} +{{- end -}} +{{- end -}} + +{{/* +Create the run model list +*/}} +{{- define "ollama.modelRunList" -}} +{{- with .Values.ollama.models.run -}} +{{- . | uniq | join " " -}} +{{- end -}} +{{- end -}} \ No newline at end of file diff --git a/templates/deployment.yaml b/templates/deployment.yaml new file mode 100644 index 0000000..c4ff392 --- /dev/null +++ b/templates/deployment.yaml @@ -0,0 +1,213 @@ +--- +{{- if not .Values.knative.enabled }} +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ include "ollama.fullname" . }} + namespace: {{ include "ollama.namespace" . }} + labels: + {{- include "ollama.labels" . | nindent 4 }} +spec: + {{- if not .Values.autoscaling.enabled }} + replicas: {{ .Values.replicaCount }} + {{- end }} + {{- if or .Values.updateStrategy.type .Values.updateStrategy.rollingUpdate }} + strategy: {{ .Values.updateStrategy | toYaml | nindent 4 }} + {{- end }} + selector: + matchLabels: + {{- include "ollama.selectorLabels" . | nindent 6 }} + template: + metadata: + {{- with .Values.podAnnotations }} + annotations: + {{- toYaml . | nindent 8 }} + {{- end }} + labels: + {{- include "ollama.labels" . | nindent 8 }} + {{- with .Values.podLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + spec: + {{- if .Values.hostIPC }} + hostIPC: {{ .Values.hostIPC }} + {{- end }} + {{- if .Values.hostPID }} + hostPID: {{ .Values.hostPID }} + {{- end }} + {{- if .Values.hostNetwork }} + hostNetwork: {{ .Values.hostNetwork }} + {{- end }} + {{- with .Values.imagePullSecrets }} + imagePullSecrets: + {{- toYaml . | nindent 8 }} + {{- end }} + serviceAccountName: {{ include "ollama.serviceAccountName" . }} + securityContext: + {{- toYaml .Values.podSecurityContext | nindent 8 }} + {{- if .Values.runtimeClassName }} + runtimeClassName: {{ .Values.runtimeClassName | quote }} + {{- end }} + {{- with .Values.initContainers }} + initContainers: + {{- tpl (toYaml . ) $ | nindent 8 }} + {{- end }} + containers: + - name: {{ .Chart.Name }} + securityContext: + {{- toYaml .Values.securityContext | nindent 12 }} + image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default (ternary (printf "%s-rocm" .Chart.AppVersion) (.Chart.AppVersion) (and (.Values.ollama.gpu.enabled) (eq .Values.ollama.gpu.type "amd"))) }}" + imagePullPolicy: {{ .Values.image.pullPolicy }} + ports: + - name: http + containerPort: {{ .Values.service.port }} + protocol: TCP + env: + {{- if and .Values.ollama.gpu.enabled (or (eq .Values.ollama.gpu.type "nvidia") (not .Values.ollama.gpu.type))}} + - name: PATH + value: /usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin + {{- end}} + {{- with .Values.extraEnv }} + {{- toYaml . | nindent 12 }} + {{- end }} + envFrom: + {{- with .Values.extraEnvFrom }} + {{- toYaml . | nindent 12 }} + {{- end }} + args: + {{- with .Values.extraArgs }} + {{- toYaml . | nindent 12 }} + {{- end }} + {{- if .Values.resources }} + resources: + {{- $limits := default dict .Values.resources.limits }} + {{- if .Values.ollama.gpu.enabled }} + # If gpu is enabled, it can either be a NVIDIA card or a AMD card + {{- if or (eq .Values.ollama.gpu.type "nvidia") (not .Values.ollama.gpu.type) }} + # NVIDIA is assumed by default if no value is set and GPU is enabled + # NVIDIA cards can have mig enabled (i.e., the card is sliced into parts + # Therefore, the first case is no migs enabled + {{- if or (not .Values.ollama.gpu.mig) (not .Values.ollama.gpu.mig.enabled ) }} + {{- $gpuLimit := dict (.Values.ollama.gpu.nvidiaResource | default "nvidia.com/gpu") (.Values.ollama.gpu.number | default 1) }} + {{- $limits = merge $limits $gpuLimit }} + # Second case is mig is enabled + {{- else if or (.Values.ollama.gpu.mig.enabled) }} + # Initialize empty dictionary + {{- $migDevices := dict -}} + # Loop over the entries in the mig devices + {{- range $key, $value := .Values.ollama.gpu.mig.devices }} + {{- $migKey := printf "nvidia.com/mig-%s" $key -}} + {{- $migDevices = merge $migDevices (dict $migKey $value) -}} + {{- end }} + {{- $limits = merge $limits $migDevices}} + {{- end }} + {{- end }} + {{- if eq .Values.ollama.gpu.type "amd" }} + {{- $gpuLimit := dict "amd.com/gpu" (.Values.ollama.gpu.number | default 1) }} + {{- $limits = merge $limits $gpuLimit }} + {{- end }} + {{- end }} + {{- $ressources := deepCopy (dict "limits" $limits) | mergeOverwrite .Values.resources }} + {{- toYaml $ressources | nindent 12 }} + {{- end}} + volumeMounts: + - name: ollama-data + mountPath: {{ .Values.ollama.mountPath | default "/root/.ollama" }} + {{- if .Values.persistentVolume.subPath }} + subPath: {{ .Values.persistentVolume.subPath }} + {{- end }} + {{- with .Values.volumeMounts }} + {{- toYaml . | nindent 12 }} + {{- end }} + {{- if .Values.livenessProbe.enabled }} + livenessProbe: + httpGet: + path: {{ .Values.livenessProbe.path }} + port: http + initialDelaySeconds: {{ .Values.livenessProbe.initialDelaySeconds }} + periodSeconds: {{ .Values.livenessProbe.periodSeconds }} + timeoutSeconds: {{ .Values.livenessProbe.timeoutSeconds }} + successThreshold: {{ .Values.livenessProbe.successThreshold }} + failureThreshold: {{ .Values.livenessProbe.failureThreshold }} + {{- end }} + {{- if .Values.readinessProbe.enabled }} + readinessProbe: + httpGet: + path: {{ .Values.readinessProbe.path }} + port: http + initialDelaySeconds: {{ .Values.readinessProbe.initialDelaySeconds }} + periodSeconds: {{ .Values.readinessProbe.periodSeconds }} + timeoutSeconds: {{ .Values.readinessProbe.timeoutSeconds }} + successThreshold: {{ .Values.readinessProbe.successThreshold }} + failureThreshold: {{ .Values.readinessProbe.failureThreshold }} + {{- end }} + {{- with .Values.lifecycle}} + lifecycle: + {{- toYaml . | nindent 12 }} + {{- else }} + {{- if or .Values.ollama.models.pull .Values.ollama.models.run }} + lifecycle: + postStart: + exec: + command: + - /bin/sh + - -c + - | + while ! /bin/ollama ps > /dev/null 2>&1; do + sleep 5 + done + {{- if .Values.ollama.models.pull }} + echo "{{ include "ollama.modelPullList" . }}" | xargs -n1 /bin/ollama pull {{ternary "--insecure" "" .Values.ollama.insecure | toString }} + {{- end }} + {{- if .Values.ollama.models.run }} + echo "{{ include "ollama.modelRunList" . }}" | xargs -n1 /bin/ollama run + {{- end }} + {{- end }} + {{- end }} + volumes: + - name: ollama-data + {{- if .Values.persistentVolume.enabled }} + persistentVolumeClaim: + claimName: {{ .Values.persistentVolume.existingClaim | default (printf "%s" (include "ollama.fullname" .)) }} + {{- else }} + emptyDir: { } + {{- end }} + {{- with .Values.volumes }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.affinity }} + affinity: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.topologySpreadConstraints }} + topologySpreadConstraints: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- if or .Values.ollama.gpu.enabled .Values.tolerations }} + tolerations: + {{- if and .Values.ollama.gpu.enabled (and + ( or (eq .Values.ollama.gpu.type "nvidia") (not .Values.ollama.gpu.type)) + ( or (not .Values.ollama.gpu.mig) (not .Values.ollama.gpu.mig.enabled)) + ) }} + - key: "{{(.Values.ollama.gpu.nvidiaResource | default "nvidia.com/gpu")}}" + operator: Exists + effect: NoSchedule + {{- else if and .Values.ollama.gpu.enabled (and + ( or (eq .Values.ollama.gpu.type "nvidia") (not .Values.ollama.gpu.type)) + (( .Values.ollama.gpu.mig.enabled)) + ) }} + {{- range $key, $value := .Values.ollama.gpu.mig.devices }} + - key: nvidia.com/mig-{{ $key }} + operator: Exists + effect: NoSchedule + {{- end }} + {{- end }} + {{- with .Values.tolerations }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} diff --git a/templates/hpa.yaml b/templates/hpa.yaml new file mode 100644 index 0000000..b249330 --- /dev/null +++ b/templates/hpa.yaml @@ -0,0 +1,34 @@ +--- +{{- if .Values.autoscaling.enabled }} +apiVersion: autoscaling/v2 +kind: HorizontalPodAutoscaler +metadata: + name: {{ include "ollama.fullname" . }} + namespace: {{ include "ollama.namespace" . }} + labels: + {{- include "ollama.labels" . | nindent 4 }} +spec: + scaleTargetRef: + apiVersion: apps/v1 + kind: Deployment + name: {{ include "ollama.fullname" . }} + minReplicas: {{ .Values.autoscaling.minReplicas }} + maxReplicas: {{ .Values.autoscaling.maxReplicas }} + metrics: + {{- if .Values.autoscaling.targetCPUUtilizationPercentage }} + - type: Resource + resource: + name: cpu + target: + type: Utilization + averageUtilization: {{ .Values.autoscaling.targetCPUUtilizationPercentage }} + {{- end }} + {{- if .Values.autoscaling.targetMemoryUtilizationPercentage }} + - type: Resource + resource: + name: memory + target: + type: Utilization + averageUtilization: {{ .Values.autoscaling.targetMemoryUtilizationPercentage }} + {{- end }} +{{- end }} diff --git a/templates/ingress.yaml b/templates/ingress.yaml new file mode 100644 index 0000000..859fc3f --- /dev/null +++ b/templates/ingress.yaml @@ -0,0 +1,63 @@ +--- +{{- if .Values.ingress.enabled -}} +{{- $fullName := include "ollama.fullname" . -}} +{{- $svcPort := .Values.service.port -}} +{{- if and .Values.ingress.className (not (semverCompare ">=1.18-0" .Capabilities.KubeVersion.GitVersion)) }} + {{- if not (hasKey .Values.ingress.annotations "kubernetes.io/ingress.class") }} + {{- $_ := set .Values.ingress.annotations "kubernetes.io/ingress.class" .Values.ingress.className}} + {{- end }} +{{- end }} +{{- if semverCompare ">=1.19-0" .Capabilities.KubeVersion.GitVersion -}} +apiVersion: networking.k8s.io/v1 +{{- else if semverCompare ">=1.14-0" .Capabilities.KubeVersion.GitVersion -}} +apiVersion: networking.k8s.io/v1beta1 +{{- else -}} +apiVersion: extensions/v1beta1 +{{- end }} +kind: Ingress +metadata: + name: {{ $fullName }} + namespace: {{ include "ollama.namespace" . }} + labels: + {{- include "ollama.labels" . | nindent 4 }} + {{- with .Values.ingress.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{- end }} +spec: + {{- if and .Values.ingress.className (semverCompare ">=1.18-0" .Capabilities.KubeVersion.GitVersion) }} + ingressClassName: {{ .Values.ingress.className }} + {{- end }} + {{- if .Values.ingress.tls }} + tls: + {{- range .Values.ingress.tls }} + - hosts: + {{- range .hosts }} + - {{ . | quote }} + {{- end }} + secretName: {{ .secretName }} + {{- end }} + {{- end }} + rules: + {{- range .Values.ingress.hosts }} + - host: {{ .host | quote }} + http: + paths: + {{- range .paths }} + - path: {{ .path }} + {{- if and .pathType (semverCompare ">=1.18-0" $.Capabilities.KubeVersion.GitVersion) }} + pathType: {{ .pathType }} + {{- end }} + backend: + {{- if semverCompare ">=1.19-0" $.Capabilities.KubeVersion.GitVersion }} + service: + name: {{ $fullName }} + port: + number: {{ $svcPort }} + {{- else }} + serviceName: {{ $fullName }} + servicePort: {{ $svcPort }} + {{- end }} + {{- end }} + {{- end }} +{{- end }} diff --git a/templates/knative/service.yaml b/templates/knative/service.yaml new file mode 100644 index 0000000..cad2429 --- /dev/null +++ b/templates/knative/service.yaml @@ -0,0 +1,156 @@ +--- +{{- if .Values.knative.enabled }} +apiVersion: serving.knative.dev/v1 +kind: Service +metadata: + name: {{ include "ollama.fullname" . }} + namespace: {{ include "ollama.namespace" . }} + labels: + {{- include "ollama.labels" . | nindent 4 }} +spec: + template: + spec: + containerConcurrency: {{ .Values.knative.containerConcurrency }} + timeoutSeconds: {{ .Values.knative.timeoutSeconds }} + responseStartTimeoutSeconds: {{ .Values.knative.responseStartTimeoutSeconds }} + idleTimeoutSeconds: {{ .Values.knative.idleTimeoutSeconds }} + {{- with .Values.imagePullSecrets }} + imagePullSecrets: + {{- toYaml . | nindent 8 }} + {{- end }} + serviceAccountName: {{ include "ollama.serviceAccountName" . }} + {{- if .Values.runtimeClassName }} + runtimeClassName: {{ .Values.runtimeClassName | quote }} + {{- end }} + {{- with .Values.initContainers }} + initContainers: + {{- tpl (toYaml . ) $ | nindent 8 }} + {{- end }} + containers: + - image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default (ternary (printf "%s-rocm" .Chart.AppVersion) (.Chart.AppVersion) (and (.Values.ollama.gpu.enabled) (eq .Values.ollama.gpu.type "amd"))) }}" + imagePullPolicy: {{ .Values.image.pullPolicy }} + securityContext: + {{- toYaml .Values.securityContext | nindent 12 }} + ports: + - containerPort: {{ .Values.service.port }} + env: + {{- if and .Values.ollama.gpu.enabled (or (eq .Values.ollama.gpu.type "nvidia") (not .Values.ollama.gpu.type))}} + - name: PATH + value: /usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin + {{- end}} + {{- with .Values.extraEnv }} + {{- toYaml . | nindent 12 }} + {{- end }} + envFrom: + {{- with .Values.extraEnvFrom }} + {{- toYaml . | nindent 12 }} + {{- end }} + args: + {{- with .Values.extraArgs }} + {{- toYaml . | nindent 12 }} + {{- end }} + {{- if .Values.resources }} + resources: + {{- $limits := default dict .Values.resources.limits }} + {{- if .Values.ollama.gpu.enabled }} + {{- if or (eq .Values.ollama.gpu.type "nvidia") (not .Values.ollama.gpu.type) }} + {{- $gpuLimit := dict (.Values.ollama.gpu.nvidiaResource | default "nvidia.com/gpu") (.Values.ollama.gpu.number | default 1) }} + {{- $limits = merge $limits $gpuLimit }} + {{- end }} + {{- if eq .Values.ollama.gpu.type "amd" }} + {{- $gpuLimit := dict "amd.com/gpu" (.Values.ollama.gpu.number | default 1) }} + {{- $limits = merge $limits $gpuLimit }} + {{- end }} + {{- end }} + {{- $ressources := deepCopy (dict "limits" $limits) | mergeOverwrite .Values.resources }} + {{- toYaml $ressources | nindent 12 }} + {{- end}} + volumeMounts: + - name: ollama-data + mountPath: {{ .Values.ollama.mountPath | default "/root/.ollama" }} + {{- if .Values.persistentVolume.subPath }} + subPath: {{ .Values.persistentVolume.subPath }} + {{- end }} + {{- with .Values.volumeMounts }} + {{- toYaml . | nindent 12 }} + {{- end }} + {{- if .Values.livenessProbe.enabled }} + livenessProbe: + httpGet: + path: {{ .Values.livenessProbe.path }} + port: http + initialDelaySeconds: {{ .Values.livenessProbe.initialDelaySeconds }} + periodSeconds: {{ .Values.livenessProbe.periodSeconds }} + timeoutSeconds: {{ .Values.livenessProbe.timeoutSeconds }} + successThreshold: {{ .Values.livenessProbe.successThreshold }} + failureThreshold: {{ .Values.livenessProbe.failureThreshold }} + {{- end }} + {{- if .Values.readinessProbe.enabled }} + readinessProbe: + httpGet: + path: {{ .Values.readinessProbe.path }} + port: http + initialDelaySeconds: {{ .Values.readinessProbe.initialDelaySeconds }} + periodSeconds: {{ .Values.readinessProbe.periodSeconds }} + timeoutSeconds: {{ .Values.readinessProbe.timeoutSeconds }} + successThreshold: {{ .Values.readinessProbe.successThreshold }} + failureThreshold: {{ .Values.readinessProbe.failureThreshold }} + {{- end }} + {{- with .Values.lifecycle}} + lifecycle: + {{- toYaml . | nindent 12 }} + {{- else }} + {{- if or .Values.ollama.models.pull .Values.ollama.models.run }} + lifecycle: + postStart: + exec: + command: + - /bin/sh + - -c + - | + while ! /bin/ollama ps > /dev/null 2>&1; do + sleep 5 + done + {{- if .Values.ollama.models.pull }} + echo "{{ include "ollama.modelPullList" . }}" | xargs -n1 /bin/ollama pull {{ternary "--insecure" "" .Values.ollama.insecure | toString }} + {{- end }} + {{- if .Values.ollama.models.run }} + echo "{{ include "ollama.modelRunList" . }}" | xargs -n1 /bin/ollama run + {{- end }} + {{- end }} + {{- end }} + volumes: + - name: ollama-data + {{- if .Values.persistentVolume.enabled }} + persistentVolumeClaim: + claimName: {{ .Values.persistentVolume.existingClaim | default (printf "%s" (include "ollama.fullname" .)) }} + {{- else }} + emptyDir: { } + {{- end }} + {{- with .Values.volumes }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.affinity }} + affinity: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.topologySpreadConstraints }} + topologySpreadConstraints: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- if or .Values.ollama.gpu.enabled .Values.tolerations }} + tolerations: + {{- if and .Values.ollama.gpu.enabled (or (eq .Values.ollama.gpu.type "nvidia") (not .Values.ollama.gpu.type)) }} + - key: "{{(.Values.ollama.gpu.nvidiaResource | default "nvidia.com/gpu")}}" + operator: Exists + effect: NoSchedule + {{- end }} + {{- with .Values.tolerations }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} diff --git a/templates/pvc.yaml b/templates/pvc.yaml new file mode 100644 index 0000000..6708cb9 --- /dev/null +++ b/templates/pvc.yaml @@ -0,0 +1,35 @@ +--- +{{- if .Values.persistentVolume.enabled -}} +{{- if not .Values.persistentVolume.existingClaim -}} +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + {{- if .Values.persistentVolume.annotations }} + annotations: +{{ toYaml .Values.persistentVolume.annotations | indent 4 }} + {{- end }} + labels: + {{- include "ollama.labels" . | nindent 4 }} + name: {{ template "ollama.fullname" . }} + namespace: {{ include "ollama.namespace" . }} +spec: + accessModes: +{{ toYaml .Values.persistentVolume.accessModes | indent 4 }} +{{- if .Values.persistentVolume.storageClass }} +{{- if (eq "-" .Values.persistentVolume.storageClass) }} + storageClassName: "" +{{- else }} + storageClassName: "{{ .Values.persistentVolume.storageClass }}" +{{- end }} +{{- end }} +{{- if .Values.persistentVolume.volumeMode }} + volumeMode: "{{ .Values.persistentVolume.volumeMode }}" +{{- end }} +{{- if .Values.persistentVolume.volumeName }} + volumeName: "{{ .Values.persistentVolume.volumeName }}" +{{- end }} + resources: + requests: + storage: "{{ .Values.persistentVolume.size }}" +{{- end -}} +{{- end -}} diff --git a/templates/service.yaml b/templates/service.yaml new file mode 100644 index 0000000..6540fac --- /dev/null +++ b/templates/service.yaml @@ -0,0 +1,29 @@ +--- +{{- if not .Values.knative.enabled }} +apiVersion: v1 +kind: Service +metadata: + name: {{ include "ollama.fullname" . }} + namespace: {{ include "ollama.namespace" . }} + labels: + {{- include "ollama.labels" . | nindent 4 }} + {{- with .Values.service.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{- end }} +spec: + type: {{ .Values.service.type }} + ports: + - port: {{ .Values.service.port }} + targetPort: http + protocol: TCP + name: http + {{- if contains "NodePort" .Values.service.type }} + nodePort: {{ .Values.service.nodePort }} + {{- end }} +{{- if .Values.service.loadBalancerIP }} + loadBalancerIP: {{ .Values.service.loadBalancerIP | quote }} +{{- end }} + selector: + {{- include "ollama.selectorLabels" . | nindent 4 }} +{{- end }} diff --git a/templates/serviceaccount.yaml b/templates/serviceaccount.yaml new file mode 100644 index 0000000..25ac575 --- /dev/null +++ b/templates/serviceaccount.yaml @@ -0,0 +1,15 @@ +{{- if .Values.serviceAccount.create -}} +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: {{ include "ollama.serviceAccountName" . }} + namespace: {{ include "ollama.namespace" . }} + labels: + {{- include "ollama.labels" . | nindent 4 }} + {{- with .Values.serviceAccount.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{- end }} +automountServiceAccountToken: {{ .Values.serviceAccount.automount }} +{{- end }} diff --git a/templates/tests/test-connection.yaml b/templates/tests/test-connection.yaml new file mode 100644 index 0000000..5ccb42c --- /dev/null +++ b/templates/tests/test-connection.yaml @@ -0,0 +1,17 @@ +--- +apiVersion: v1 +kind: Pod +metadata: + name: "{{ include "ollama.fullname" . }}-test-connection" + namespace: {{ include "ollama.namespace" . }} + labels: + {{- include "ollama.labels" . | nindent 4 }} + annotations: + "helm.sh/hook": test +spec: + containers: + - name: wget + image: busybox + command: ['wget'] + args: ['{{ include "ollama.fullname" . }}:{{ .Values.service.port }}'] + restartPolicy: Never diff --git a/values-test.yaml b/values-test.yaml new file mode 100644 index 0000000..916686a --- /dev/null +++ b/values-test.yaml @@ -0,0 +1,311 @@ +# Default values for ollama-helm. +# This is a YAML-formatted file. +# Declare variables to be passed into your templates. + +# -- Number of replicas +replicaCount: 1 + +# Knative configuration +knative: + # -- Enable Knative integration + enabled: false + # -- Knative service container concurrency + containerConcurrency: 0 + # -- Knative service timeout seconds + timeoutSeconds: 300 + # -- Knative service response start timeout seconds + responseStartTimeoutSeconds: 300 + # -- Knative service idle timeout seconds + idleTimeoutSeconds: 300 + +# Docker image +image: + # -- Docker image registry + repository: ollama/ollama + + # -- Docker pull policy + pullPolicy: IfNotPresent + + # -- Docker image tag, overrides the image tag whose default is the chart appVersion. + tag: "" + +# -- Docker registry secret names as an array +imagePullSecrets: [] + +# -- String to partially override template (will maintain the release name) +nameOverride: "" + +# -- String to fully override template +fullnameOverride: "" + +# Ollama parameters +ollama: + gpu: + # -- Enable GPU integration + enabled: false + + # -- GPU type: 'nvidia' or 'amd' + # If 'ollama.gpu.enabled', default value is nvidia + # If set to 'amd', this will add 'rocm' suffix to image tag if 'image.tag' is not override + # This is due cause AMD and CPU/CUDA are different images + type: 'nvidia' + + # -- Specify the number of GPU + # If you use MIG section below then this parameter is ignored + number: 1 + + # -- only for nvidia cards; change to (example) 'nvidia.com/mig-1g.10gb' to use MIG slice + nvidiaResource: "nvidia.com/gpu" + # nvidiaResource: "nvidia.com/mig-1g.10gb" # example + # If you want to use more than one NVIDIA MIG you can use the following syntax (then nvidiaResource is ignored and only the configuration in the following MIG section is used) + + mig: + # -- Enable multiple mig devices + # If enabled you will have to specify the mig devices + # If enabled is set to false this section is ignored + enabled: false + + # -- Specify the mig devices and the corresponding number + devices: {} + # 1g.10gb: 1 + # 3g.40gb: 1 + + models: + - llama2 + + + # -- Add insecure flag for pulling at container startup + insecure: false + + # -- Override ollama-data volume mount path, default: "/root/.ollama" + mountPath: "" + +# Service account +# ref: https://kubernetes.io/docs/tasks/configure-pod-container/configure-service-account/ +serviceAccount: + # -- Specifies whether a service account should be created + create: true + + # -- Automatically mount a ServiceAccount's API credentials? + automount: true + + # -- Annotations to add to the service account + annotations: {} + + # -- The name of the service account to use. + # If not set and create is true, a name is generated using the fullname template + name: "" + +# -- Map of annotations to add to the pods +podAnnotations: {} + +# -- Map of labels to add to the pods +podLabels: {} + +# -- Pod Security Context +podSecurityContext: {} + # fsGroup: 2000 + +# -- Container Security Context +securityContext: {} + # capabilities: + # drop: + # - ALL + # readOnlyRootFilesystem: true + # runAsNonRoot: true + # runAsUser: 1000 + +# -- Specify runtime class +runtimeClassName: "" + +# Configure Service +service: + + # -- Service type + type: ClusterIP + + # -- Service port + port: 11434 + + # -- Service node port when service type is 'NodePort' + nodePort: 31434 + + # -- Load Balancer IP address + loadBalancerIP: + + # -- Annotations to add to the service + annotations: {} + +# Configure extra options for liveness probe +# ref: https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-probes/#configure-probes +livenessProbe: + # -- Enable livenessProbe + enabled: true + + # -- Request path for livenessProbe + path: / + + # -- Initial delay seconds for livenessProbe + initialDelaySeconds: 60 + + # -- Period seconds for livenessProbe + periodSeconds: 10 + + # -- Timeout seconds for livenessProbe + timeoutSeconds: 5 + + # -- Failure threshold for livenessProbe + failureThreshold: 6 + + # -- Success threshold for livenessProbe + successThreshold: 1 + +# Configure extra options for readiness probe +# ref: https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-probes/#configure-probes +readinessProbe: + # -- Enable readinessProbe + enabled: true + + # -- Request path for readinessProbe + path: / + + # -- Initial delay seconds for readinessProbe + initialDelaySeconds: 30 + + # -- Period seconds for readinessProbe + periodSeconds: 5 + + # -- Timeout seconds for readinessProbe + timeoutSeconds: 3 + + # -- Failure threshold for readinessProbe + failureThreshold: 6 + + # -- Success threshold for readinessProbe + successThreshold: 1 + +# Configure autoscaling +autoscaling: + # -- Enable autoscaling + enabled: false + + # -- Number of minimum replicas + minReplicas: 1 + + # -- Number of maximum replicas + maxReplicas: 100 + + # -- CPU usage to target replica + targetCPUUtilizationPercentage: 80 + + # -- targetMemoryUtilizationPercentage: 80 + +# -- Additional volumes on the output Deployment definition. +volumes: [] +# -- - name: foo +# secret: +# secretName: mysecret +# optional: false + +# -- Additional volumeMounts on the output Deployment definition. +volumeMounts: [] +# -- - name: foo +# mountPath: "/etc/foo" +# readOnly: true + +# -- Additional arguments on the output Deployment definition. +extraArgs: [] + +# -- Additional environments variables on the output Deployment definition. +# For extra OLLAMA env, please refer to https://github.com/ollama/ollama/blob/main/envconfig/config.go +extraEnv: [] +# - name: OLLAMA_DEBUG +# value: "1" + +# -- Additionl environment variables from external sources (like ConfigMap) +extraEnvFrom: [] +# - configMapRef: +# name: my-env-configmap + +# Enable persistence using Persistent Volume Claims +# ref: https://kubernetes.io/docs/concepts/storage/persistent-volumes/ +persistentVolume: + # -- Enable persistence using PVC + enabled: false + + # -- Ollama server data Persistent Volume access modes + # Must match those of existing PV or dynamic provisioner + # Ref: http://kubernetes.io/docs/user-guide/persistent-volumes/ + accessModes: + - ReadWriteOnce + + # -- Ollama server data Persistent Volume annotations + annotations: {} + + # -- If you'd like to bring your own PVC for persisting Ollama state, pass the name of the + # created + ready PVC here. If set, this Chart will not create the default PVC. + # Requires server.persistentVolume.enabled: true + existingClaim: "" + + # -- Ollama server data Persistent Volume size + size: 30Gi + + # -- Ollama server data Persistent Volume Storage Class + # If defined, storageClassName: + # If set to "-", storageClassName: "", which disables dynamic provisioning + # If undefined (the default) or set to null, no storageClassName spec is + # set, choosing the default provisioner. (gp2 on AWS, standard on + # GKE, AWS & OpenStack) + storageClass: "" + + # -- Ollama server data Persistent Volume Binding Mode + # If defined, volumeMode: + # If empty (the default) or set to null, no volumeBindingMode spec is + # set, choosing the default mode. + volumeMode: "" + + # -- Subdirectory of Ollama server data Persistent Volume to mount + # Useful if the volume's root directory is not empty + subPath: "" + + # -- Pre-existing PV to attach this claim to + # Useful if a CSI auto-provisions a PV for you and you want to always + # reference the PV moving forward + volumeName: "" + +# -- Node labels for pod assignment. +nodeSelector: {} + +# -- Tolerations for pod assignment +tolerations: [] + +# -- Affinity for pod assignment +affinity: {} + +# -- Lifecycle for pod assignment (override ollama.models startup pull/run) +lifecycle: {} + +# How to replace existing pods +updateStrategy: + # -- Deployment strategy can be "Recreate" or "RollingUpdate". Default is Recreate + type: "Recreate" + +# -- Topology Spread Constraints for pod assignment +topologySpreadConstraints: {} + +# -- Init containers to add to the pod +initContainers: [] +# - name: startup-tool +# image: alpine:3 +# command: [sh, -c] +# args: +# - echo init + +# -- Use the host’s ipc namespace. +hostIPC: false + +# -- Use the host’s pid namespace +hostPID: false + +# -- Use the host's network namespace. +hostNetwork: false \ No newline at end of file diff --git a/values.yaml b/values.yaml new file mode 100644 index 0000000..baac43f --- /dev/null +++ b/values.yaml @@ -0,0 +1,401 @@ +# Default values for ollama-helm. +# This is a YAML-formatted file. +# Declare variables to be passed into your templates. + +# -- Number of replicas +replicaCount: 1 + +# Knative configuration +knative: + # -- Enable Knative integration + enabled: false + # -- Knative service container concurrency + containerConcurrency: 0 + # -- Knative service timeout seconds + timeoutSeconds: 300 + # -- Knative service response start timeout seconds + responseStartTimeoutSeconds: 300 + # -- Knative service idle timeout seconds + idleTimeoutSeconds: 300 + +# Docker image +image: + # -- Docker image registry + repository: docker.io/ollama/ollama + + # -- Docker pull policy + pullPolicy: IfNotPresent + + # -- Docker image tag, overrides the image tag whose default is the chart appVersion. + tag: "" + +# -- Docker registry secret names as an array +imagePullSecrets: [] + +# -- String to partially override template (will maintain the release name) +nameOverride: "" + +# -- String to fully override template +fullnameOverride: "" + +# -- String to fully override namespace +namespaceOverride: "" + +# Ollama parameters +ollama: + gpu: + # -- Enable GPU integration + enabled: true + + # -- GPU type: 'nvidia' or 'amd' + # If 'ollama.gpu.enabled', default value is nvidia + # If set to 'amd', this will add 'rocm' suffix to image tag if 'image.tag' is not override + # This is due cause AMD and CPU/CUDA are different images + type: "amd" + + # -- Specify the number of GPU + # If you use MIG section below then this parameter is ignored + number: 1 + + # -- only for nvidia cards; change to (example) 'nvidia.com/mig-1g.10gb' to use MIG slice + #nvidiaResource: "nvidia.com/gpu" + # nvidiaResource: "nvidia.com/mig-1g.10gb" # example + # If you want to use more than one NVIDIA MIG you can use the following syntax (then nvidiaResource is ignored and only the configuration in the following MIG section is used) + + mig: + # -- Enable multiple mig devices + # If enabled you will have to specify the mig devices + # If enabled is set to false this section is ignored + enabled: false + + # -- Specify the mig devices and the corresponding number + devices: {} + # 1g.10gb: 1 + # 3g.40gb: 1 + + models: + #clean: true + pull: + #- deepseek-r1:1.5b + - smollm2:1.7b + run: + #- deepseek-r1:1.5b + - smollm2:1.7b + insecure: false + + # -- Override ollama-data volume mount path, default: "/root/.ollama" + mountPath: "" + +# Service account +# ref: https://kubernetes.io/docs/tasks/configure-pod-container/configure-service-account/ +serviceAccount: + # -- Specifies whether a service account should be created + create: true + + # -- Automatically mount a ServiceAccount's API credentials? + automount: true + + # -- Annotations to add to the service account + annotations: {} + + # -- The name of the service account to use. + # If not set and create is true, a name is generated using the fullname template + name: "" + +# -- Map of annotations to add to the pods +podAnnotations: {} + +# -- Map of labels to add to the pods +podLabels: {} + +# -- Pod Security Context +podSecurityContext: + # fsGroup: 2000 + # runAsUser: 0 + #runAsGroup: 0 # Primary group + #supplementalGroups: + # - 39 + # - 998 + +# -- Container Security Context +securityContext: + # capabilities: + # drop: + # - ALL + # readOnlyRootFilesystem: true + # runAsNonRoot: true + # runAsUser: 1000 + privileged: true + +# -- Specify runtime class +runtimeClassName: "" + +# Configure Service +service: + # -- Service type + type: ClusterIP + + # -- Service port + port: 11434 + + # -- Service node port when service type is 'NodePort' + nodePort: 31434 + + # -- Load Balancer IP address + loadBalancerIP: + + # -- Annotations to add to the service + annotations: {} + +# Configure the ingress resource that allows you to access the +ingress: + # -- Enable ingress controller resource + enabled: true + + # -- IngressClass that will be used to implement the Ingress (Kubernetes 1.18+) + className: "openshift-default" + + # -- Additional annotations for the Ingress resource. + annotations: + # kubernetes.io/ingress.class: traefik + kubernetes.io/ingress.class: openshift-default + kubernetes.io/tls-acme: "true" + cert-manager.io/cluster-issuer: "letsencrypt-dns01-cloudflare" + haproxy.router.openshift.io/timeout: 600s + + # The list of hostnames to be covered with this ingress record. + hosts: + - host: ollama.apilab.us + paths: + - path: / + pathType: Prefix + - host: ollama.apilab.us + paths: + - path: / + pathType: Prefix + + # -- The tls configuration for hostnames to be covered with this ingress record. + tls: + - secretName: ollama-tls + hosts: + - ollama.apilab.us + +# Configure resource requests and limits +# ref: http://kubernetes.io/docs/user-guide/compute-resources/ +resources: + # -- Pod requests + requests: + # Memory request + memory: 4096Mi + + # CPU request + cpu: 2000m + + # -- Pod limit + limits: + # Memory limit + memory: 8192Mi + + # CPU limit + cpu: 4000m + amd.com/gpu: 1 + +# Configure extra options for liveness probe +# ref: https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-probes/#configure-probes +livenessProbe: + # -- Enable livenessProbe + enabled: true + + # -- Request path for livenessProbe + path: / + + # -- Initial delay seconds for livenessProbe + initialDelaySeconds: 60 + + # -- Period seconds for livenessProbe + periodSeconds: 10 + + # -- Timeout seconds for livenessProbe + timeoutSeconds: 5 + + # -- Failure threshold for livenessProbe + failureThreshold: 6 + + # -- Success threshold for livenessProbe + successThreshold: 1 + +# Configure extra options for readiness probe +# ref: https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-probes/#configure-probes +readinessProbe: + # -- Enable readinessProbe + enabled: true + + # -- Request path for readinessProbe + path: / + + # -- Initial delay seconds for readinessProbe + initialDelaySeconds: 30 + + # -- Period seconds for readinessProbe + periodSeconds: 5 + + # -- Timeout seconds for readinessProbe + timeoutSeconds: 3 + + # -- Failure threshold for readinessProbe + failureThreshold: 6 + + # -- Success threshold for readinessProbe + successThreshold: 1 + +# Configure autoscaling +autoscaling: + # -- Enable autoscaling + enabled: false + + # -- Number of minimum replicas + minReplicas: 1 + + # -- Number of maximum replicas + maxReplicas: 100 + + # -- CPU usage to target replica + targetCPUUtilizationPercentage: 80 + + # -- targetMemoryUtilizationPercentage: 80 + +# -- Additional volumes on the output Deployment definition. +volumes: + # -- - name: foo + # secret: + # secretName: mysecret + # optional: false + + - name: host-volumes + # hostPath: + # path: /opt/amdgpu/share/libdrm + # - name: kfd + # hostPath: + # path: /dev/kfd + - name: dri + hostPath: + path: /dev/dri + type: Directory + +# -- Additional volumeMounts on the output Deployment definition. +volumeMounts: + # -- - name: foo + # mountPath: "/etc/foo" + # readOnly: true + # - name: host-volumes + # mountPath: /opt/amdgpu/share/libdrm + - name: dri + mountPath: /dev/dri +# - name: kfd +# mountPath: /dev/kfd + +# -- Additional arguments on the output Deployment definition. +extraArgs: [] + +# -- Additional environments variables on the output Deployment definition. +# For extra OLLAMA env, please refer to https://github.com/ollama/ollama/blob/main/envconfig/config.go +extraEnv: + - name: OLLAMA_DEBUG + value: "0" + - name: GIN_MODE + value: "release" + - name: HSA_OVERRIDE_GFX_VERSION + value: "11.0.2" + - name: HIP_VISIBLE_DEVICES + value: "0" + - name: GPU_DEVICE_ORDINAL + value: "0" + +# -- Additionl environment variables from external sources (like ConfigMap) +extraEnvFrom: [] +# - configMapRef: +# name: my-env-configmap + +# Enable persistence using Persistent Volume Claims +# ref: https://kubernetes.io/docs/concepts/storage/persistent-volumes/ +persistentVolume: + # -- Enable persistence using PVC + enabled: true + + # -- Ollama server data Persistent Volume access modes + # Must match those of existing PV or dynamic provisioner + # Ref: http://kubernetes.io/docs/user-guide/persistent-volumes/ + accessModes: + - ReadWriteOnce + + # -- Ollama server data Persistent Volume annotations + annotations: {} + + # -- If you'd like to bring your own PVC for persisting Ollama state, pass the name of the + # created + ready PVC here. If set, this Chart will not create the default PVC. + # Requires server.persistentVolume.enabled: true + # existingClaim: "ollama" + + # -- Ollama server data Persistent Volume size + size: 30Gi + + # -- Ollama server data Persistent Volume Storage Class + # If defined, storageClassName: + # If set to "-", storageClassName: "", which disables dynamic provisioning + # If undefined (the default) or set to null, no storageClassName spec is + # set, choosing the default provisioner. (gp2 on AWS, standard on + # GKE, AWS & OpenStack) + storageClass: "local-nvme-retain" + + # -- Ollama server data Persistent Volume Binding Mode + # If defined, volumeMode: + # If empty (the default) or set to null, no volumeBindingMode spec is + # set, choosing the default mode. + volumeMode: "" + + # -- Subdirectory of Ollama server data Persistent Volume to mount + # Useful if the volume's root directory is not empty + #subPath: "ollama-data" + + # -- Pre-existing PV to attach this claim to + # Useful if a CSI auto-provisions a PV for you and you want to always + # reference the PV moving forward + # volumeName: "pvc-9583b3c6-7bbd-403c-abac-6fe728dfb8c4" + +# -- Node labels for pod assignment. +#nodeSelector: +# topology.kubernetes.io/zone: lab-sno + +# -- Tolerations for pod assignment +tolerations: [] + +# -- Affinity for pod assignment +affinity: {} + +# -- Lifecycle for pod assignment (override ollama.models startup pull/run) +lifecycle: {} + +# How to replace existing pods +updateStrategy: + # -- Deployment strategy can be "Recreate" or "RollingUpdate". Default is Recreate + type: "Recreate" + +# -- Topology Spread Constraints for pod assignment +topologySpreadConstraints: {} + +# -- Init containers to add to the pod +initContainers: [] +# - name: startup-tool +# image: alpine:3 +# command: [sh, -c] +# args: +# - echo init + +# -- Use the host’s ipc namespace. +hostIPC: false + +# -- Use the host’s pid namespace +hostPID: false + +# -- Use the host's network namespace. +hostNetwork: false