Files
gpu-operator-charts/templates/pre-upgrade-hook.yaml
2025-12-16 17:56:13 +11:00

229 lines
7.8 KiB
YAML

---
apiVersion: v1
kind: ServiceAccount
metadata:
name: pre-upgrade-check-sa
annotations:
# hook will be executed before helm upgrade
"helm.sh/hook": pre-upgrade,pre-rollback
# don't cleanup the job on hook failure
"helm.sh/hook-delete-policy": before-hook-creation, hook-succeeded
# hook with lower weight value will run firstly
"helm.sh/hook-weight": "0"
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: pre-upgrade-check-cluster-role
annotations:
# hook will be executed before helm upgrade
"helm.sh/hook": pre-upgrade,pre-rollback
# don't cleanup the job on hook failure
"helm.sh/hook-delete-policy": before-hook-creation, hook-succeeded
# hook with lower weight value will run firstly
"helm.sh/hook-weight": "0"
rules:
- apiGroups:
- amd.com
resources:
- deviceconfigs
verbs:
- list
- get
---
kind: ClusterRoleBinding
apiVersion: rbac.authorization.k8s.io/v1
metadata:
name: pre-upgrade-check-cluster-role-binding
annotations:
# hook will be executed before helm upgrade
"helm.sh/hook": pre-upgrade,pre-rollback
# don't cleanup the job on hook failure
"helm.sh/hook-delete-policy": before-hook-creation, hook-succeeded
# hook with lower weight value will run firstly
"helm.sh/hook-weight": "1"
subjects:
- kind: ServiceAccount
name: pre-upgrade-check-sa
namespace: {{ .Release.Namespace }}
roleRef:
kind: ClusterRole
name: pre-upgrade-check-cluster-role
apiGroup: rbac.authorization.k8s.io
---
apiVersion: batch/v1
kind: Job
metadata:
name: pre-upgrade-check
namespace: {{ .Release.Namespace }}
labels:
{{- include "helm-charts-k8s.labels" . | nindent 4 }}
annotations:
# hook will be executed before helm upgrade
"helm.sh/hook": pre-upgrade,pre-rollback
# don't cleanup the job on hook failure
"helm.sh/hook-delete-policy": before-hook-creation, hook-succeeded
# hook with lower weight value will run firstly
"helm.sh/hook-weight": "2"
spec:
backoffLimit: 0 # once the job finished first run, don't retry to create another pod
ttlSecondsAfterFinished: 60 # job info will be kept for 1 min then deleted
template:
spec:
serviceAccountName: pre-upgrade-check-sa
containers:
- name: pre-upgrade-check
image: {{ .Values.controllerManager.manager.image.repository }}:{{ .Values.controllerManager.manager.image.tag }}
command:
- /bin/sh
- -c
- |
# Ignore the lack of CRDs, probably haven't actually been installed yet
# this provides idempotentcy when "things" don't understand the difference between
# install and upgrade. E.g. Argo turns pre-upgrade hook into its PreSync hook
installed=$(kubectl api-resources -owide | grep -i amd.com | grep -i deviceconfig)
if [ -z ${installed} ] ; then
exit 0
fi
# List all DeviceConfig CRs
deviceconfigs=$(kubectl get deviceconfigs -n {{ .Release.Namespace }} -o json)
echo "DeviceConfigs JSON:"
echo "$deviceconfigs" | jq .
# Check if any UpgradeState is in the blocked states
blocked_states='["Upgrade-Not-Started", "Upgrade-Started", "Install-In-Progress", "Upgrade-In-Progress"]'
if echo "$deviceconfigs" | jq --argjson blocked_states "$blocked_states" -e '
.items[] |
.status.nodeModuleStatus // {} |
to_entries |
any(.value.status as $state | ($blocked_states | index($state)))' > /dev/null; then
echo "Upgrade blocked: Some DeviceConfigs are in a disallowed UpgradeState."
exit 1
else
echo "All DeviceConfigs are in an allowed state. Proceeding with upgrade."
exit 0
fi
{{- if .Values.controllerManager.manager.imagePullSecrets }}
imagePullSecrets:
- name: {{ .Values.controllerManager.manager.imagePullSecrets }}
{{- end }}
{{- with .Values.controllerManager.manager.tolerations }}
tolerations:
{{- toYaml . | nindent 8 }}
{{- end }}
{{- with .Values.controllerManager.affinity }}
affinity:
{{- toYaml . | nindent 8 }}
{{- end }}
restartPolicy: Never
{{- if .Values.upgradeCRD }}
---
apiVersion: v1
kind: ServiceAccount
metadata:
name: upgrade-crd-hook-sa
annotations:
# hook will be executed before helm upgrade
"helm.sh/hook": pre-upgrade,pre-rollback
# don't cleanup the job on hook failure
"helm.sh/hook-delete-policy": before-hook-creation, hook-succeeded
# hook with lower weight value will run firstly
"helm.sh/hook-weight": "1"
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: upgrade-crd-hook-cluster-role
annotations:
# hook will be executed before helm upgrade
"helm.sh/hook": pre-upgrade,pre-rollback
# don't cleanup the job on hook failure
"helm.sh/hook-delete-policy": before-hook-creation, hook-succeeded
# hook with lower weight value will run firstly
"helm.sh/hook-weight": "1"
rules:
- apiGroups:
- apiextensions.k8s.io
resources:
- customresourcedefinitions
verbs:
- create
- get
- list
- watch
- patch
- update
---
kind: ClusterRoleBinding
apiVersion: rbac.authorization.k8s.io/v1
metadata:
name: upgrade-crd-hook-cluster-role-binding
annotations:
# hook will be executed before helm upgrade
"helm.sh/hook": pre-upgrade,pre-rollback
# don't cleanup the job on hook failure
"helm.sh/hook-delete-policy": before-hook-creation, hook-succeeded
# hook with lower weight value will run firstly
"helm.sh/hook-weight": "2"
subjects:
- kind: ServiceAccount
name: upgrade-crd-hook-sa
namespace: {{ .Release.Namespace }}
roleRef:
kind: ClusterRole
name: upgrade-crd-hook-cluster-role
apiGroup: rbac.authorization.k8s.io
---
apiVersion: batch/v1
kind: Job
metadata:
name: upgrade-crd
namespace: {{ .Release.Namespace }}
labels:
{{- include "helm-charts-k8s.labels" . | nindent 4 }}
annotations:
# hook will be executed before helm upgrade
"helm.sh/hook": pre-upgrade,pre-rollback
# don't cleanup the job on hook failure
"helm.sh/hook-delete-policy": before-hook-creation, hook-succeeded
# hook with lower weight value will run firstly
"helm.sh/hook-weight": "3"
spec:
template:
metadata:
name: upgrade-crd
spec:
serviceAccountName: upgrade-crd-hook-sa
{{- if .Values.controllerManager.manager.imagePullSecrets }}
imagePullSecrets:
- name: {{ .Values.controllerManager.manager.imagePullSecrets }}
{{- end }}
{{- with .Values.controllerManager.manager.tolerations }}
tolerations:
{{- toYaml . | nindent 8 }}
{{- end }}
{{- with .Values.controllerManager.affinity }}
affinity:
{{- toYaml . | nindent 8 }}
{{- end }}
containers:
- name: upgrade-crd
image: {{ .Values.controllerManager.manager.image.repository }}:{{ .Values.controllerManager.manager.image.tag }}
imagePullPolicy: {{ .Values.controllerManager.manager.imagePullPolicy }}
command:
- /bin/sh
- -c
- |
kubectl apply -f /opt/helm-charts-crds-k8s/deviceconfig-crd.yaml
{{- if index .Values "node-feature-discovery" "enabled" }}
kubectl apply -f /opt/helm-charts-crds-k8s/nfd-api-crds.yaml
{{- end }}
{{- if .Values.kmm.enabled }}
kubectl apply -f /opt/helm-charts-crds-k8s/module-crd.yaml
kubectl apply -f /opt/helm-charts-crds-k8s/nodemodulesconfig-crd.yaml
{{- end }}
restartPolicy: OnFailure
{{- end }}
# Run helm upgrade with --no-hooks to bypass the pre-upgrade hook