first commit

This commit is contained in:
2025-12-16 17:56:13 +11:00
commit 2da0e4f030
70 changed files with 11317 additions and 0 deletions

62
templates/_helpers.tpl Normal file
View File

@@ -0,0 +1,62 @@
{{/*
Expand the name of the chart.
*/}}
{{- define "helm-charts-k8s.name" -}}
{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }}
{{- end }}
{{/*
Create a default fully qualified app name.
We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
If release name contains chart name it will be used as a full name.
*/}}
{{- define "helm-charts-k8s.fullname" -}}
{{- if .Values.fullnameOverride }}
{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }}
{{- else }}
{{- $name := default .Chart.Name .Values.nameOverride }}
{{- if contains $name .Release.Name }}
{{- .Release.Name | trunc 63 | trimSuffix "-" }}
{{- else }}
{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }}
{{- end }}
{{- end }}
{{- end }}
{{/*
Create chart name and version as used by the chart label.
*/}}
{{- define "helm-charts-k8s.chart" -}}
{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }}
{{- end }}
{{/*
Common labels
*/}}
{{- define "helm-charts-k8s.labels" -}}
helm.sh/chart: {{ include "helm-charts-k8s.chart" . }}
{{ include "helm-charts-k8s.selectorLabels" . }}
{{- if .Chart.AppVersion }}
app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
{{- end }}
app.kubernetes.io/managed-by: {{ .Release.Service }}
{{- end }}
{{/*
Selector labels
*/}}
{{- define "helm-charts-k8s.selectorLabels" -}}
app.kubernetes.io/name: {{ include "helm-charts-k8s.name" . }}
app.kubernetes.io/instance: {{ .Release.Name }}
{{- end }}
{{/*
Create the name of the service account to use
*/}}
{{- define "helm-charts-k8s.serviceAccountName" -}}
{{- if .Values.serviceAccount.create }}
{{- default (include "helm-charts-k8s.fullname" .) .Values.serviceAccount.name }}
{{- else }}
{{- default "default" .Values.serviceAccount.name }}
{{- end }}
{{- end }}

View File

@@ -0,0 +1,74 @@
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: {{ include "helm-charts-k8s.fullname" . }}-config-manager
labels:
app.kubernetes.io/component: amd-gpu
app.kubernetes.io/part-of: amd-gpu
{{- include "helm-charts-k8s.labels" . | nindent 4 }}
rules:
- apiGroups:
- ""
resources:
- events
verbs:
- create
- get
- list
- update
- apiGroups:
- ""
resources:
- nodes
verbs:
- get
- list
- watch
- update
- apiGroups:
- "apps"
resources:
- daemonsets
verbs:
- get
- list
- watch
- delete
- create
- update
- apiGroups:
- ""
resources:
- pods
verbs:
- get
- list
- watch
- delete
- create
- update
- apiGroups:
- kmm.sigs.x-k8s.io
resources:
- nodemodulesconfigs
- nodemodulesconfigs/status
verbs:
- delete
- get
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: {{ include "helm-charts-k8s.fullname" . }}-config-manager
labels:
app.kubernetes.io/component: amd-gpu
app.kubernetes.io/part-of: amd-gpu
{{- include "helm-charts-k8s.labels" . | nindent 4 }}
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: '{{ include "helm-charts-k8s.fullname" . }}-config-manager'
subjects:
- kind: ServiceAccount
name: amd-gpu-operator-config-manager
namespace: '{{ .Release.Namespace }}'

View File

@@ -0,0 +1,393 @@
{{- if or (and .Release.IsInstall .Values.crds.defaultCR.install) (and .Release.IsUpgrade .Values.crds.defaultCR.upgrade) }}
{{- if and (hasKey .Values "deviceConfig") (hasKey .Values.deviceConfig "spec") }}
apiVersion: amd.com/v1alpha1
kind: DeviceConfig
metadata:
name: default
# the default CR cleanup is handled by pre-delete hook
# add this annotation so that helm won't try to delete the default DeviceConfig twice
annotations:
"helm.sh/resource-policy": keep
spec:
{{- with .Values.deviceConfig.spec.selector }}
selector:
{{- toYaml . | nindent 4 }}
{{- end }}
{{- with .Values.deviceConfig.spec.driver }}
driver:
{{- if (hasKey . "enable") }}
enable: {{ .enable }}
{{- end }}
{{- if (hasKey . "blacklist") }}
blacklist: {{ .blacklist }}
{{- end }}
{{- with .driverType }}
driverType: {{ . }}
{{- end }}
{{- with .vfioConfig }}
vfioConfig:
{{- with .deviceIDs }}
deviceIDs:
{{- toYaml . | nindent 8 }}
{{- end }}
{{- end }}
{{- with .kernelModuleConfig }}
kernelModuleConfig:
{{- with .loadArgs }}
loadArgs:
{{- toYaml . | nindent 8 }}
{{- end }}
{{- with .unloadArgs }}
unloadArgs:
{{- toYaml . | nindent 8 }}
{{- end }}
{{- with .parameters }}
parameters:
{{- toYaml . | nindent 8 }}
{{- end }}
{{- end }}
{{- with .image }}
image: {{ . }}
{{- end }}
{{- with .imageRegistrySecret }}
imageRegistrySecret:
{{- toYaml . | nindent 6 }}
{{- end }}
{{- with .imageRegistryTLS }}
imageRegistryTLS:
{{- toYaml . | nindent 6 }}
{{- end }}
{{- with .version }}
version: {{ quote . }}
{{- end }}
{{- with .imageSign }}
imageSign:
{{- toYaml . | nindent 6 }}
{{- end }}
{{- with .imageBuild }}
imageBuild:
{{- toYaml . | nindent 6 }}
{{- end }}
{{- with .tolerations }}
tolerations:
{{- toYaml . | nindent 6 }}
{{- end }}
{{- with .upgradePolicy }}
upgradePolicy:
{{- toYaml . | nindent 6 }}
{{- end }}
{{- end }}
{{- with .Values.deviceConfig.spec.commonConfig }}
commonConfig:
{{- with .initContainerImage }}
initContainerImage: {{ . }}
{{- end }}
{{- with .utilsContainer }}
utilsContainer:
{{- with .image }}
image: {{ . }}
{{- end }}
{{- with .imagePullPolicy }}
imagePullPolicy: {{ . }}
{{- end }}
{{- with .imageRegistrySecret }}
imageRegistrySecret:
{{- toYaml . | nindent 8 }}
{{- end }}
{{- end }}
{{- end }}
{{- with .Values.deviceConfig.spec.devicePlugin }}
devicePlugin:
{{- with .devicePluginImage }}
devicePluginImage: {{ . }}
{{- end }}
{{- with .devicePluginImagePullPolicy }}
devicePluginImagePullPolicy: {{ . }}
{{- end }}
{{- with .devicePluginTolerations }}
devicePluginTolerations:
{{- toYaml . | nindent 6 }}
{{- end }}
{{- with .devicePluginArguments }}
devicePluginArguments:
{{- toYaml . | nindent 6 }}
{{- end }}
{{- if (hasKey . "enableNodeLabeller") }}
enableNodeLabeller: {{ .enableNodeLabeller }}
{{- end }}
{{- with .nodeLabellerImage }}
nodeLabellerImage: {{ . }}
{{- end }}
{{- with .nodeLabellerImagePullPolicy }}
nodeLabellerImagePullPolicy: {{ . }}
{{- end }}
{{- with .nodeLabellerTolerations }}
nodeLabellerTolerations:
{{- toYaml . | nindent 6 }}
{{- end }}
{{- with .nodeLabellerArguments }}
nodeLabellerArguments:
{{- toYaml . | nindent 6 }}
{{- end }}
{{- with .imageRegistrySecret }}
imageRegistrySecret:
{{- toYaml . | nindent 6 }}
{{- end }}
{{- with .upgradePolicy }}
upgradePolicy:
{{- toYaml . | nindent 6 }}
{{- end }}
{{- end }}
{{- with .Values.deviceConfig.spec.metricsExporter }}
metricsExporter:
{{- if (hasKey . "enable") }}
enable: {{ .enable }}
{{- end }}
{{- with .serviceType }}
serviceType: {{ . }}
{{- end }}
{{- if (hasKey . "port") }}
port: {{ .port }}
{{- end }}
{{- if (hasKey . "nodePort") }}
nodePort: {{ .nodePort }}
{{- end }}
{{- with .image }}
image: {{ . }}
{{- end }}
{{- with .imagePullPolicy }}
imagePullPolicy: {{ . }}
{{- end }}
{{- with .config }}
config:
{{- toYaml . | nindent 6 }}
{{- end }}
{{- with .tolerations }}
tolerations:
{{- toYaml . | nindent 6 }}
{{- end }}
{{- with .imageRegistrySecret }}
imageRegistrySecret:
{{- toYaml . | nindent 6 }}
{{- end }}
{{- with .selector }}
selector:
{{- toYaml . | nindent 6 }}
{{- end }}
{{- with .upgradePolicy }}
upgradePolicy:
{{- toYaml . | nindent 6 }}
{{- end }}
{{- with .rbacConfig }}
rbacConfig:
{{- if (hasKey . "enable") }}
enable: {{ .enable }}
{{- end }}
{{- with .image }}
image: {{ . }}
{{- end }}
{{- if (hasKey . "disableHttps")}}
disableHttps: {{ .disableHttps }}
{{- end }}
{{- with .secret }}
secret:
{{- toYaml . | nindent 8 }}
{{- end }}
{{- with .clientCAConfigMap }}
clientCAConfigMap:
{{- toYaml . | nindent 8 }}
{{- end }}
{{- with .staticAuthorization }}
staticAuthorization:
{{- toYaml . | nindent 8 }}
{{- end }}
{{- end }}
{{- with .prometheus }}
prometheus:
{{- with .serviceMonitor }}
serviceMonitor:
{{- if (hasKey . "enable") }}
enable: {{ .enable }}
{{- end }}
{{- if (hasKey . "interval") }}
interval: {{ .interval }}
{{- end }}
{{- with .attachMetadata }}
attachMetadata:
{{- toYaml . | nindent 10 }}
{{- end }}
{{- if (hasKey . "honorLabels") }}
honorLabels: {{ .honorLabels }}
{{- end }}
{{- if (hasKey . "honorTimestamps") }}
honorTimestamps: {{ .honorTimestamps }}
{{- end }}
{{- with .labels }}
labels:
{{- toYaml . | nindent 10 }}
{{- end }}
{{- with .relabelings }}
relabelings:
{{- toYaml . | nindent 10 }}
{{- end }}
{{- with .metricRelabelings }}
metricRelabelings:
{{- toYaml . | nindent 10 }}
{{- end }}
{{- with .authorization }}
authorization:
{{- toYaml . | nindent 10 }}
{{- end }}
{{- with .tlsConfig }}
tlsConfig:
{{- toYaml . | nindent 10 }}
{{- end }}
{{- end }}
{{- end }}
{{- end }}
{{- with .Values.deviceConfig.spec.testRunner }}
testRunner:
{{- if (hasKey . "enable") }}
enable: {{ .enable }}
{{- end }}
{{- with .image }}
image: {{ . }}
{{- end }}
{{- with .imagePullPolicy }}
imagePullPolicy: {{ . }}
{{- end }}
{{- with .config }}
config:
{{- toYaml . | nindent 6 }}
{{- end }}
{{- with .logsLocation }}
logsLocation:
{{- toYaml . | nindent 6 }}
{{- end }}
{{- with .upgradePolicy }}
upgradePolicy:
{{- toYaml . | nindent 6 }}
{{- end }}
{{- with .tolerations }}
tolerations:
{{- toYaml . | nindent 6 }}
{{- end }}
{{- with .imageRegistrySecret }}
imageRegistrySecret:
{{- toYaml . | nindent 6 }}
{{- end }}
{{- with .selector }}
selector:
{{- toYaml . | nindent 6 }}
{{- end }}
{{- end }}
{{- with .Values.deviceConfig.spec.configManager }}
configManager:
{{- if (hasKey . "enable") }}
enable: {{ .enable }}
{{- end }}
{{- with .image }}
image: {{ . }}
{{- end }}
{{- with .imagePullPolicy }}
imagePullPolicy: {{ . }}
{{- end }}
{{- with .imageRegistrySecret }}
imageRegistrySecret:
{{- toYaml . | nindent 6 }}
{{- end }}
{{- with .config }}
config:
{{- toYaml . | nindent 6 }}
{{- end }}
{{- with .selector }}
selector:
{{- toYaml . | nindent 6 }}
{{- end }}
{{- with .upgradePolicy }}
upgradePolicy:
{{- toYaml . | nindent 6 }}
{{- end }}
{{- with .configManagerTolerations }}
configManagerTolerations:
{{- toYaml . | nindent 6 }}
{{- end }}
{{- end }}
{{- end }}
{{- end }}

83
templates/deployment.yaml Normal file
View File

@@ -0,0 +1,83 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: {{ include "helm-charts-k8s.fullname" . }}-controller-manager
labels:
app.kubernetes.io/component: amd-gpu
app.kubernetes.io/part-of: amd-gpu
control-plane: controller-manager
{{- include "helm-charts-k8s.labels" . | nindent 4 }}
spec:
replicas: {{ .Values.controllerManager.replicas }}
selector:
matchLabels:
app.kubernetes.io/component: amd-gpu
app.kubernetes.io/part-of: amd-gpu
control-plane: controller-manager
{{- include "helm-charts-k8s.selectorLabels" . | nindent 6 }}
template:
metadata:
labels:
app.kubernetes.io/component: amd-gpu
app.kubernetes.io/part-of: amd-gpu
control-plane: controller-manager
{{- include "helm-charts-k8s.selectorLabels" . | nindent 8 }}
annotations:
kubectl.kubernetes.io/default-container: manager
spec:
{{- with .Values.controllerManager.affinity }}
affinity:
{{- toYaml . | nindent 8 }}
{{- end }}
nodeSelector: {{- toYaml .Values.controllerManager.nodeSelector | nindent 8 }}
containers:
- args: {{- toYaml .Values.controllerManager.manager.args | nindent 8 }}
env:
- name: OPERATOR_NAMESPACE
valueFrom:
fieldRef:
fieldPath: metadata.namespace
- name: KUBERNETES_CLUSTER_DOMAIN
value: {{ quote .Values.kubernetesClusterDomain }}
- name: SIM_ENABLE
value: {{ quote .Values.controllerManager.env.simEnable }}
image: {{ .Values.controllerManager.manager.image.repository }}:{{ .Values.controllerManager.manager.image.tag
| default .Chart.AppVersion }}
imagePullPolicy: {{ .Values.controllerManager.manager.imagePullPolicy }}
livenessProbe:
httpGet:
path: /healthz
port: 8081
initialDelaySeconds: 15
periodSeconds: 20
name: manager
readinessProbe:
httpGet:
path: /readyz
port: 8081
initialDelaySeconds: 5
periodSeconds: 10
resources: {{- toYaml .Values.controllerManager.manager.resources | nindent 10
}}
securityContext: {{- toYaml .Values.controllerManager.manager.containerSecurityContext
| nindent 10 }}
volumeMounts:
- mountPath: /controller_manager_config.yaml
name: manager-config
subPath: controller_manager_config.yaml
{{- if .Values.controllerManager.manager.imagePullSecrets }}
imagePullSecrets:
- name: {{ .Values.controllerManager.manager.imagePullSecrets }}
{{- end}}
securityContext:
runAsNonRoot: true
serviceAccountName: {{ include "helm-charts-k8s.fullname" . }}-controller-manager
terminationGracePeriodSeconds: 10
{{- with .Values.controllerManager.manager.tolerations }}
tolerations:
{{- toYaml . | nindent 8 }}
{{- end }}
volumes:
- configMap:
name: {{ include "helm-charts-k8s.fullname" . }}-manager-config
name: manager-config

View File

@@ -0,0 +1,16 @@
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: {{ include "helm-charts-k8s.fullname" . }}-event-recorder-clusterrole
labels:
app.kubernetes.io/component: amd-gpu
app.kubernetes.io/part-of: amd-gpu
{{- include "helm-charts-k8s.labels" . | nindent 4 }}
rules:
- apiGroups:
- ""
resources:
- events
verbs:
- create
- patch

View File

@@ -0,0 +1,16 @@
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: {{ include "helm-charts-k8s.fullname" . }}-event-recorder-clusterrolebinding
labels:
app.kubernetes.io/component: amd-gpu
app.kubernetes.io/part-of: amd-gpu
{{- include "helm-charts-k8s.labels" . | nindent 4 }}
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: '{{ include "helm-charts-k8s.fullname" . }}-event-recorder-clusterrole'
subjects:
- kind: ServiceAccount
name: '{{ include "helm-charts-k8s.fullname" . }}-controller-manager'
namespace: '{{ .Release.Namespace }}'

View File

@@ -0,0 +1,217 @@
{{- if .Values.installdefaultNFDRule }}
apiVersion: nfd.k8s-sigs.io/v1alpha1
kind: NodeFeatureRule
metadata:
name: amd-gpu-label-nfd-rule
# the PCI info is from these websites:
# source1: https://admin.pci-ids.ucw.cz/read/PC/1002
# source2: https://devicehunt.com/view/type/pci/vendor/1002
spec:
rules:
- name: amd-vgpu
labels:
feature.node.kubernetes.io/amd-vgpu: "true"
matchAny:
# AMD Instinct
- matchFeatures:
- feature: pci.device
matchExpressions:
vendor: {op: In, value: ["1002"]}
device: {op: In, value: ["7410"]} # MI210 VF
- matchFeatures:
- feature: pci.device
matchExpressions:
vendor: {op: In, value: ["1002"]}
device: {op: In, value: ["74b5"]} # MI300X VF
- matchFeatures:
- feature: pci.device
matchExpressions:
vendor: {op: In, value: ["1002"]}
device: {op: In, value: ["74b9"]} # Mi325X VF
- matchFeatures:
- feature: pci.device
matchExpressions:
vendor: {op: In, value: ["1002"]}
device: {op: In, value: ["75b0"]} # Mi350X VF
- matchFeatures:
- feature: pci.device
matchExpressions:
vendor: {op: In, value: ["1002"]}
device: {op: In, value: ["75b3"]} # Mi355X VF
# AMD Radeon Pro
- matchFeatures:
- feature: pci.device
matchExpressions:
vendor: {op: In, value: ["1002"]}
device: {op: In, value: ["7461"]} # Radeon Pro V710 MxGPU
- matchFeatures:
- feature: pci.device
matchExpressions:
vendor: {op: In, value: ["1002"]}
device: {op: In, value: ["73ae"]} # Radeon Pro V620 MxGPU
- name: amd-gpu
labels:
feature.node.kubernetes.io/amd-gpu: "true"
matchAny:
# AMD Instinct
- matchFeatures:
- feature: pci.device
matchExpressions:
vendor: {op: In, value: ["1002"]}
device: {op: In, value: ["75a3"]} # MI355X
- matchFeatures:
- feature: pci.device
matchExpressions:
vendor: {op: In, value: ["1002"]}
device: {op: In, value: ["75a0"]} # MI350X
- matchFeatures:
- feature: pci.device
matchExpressions:
vendor: {op: In, value: ["1002"]}
device: {op: In, value: ["74a5"]} # MI325X
- matchFeatures:
- feature: pci.device
matchExpressions:
vendor: {op: In, value: ["1002"]}
device: {op: In, value: ["74a2"]} # MI308X
- matchFeatures:
- feature: pci.device
matchExpressions:
vendor: {op: In, value: ["1002"]}
device: {op: In, value: ["74b6"]} # MI308X
- matchFeatures:
- feature: pci.device
matchExpressions:
vendor: {op: In, value: ["1002"]}
device: {op: In, value: ["74a8"]} # MI308X HF
- matchFeatures:
- feature: pci.device
matchExpressions:
vendor: {op: In, value: ["1002"]}
device: {op: In, value: ["74a0"]} # MI300A
- matchFeatures:
- feature: pci.device
matchExpressions:
vendor: {op: In, value: ["1002"]}
device: {op: In, value: ["74a1"]} # MI300X
- matchFeatures:
- feature: pci.device
matchExpressions:
vendor: {op: In, value: ["1002"]}
device: {op: In, value: ["74a9"]} # MI300X HF
- matchFeatures:
- feature: pci.device
matchExpressions:
vendor: {op: In, value: ["1002"]}
device: {op: In, value: ["74bd"]} # MI300X HF
- matchFeatures:
- feature: pci.device
matchExpressions:
vendor: {op: In, value: ["1002"]}
device: {op: In, value: ["740f"]} # MI210
- matchFeatures:
- feature: pci.device
matchExpressions:
vendor: {op: In, value: ["1002"]}
device: {op: In, value: ["7408"]} # MI250X
- matchFeatures:
- feature: pci.device
matchExpressions:
vendor: {op: In, value: ["1002"]}
device: {op: In, value: ["740c"]} # MI250/MI250X
- matchFeatures:
- feature: pci.device
matchExpressions:
vendor: {op: In, value: ["1002"]}
device: {op: In, value: ["738c"]} # MI100
- matchFeatures:
- feature: pci.device
matchExpressions:
vendor: {op: In, value: ["1002"]}
device: {op: In, value: ["738e"]} # MI100
# AMD Radeon Pro
- matchFeatures:
- feature: pci.device
matchExpressions:
vendor: {op: In, value: ["1002"]}
device: {op: In, value: ["7460"]} # V710
- matchFeatures:
- feature: pci.device
matchExpressions:
vendor: {op: In, value: ["1002"]}
device: {op: In, value: ["7448"]} # W7900
- matchFeatures:
- feature: pci.device
matchExpressions:
vendor: {op: In, value: ["1002"]}
device: {op: In, value: ["744a"]} # W7900 Dual Slot
- matchFeatures:
- feature: pci.device
matchExpressions:
vendor: {op: In, value: ["1002"]}
device: {op: In, value: ["7449"]} # W7800 48GB
- matchFeatures:
- feature: pci.device
matchExpressions:
vendor: {op: In, value: ["1002"]}
device: {op: In, value: ["745e"]} # W7800
- matchFeatures:
- feature: pci.device
matchExpressions:
vendor: {op: In, value: ["1002"]}
device: {op: In, value: ["73a2"]} # W6900X
- matchFeatures:
- feature: pci.device
matchExpressions:
vendor: {op: In, value: ["1002"]}
device: {op: In, value: ["73a3"]} # W6800 GL-XL
- matchFeatures:
- feature: pci.device
matchExpressions:
vendor: {op: In, value: ["1002"]}
device: {op: In, value: ["73ab"]} # W6800X / W6800X Duo
- matchFeatures:
- feature: pci.device
matchExpressions:
vendor: {op: In, value: ["1002"]}
device: {op: In, value: ["73a1"]} # V620
# AMD Radeon
- matchFeatures:
- feature: pci.device
matchExpressions:
vendor: {op: In, value: ["1002"]}
device: {op: In, value: ["7550"]} # RX 9070 / 9070 XT
- matchFeatures:
- feature: pci.device
matchExpressions:
vendor: {op: In, value: ["1002"]}
device: {op: In, value: ["744c"]} # RX 7900 XT / 7900 XTX / 7900 GRE / 7900M
- matchFeatures:
- feature: pci.device
matchExpressions:
vendor: {op: In, value: ["1002"]}
device: {op: In, value: ["73af"]} # RX 6900 XT
- matchFeatures:
- feature: pci.device
matchExpressions:
vendor: {op: In, value: ["1002"]}
device: {op: In, value: ["73bf"]} # RX 6800 / 6800 XT / 6900 XT
- name: amd-gpu-mi210
labels:
feature.node.kubernetes.io/amd-gpu-mi210: "true"
matchAny:
- matchFeatures:
- feature: pci.device
matchExpressions:
vendor: {op: In, value: ["1002"]}
device: {op: In, value: ["740f"]} # MI210
- name: amd-gpu-mi300x
labels:
feature.node.kubernetes.io/amd-gpu-mi300x: "true"
matchAny:
- matchFeatures:
- feature: pci.device
matchExpressions:
vendor: {op: In, value: ["1002"]}
device: {op: In, value: ["74a1"]} # MI300X
{{- end }}

View File

@@ -0,0 +1,50 @@
apiVersion: rbac.authorization.k8s.io/v1
kind: Role
metadata:
name: {{ include "helm-charts-k8s.fullname" . }}-leader-election-role
labels:
app.kubernetes.io/component: amd-gpu
app.kubernetes.io/part-of: amd-gpu
{{- include "helm-charts-k8s.labels" . | nindent 4 }}
rules:
- apiGroups:
- ""
resources:
- configmaps
verbs:
- get
- list
- watch
- create
- update
- patch
- delete
- apiGroups:
- coordination.k8s.io
resources:
- leases
verbs:
- get
- list
- watch
- create
- update
- patch
- delete
---
apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
metadata:
name: {{ include "helm-charts-k8s.fullname" . }}-leader-election-rolebinding
labels:
app.kubernetes.io/component: amd-gpu
app.kubernetes.io/part-of: amd-gpu
{{- include "helm-charts-k8s.labels" . | nindent 4 }}
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: Role
name: '{{ include "helm-charts-k8s.fullname" . }}-leader-election-role'
subjects:
- kind: ServiceAccount
name: '{{ include "helm-charts-k8s.fullname" . }}-controller-manager'
namespace: '{{ .Release.Namespace }}'

View File

@@ -0,0 +1,11 @@
apiVersion: v1
kind: ConfigMap
metadata:
name: {{ include "helm-charts-k8s.fullname" . }}-manager-config
labels:
app.kubernetes.io/component: amd-gpu
app.kubernetes.io/part-of: amd-gpu
{{- include "helm-charts-k8s.labels" . | nindent 4 }}
data:
controller_manager_config.yaml: {{ .Values.managerConfig.controllerManagerConfigYaml
| toYaml | indent 1 }}

216
templates/manager-rbac.yaml Normal file
View File

@@ -0,0 +1,216 @@
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: {{ include "helm-charts-k8s.fullname" . }}-manager-role
labels:
app.kubernetes.io/component: amd-gpu
app.kubernetes.io/part-of: amd-gpu
{{- include "helm-charts-k8s.labels" . | nindent 4 }}
rules:
- apiGroups:
- ""
resources:
- configmaps
- secrets
- services
verbs:
- create
- delete
- get
- list
- patch
- watch
- apiGroups:
- ""
resources:
- nodes
verbs:
- get
- list
- patch
- watch
- apiGroups:
- ""
resources:
- nodes/finalizers
- nodes/status
verbs:
- get
- update
- watch
- apiGroups:
- ""
resources:
- pods
verbs:
- create
- delete
- get
- list
- watch
- apiGroups:
- ""
resources:
- pods/eviction
verbs:
- create
- delete
- get
- list
- apiGroups:
- ""
resources:
- pods/finalizers
- pods/status
verbs:
- delete
- get
- list
- watch
- apiGroups:
- ""
resources:
- services/finalizers
verbs:
- create
- get
- update
- watch
- apiGroups:
- amd.com
resources:
- deviceconfigs
verbs:
- create
- get
- list
- patch
- update
- watch
- apiGroups:
- amd.com
resources:
- deviceconfigs/finalizers
verbs:
- update
- apiGroups:
- amd.com
resources:
- deviceconfigs/status
verbs:
- get
- patch
- update
- apiGroups:
- apiextensions.k8s.io
resources:
- customresourcedefinitions
verbs:
- delete
- get
- list
- watch
- apiGroups:
- apps
resources:
- daemonsets
- daemonsets/status
verbs:
- create
- delete
- get
- list
- patch
- watch
- apiGroups:
- apps
resources:
- daemonsets/finalizers
verbs:
- create
- get
- update
- watch
- apiGroups:
- kmm.sigs.x-k8s.io
resources:
- modules
verbs:
- create
- delete
- get
- list
- patch
- update
- watch
- apiGroups:
- kmm.sigs.x-k8s.io
resources:
- modules/finalizers
- nodemodulesconfigs/finalizers
verbs:
- get
- update
- watch
- apiGroups:
- kmm.sigs.x-k8s.io
resources:
- modules/status
verbs:
- get
- patch
- update
- apiGroups:
- kmm.sigs.x-k8s.io
resources:
- nodemodulesconfigs
- nodemodulesconfigs/status
verbs:
- get
- list
- watch
- apiGroups:
- monitoring.coreos.com
resources:
- servicemonitors
verbs:
- create
- delete
- get
- list
- patch
- update
- watch
- apiGroups:
- nfd.openshift.io
resources:
- nodefeaturediscoveries
verbs:
- delete
- get
- list
- apiGroups:
- nfd.openshift.io
resources:
- nodefeaturediscoveries/finalizers
- nodefeaturediscoveries/status
verbs:
- get
- update
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: {{ include "helm-charts-k8s.fullname" . }}-manager-rolebinding
labels:
app.kubernetes.io/component: amd-gpu
app.kubernetes.io/part-of: amd-gpu
{{- include "helm-charts-k8s.labels" . | nindent 4 }}
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: '{{ include "helm-charts-k8s.fullname" . }}-manager-role'
subjects:
- kind: ServiceAccount
name: '{{ include "helm-charts-k8s.fullname" . }}-controller-manager'
namespace: '{{ .Release.Namespace }}'

View File

@@ -0,0 +1,55 @@
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: {{ include "helm-charts-k8s.fullname" . }}-metrics-exporter-rbac-proxy
labels:
app.kubernetes.io/component: amd-gpu
app.kubernetes.io/part-of: amd-gpu
{{- include "helm-charts-k8s.labels" . | nindent 4 }}
rules:
- apiGroups:
- authentication.k8s.io
resources:
- tokenreviews
verbs:
- create
- apiGroups:
- authorization.k8s.io
resources:
- subjectaccessreviews
verbs:
- create
- apiGroups:
- ""
resources:
- pods
verbs:
- get
- list
- watch
- apiGroups:
- ""
resources:
- nodes
verbs:
- watch
- get
- list
- update
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: {{ include "helm-charts-k8s.fullname" . }}-metrics-exporter-rbac-proxy
labels:
app.kubernetes.io/component: amd-gpu
app.kubernetes.io/part-of: amd-gpu
{{- include "helm-charts-k8s.labels" . | nindent 4 }}
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: '{{ include "helm-charts-k8s.fullname" . }}-metrics-exporter-rbac-proxy'
subjects:
- kind: ServiceAccount
name: amd-gpu-operator-metrics-exporter-rbac-proxy
namespace: '{{ .Release.Namespace }}'

View File

@@ -0,0 +1,43 @@
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: {{ include "helm-charts-k8s.fullname" . }}-metrics-exporter
labels:
app.kubernetes.io/component: amd-gpu
app.kubernetes.io/part-of: amd-gpu
{{- include "helm-charts-k8s.labels" . | nindent 4 }}
rules:
- apiGroups:
- ""
resources:
- pods
verbs:
- get
- list
- watch
- apiGroups:
- ""
resources:
- nodes
verbs:
- watch
- get
- list
- update
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: {{ include "helm-charts-k8s.fullname" . }}-metrics-exporter
labels:
app.kubernetes.io/component: amd-gpu
app.kubernetes.io/part-of: amd-gpu
{{- include "helm-charts-k8s.labels" . | nindent 4 }}
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: '{{ include "helm-charts-k8s.fullname" . }}-metrics-exporter'
subjects:
- kind: ServiceAccount
name: amd-gpu-operator-metrics-exporter
namespace: '{{ .Release.Namespace }}'

View File

@@ -0,0 +1,39 @@
{{- if .Values.installdefaultNFDRule }}
apiVersion: nfd.k8s-sigs.io/v1alpha1
kind: NodeFeatureRule
metadata:
name: amd-nic-label-nfd-rule
# the PCI info is from these websites:
# source1: https://admin.pci-ids.ucw.cz/read/PC/1dd8
# source2: https://devicehunt.com/view/type/pci/vendor/1dd8
spec:
rules:
- name: amd-vnic
labels:
feature.node.kubernetes.io/amd-vnic: "true"
matchAny:
- matchFeatures:
- feature: kernel.loadedmodule
matchExpressions:
ionic: {op: Exists}
- feature: pci.device
matchExpressions:
vendor: {op: In, value: ["1dd8"]} # AMD Pensando Systems
device: {op: In, value: ["1003"]} # DSC Ethernet Controller VF
subsystem_vendor: {op: In, value: ["1dd8"]}
subsystem_device: {op: In, value: ["5201"]} # POLLARA-1Q400 100/200/400G 1-port Card
- name: amd-nic
labels:
feature.node.kubernetes.io/amd-nic: "true"
matchAny:
- matchFeatures:
- feature: kernel.loadedmodule
matchExpressions:
ionic: {op: Exists}
- feature: pci.device
matchExpressions:
vendor: {op: In, value: ["1dd8"]} # AMD Pensando Systems
device: {op: In, value: ["1002"]} # DSC Ethernet Controller
subsystem_vendor: {op: In, value: ["1dd8"]}
subsystem_device: {op: In, value: ["5201"]} # POLLARA-1Q400 100/200/400G 1-port Card
{{- end }}

View File

@@ -0,0 +1,35 @@
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: {{ include "helm-charts-k8s.fullname" . }}-node-labeller
labels:
app.kubernetes.io/component: amd-gpu
app.kubernetes.io/part-of: amd-gpu
{{- include "helm-charts-k8s.labels" . | nindent 4 }}
rules:
- apiGroups:
- ""
resources:
- nodes
verbs:
- watch
- get
- list
- update
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: {{ include "helm-charts-k8s.fullname" . }}-node-labeller
labels:
app.kubernetes.io/component: amd-gpu
app.kubernetes.io/part-of: amd-gpu
{{- include "helm-charts-k8s.labels" . | nindent 4 }}
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: '{{ include "helm-charts-k8s.fullname" . }}-node-labeller'
subjects:
- kind: ServiceAccount
name: amd-gpu-operator-node-labeller
namespace: '{{ .Release.Namespace }}'

View File

@@ -0,0 +1,117 @@
# Run helm uninstall with --no-hooks to bypass the post-delete hook
apiVersion: v1
kind: ServiceAccount
metadata:
name: {{ include "helm-charts-k8s.fullname" . }}-prune
namespace: {{ .Release.Namespace }}
labels:
{{- include "helm-charts-k8s.labels" . | nindent 4 }}
annotations:
# hook with lower weight value will run firstly
"helm.sh/hook-weight": "0"
"helm.sh/hook": post-delete
"helm.sh/hook-delete-policy": before-hook-creation,hook-succeeded
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: {{ include "helm-charts-k8s.fullname" . }}-prune
labels:
{{- include "helm-charts-k8s.labels" . | nindent 4 }}
annotations:
# hook with lower weight value will run firstly
"helm.sh/hook-weight": "0"
"helm.sh/hook": post-delete
"helm.sh/hook-delete-policy": before-hook-creation,hook-succeeded
rules:
- apiGroups:
- apiextensions.k8s.io
resources:
- customresourcedefinitions
verbs:
- delete
- get
- list
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: {{ include "helm-charts-k8s.fullname" . }}-prune
labels:
{{- include "helm-charts-k8s.labels" . | nindent 4 }}
annotations:
# hook with lower weight value will run firstly
"helm.sh/hook-weight": "1"
"helm.sh/hook": post-delete
"helm.sh/hook-delete-policy": before-hook-creation,hook-succeeded
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: {{ include "helm-charts-k8s.fullname" . }}-prune
subjects:
- kind: ServiceAccount
name: {{ include "helm-charts-k8s.fullname" . }}-prune
namespace: {{ .Release.Namespace }}
---
apiVersion: batch/v1
kind: Job
metadata:
name: delete-custom-resource-definitions
namespace: {{ .Release.Namespace }}
labels:
{{- include "helm-charts-k8s.labels" . | nindent 4 }}
annotations:
# hook with lower weight value will run firstly
"helm.sh/hook-weight": "2"
# hook will be executed after helm uninstall
"helm.sh/hook": post-delete
# remove the resource created by the hook whether it succeeded or failed
"helm.sh/hook-delete-policy": before-hook-creation, hook-succeeded
spec:
backoffLimit: 0 # once the job finished first run, don't retry to create another pod
ttlSecondsAfterFinished: 60 # job info will be kept for 1 min then deleted
template:
spec:
serviceAccountName: {{ include "helm-charts-k8s.fullname" . }}-prune
containers:
- name: delete-custom-resource-definitions
image: {{ .Values.controllerManager.manager.image.repository }}:{{ .Values.controllerManager.manager.image.tag }}
command:
- /bin/sh
- -c
- |
if kubectl get crds deviceconfigs.amd.com > /dev/null 2>&1; then
kubectl delete crds deviceconfigs.amd.com
fi
{{- if index .Values "node-feature-discovery" "enabled" }}
if kubectl get crds nodefeaturegroups.nfd.k8s-sigs.io > /dev/null 2>&1; then
kubectl delete crds nodefeaturegroups.nfd.k8s-sigs.io
fi
if kubectl get crds nodefeaturerules.nfd.k8s-sigs.io > /dev/null 2>&1; then
kubectl delete crds nodefeaturerules.nfd.k8s-sigs.io
fi
if kubectl get crds nodefeatures.nfd.k8s-sigs.io > /dev/null 2>&1; then
kubectl delete crds nodefeatures.nfd.k8s-sigs.io
fi
{{- end }}
{{- if .Values.kmm.enabled }}
if kubectl get crds modules.kmm.sigs.x-k8s.io > /dev/null 2>&1; then
kubectl delete crds modules.kmm.sigs.x-k8s.io
fi
if kubectl get crds nodemodulesconfigs.kmm.sigs.x-k8s.io > /dev/null 2>&1; then
kubectl delete crds nodemodulesconfigs.kmm.sigs.x-k8s.io
fi
{{- end }}
{{- if .Values.controllerManager.manager.imagePullSecrets }}
imagePullSecrets:
- name: {{ .Values.controllerManager.manager.imagePullSecrets }}
{{- end }}
{{- with .Values.controllerManager.manager.tolerations }}
tolerations:
{{- toYaml . | nindent 8 }}
{{- end }}
{{- with .Values.controllerManager.affinity }}
affinity:
{{- toYaml . | nindent 8 }}
{{- end }}
restartPolicy: Never

View File

@@ -0,0 +1,101 @@
# Run helm uninstall with --no-hooks to bypass the pre-delete hook
apiVersion: v1
kind: ServiceAccount
metadata:
name: {{ include "helm-charts-k8s.fullname" . }}-pre-delete
namespace: {{ .Release.Namespace }}
labels:
{{- include "helm-charts-k8s.labels" . | nindent 4 }}
annotations:
# hook with lower weight value will run firstly
"helm.sh/hook-weight": "0"
"helm.sh/hook": pre-delete
"helm.sh/hook-delete-policy": before-hook-creation,hook-succeeded
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: {{ include "helm-charts-k8s.fullname" . }}-pre-delete
labels:
{{- include "helm-charts-k8s.labels" . | nindent 4 }}
annotations:
# hook with lower weight value will run firstly
"helm.sh/hook-weight": "0"
"helm.sh/hook": pre-delete
"helm.sh/hook-delete-policy": before-hook-creation,hook-succeeded
rules:
- apiGroups:
- amd.com
resources:
- deviceconfigs
verbs:
- get
- list
- delete
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: {{ include "helm-charts-k8s.fullname" . }}-pre-delete
labels:
{{- include "helm-charts-k8s.labels" . | nindent 4 }}
annotations:
# hook with lower weight value will run firstly
"helm.sh/hook-weight": "1"
"helm.sh/hook": pre-delete
"helm.sh/hook-delete-policy": before-hook-creation,hook-succeeded
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: {{ include "helm-charts-k8s.fullname" . }}-pre-delete
subjects:
- kind: ServiceAccount
name: {{ include "helm-charts-k8s.fullname" . }}-pre-delete
namespace: {{ .Release.Namespace }}
---
apiVersion: batch/v1
kind: Job
metadata:
name: delete-leftover-deviceconfigs
namespace: {{ .Release.Namespace }}
labels:
{{- include "helm-charts-k8s.labels" . | nindent 4 }}
annotations:
# hook with lower weight value will run firstly
"helm.sh/hook-weight": "2"
# hook will be executed before helm uninstall
"helm.sh/hook": pre-delete
# remove the resource created by the hook whether it succeeded or failed
"helm.sh/hook-delete-policy": before-hook-creation, hook-succeeded
spec:
backoffLimit: 0 # once the job finished first run, don't retry to create another pod
ttlSecondsAfterFinished: 60 # job info will be kept for 1 min then deleted
template:
spec:
serviceAccountName: {{ include "helm-charts-k8s.fullname" . }}-pre-delete
containers:
- name: delete-leftover-deviceconfigs
image: {{ .Values.controllerManager.manager.image.repository }}:{{ .Values.controllerManager.manager.image.tag }}
command:
- /bin/sh
- -c
- |
installed=$(kubectl api-resources -owide | grep -i amd.com | grep -i deviceconfig)
if [ -z ${installed} ] ; then
exit 0
fi
# Delete all existing DeviceConfig custom resources
kubectl delete deviceconfigs.amd.com --all -A
{{- if .Values.controllerManager.manager.imagePullSecrets }}
imagePullSecrets:
- name: {{ .Values.controllerManager.manager.imagePullSecrets }}
{{- end}}
{{- with .Values.controllerManager.manager.tolerations }}
tolerations:
{{- toYaml . | nindent 8 }}
{{- end }}
{{- with .Values.controllerManager.affinity }}
affinity:
{{- toYaml . | nindent 8 }}
{{- end }}
restartPolicy: Never

View File

@@ -0,0 +1,229 @@
---
apiVersion: v1
kind: ServiceAccount
metadata:
name: pre-upgrade-check-sa
annotations:
# hook will be executed before helm upgrade
"helm.sh/hook": pre-upgrade,pre-rollback
# don't cleanup the job on hook failure
"helm.sh/hook-delete-policy": before-hook-creation, hook-succeeded
# hook with lower weight value will run firstly
"helm.sh/hook-weight": "0"
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: pre-upgrade-check-cluster-role
annotations:
# hook will be executed before helm upgrade
"helm.sh/hook": pre-upgrade,pre-rollback
# don't cleanup the job on hook failure
"helm.sh/hook-delete-policy": before-hook-creation, hook-succeeded
# hook with lower weight value will run firstly
"helm.sh/hook-weight": "0"
rules:
- apiGroups:
- amd.com
resources:
- deviceconfigs
verbs:
- list
- get
---
kind: ClusterRoleBinding
apiVersion: rbac.authorization.k8s.io/v1
metadata:
name: pre-upgrade-check-cluster-role-binding
annotations:
# hook will be executed before helm upgrade
"helm.sh/hook": pre-upgrade,pre-rollback
# don't cleanup the job on hook failure
"helm.sh/hook-delete-policy": before-hook-creation, hook-succeeded
# hook with lower weight value will run firstly
"helm.sh/hook-weight": "1"
subjects:
- kind: ServiceAccount
name: pre-upgrade-check-sa
namespace: {{ .Release.Namespace }}
roleRef:
kind: ClusterRole
name: pre-upgrade-check-cluster-role
apiGroup: rbac.authorization.k8s.io
---
apiVersion: batch/v1
kind: Job
metadata:
name: pre-upgrade-check
namespace: {{ .Release.Namespace }}
labels:
{{- include "helm-charts-k8s.labels" . | nindent 4 }}
annotations:
# hook will be executed before helm upgrade
"helm.sh/hook": pre-upgrade,pre-rollback
# don't cleanup the job on hook failure
"helm.sh/hook-delete-policy": before-hook-creation, hook-succeeded
# hook with lower weight value will run firstly
"helm.sh/hook-weight": "2"
spec:
backoffLimit: 0 # once the job finished first run, don't retry to create another pod
ttlSecondsAfterFinished: 60 # job info will be kept for 1 min then deleted
template:
spec:
serviceAccountName: pre-upgrade-check-sa
containers:
- name: pre-upgrade-check
image: {{ .Values.controllerManager.manager.image.repository }}:{{ .Values.controllerManager.manager.image.tag }}
command:
- /bin/sh
- -c
- |
# Ignore the lack of CRDs, probably haven't actually been installed yet
# this provides idempotentcy when "things" don't understand the difference between
# install and upgrade. E.g. Argo turns pre-upgrade hook into its PreSync hook
installed=$(kubectl api-resources -owide | grep -i amd.com | grep -i deviceconfig)
if [ -z ${installed} ] ; then
exit 0
fi
# List all DeviceConfig CRs
deviceconfigs=$(kubectl get deviceconfigs -n {{ .Release.Namespace }} -o json)
echo "DeviceConfigs JSON:"
echo "$deviceconfigs" | jq .
# Check if any UpgradeState is in the blocked states
blocked_states='["Upgrade-Not-Started", "Upgrade-Started", "Install-In-Progress", "Upgrade-In-Progress"]'
if echo "$deviceconfigs" | jq --argjson blocked_states "$blocked_states" -e '
.items[] |
.status.nodeModuleStatus // {} |
to_entries |
any(.value.status as $state | ($blocked_states | index($state)))' > /dev/null; then
echo "Upgrade blocked: Some DeviceConfigs are in a disallowed UpgradeState."
exit 1
else
echo "All DeviceConfigs are in an allowed state. Proceeding with upgrade."
exit 0
fi
{{- if .Values.controllerManager.manager.imagePullSecrets }}
imagePullSecrets:
- name: {{ .Values.controllerManager.manager.imagePullSecrets }}
{{- end }}
{{- with .Values.controllerManager.manager.tolerations }}
tolerations:
{{- toYaml . | nindent 8 }}
{{- end }}
{{- with .Values.controllerManager.affinity }}
affinity:
{{- toYaml . | nindent 8 }}
{{- end }}
restartPolicy: Never
{{- if .Values.upgradeCRD }}
---
apiVersion: v1
kind: ServiceAccount
metadata:
name: upgrade-crd-hook-sa
annotations:
# hook will be executed before helm upgrade
"helm.sh/hook": pre-upgrade,pre-rollback
# don't cleanup the job on hook failure
"helm.sh/hook-delete-policy": before-hook-creation, hook-succeeded
# hook with lower weight value will run firstly
"helm.sh/hook-weight": "1"
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: upgrade-crd-hook-cluster-role
annotations:
# hook will be executed before helm upgrade
"helm.sh/hook": pre-upgrade,pre-rollback
# don't cleanup the job on hook failure
"helm.sh/hook-delete-policy": before-hook-creation, hook-succeeded
# hook with lower weight value will run firstly
"helm.sh/hook-weight": "1"
rules:
- apiGroups:
- apiextensions.k8s.io
resources:
- customresourcedefinitions
verbs:
- create
- get
- list
- watch
- patch
- update
---
kind: ClusterRoleBinding
apiVersion: rbac.authorization.k8s.io/v1
metadata:
name: upgrade-crd-hook-cluster-role-binding
annotations:
# hook will be executed before helm upgrade
"helm.sh/hook": pre-upgrade,pre-rollback
# don't cleanup the job on hook failure
"helm.sh/hook-delete-policy": before-hook-creation, hook-succeeded
# hook with lower weight value will run firstly
"helm.sh/hook-weight": "2"
subjects:
- kind: ServiceAccount
name: upgrade-crd-hook-sa
namespace: {{ .Release.Namespace }}
roleRef:
kind: ClusterRole
name: upgrade-crd-hook-cluster-role
apiGroup: rbac.authorization.k8s.io
---
apiVersion: batch/v1
kind: Job
metadata:
name: upgrade-crd
namespace: {{ .Release.Namespace }}
labels:
{{- include "helm-charts-k8s.labels" . | nindent 4 }}
annotations:
# hook will be executed before helm upgrade
"helm.sh/hook": pre-upgrade,pre-rollback
# don't cleanup the job on hook failure
"helm.sh/hook-delete-policy": before-hook-creation, hook-succeeded
# hook with lower weight value will run firstly
"helm.sh/hook-weight": "3"
spec:
template:
metadata:
name: upgrade-crd
spec:
serviceAccountName: upgrade-crd-hook-sa
{{- if .Values.controllerManager.manager.imagePullSecrets }}
imagePullSecrets:
- name: {{ .Values.controllerManager.manager.imagePullSecrets }}
{{- end }}
{{- with .Values.controllerManager.manager.tolerations }}
tolerations:
{{- toYaml . | nindent 8 }}
{{- end }}
{{- with .Values.controllerManager.affinity }}
affinity:
{{- toYaml . | nindent 8 }}
{{- end }}
containers:
- name: upgrade-crd
image: {{ .Values.controllerManager.manager.image.repository }}:{{ .Values.controllerManager.manager.image.tag }}
imagePullPolicy: {{ .Values.controllerManager.manager.imagePullPolicy }}
command:
- /bin/sh
- -c
- |
kubectl apply -f /opt/helm-charts-crds-k8s/deviceconfig-crd.yaml
{{- if index .Values "node-feature-discovery" "enabled" }}
kubectl apply -f /opt/helm-charts-crds-k8s/nfd-api-crds.yaml
{{- end }}
{{- if .Values.kmm.enabled }}
kubectl apply -f /opt/helm-charts-crds-k8s/module-crd.yaml
kubectl apply -f /opt/helm-charts-crds-k8s/nodemodulesconfig-crd.yaml
{{- end }}
restartPolicy: OnFailure
{{- end }}
# Run helm upgrade with --no-hooks to bypass the pre-upgrade hook

View File

@@ -0,0 +1,98 @@
apiVersion: v1
kind: ServiceAccount
metadata:
name: {{ include "helm-charts-k8s.fullname" . }}-controller-manager
labels:
app.kubernetes.io/component: amd-gpu
app.kubernetes.io/part-of: amd-gpu
{{- include "helm-charts-k8s.labels" . | nindent 4 }}
annotations:
{{- toYaml .Values.controllerManager.serviceAccount.annotations | nindent 4 }}
---
apiVersion: v1
kind: ServiceAccount
metadata:
name: amd-gpu-operator-kmm-device-plugin
labels:
app.kubernetes.io/component: amd-gpu
app.kubernetes.io/part-of: amd-gpu
{{- include "helm-charts-k8s.labels" . | nindent 4 }}
annotations:
{{- toYaml .Values.kmmDevicePlugin.serviceAccount.annotations | nindent 4 }}
---
apiVersion: v1
kind: ServiceAccount
metadata:
name: amd-gpu-operator-kmm-module-loader
labels:
app.kubernetes.io/component: amd-gpu
app.kubernetes.io/part-of: amd-gpu
{{- include "helm-charts-k8s.labels" . | nindent 4 }}
annotations:
{{- toYaml .Values.kmmModuleLoader.serviceAccount.annotations | nindent 4 }}
---
apiVersion: v1
kind: ServiceAccount
metadata:
name: amd-gpu-operator-node-labeller
labels:
app.kubernetes.io/component: amd-gpu
app.kubernetes.io/part-of: amd-gpu
{{- include "helm-charts-k8s.labels" . | nindent 4 }}
annotations:
{{- toYaml .Values.nodeLabeller.serviceAccount.annotations | nindent 4 }}
---
apiVersion: v1
kind: ServiceAccount
metadata:
name: amd-gpu-operator-metrics-exporter
labels:
app.kubernetes.io/component: amd-gpu
app.kubernetes.io/part-of: amd-gpu
{{- include "helm-charts-k8s.labels" . | nindent 4 }}
annotations:
{{- toYaml .Values.metricsExporter.serviceAccount.annotations | nindent 4 }}
---
apiVersion: v1
kind: ServiceAccount
metadata:
name: amd-gpu-operator-metrics-exporter-rbac-proxy
labels:
app.kubernetes.io/component: amd-gpu
app.kubernetes.io/part-of: amd-gpu
{{- include "helm-charts-k8s.labels" . | nindent 4 }}
annotations:
{{- toYaml .Values.metricsExporter.serviceAccount.annotations | nindent 4 }}
---
apiVersion: v1
kind: ServiceAccount
metadata:
name: amd-gpu-operator-test-runner
labels:
app.kubernetes.io/component: amd-gpu
app.kubernetes.io/part-of: amd-gpu
{{- include "helm-charts-k8s.labels" . | nindent 4 }}
annotations:
{{- toYaml .Values.testRunner.serviceAccount.annotations | nindent 4 }}
---
apiVersion: v1
kind: ServiceAccount
metadata:
name: amd-gpu-operator-config-manager
labels:
app.kubernetes.io/component: amd-gpu
app.kubernetes.io/part-of: amd-gpu
{{- include "helm-charts-k8s.labels" . | nindent 4 }}
annotations:
{{- toYaml .Values.configManager.serviceAccount.annotations | nindent 4 }}
---
apiVersion: v1
kind: ServiceAccount
metadata:
name: amd-gpu-operator-utils-container
labels:
app.kubernetes.io/component: amd-gpu
app.kubernetes.io/part-of: amd-gpu
{{- include "helm-charts-k8s.labels" . | nindent 4 }}
annotations:
{{- toYaml .Values.utilsContainer.serviceAccount.annotations | nindent 4 }}

View File

@@ -0,0 +1,41 @@
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: {{ include "helm-charts-k8s.fullname" . }}-test-runner
labels:
app.kubernetes.io/component: amd-gpu
app.kubernetes.io/part-of: amd-gpu
{{- include "helm-charts-k8s.labels" . | nindent 4 }}
rules:
- apiGroups:
- ""
resources:
- events
verbs:
- create
- get
- list
- update
- apiGroups:
- ""
resources:
- nodes
verbs:
- patch
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: {{ include "helm-charts-k8s.fullname" . }}-test-runner
labels:
app.kubernetes.io/component: amd-gpu
app.kubernetes.io/part-of: amd-gpu
{{- include "helm-charts-k8s.labels" . | nindent 4 }}
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: '{{ include "helm-charts-k8s.fullname" . }}-test-runner'
subjects:
- kind: ServiceAccount
name: amd-gpu-operator-test-runner
namespace: '{{ .Release.Namespace }}'

View File

@@ -0,0 +1,34 @@
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: {{ include "helm-charts-k8s.fullname" . }}-utils-container
labels:
app.kubernetes.io/component: amd-gpu
app.kubernetes.io/part-of: amd-gpu
{{- include "helm-charts-k8s.labels" . | nindent 4 }}
rules:
- apiGroups:
- security.openshift.io
resourceNames:
- privileged
resources:
- securitycontextconstraints
verbs:
- use
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: {{ include "helm-charts-k8s.fullname" . }}-utils-container
labels:
app.kubernetes.io/component: amd-gpu
app.kubernetes.io/part-of: amd-gpu
{{- include "helm-charts-k8s.labels" . | nindent 4 }}
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: '{{ include "helm-charts-k8s.fullname" . }}-utils-container'
subjects:
- kind: ServiceAccount
name: amd-gpu-operator-utils-container
namespace: '{{ .Release.Namespace }}'