1605 lines
86 KiB
YAML
1605 lines
86 KiB
YAML
---
|
|
# Source: gpu-operator-charts/templates/deviceconfig-crd.yaml
|
|
apiVersion: apiextensions.k8s.io/v1
|
|
kind: CustomResourceDefinition
|
|
metadata:
|
|
name: deviceconfigs.amd.com
|
|
annotations:
|
|
controller-gen.kubebuilder.io/version: v0.17.0
|
|
labels:
|
|
app.kubernetes.io/component: amd-gpu
|
|
app.kubernetes.io/part-of: amd-gpu
|
|
helm.sh/chart: gpu-operator-charts-v1.4.0
|
|
app.kubernetes.io/name: gpu-operator-charts
|
|
app.kubernetes.io/instance: amd-gpu
|
|
app.kubernetes.io/version: "v1.4.0"
|
|
app.kubernetes.io/managed-by: Helm
|
|
spec:
|
|
group: amd.com
|
|
names:
|
|
kind: DeviceConfig
|
|
listKind: DeviceConfigList
|
|
plural: deviceconfigs
|
|
shortNames:
|
|
- gpue
|
|
singular: deviceconfig
|
|
scope: Namespaced
|
|
versions:
|
|
- name: v1alpha1
|
|
schema:
|
|
openAPIV3Schema:
|
|
description: DeviceConfig describes how to enable AMD GPU device
|
|
properties:
|
|
apiVersion:
|
|
description: |-
|
|
APIVersion defines the versioned schema of this representation of an object.
|
|
Servers should convert recognized schemas to the latest internal value, and
|
|
may reject unrecognized values.
|
|
More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources
|
|
type: string
|
|
kind:
|
|
description: |-
|
|
Kind is a string value representing the REST resource this object represents.
|
|
Servers may infer this from the endpoint the client submits requests to.
|
|
Cannot be updated.
|
|
In CamelCase.
|
|
More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds
|
|
type: string
|
|
metadata:
|
|
type: object
|
|
spec:
|
|
description: DeviceConfigSpec describes how the AMD GPU operator should
|
|
enable AMD GPU device for customer's use.
|
|
properties:
|
|
commonConfig:
|
|
description: common config
|
|
properties:
|
|
initContainerImage:
|
|
description: InitContainerImage is being used for the operands pods,
|
|
i.e. metrics exporter, test runner, device plugin, device config
|
|
manager and node labeller
|
|
type: string
|
|
utilsContainer:
|
|
description: UtilsContainer contains parameters to configure operator's
|
|
utils container
|
|
properties:
|
|
image:
|
|
description: Image is the image of utils container
|
|
pattern: ^([a-z0-9]+(?:[._-][a-z0-9]+)*(:[0-9]+)?)(/[a-z0-9]+(?:[._-][a-z0-9]+)*)*(?::[a-z0-9._-]+)?(?:@[a-zA-Z0-9]+:[a-f0-9]+)?$
|
|
type: string
|
|
imagePullPolicy:
|
|
description: image pull policy for utils container
|
|
enum:
|
|
- Always
|
|
- IfNotPresent
|
|
- Never
|
|
type: string
|
|
imageRegistrySecret:
|
|
description: secret used for pull utils container image
|
|
properties:
|
|
name:
|
|
default: ""
|
|
description: |-
|
|
Name of the referent.
|
|
This field is effectively required, but due to backwards compatibility is
|
|
allowed to be empty. Instances of this type with an empty value here are
|
|
almost certainly wrong.
|
|
More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
|
|
type: string
|
|
type: object
|
|
x-kubernetes-map-type: atomic
|
|
type: object
|
|
type: object
|
|
configManager:
|
|
description: config manager
|
|
properties:
|
|
config:
|
|
description: config map to customize the config for config manager,
|
|
if not specified default config will be applied
|
|
properties:
|
|
name:
|
|
default: ""
|
|
description: |-
|
|
Name of the referent.
|
|
This field is effectively required, but due to backwards compatibility is
|
|
allowed to be empty. Instances of this type with an empty value here are
|
|
almost certainly wrong.
|
|
More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
|
|
type: string
|
|
type: object
|
|
x-kubernetes-map-type: atomic
|
|
configManagerTolerations:
|
|
description: tolerations for the device config manager DaemonSet
|
|
items:
|
|
description: |-
|
|
The pod this Toleration is attached to tolerates any taint that matches
|
|
the triple <key,value,effect> using the matching operator <operator>.
|
|
properties:
|
|
effect:
|
|
description: |-
|
|
Effect indicates the taint effect to match. Empty means match all taint effects.
|
|
When specified, allowed values are NoSchedule, PreferNoSchedule and NoExecute.
|
|
type: string
|
|
key:
|
|
description: |-
|
|
Key is the taint key that the toleration applies to. Empty means match all taint keys.
|
|
If the key is empty, operator must be Exists; this combination means to match all values and all keys.
|
|
type: string
|
|
operator:
|
|
description: |-
|
|
Operator represents a key's relationship to the value.
|
|
Valid operators are Exists and Equal. Defaults to Equal.
|
|
Exists is equivalent to wildcard for value, so that a pod can
|
|
tolerate all taints of a particular category.
|
|
type: string
|
|
tolerationSeconds:
|
|
description: |-
|
|
TolerationSeconds represents the period of time the toleration (which must be
|
|
of effect NoExecute, otherwise this field is ignored) tolerates the taint. By default,
|
|
it is not set, which means tolerate the taint forever (do not evict). Zero and
|
|
negative values will be treated as 0 (evict immediately) by the system.
|
|
format: int64
|
|
type: integer
|
|
value:
|
|
description: |-
|
|
Value is the taint value the toleration matches to.
|
|
If the operator is Exists, the value should be empty, otherwise just a regular string.
|
|
type: string
|
|
type: object
|
|
type: array
|
|
enable:
|
|
description: enable config manager, disabled by default
|
|
type: boolean
|
|
image:
|
|
description: config manager image
|
|
pattern: ^([a-z0-9]+(?:[._-][a-z0-9]+)*(:[0-9]+)?)(/[a-z0-9]+(?:[._-][a-z0-9]+)*)*(?::[a-z0-9._-]+)?(?:@[a-zA-Z0-9]+:[a-f0-9]+)?$
|
|
type: string
|
|
imagePullPolicy:
|
|
description: image pull policy for config manager
|
|
enum:
|
|
- Always
|
|
- IfNotPresent
|
|
- Never
|
|
type: string
|
|
imageRegistrySecret:
|
|
description: config manager image registry secret used to pull/push
|
|
images
|
|
properties:
|
|
name:
|
|
default: ""
|
|
description: |-
|
|
Name of the referent.
|
|
This field is effectively required, but due to backwards compatibility is
|
|
allowed to be empty. Instances of this type with an empty value here are
|
|
almost certainly wrong.
|
|
More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
|
|
type: string
|
|
type: object
|
|
x-kubernetes-map-type: atomic
|
|
selector:
|
|
additionalProperties:
|
|
type: string
|
|
description: Selector describes on which nodes to enable config
|
|
manager
|
|
type: object
|
|
upgradePolicy:
|
|
description: upgrade policy for config manager daemonset
|
|
properties:
|
|
maxUnavailable:
|
|
default: 1
|
|
description: MaxUnavailable specifies the maximum number of
|
|
Pods that can be unavailable during the update process. Applicable
|
|
for RollingUpdate only. Default value is 1.
|
|
format: int32
|
|
type: integer
|
|
upgradeStrategy:
|
|
description: UpgradeStrategy specifies the type of the DaemonSet
|
|
update. Valid values are "RollingUpdate" (default) or "OnDelete".
|
|
enum:
|
|
- RollingUpdate
|
|
- OnDelete
|
|
type: string
|
|
type: object
|
|
type: object
|
|
devicePlugin:
|
|
description: device plugin
|
|
properties:
|
|
devicePluginArguments:
|
|
additionalProperties:
|
|
type: string
|
|
description: |-
|
|
device plugin arguments is used to pass supported flags and their values while starting device plugin daemonset
|
|
supported flag values: {"resource_naming_strategy": {"single", "mixed"}}
|
|
type: object
|
|
devicePluginImage:
|
|
description: device plugin image
|
|
pattern: ^([a-z0-9]+(?:[._-][a-z0-9]+)*(:[0-9]+)?)(/[a-z0-9]+(?:[._-][a-z0-9]+)*)*(?::[a-z0-9._-]+)?(?:@[a-zA-Z0-9]+:[a-f0-9]+)?$
|
|
type: string
|
|
devicePluginImagePullPolicy:
|
|
description: image pull policy for device plugin
|
|
enum:
|
|
- Always
|
|
- IfNotPresent
|
|
- Never
|
|
type: string
|
|
devicePluginTolerations:
|
|
description: tolerations for the device plugin DaemonSet
|
|
items:
|
|
description: |-
|
|
The pod this Toleration is attached to tolerates any taint that matches
|
|
the triple <key,value,effect> using the matching operator <operator>.
|
|
properties:
|
|
effect:
|
|
description: |-
|
|
Effect indicates the taint effect to match. Empty means match all taint effects.
|
|
When specified, allowed values are NoSchedule, PreferNoSchedule and NoExecute.
|
|
type: string
|
|
key:
|
|
description: |-
|
|
Key is the taint key that the toleration applies to. Empty means match all taint keys.
|
|
If the key is empty, operator must be Exists; this combination means to match all values and all keys.
|
|
type: string
|
|
operator:
|
|
description: |-
|
|
Operator represents a key's relationship to the value.
|
|
Valid operators are Exists and Equal. Defaults to Equal.
|
|
Exists is equivalent to wildcard for value, so that a pod can
|
|
tolerate all taints of a particular category.
|
|
type: string
|
|
tolerationSeconds:
|
|
description: |-
|
|
TolerationSeconds represents the period of time the toleration (which must be
|
|
of effect NoExecute, otherwise this field is ignored) tolerates the taint. By default,
|
|
it is not set, which means tolerate the taint forever (do not evict). Zero and
|
|
negative values will be treated as 0 (evict immediately) by the system.
|
|
format: int64
|
|
type: integer
|
|
value:
|
|
description: |-
|
|
Value is the taint value the toleration matches to.
|
|
If the operator is Exists, the value should be empty, otherwise just a regular string.
|
|
type: string
|
|
type: object
|
|
type: array
|
|
enableNodeLabeller:
|
|
default: true
|
|
description: enable or disable the node labeller
|
|
type: boolean
|
|
imageRegistrySecret:
|
|
description: node labeller image registry secret used to pull/push
|
|
images
|
|
properties:
|
|
name:
|
|
default: ""
|
|
description: |-
|
|
Name of the referent.
|
|
This field is effectively required, but due to backwards compatibility is
|
|
allowed to be empty. Instances of this type with an empty value here are
|
|
almost certainly wrong.
|
|
More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
|
|
type: string
|
|
type: object
|
|
x-kubernetes-map-type: atomic
|
|
nodeLabellerArguments:
|
|
description: |-
|
|
node labeller arguments is used to pass supported labels while starting node labeller daemonset
|
|
some flags are enabled by default as they are applicable and bare minimum for all setups and are supported in all versions of node labeller
|
|
default flags: {"vram", "cu-count", "simd-count", "device-id", "family", "product-name", "driver-version"}
|
|
supported flags: {"compute-memory-partition", "compute-partitioning-supported", "memory-partitioning-supported"}
|
|
items:
|
|
type: string
|
|
type: array
|
|
nodeLabellerImage:
|
|
description: node labeller image
|
|
pattern: ^([a-z0-9]+(?:[._-][a-z0-9]+)*(:[0-9]+)?)(/[a-z0-9]+(?:[._-][a-z0-9]+)*)*(?::[a-z0-9._-]+)?(?:@[a-zA-Z0-9]+:[a-f0-9]+)?$
|
|
type: string
|
|
nodeLabellerImagePullPolicy:
|
|
description: image pull policy for node labeller
|
|
enum:
|
|
- Always
|
|
- IfNotPresent
|
|
- Never
|
|
type: string
|
|
nodeLabellerTolerations:
|
|
description: tolerations for the node labeller DaemonSet
|
|
items:
|
|
description: |-
|
|
The pod this Toleration is attached to tolerates any taint that matches
|
|
the triple <key,value,effect> using the matching operator <operator>.
|
|
properties:
|
|
effect:
|
|
description: |-
|
|
Effect indicates the taint effect to match. Empty means match all taint effects.
|
|
When specified, allowed values are NoSchedule, PreferNoSchedule and NoExecute.
|
|
type: string
|
|
key:
|
|
description: |-
|
|
Key is the taint key that the toleration applies to. Empty means match all taint keys.
|
|
If the key is empty, operator must be Exists; this combination means to match all values and all keys.
|
|
type: string
|
|
operator:
|
|
description: |-
|
|
Operator represents a key's relationship to the value.
|
|
Valid operators are Exists and Equal. Defaults to Equal.
|
|
Exists is equivalent to wildcard for value, so that a pod can
|
|
tolerate all taints of a particular category.
|
|
type: string
|
|
tolerationSeconds:
|
|
description: |-
|
|
TolerationSeconds represents the period of time the toleration (which must be
|
|
of effect NoExecute, otherwise this field is ignored) tolerates the taint. By default,
|
|
it is not set, which means tolerate the taint forever (do not evict). Zero and
|
|
negative values will be treated as 0 (evict immediately) by the system.
|
|
format: int64
|
|
type: integer
|
|
value:
|
|
description: |-
|
|
Value is the taint value the toleration matches to.
|
|
If the operator is Exists, the value should be empty, otherwise just a regular string.
|
|
type: string
|
|
type: object
|
|
type: array
|
|
upgradePolicy:
|
|
description: upgrade policy for device plugin and node labeller
|
|
daemons
|
|
properties:
|
|
maxUnavailable:
|
|
default: 1
|
|
description: MaxUnavailable specifies the maximum number of
|
|
Pods that can be unavailable during the update process. Applicable
|
|
for RollingUpdate only. Default value is 1.
|
|
format: int32
|
|
type: integer
|
|
upgradeStrategy:
|
|
description: UpgradeStrategy specifies the type of the DaemonSet
|
|
update. Valid values are "RollingUpdate" (default) or "OnDelete".
|
|
enum:
|
|
- RollingUpdate
|
|
- OnDelete
|
|
type: string
|
|
type: object
|
|
type: object
|
|
driver:
|
|
description: driver
|
|
properties:
|
|
amdgpuInstallerRepoURL:
|
|
description: |-
|
|
radeon repo URL for fetching amdgpu installer if building driver image on the fly
|
|
installer URL is https://repo.radeon.com/amdgpu-install by default
|
|
type: string
|
|
blacklist:
|
|
description: |-
|
|
blacklist amdgpu drivers on the host. Node reboot is required to apply the baclklist on the worker nodes.
|
|
Not working for OpenShift cluster. OpenShift users please use the Machine Config Operator (MCO) resource to configure amdgpu blacklist.
|
|
Example MCO resource is available at https://instinct.docs.amd.com/projects/gpu-operator/en/latest/installation/openshift-olm.html#create-blacklist-for-installing-out-of-tree-kernel-module
|
|
type: boolean
|
|
driverType:
|
|
default: container
|
|
description: |-
|
|
specify the type of driver (container/vf-passthrough/pf-passthrough) to install on the worker node. default value is container.
|
|
container: normal amdgpu-dkms driver for Bare Metal GPU nodes or guest VM.
|
|
vf-passthrough: MxGPU GIM driver on the host machine to generate VF, then mount VF to vfio-pci
|
|
pf-passthrough: directly mount PF device to vfio-pci
|
|
enum:
|
|
- container
|
|
- vf-passthrough
|
|
- pf-passthrough
|
|
type: string
|
|
enable:
|
|
default: true
|
|
description: |-
|
|
enable driver install. default value is true.
|
|
disable is for skipping driver install/uninstall for dryrun or using in-tree amdgpu kernel module
|
|
type: boolean
|
|
image:
|
|
description: |-
|
|
defines image that includes drivers and firmware blobs, don't include tag since it will be fully managed by operator
|
|
for vanilla k8s the default value is image-registry:5000/$MOD_NAMESPACE/amdgpu_kmod
|
|
for OpenShift the default value is image-registry.openshift-image-registry.svc:5000/$MOD_NAMESPACE/amdgpu_kmod
|
|
image tag will be in the format of <linux distro>-<release version>-<kernel version>-<driver version>
|
|
example tag is coreos-416.94-5.14.0-427.28.1.el9_4.x86_64-6.2.2 and ubuntu-22.04-5.15.0-94-generic-6.1.3
|
|
NOTE: Updating the driver image repository is not supported. Please delete the existing DeviceConfig and create a new one with the updated image repository
|
|
pattern: ^([a-z0-9]+(?:[._-][a-z0-9]+)*(:[0-9]+)?)(/[$a-zA-Z0-9_]+(?:[._-][$a-zA-Z0-9_]+)*)*(?::[a-z0-9._-]+)?(?:@[a-zA-Z0-9]+:[a-f0-9]+)?$
|
|
type: string
|
|
imageBuild:
|
|
description: image build configs
|
|
properties:
|
|
baseImageRegistry:
|
|
default: docker.io
|
|
description: |-
|
|
image registry to fetch base image for building driver image, default value is docker.io, the builder will search for corresponding OS base image from given registry
|
|
e.g. if your worker node is using Ubuntu 22.04, by default the base image would be docker.io/ubuntu:22.04
|
|
NOTE: this field won't apply for OpenShift since OpenShift is using its own DriverToolKit image to build driver image
|
|
type: string
|
|
baseImageRegistryTLS:
|
|
description: TLS settings for fetching base image
|
|
properties:
|
|
insecure:
|
|
description: If true, check if the container image already
|
|
exists using plain HTTP.
|
|
type: boolean
|
|
insecureSkipTLSVerify:
|
|
description: If true, skip any TLS server certificate validation
|
|
type: boolean
|
|
type: object
|
|
type: object
|
|
imageRegistrySecret:
|
|
description: secrets used for pull/push images from/to private registry
|
|
specified in driversImage
|
|
properties:
|
|
name:
|
|
default: ""
|
|
description: |-
|
|
Name of the referent.
|
|
This field is effectively required, but due to backwards compatibility is
|
|
allowed to be empty. Instances of this type with an empty value here are
|
|
almost certainly wrong.
|
|
More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
|
|
type: string
|
|
type: object
|
|
x-kubernetes-map-type: atomic
|
|
imageRegistryTLS:
|
|
description: driver image registry TLS setting for the container
|
|
image
|
|
properties:
|
|
insecure:
|
|
description: If true, check if the container image already exists
|
|
using plain HTTP.
|
|
type: boolean
|
|
insecureSkipTLSVerify:
|
|
description: If true, skip any TLS server certificate validation
|
|
type: boolean
|
|
type: object
|
|
imageSign:
|
|
description: |-
|
|
image signing config to sign the driver image when building driver image on the fly
|
|
image signing is required for installing driver on secure boot enabled system
|
|
properties:
|
|
certSecret:
|
|
description: |-
|
|
ImageSignCertSecret the public key used to sign kernel modules within image
|
|
necessary for secure boot enabled system
|
|
properties:
|
|
name:
|
|
default: ""
|
|
description: |-
|
|
Name of the referent.
|
|
This field is effectively required, but due to backwards compatibility is
|
|
allowed to be empty. Instances of this type with an empty value here are
|
|
almost certainly wrong.
|
|
More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
|
|
type: string
|
|
type: object
|
|
x-kubernetes-map-type: atomic
|
|
keySecret:
|
|
description: |-
|
|
ImageSignKeySecret the private key used to sign kernel modules within image
|
|
necessary for secure boot enabled system
|
|
properties:
|
|
name:
|
|
default: ""
|
|
description: |-
|
|
Name of the referent.
|
|
This field is effectively required, but due to backwards compatibility is
|
|
allowed to be empty. Instances of this type with an empty value here are
|
|
almost certainly wrong.
|
|
More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
|
|
type: string
|
|
type: object
|
|
x-kubernetes-map-type: atomic
|
|
type: object
|
|
kernelModuleConfig:
|
|
description: advanced arguments, parameters and more configs to
|
|
manage tne driver
|
|
properties:
|
|
loadArgs:
|
|
description: LoadArg are the arguments when modprobe is executed
|
|
to load the kernel module. The command will be `modprobe ${Args}
|
|
module_name`.
|
|
items:
|
|
type: string
|
|
type: array
|
|
parameters:
|
|
description: Parameters is being used for modprobe commands.
|
|
The command will be `modprobe ${Args} module_name ${Parameters}`.
|
|
items:
|
|
type: string
|
|
type: array
|
|
unloadArgs:
|
|
description: UnloadArg are the arguments when modprobe is executed
|
|
to unload the kernel module. The command will be `modprobe
|
|
-r ${Args} module_name`.
|
|
items:
|
|
type: string
|
|
type: array
|
|
type: object
|
|
tolerations:
|
|
description: tolerations for kmm module object
|
|
items:
|
|
description: |-
|
|
The pod this Toleration is attached to tolerates any taint that matches
|
|
the triple <key,value,effect> using the matching operator <operator>.
|
|
properties:
|
|
effect:
|
|
description: |-
|
|
Effect indicates the taint effect to match. Empty means match all taint effects.
|
|
When specified, allowed values are NoSchedule, PreferNoSchedule and NoExecute.
|
|
type: string
|
|
key:
|
|
description: |-
|
|
Key is the taint key that the toleration applies to. Empty means match all taint keys.
|
|
If the key is empty, operator must be Exists; this combination means to match all values and all keys.
|
|
type: string
|
|
operator:
|
|
description: |-
|
|
Operator represents a key's relationship to the value.
|
|
Valid operators are Exists and Equal. Defaults to Equal.
|
|
Exists is equivalent to wildcard for value, so that a pod can
|
|
tolerate all taints of a particular category.
|
|
type: string
|
|
tolerationSeconds:
|
|
description: |-
|
|
TolerationSeconds represents the period of time the toleration (which must be
|
|
of effect NoExecute, otherwise this field is ignored) tolerates the taint. By default,
|
|
it is not set, which means tolerate the taint forever (do not evict). Zero and
|
|
negative values will be treated as 0 (evict immediately) by the system.
|
|
format: int64
|
|
type: integer
|
|
value:
|
|
description: |-
|
|
Value is the taint value the toleration matches to.
|
|
If the operator is Exists, the value should be empty, otherwise just a regular string.
|
|
type: string
|
|
type: object
|
|
type: array
|
|
upgradePolicy:
|
|
description: policy to upgrade the drivers
|
|
properties:
|
|
enable:
|
|
description: |-
|
|
enable upgrade policy, disabled by default
|
|
If disabled, user has to manually upgrade all the nodes.
|
|
type: boolean
|
|
maxParallelUpgrades:
|
|
default: 1
|
|
description: |-
|
|
MaxParallelUpgrades indicates how many nodes can be upgraded in parallel
|
|
0 means no limit, all nodes will be upgraded in parallel
|
|
minimum: 0
|
|
type: integer
|
|
maxUnavailableNodes:
|
|
anyOf:
|
|
- type: integer
|
|
- type: string
|
|
default: 25%
|
|
description: |-
|
|
MaxUnavailableNodes indicates maximum number of nodes that can be in a failed upgrade state beyond which upgrades will stop to keep cluster at a minimal healthy state
|
|
Value can be an integer (ex: 2) which would mean atmost 2 nodes can be in failed state after which new upgrades will not start. Or it can be a percentage string(ex: "50%") from which absolute number will be calculated and round up
|
|
x-kubernetes-int-or-string: true
|
|
nodeDrainPolicy:
|
|
description: Node draining policy
|
|
properties:
|
|
force:
|
|
default: false
|
|
description: Force indicates if force draining is allowed
|
|
type: boolean
|
|
gracePeriodSeconds:
|
|
default: -1
|
|
description: GracePeriodSeconds indicates the time kubernetes
|
|
waits for a pod to shut down gracefully after receiving
|
|
a termination signal
|
|
type: integer
|
|
timeoutSeconds:
|
|
default: 300
|
|
description: TimeoutSecond specifies the length of time
|
|
in seconds to wait before giving up drain, zero means
|
|
infinite
|
|
minimum: 0
|
|
type: integer
|
|
type: object
|
|
podDeletionPolicy:
|
|
description: Pod Deletion policy. If both NodeDrainPolicy and
|
|
PodDeletionPolicy config is available, NodeDrainPolicy(if
|
|
enabled) will take precedence.
|
|
properties:
|
|
force:
|
|
default: false
|
|
description: Force indicates if force deletion is allowed
|
|
type: boolean
|
|
gracePeriodSeconds:
|
|
default: -1
|
|
description: GracePeriodSeconds indicates the time kubernetes
|
|
waits for a pod to shut down gracefully after receiving
|
|
a termination signal
|
|
type: integer
|
|
timeoutSeconds:
|
|
default: 300
|
|
description: TimeoutSecond specifies the length of time
|
|
in seconds to wait before giving up on pod deletion, zero
|
|
means infinite
|
|
minimum: 0
|
|
type: integer
|
|
type: object
|
|
rebootRequired:
|
|
default: true
|
|
description: reboot between driver upgrades, enabled by default,
|
|
if enabled spec.commonConfig.utilsContainer will be used to
|
|
perform reboot on worker nodes
|
|
type: boolean
|
|
type: object
|
|
version:
|
|
description: |-
|
|
version of the drivers source code, can be used as part of image of dockerfile source image
|
|
default value for different OS is: ubuntu: 6.1.3, coreOS: 6.2.2
|
|
type: string
|
|
vfioConfig:
|
|
description: |-
|
|
vfio config
|
|
specify the specific configs for binding PCI devices to vfio-pci kernel module, applies for driver type vf-passthrough and pf-passthrough
|
|
properties:
|
|
deviceIDs:
|
|
description: list of PCI device IDs to load into vfio-pci driver.
|
|
default is the list of AMD GPU PF/VF PCI device IDs based
|
|
on driver type vf-passthrough/pf-passthrough.
|
|
items:
|
|
type: string
|
|
type: array
|
|
type: object
|
|
type: object
|
|
metricsExporter:
|
|
description: metrics exporter
|
|
properties:
|
|
config:
|
|
description: optional configuration for metrics
|
|
properties:
|
|
name:
|
|
description: |-
|
|
Name of the configMap that defines the list of metrics
|
|
default list:[]
|
|
type: string
|
|
type: object
|
|
enable:
|
|
description: enable metrics exporter, disabled by default
|
|
type: boolean
|
|
image:
|
|
description: metrics exporter image
|
|
pattern: ^([a-z0-9]+(?:[._-][a-z0-9]+)*(:[0-9]+)?)(/[a-z0-9]+(?:[._-][a-z0-9]+)*)*(?::[a-z0-9._-]+)?(?:@[a-zA-Z0-9]+:[a-f0-9]+)?$
|
|
type: string
|
|
imagePullPolicy:
|
|
description: image pull policy for metrics exporter
|
|
enum:
|
|
- Always
|
|
- IfNotPresent
|
|
- Never
|
|
type: string
|
|
imageRegistrySecret:
|
|
description: metrics exporter image registry secret used to pull/push
|
|
images
|
|
properties:
|
|
name:
|
|
default: ""
|
|
description: |-
|
|
Name of the referent.
|
|
This field is effectively required, but due to backwards compatibility is
|
|
allowed to be empty. Instances of this type with an empty value here are
|
|
almost certainly wrong.
|
|
More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
|
|
type: string
|
|
type: object
|
|
x-kubernetes-map-type: atomic
|
|
nodePort:
|
|
description: NodePort is the external port for pulling metrics from
|
|
outside the cluster, in the range 30000-32767 (assigned automatically
|
|
by default)
|
|
format: int32
|
|
maximum: 32767
|
|
minimum: 30000
|
|
type: integer
|
|
podResourceAPISocketPath:
|
|
default: /var/lib/kubelet/pod-resources
|
|
description: |-
|
|
Set the host path for pod-resource kubelet.socket,
|
|
vanila kubernetes path is /var/lib/kubelet/pod-resources
|
|
microk8s path is /var/snap/microk8s/common/var/lib/kubelet/pod-resources/
|
|
path is an absolute unix path that allows a trailing slash
|
|
pattern: ^(/[^/\0]+)*(/)?$
|
|
type: string
|
|
port:
|
|
default: 5000
|
|
description: Port is the internal port used for in-cluster and node
|
|
access to pull metrics from the metrics-exporter (default 5000).
|
|
format: int32
|
|
type: integer
|
|
prometheus:
|
|
description: Prometheus configuration for metrics exporter
|
|
properties:
|
|
serviceMonitor:
|
|
description: ServiceMonitor configuration for Prometheus integration
|
|
properties:
|
|
attachMetadata:
|
|
description: AttachMetadata defines if Prometheus should
|
|
attach node metadata to the target
|
|
properties:
|
|
node:
|
|
description: |-
|
|
When set to true, Prometheus attaches node metadata to the discovered
|
|
targets.
|
|
|
|
The Prometheus service account must have the `list` and `watch`
|
|
permissions on the `Nodes` objects.
|
|
type: boolean
|
|
type: object
|
|
authorization:
|
|
description: Optional Prometheus authorization configuration
|
|
for accessing the endpoint
|
|
properties:
|
|
credentials:
|
|
description: Selects a key of a Secret in the namespace
|
|
that contains the credentials for authentication.
|
|
properties:
|
|
key:
|
|
description: The key of the secret to select from. Must
|
|
be a valid secret key.
|
|
type: string
|
|
name:
|
|
default: ""
|
|
description: |-
|
|
Name of the referent.
|
|
This field is effectively required, but due to backwards compatibility is
|
|
allowed to be empty. Instances of this type with an empty value here are
|
|
almost certainly wrong.
|
|
More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
|
|
type: string
|
|
optional:
|
|
description: Specify whether the Secret or its key
|
|
must be defined
|
|
type: boolean
|
|
required:
|
|
- key
|
|
type: object
|
|
x-kubernetes-map-type: atomic
|
|
type:
|
|
description: |-
|
|
Defines the authentication type. The value is case-insensitive.
|
|
|
|
"Basic" is not a supported value.
|
|
|
|
Default: "Bearer"
|
|
type: string
|
|
type: object
|
|
bearerTokenFile:
|
|
description: |-
|
|
Path to bearer token file to be used by Prometheus (e.g., service account token path)
|
|
Deprecated: Use Authorization instead. This field is kept for backward compatibility.
|
|
type: string
|
|
enable:
|
|
description: Enable or disable ServiceMonitor creation (default
|
|
false)
|
|
type: boolean
|
|
honorLabels:
|
|
default: true
|
|
description: HonorLabels chooses the metric's labels on
|
|
collisions with target labels (default true)
|
|
type: boolean
|
|
honorTimestamps:
|
|
description: HonorTimestamps controls whether the scrape
|
|
endpoints honor timestamps (default false)
|
|
type: boolean
|
|
interval:
|
|
description: 'How frequently to scrape metrics. Accepts
|
|
values with time unit suffix: "30s", "1m", "2h", "500ms"'
|
|
pattern: ^([0-9]+)(ms|s|m|h)$
|
|
type: string
|
|
labels:
|
|
additionalProperties:
|
|
type: string
|
|
description: 'Additional labels to add to the ServiceMonitor
|
|
(default release: prometheus)'
|
|
type: object
|
|
metricRelabelings:
|
|
description: Relabeling rules applied to individual scraped
|
|
metrics
|
|
items:
|
|
description: |-
|
|
RelabelConfig allows dynamic rewriting of the label set for targets, alerts,
|
|
scraped samples and remote write samples.
|
|
|
|
More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#relabel_config
|
|
properties:
|
|
action:
|
|
default: replace
|
|
description: |-
|
|
Action to perform based on the regex matching.
|
|
|
|
`Uppercase` and `Lowercase` actions require Prometheus >= v2.36.0.
|
|
`DropEqual` and `KeepEqual` actions require Prometheus >= v2.41.0.
|
|
|
|
Default: "Replace"
|
|
enum:
|
|
- replace
|
|
- Replace
|
|
- keep
|
|
- Keep
|
|
- drop
|
|
- Drop
|
|
- hashmod
|
|
- HashMod
|
|
- labelmap
|
|
- LabelMap
|
|
- labeldrop
|
|
- LabelDrop
|
|
- labelkeep
|
|
- LabelKeep
|
|
- lowercase
|
|
- Lowercase
|
|
- uppercase
|
|
- Uppercase
|
|
- keepequal
|
|
- KeepEqual
|
|
- dropequal
|
|
- DropEqual
|
|
type: string
|
|
modulus:
|
|
description: |-
|
|
Modulus to take of the hash of the source label values.
|
|
|
|
Only applicable when the action is `HashMod`.
|
|
format: int64
|
|
type: integer
|
|
regex:
|
|
description: Regular expression against which the
|
|
extracted value is matched.
|
|
type: string
|
|
replacement:
|
|
description: |-
|
|
Replacement value against which a Replace action is performed if the
|
|
regular expression matches.
|
|
|
|
Regex capture groups are available.
|
|
type: string
|
|
separator:
|
|
description: Separator is the string between concatenated
|
|
SourceLabels.
|
|
type: string
|
|
sourceLabels:
|
|
description: |-
|
|
The source labels select values from existing labels. Their content is
|
|
concatenated using the configured Separator and matched against the
|
|
configured regular expression.
|
|
items:
|
|
description: |-
|
|
LabelName is a valid Prometheus label name which may only contain ASCII
|
|
letters, numbers, as well as underscores.
|
|
pattern: ^[a-zA-Z_][a-zA-Z0-9_]*$
|
|
type: string
|
|
type: array
|
|
targetLabel:
|
|
description: |-
|
|
Label to which the resulting string is written in a replacement.
|
|
|
|
It is mandatory for `Replace`, `HashMod`, `Lowercase`, `Uppercase`,
|
|
`KeepEqual` and `DropEqual` actions.
|
|
|
|
Regex capture groups are available.
|
|
type: string
|
|
type: object
|
|
type: array
|
|
relabelings:
|
|
description: RelabelConfigs to apply to samples before ingestion
|
|
items:
|
|
description: |-
|
|
RelabelConfig allows dynamic rewriting of the label set for targets, alerts,
|
|
scraped samples and remote write samples.
|
|
|
|
More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#relabel_config
|
|
properties:
|
|
action:
|
|
default: replace
|
|
description: |-
|
|
Action to perform based on the regex matching.
|
|
|
|
`Uppercase` and `Lowercase` actions require Prometheus >= v2.36.0.
|
|
`DropEqual` and `KeepEqual` actions require Prometheus >= v2.41.0.
|
|
|
|
Default: "Replace"
|
|
enum:
|
|
- replace
|
|
- Replace
|
|
- keep
|
|
- Keep
|
|
- drop
|
|
- Drop
|
|
- hashmod
|
|
- HashMod
|
|
- labelmap
|
|
- LabelMap
|
|
- labeldrop
|
|
- LabelDrop
|
|
- labelkeep
|
|
- LabelKeep
|
|
- lowercase
|
|
- Lowercase
|
|
- uppercase
|
|
- Uppercase
|
|
- keepequal
|
|
- KeepEqual
|
|
- dropequal
|
|
- DropEqual
|
|
type: string
|
|
modulus:
|
|
description: |-
|
|
Modulus to take of the hash of the source label values.
|
|
|
|
Only applicable when the action is `HashMod`.
|
|
format: int64
|
|
type: integer
|
|
regex:
|
|
description: Regular expression against which the
|
|
extracted value is matched.
|
|
type: string
|
|
replacement:
|
|
description: |-
|
|
Replacement value against which a Replace action is performed if the
|
|
regular expression matches.
|
|
|
|
Regex capture groups are available.
|
|
type: string
|
|
separator:
|
|
description: Separator is the string between concatenated
|
|
SourceLabels.
|
|
type: string
|
|
sourceLabels:
|
|
description: |-
|
|
The source labels select values from existing labels. Their content is
|
|
concatenated using the configured Separator and matched against the
|
|
configured regular expression.
|
|
items:
|
|
description: |-
|
|
LabelName is a valid Prometheus label name which may only contain ASCII
|
|
letters, numbers, as well as underscores.
|
|
pattern: ^[a-zA-Z_][a-zA-Z0-9_]*$
|
|
type: string
|
|
type: array
|
|
targetLabel:
|
|
description: |-
|
|
Label to which the resulting string is written in a replacement.
|
|
|
|
It is mandatory for `Replace`, `HashMod`, `Lowercase`, `Uppercase`,
|
|
`KeepEqual` and `DropEqual` actions.
|
|
|
|
Regex capture groups are available.
|
|
type: string
|
|
type: object
|
|
type: array
|
|
tlsConfig:
|
|
description: TLS settings used by Prometheus to connect
|
|
to the metrics endpoint
|
|
properties:
|
|
ca:
|
|
description: Certificate authority used when verifying
|
|
server certificates.
|
|
properties:
|
|
configMap:
|
|
description: ConfigMap containing data to use for
|
|
the targets.
|
|
properties:
|
|
key:
|
|
description: The key to select.
|
|
type: string
|
|
name:
|
|
default: ""
|
|
description: |-
|
|
Name of the referent.
|
|
This field is effectively required, but due to backwards compatibility is
|
|
allowed to be empty. Instances of this type with an empty value here are
|
|
almost certainly wrong.
|
|
More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
|
|
type: string
|
|
optional:
|
|
description: Specify whether the ConfigMap or
|
|
its key must be defined
|
|
type: boolean
|
|
required:
|
|
- key
|
|
type: object
|
|
x-kubernetes-map-type: atomic
|
|
secret:
|
|
description: Secret containing data to use for the
|
|
targets.
|
|
properties:
|
|
key:
|
|
description: The key of the secret to select
|
|
from. Must be a valid secret key.
|
|
type: string
|
|
name:
|
|
default: ""
|
|
description: |-
|
|
Name of the referent.
|
|
This field is effectively required, but due to backwards compatibility is
|
|
allowed to be empty. Instances of this type with an empty value here are
|
|
almost certainly wrong.
|
|
More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
|
|
type: string
|
|
optional:
|
|
description: Specify whether the Secret or its
|
|
key must be defined
|
|
type: boolean
|
|
required:
|
|
- key
|
|
type: object
|
|
x-kubernetes-map-type: atomic
|
|
type: object
|
|
caFile:
|
|
description: Path to the CA cert in the Prometheus container
|
|
to use for the targets.
|
|
type: string
|
|
cert:
|
|
description: Client certificate to present when doing
|
|
client-authentication.
|
|
properties:
|
|
configMap:
|
|
description: ConfigMap containing data to use for
|
|
the targets.
|
|
properties:
|
|
key:
|
|
description: The key to select.
|
|
type: string
|
|
name:
|
|
default: ""
|
|
description: |-
|
|
Name of the referent.
|
|
This field is effectively required, but due to backwards compatibility is
|
|
allowed to be empty. Instances of this type with an empty value here are
|
|
almost certainly wrong.
|
|
More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
|
|
type: string
|
|
optional:
|
|
description: Specify whether the ConfigMap or
|
|
its key must be defined
|
|
type: boolean
|
|
required:
|
|
- key
|
|
type: object
|
|
x-kubernetes-map-type: atomic
|
|
secret:
|
|
description: Secret containing data to use for the
|
|
targets.
|
|
properties:
|
|
key:
|
|
description: The key of the secret to select
|
|
from. Must be a valid secret key.
|
|
type: string
|
|
name:
|
|
default: ""
|
|
description: |-
|
|
Name of the referent.
|
|
This field is effectively required, but due to backwards compatibility is
|
|
allowed to be empty. Instances of this type with an empty value here are
|
|
almost certainly wrong.
|
|
More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
|
|
type: string
|
|
optional:
|
|
description: Specify whether the Secret or its
|
|
key must be defined
|
|
type: boolean
|
|
required:
|
|
- key
|
|
type: object
|
|
x-kubernetes-map-type: atomic
|
|
type: object
|
|
certFile:
|
|
description: Path to the client cert file in the Prometheus
|
|
container for the targets.
|
|
type: string
|
|
insecureSkipVerify:
|
|
description: Disable target certificate validation.
|
|
type: boolean
|
|
keyFile:
|
|
description: Path to the client key file in the Prometheus
|
|
container for the targets.
|
|
type: string
|
|
keySecret:
|
|
description: Secret containing the client key file for
|
|
the targets.
|
|
properties:
|
|
key:
|
|
description: The key of the secret to select from. Must
|
|
be a valid secret key.
|
|
type: string
|
|
name:
|
|
default: ""
|
|
description: |-
|
|
Name of the referent.
|
|
This field is effectively required, but due to backwards compatibility is
|
|
allowed to be empty. Instances of this type with an empty value here are
|
|
almost certainly wrong.
|
|
More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
|
|
type: string
|
|
optional:
|
|
description: Specify whether the Secret or its key
|
|
must be defined
|
|
type: boolean
|
|
required:
|
|
- key
|
|
type: object
|
|
x-kubernetes-map-type: atomic
|
|
maxVersion:
|
|
description: |-
|
|
Maximum acceptable TLS version.
|
|
|
|
It requires Prometheus >= v2.41.0.
|
|
enum:
|
|
- TLS10
|
|
- TLS11
|
|
- TLS12
|
|
- TLS13
|
|
type: string
|
|
minVersion:
|
|
description: |-
|
|
Minimum acceptable TLS version.
|
|
|
|
It requires Prometheus >= v2.35.0.
|
|
enum:
|
|
- TLS10
|
|
- TLS11
|
|
- TLS12
|
|
- TLS13
|
|
type: string
|
|
serverName:
|
|
description: Used to verify the hostname for the targets.
|
|
type: string
|
|
type: object
|
|
type: object
|
|
type: object
|
|
rbacConfig:
|
|
description: optional kube-rbac-proxy config to provide rbac services
|
|
properties:
|
|
clientCAConfigMap:
|
|
description: 'Reference to a configmap containing the client
|
|
CA (key: ca.crt) for mTLS client validation'
|
|
properties:
|
|
name:
|
|
default: ""
|
|
description: |-
|
|
Name of the referent.
|
|
This field is effectively required, but due to backwards compatibility is
|
|
allowed to be empty. Instances of this type with an empty value here are
|
|
almost certainly wrong.
|
|
More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
|
|
type: string
|
|
type: object
|
|
x-kubernetes-map-type: atomic
|
|
disableHttps:
|
|
description: disable https protecting the proxy endpoint
|
|
type: boolean
|
|
enable:
|
|
description: enable kube-rbac-proxy, disabled by default
|
|
type: boolean
|
|
image:
|
|
description: kube-rbac-proxy image
|
|
pattern: ^([a-z0-9]+(?:[._-][a-z0-9]+)*(:[0-9]+)?)(/[a-z0-9]+(?:[._-][a-z0-9]+)*)*(?::[a-z0-9._-]+)?(?:@[a-zA-Z0-9]+:[a-f0-9]+)?$
|
|
type: string
|
|
secret:
|
|
description: certificate secret to mount in kube-rbac container
|
|
for TLS, self signed certificates will be generated by default
|
|
properties:
|
|
name:
|
|
default: ""
|
|
description: |-
|
|
Name of the referent.
|
|
This field is effectively required, but due to backwards compatibility is
|
|
allowed to be empty. Instances of this type with an empty value here are
|
|
almost certainly wrong.
|
|
More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
|
|
type: string
|
|
type: object
|
|
x-kubernetes-map-type: atomic
|
|
staticAuthorization:
|
|
description: Optional static RBAC rules based on client certificate
|
|
Common Name (CN)
|
|
properties:
|
|
clientName:
|
|
description: Expected CN (Common Name) from client cert
|
|
(e.g., Prometheus SA identity)
|
|
type: string
|
|
enable:
|
|
description: Enables static authorization using client certificate
|
|
CN
|
|
type: boolean
|
|
type: object
|
|
type: object
|
|
selector:
|
|
additionalProperties:
|
|
type: string
|
|
description: Selector describes on which nodes to enable metrics
|
|
exporter
|
|
type: object
|
|
serviceType:
|
|
default: ClusterIP
|
|
description: ServiceType service type for metrics, clusterIP/NodePort,
|
|
clusterIP by default
|
|
enum:
|
|
- ClusterIP
|
|
- NodePort
|
|
type: string
|
|
tolerations:
|
|
description: tolerations for metrics exporter
|
|
items:
|
|
description: |-
|
|
The pod this Toleration is attached to tolerates any taint that matches
|
|
the triple <key,value,effect> using the matching operator <operator>.
|
|
properties:
|
|
effect:
|
|
description: |-
|
|
Effect indicates the taint effect to match. Empty means match all taint effects.
|
|
When specified, allowed values are NoSchedule, PreferNoSchedule and NoExecute.
|
|
type: string
|
|
key:
|
|
description: |-
|
|
Key is the taint key that the toleration applies to. Empty means match all taint keys.
|
|
If the key is empty, operator must be Exists; this combination means to match all values and all keys.
|
|
type: string
|
|
operator:
|
|
description: |-
|
|
Operator represents a key's relationship to the value.
|
|
Valid operators are Exists and Equal. Defaults to Equal.
|
|
Exists is equivalent to wildcard for value, so that a pod can
|
|
tolerate all taints of a particular category.
|
|
type: string
|
|
tolerationSeconds:
|
|
description: |-
|
|
TolerationSeconds represents the period of time the toleration (which must be
|
|
of effect NoExecute, otherwise this field is ignored) tolerates the taint. By default,
|
|
it is not set, which means tolerate the taint forever (do not evict). Zero and
|
|
negative values will be treated as 0 (evict immediately) by the system.
|
|
format: int64
|
|
type: integer
|
|
value:
|
|
description: |-
|
|
Value is the taint value the toleration matches to.
|
|
If the operator is Exists, the value should be empty, otherwise just a regular string.
|
|
type: string
|
|
type: object
|
|
type: array
|
|
upgradePolicy:
|
|
description: upgrade policy for metrics exporter daemons
|
|
properties:
|
|
maxUnavailable:
|
|
default: 1
|
|
description: MaxUnavailable specifies the maximum number of
|
|
Pods that can be unavailable during the update process. Applicable
|
|
for RollingUpdate only. Default value is 1.
|
|
format: int32
|
|
type: integer
|
|
upgradeStrategy:
|
|
description: UpgradeStrategy specifies the type of the DaemonSet
|
|
update. Valid values are "RollingUpdate" (default) or "OnDelete".
|
|
enum:
|
|
- RollingUpdate
|
|
- OnDelete
|
|
type: string
|
|
type: object
|
|
type: object
|
|
selector:
|
|
additionalProperties:
|
|
type: string
|
|
description: Selector describes on which nodes the GPU Operator should
|
|
enable the GPU device.
|
|
type: object
|
|
testRunner:
|
|
description: test runner
|
|
properties:
|
|
config:
|
|
description: config map to customize the config for test runner,
|
|
if not specified default test config will be aplied
|
|
properties:
|
|
name:
|
|
default: ""
|
|
description: |-
|
|
Name of the referent.
|
|
This field is effectively required, but due to backwards compatibility is
|
|
allowed to be empty. Instances of this type with an empty value here are
|
|
almost certainly wrong.
|
|
More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
|
|
type: string
|
|
type: object
|
|
x-kubernetes-map-type: atomic
|
|
enable:
|
|
description: enable test runner, disabled by default
|
|
type: boolean
|
|
image:
|
|
description: test runner image
|
|
pattern: ^([a-z0-9]+(?:[._-][a-z0-9]+)*(:[0-9]+)?)(/[a-z0-9]+(?:[._-][a-z0-9]+)*)*(?::[a-z0-9._-]+)?(?:@[a-zA-Z0-9]+:[a-f0-9]+)?$
|
|
type: string
|
|
imagePullPolicy:
|
|
description: image pull policy for test runner
|
|
enum:
|
|
- Always
|
|
- IfNotPresent
|
|
- Never
|
|
type: string
|
|
imageRegistrySecret:
|
|
description: test runner image registry secret used to pull/push
|
|
images
|
|
properties:
|
|
name:
|
|
default: ""
|
|
description: |-
|
|
Name of the referent.
|
|
This field is effectively required, but due to backwards compatibility is
|
|
allowed to be empty. Instances of this type with an empty value here are
|
|
almost certainly wrong.
|
|
More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
|
|
type: string
|
|
type: object
|
|
x-kubernetes-map-type: atomic
|
|
logsLocation:
|
|
description: captures logs location and export config for test runner
|
|
logs
|
|
properties:
|
|
hostPath:
|
|
default: /var/log/amd-test-runner
|
|
description: host path to store test runner internal status
|
|
db in order to persist test running status
|
|
type: string
|
|
logsExportSecrets:
|
|
description: LogsExportSecrets is a list of secrets that contain
|
|
connectivity info to multiple cloud providers
|
|
items:
|
|
description: |-
|
|
LocalObjectReference contains enough information to let you locate the
|
|
referenced object inside the same namespace.
|
|
properties:
|
|
name:
|
|
default: ""
|
|
description: |-
|
|
Name of the referent.
|
|
This field is effectively required, but due to backwards compatibility is
|
|
allowed to be empty. Instances of this type with an empty value here are
|
|
almost certainly wrong.
|
|
More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
|
|
type: string
|
|
type: object
|
|
x-kubernetes-map-type: atomic
|
|
type: array
|
|
mountPath:
|
|
default: /var/log/amd-test-runner
|
|
description: volume mount destination within test runner container
|
|
type: string
|
|
type: object
|
|
selector:
|
|
additionalProperties:
|
|
type: string
|
|
description: Selector describes on which nodes to enable test runner
|
|
type: object
|
|
tolerations:
|
|
description: tolerations for test runner
|
|
items:
|
|
description: |-
|
|
The pod this Toleration is attached to tolerates any taint that matches
|
|
the triple <key,value,effect> using the matching operator <operator>.
|
|
properties:
|
|
effect:
|
|
description: |-
|
|
Effect indicates the taint effect to match. Empty means match all taint effects.
|
|
When specified, allowed values are NoSchedule, PreferNoSchedule and NoExecute.
|
|
type: string
|
|
key:
|
|
description: |-
|
|
Key is the taint key that the toleration applies to. Empty means match all taint keys.
|
|
If the key is empty, operator must be Exists; this combination means to match all values and all keys.
|
|
type: string
|
|
operator:
|
|
description: |-
|
|
Operator represents a key's relationship to the value.
|
|
Valid operators are Exists and Equal. Defaults to Equal.
|
|
Exists is equivalent to wildcard for value, so that a pod can
|
|
tolerate all taints of a particular category.
|
|
type: string
|
|
tolerationSeconds:
|
|
description: |-
|
|
TolerationSeconds represents the period of time the toleration (which must be
|
|
of effect NoExecute, otherwise this field is ignored) tolerates the taint. By default,
|
|
it is not set, which means tolerate the taint forever (do not evict). Zero and
|
|
negative values will be treated as 0 (evict immediately) by the system.
|
|
format: int64
|
|
type: integer
|
|
value:
|
|
description: |-
|
|
Value is the taint value the toleration matches to.
|
|
If the operator is Exists, the value should be empty, otherwise just a regular string.
|
|
type: string
|
|
type: object
|
|
type: array
|
|
upgradePolicy:
|
|
description: upgrade policy for test runner daemonset
|
|
properties:
|
|
maxUnavailable:
|
|
default: 1
|
|
description: MaxUnavailable specifies the maximum number of
|
|
Pods that can be unavailable during the update process. Applicable
|
|
for RollingUpdate only. Default value is 1.
|
|
format: int32
|
|
type: integer
|
|
upgradeStrategy:
|
|
description: UpgradeStrategy specifies the type of the DaemonSet
|
|
update. Valid values are "RollingUpdate" (default) or "OnDelete".
|
|
enum:
|
|
- RollingUpdate
|
|
- OnDelete
|
|
type: string
|
|
type: object
|
|
type: object
|
|
type: object
|
|
status:
|
|
description: DeviceConfigStatus defines the observed state of Module.
|
|
properties:
|
|
conditions:
|
|
description: Conditions list the current status of the DeviceConfig
|
|
object
|
|
items:
|
|
description: Condition contains details for one aspect of the current
|
|
state of this API Resource.
|
|
properties:
|
|
lastTransitionTime:
|
|
description: |-
|
|
lastTransitionTime is the last time the condition transitioned from one status to another.
|
|
This should be when the underlying condition changed. If that is not known, then using the time when the API field changed is acceptable.
|
|
format: date-time
|
|
type: string
|
|
message:
|
|
description: |-
|
|
message is a human readable message indicating details about the transition.
|
|
This may be an empty string.
|
|
maxLength: 32768
|
|
type: string
|
|
observedGeneration:
|
|
description: |-
|
|
observedGeneration represents the .metadata.generation that the condition was set based upon.
|
|
For instance, if .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration is 9, the condition is out of date
|
|
with respect to the current state of the instance.
|
|
format: int64
|
|
minimum: 0
|
|
type: integer
|
|
reason:
|
|
description: |-
|
|
reason contains a programmatic identifier indicating the reason for the condition's last transition.
|
|
Producers of specific condition types may define expected values and meanings for this field,
|
|
and whether the values are considered a guaranteed API.
|
|
The value should be a CamelCase string.
|
|
This field may not be empty.
|
|
maxLength: 1024
|
|
minLength: 1
|
|
pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$
|
|
type: string
|
|
status:
|
|
description: status of the condition, one of True, False, Unknown.
|
|
enum:
|
|
- "True"
|
|
- "False"
|
|
- Unknown
|
|
type: string
|
|
type:
|
|
description: type of condition in CamelCase or in foo.example.com/CamelCase.
|
|
maxLength: 316
|
|
pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$
|
|
type: string
|
|
required:
|
|
- lastTransitionTime
|
|
- message
|
|
- reason
|
|
- status
|
|
- type
|
|
type: object
|
|
type: array
|
|
configManager:
|
|
description: ConfigManager contains the status of the ConfigManager
|
|
deployment
|
|
properties:
|
|
availableNumber:
|
|
description: number of the actually deployed and running pods
|
|
format: int32
|
|
type: integer
|
|
desiredNumber:
|
|
description: number of the pods that should be deployed for daemonset
|
|
format: int32
|
|
type: integer
|
|
nodesMatchingSelectorNumber:
|
|
description: number of nodes that are targeted by the DeviceConfig
|
|
selector
|
|
format: int32
|
|
type: integer
|
|
type: object
|
|
devicePlugin:
|
|
description: DevicePlugin contains the status of the Device Plugin deployment
|
|
properties:
|
|
availableNumber:
|
|
description: number of the actually deployed and running pods
|
|
format: int32
|
|
type: integer
|
|
desiredNumber:
|
|
description: number of the pods that should be deployed for daemonset
|
|
format: int32
|
|
type: integer
|
|
nodesMatchingSelectorNumber:
|
|
description: number of nodes that are targeted by the DeviceConfig
|
|
selector
|
|
format: int32
|
|
type: integer
|
|
type: object
|
|
driver:
|
|
description: Driver contains the status of the Drivers deployment
|
|
properties:
|
|
availableNumber:
|
|
description: number of the actually deployed and running pods
|
|
format: int32
|
|
type: integer
|
|
desiredNumber:
|
|
description: number of the pods that should be deployed for daemonset
|
|
format: int32
|
|
type: integer
|
|
nodesMatchingSelectorNumber:
|
|
description: number of nodes that are targeted by the DeviceConfig
|
|
selector
|
|
format: int32
|
|
type: integer
|
|
type: object
|
|
metricsExporter:
|
|
description: MetricsExporter contains the status of the MetricsExporter
|
|
deployment
|
|
properties:
|
|
availableNumber:
|
|
description: number of the actually deployed and running pods
|
|
format: int32
|
|
type: integer
|
|
desiredNumber:
|
|
description: number of the pods that should be deployed for daemonset
|
|
format: int32
|
|
type: integer
|
|
nodesMatchingSelectorNumber:
|
|
description: number of nodes that are targeted by the DeviceConfig
|
|
selector
|
|
format: int32
|
|
type: integer
|
|
type: object
|
|
nodeModuleStatus:
|
|
additionalProperties:
|
|
description: ModuleStatus contains the status of driver module installed
|
|
by operator on the node
|
|
properties:
|
|
bootId:
|
|
type: string
|
|
containerImage:
|
|
type: string
|
|
kernelVersion:
|
|
type: string
|
|
lastTransitionTime:
|
|
type: string
|
|
status:
|
|
description: UpgradeState captures the state of the upgrade process
|
|
on a node
|
|
type: string
|
|
upgradeStartTime:
|
|
type: string
|
|
type: object
|
|
description: NodeModuleStatus contains per node status of driver module
|
|
installation
|
|
type: object
|
|
observedGeneration:
|
|
description: ObservedGeneration is the latest spec generation successfully
|
|
processed by the controller
|
|
format: int64
|
|
type: integer
|
|
type: object
|
|
type: object
|
|
served: true
|
|
storage: true
|
|
subresources:
|
|
status: {}
|
|
status:
|
|
acceptedNames:
|
|
kind: ""
|
|
plural: ""
|
|
conditions: []
|
|
storedVersions: []
|