Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 21 additions & 0 deletions api/core/v1alpha2/virtual_machine.go
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,13 @@ type VirtualMachineSpec struct {
// Devices are referenced by name of USBDevice resource in the same namespace.
// +kubebuilder:validation:MaxItems:=8
USBDevices []USBDeviceSpecRef `json:"usbDevices,omitempty"`
// List of GPU devices to attach to the virtual machine.
// Devices are requested by GPU model.
// This feature requires the GPU feature gate and the gpu.deckhouse.io DeviceClass.
// +kubebuilder:validation:MaxItems:=16
// +listType=map
// +listMapKey=name
GPUDevices []GPUDeviceSpec `json:"gpuDevices,omitempty"`
}

// RunPolicy parameter defines the VM startup policy
Expand Down Expand Up @@ -497,6 +504,20 @@ type USBDeviceSpecRef struct {
Name string `json:"name"`
}

// GPUDeviceSpec requests a GPU device by model.
type GPUDeviceSpec struct {
// A unique GPU device name inside the virtual machine spec.
// The value is used to generate DRA claim and request names.
// +kubebuilder:validation:MinLength:=1
// +kubebuilder:validation:MaxLength:=59
// +kubebuilder:validation:Pattern:=`^[a-z0-9]([-a-z0-9]*[a-z0-9])?$`
Name string `json:"name"`
// GPU product name, for example NVIDIA H100.
// +kubebuilder:validation:MinLength:=1
// +kubebuilder:validation:MaxLength:=128
Model string `json:"model"`
}

// USBDeviceStatusRef represents the status of a USB device attached to the virtual machine.
type USBDeviceStatusRef struct {
// The name of USBDevice resource.
Expand Down
21 changes: 21 additions & 0 deletions api/core/v1alpha2/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion build/components/versions.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ firmware:
libvirt: v10.9.0
edk2: stable202411
core:
3p-kubevirt: v1.6.2-v12n.47
3p-kubevirt: feat/gpu/add-deckhouse-dra-support
3p-containerized-data-importer: v1.60.3-v12n.20
distribution: 2.8.3
package:
Expand Down
14 changes: 14 additions & 0 deletions crds/doc-ru-virtualmachines.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -588,6 +588,20 @@ spec:
name:
description: |
Имя ресурса `USBDevice` в том же пространстве имен.
gpuDevices:
description: |
Список GPU-устройств для подключения к виртуальной машине.
Устройства запрашиваются по модели GPU.
Для использования требуется feature gate `GPU` и DeviceClass `gpu.deckhouse.io`.
items:
properties:
model:
description: |
Название продукта GPU, например `NVIDIA H100`.
name:
description: |
Уникальное имя GPU-устройства внутри спецификации виртуальной машины.
Значение используется для генерации имён DRA claim и request.
status:
properties:
blockDeviceRefs:
Expand Down
30 changes: 30 additions & 0 deletions crds/virtualmachines.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -1038,6 +1038,36 @@ spec:
type: string
description: |
The name of USBDevice resource in the same namespace.
gpuDevices:
type: array
maxItems: 16
x-kubernetes-list-map-keys:
- name
x-kubernetes-list-type: map
description: |
List of GPU devices to attach to the virtual machine.
Devices are requested by GPU model.
This feature requires the GPU feature gate and the gpu.deckhouse.io DeviceClass.
items:
type: object
required:
- model
- name
properties:
model:
minLength: 1
maxLength: 128
type: string
description: |
GPU product name, for example NVIDIA H100.
name:
minLength: 1
maxLength: 59
pattern: ^[a-z0-9]([-a-z0-9]*[a-z0-9])?$
type: string
description: |
A unique GPU device name inside the virtual machine spec.
The value is used to generate DRA claim and request names.
status:
type: object
properties:
Expand Down
46 changes: 46 additions & 0 deletions docs/USER_GUIDE.md
Original file line number Diff line number Diff line change
Expand Up @@ -3775,6 +3775,52 @@ spec:

As a result, a VM named `clone-database-prod` and a disk named `clone-database-root-prod` will be created.

## GPU Devices

{{< alert level="warning" >}}
GPU device passthrough is an experimental feature. It requires the Enterprise Edition (EE), Kubernetes DRA support, and an external GPU DRA provider that creates the `gpu.deckhouse.io` `DeviceClass`.
{{< /alert >}}

The virtualization module can attach physical GPU devices to virtual machines using DRA (Dynamic Resource Allocation). A GPU is requested by product model through the `.spec.gpuDevices` field of the [VirtualMachine](/modules/virtualization/cr.html#virtualmachine) resource.

GPU device passthrough requires:

- Kubernetes version 1.34 or higher with DRA feature gates required by the cluster configuration.
- The `GPU` feature gate enabled in the `virtualization` module settings.
- A GPU DRA provider installed in the cluster.
- The `gpu.deckhouse.io` [DeviceClass](https://kubernetes.io/docs/concepts/scheduling-eviction/dynamic-resource-allocation/#device-classes) created by the GPU DRA provider.

To enable the module feature gate:

```yaml
apiVersion: deckhouse.io/v1alpha1
kind: ModuleConfig
metadata:
name: virtualization
spec:
settings:
featureGates:
- GPU
```

To request a GPU device, add `.spec.gpuDevices` to the VM specification:

```yaml
apiVersion: virtualization.deckhouse.io/v1alpha2
kind: VirtualMachine
metadata:
name: linux-vm
spec:
# ... other VM settings ...
gpuDevices:
- name: gpu0
model: NVIDIA H100
```

The `name` field must be unique within `.spec.gpuDevices` and can contain up to 59 DNS-label characters. The `model` field must match the GPU product name exposed by the GPU DRA provider in the `device.attributes["gpu.deckhouse.io"].productName` device attribute.

Changing `.spec.gpuDevices` requires restarting the virtual machine to apply the new configuration.

## USB Devices

{{< alert level="warning" >}}
Expand Down
46 changes: 46 additions & 0 deletions docs/USER_GUIDE.ru.md
Original file line number Diff line number Diff line change
Expand Up @@ -3806,6 +3806,52 @@ spec:

В результате будет создана ВМ с именем `clone-database-prod` и диск с именем `clone-database-root-prod`.

## GPU-устройства

{{< alert level="warning" >}}
Проброс GPU-устройств — экспериментальная возможность. Для работы требуются Enterprise Edition (EE), поддержка Kubernetes DRA и внешний GPU DRA-провайдер, создающий `DeviceClass` с именем `gpu.deckhouse.io`.
{{< /alert >}}

Модуль виртуализации может подключать физические GPU-устройства к виртуальным машинам с помощью DRA (Dynamic Resource Allocation). GPU запрашивается по модели продукта через поле `.spec.gpuDevices` ресурса [VirtualMachine](/modules/virtualization/cr.html#virtualmachine).

Для проброса GPU требуются:

- Kubernetes версии 1.34 или выше с DRA feature gates, необходимыми для конфигурации кластера.
- Feature gate `GPU`, включённый в настройках модуля `virtualization`.
- Установленный в кластере GPU DRA-провайдер.
- [DeviceClass](https://kubernetes.io/docs/concepts/scheduling-eviction/dynamic-resource-allocation/#device-classes) `gpu.deckhouse.io`, созданный GPU DRA-провайдером.

Чтобы включить feature gate модуля:

```yaml
apiVersion: deckhouse.io/v1alpha1
kind: ModuleConfig
metadata:
name: virtualization
spec:
settings:
featureGates:
- GPU
```

Чтобы запросить GPU-устройство, добавьте `.spec.gpuDevices` в спецификацию ВМ:

```yaml
apiVersion: virtualization.deckhouse.io/v1alpha2
kind: VirtualMachine
metadata:
name: linux-vm
spec:
# ... другие настройки ВМ ...
gpuDevices:
- name: gpu0
model: NVIDIA H100
```

Поле `name` должно быть уникальным внутри `.spec.gpuDevices` и может содержать до 59 символов DNS label. Поле `model` должно совпадать с названием продукта GPU, которое GPU DRA-провайдер публикует в атрибуте устройства `device.attributes["gpu.deckhouse.io"].productName`.

Изменение `.spec.gpuDevices` требует перезапуска виртуальной машины для применения новой конфигурации.

## USB-устройства

{{< alert level="warning">}}
Expand Down
2 changes: 2 additions & 0 deletions images/virt-artifact/werf.inc.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,10 @@ secrets:
- id: SOURCE_REPO
value: {{ $.SOURCE_REPO }}
shell:
installCacheVersion: "{{ now | date "Mon Jan 2 15:04:05 MST 2006" }}"
install:
- |
echo "$date"
echo "Git clone {{ $gitRepoName }} repository..."
git clone --depth=1 $(cat /run/secrets/SOURCE_REPO)/{{ $gitRepoUrl }} --branch {{ $tag }} /src/kubevirt

Expand Down
6 changes: 6 additions & 0 deletions images/virtualization-artifact/pkg/builder/vm/option.go
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,12 @@ func WithUSBDevices(usbDevices []v1alpha2.USBDeviceSpecRef) Option {
}
}

func WithGPUDevices(gpuDevices []v1alpha2.GPUDeviceSpec) Option {
return func(vm *v1alpha2.VirtualMachine) {
vm.Spec.GPUDevices = gpuDevices
}
}

func WithIpAddress(ipAddress string) Option {
return func(vm *v1alpha2.VirtualMachine) {
vm.Spec.VirtualMachineIPAddress = ipAddress
Expand Down
94 changes: 94 additions & 0 deletions images/virtualization-artifact/pkg/controller/kvbuilder/gpu.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
/*
Copyright 2026 Flant JSC

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package kvbuilder

import (
"slices"
"strings"

corev1 "k8s.io/api/core/v1"
"k8s.io/utils/ptr"
virtv1 "kubevirt.io/api/core/v1"

"github.com/deckhouse/virtualization/api/core/v1alpha2"
)

const (
GPUNamePrefix = "gpu-"
GPUDeviceClassName = "gpu.deckhouse.io"
)

func GPUResourceClaimName(deviceName string) string {
return GPUNamePrefix + deviceName
}

func GPUResourceClaimTemplateName(vmName, deviceName string) string {
return vmName + "-" + deviceName
}

func IsGPUResourceClaimTemplateName(vmName, templateName string) bool {
return strings.HasPrefix(templateName, vmName+"-")
}

func (b *KVVM) SetGPUDevices(vmName string, devices []v1alpha2.GPUDeviceSpec) {
devices = SortGPUDevices(devices)

b.Resource.Spec.Template.Spec.ResourceClaims = slices.DeleteFunc(
b.Resource.Spec.Template.Spec.ResourceClaims,
func(claim virtv1.ResourceClaim) bool {
return strings.HasPrefix(claim.Name, GPUNamePrefix)
},
)
b.Resource.Spec.Template.Spec.Domain.Devices.GPUs = slices.DeleteFunc(
b.Resource.Spec.Template.Spec.Domain.Devices.GPUs,
func(gpu virtv1.GPU) bool {
return strings.HasPrefix(gpu.Name, GPUNamePrefix)
},
)

if len(devices) == 0 {
return
}

for _, device := range devices {
claimName := GPUResourceClaimName(device.Name)
b.Resource.Spec.Template.Spec.ResourceClaims = append(b.Resource.Spec.Template.Spec.ResourceClaims, virtv1.ResourceClaim{
PodResourceClaim: corev1.PodResourceClaim{
Name: claimName,
ResourceClaimTemplateName: ptr.To(GPUResourceClaimTemplateName(vmName, device.Name)),
},
})
b.Resource.Spec.Template.Spec.Domain.Devices.GPUs = append(b.Resource.Spec.Template.Spec.Domain.Devices.GPUs, virtv1.GPU{
Name: claimName,
ClaimRequest: &virtv1.ClaimRequest{
ClaimName: ptr.To(claimName),
RequestName: ptr.To(GPUResourceClaimName(device.Name)),
},
})
}
}

func SortGPUDevices(devices []v1alpha2.GPUDeviceSpec) []v1alpha2.GPUDeviceSpec {
if len(devices) == 0 {
return nil
}
sorted := slices.Clone(devices)
slices.SortFunc(sorted, func(a, b v1alpha2.GPUDeviceSpec) int {
return strings.Compare(a.Name, b.Name)
})
return sorted
}
Loading