From 0ef80abfb7be309688ed4a552f1cf918e40ee009 Mon Sep 17 00:00:00 2001 From: Weyang1 Date: Fri, 17 Apr 2026 22:02:03 +0000 Subject: [PATCH 1/3] hcsoci: wire Windows CPU affinity to HCS container processor schema Summary - Add Affinity field to hcsschema.Processor struct. - Add ConvertCPUAffinity function to validate and extract CPU affinity from OCI spec. - Wire CPU affinity into HCS silo container creation via createWindowsContainerDocument. - Add affinity support to updateWCOWContainerCPU for container updates. - Add unit tests for ConvertCPUAffinity validation. Motivation This is Phase 2 of Windows CPU affinity support, enabling HCS silo containers (non-JobObject path) to use CPU affinity specified in the OCI spec. Limitations (intentional for Phase 2) - Multiple affinity entries are rejected. - Non-zero processor groups are rejected. Signed-off-by: Weyang1 --- cmd/containerd-shim-runhcs-v1/task_hcs.go | 15 +++ internal/hcs/schema2/processor.go | 2 + internal/hcsoci/hcsdoc_wcow.go | 37 +++++- internal/hcsoci/hcsdoc_wcow_test.go | 149 ++++++++++++++++++++++ 4 files changed, 200 insertions(+), 3 deletions(-) create mode 100644 internal/hcsoci/hcsdoc_wcow_test.go diff --git a/cmd/containerd-shim-runhcs-v1/task_hcs.go b/cmd/containerd-shim-runhcs-v1/task_hcs.go index 9fbb1faf35..77779b3797 100644 --- a/cmd/containerd-shim-runhcs-v1/task_hcs.go +++ b/cmd/containerd-shim-runhcs-v1/task_hcs.go @@ -934,6 +934,21 @@ func (ht *hcsTask) updateWCOWContainerCPU(ctx context.Context, cpu *specs.Window if cpu.Shares != nil { req.Weight = int32(*cpu.Shares) } + if len(cpu.Affinity) > 0 { + // Create a temporary spec to reuse the existing ConvertCPUAffinity validation + tempSpec := &specs.Spec{ + Windows: &specs.Windows{ + Resources: &specs.WindowsResources{ + CPU: cpu, + }, + }, + } + affinity, err := hcsoci.ConvertCPUAffinity(tempSpec) + if err != nil { + return err + } + req.Affinity = affinity + } return ht.requestUpdateContainer(ctx, resourcepaths.SiloProcessorResourcePath, req) } diff --git a/internal/hcs/schema2/processor.go b/internal/hcs/schema2/processor.go index bb24e88da1..ff7617113c 100644 --- a/internal/hcs/schema2/processor.go +++ b/internal/hcs/schema2/processor.go @@ -15,4 +15,6 @@ type Processor struct { Maximum int32 `json:"Maximum,omitempty"` Weight int32 `json:"Weight,omitempty"` + + Affinity uint64 `json:"Affinity,omitempty"` } diff --git a/internal/hcsoci/hcsdoc_wcow.go b/internal/hcsoci/hcsdoc_wcow.go index d1d1a44c85..35e12f806c 100644 --- a/internal/hcsoci/hcsdoc_wcow.go +++ b/internal/hcsoci/hcsdoc_wcow.go @@ -94,6 +94,31 @@ func createMountsConfig(ctx context.Context, coi *createOptionsInternal) (*mount return &config, nil } +// ConvertCPUAffinity handles the logic of converting and validating the container's CPU affinity +// specified in the OCI spec to what HCS expects. +// +// Returns the CPU affinity bitmask (0 if not specified) and any validation error. +// Phase 2 limitations: +// - Multiple affinity entries are rejected +// - Non-zero processor groups are rejected +func ConvertCPUAffinity(spec *specs.Spec) (uint64, error) { + if spec.Windows == nil || spec.Windows.Resources == nil || spec.Windows.Resources.CPU == nil || len(spec.Windows.Resources.CPU.Affinity) == 0 { + return 0, nil + } + + affinity := spec.Windows.Resources.CPU.Affinity + if len(affinity) != 1 { + return 0, fmt.Errorf("cpu affinity with multiple processor groups is not supported") + } + if affinity[0].Group != 0 { + return 0, fmt.Errorf("cpu affinity processor group %d is not supported", affinity[0].Group) + } + if affinity[0].Mask == 0 { + return 0, fmt.Errorf("cpu affinity mask must be non-zero") + } + return affinity[0].Mask, nil +} + // ConvertCPULimits handles the logic of converting and validating the containers CPU limits // specified in the OCI spec to what HCS expects. // @@ -184,6 +209,11 @@ func createWindowsContainerDocument(ctx context.Context, coi *createOptionsInter return nil, nil, err } + cpuAffinity, err := ConvertCPUAffinity(coi.Spec) + if err != nil { + return nil, nil, err + } + if coi.HostingSystem != nil && coi.ScaleCPULimitsToSandbox && cpuLimit > 0 { // When ScaleCPULimitsToSandbox is set and we are running in a UVM, we assume // the CPU limit has been calculated based on the number of processors on the @@ -233,9 +263,10 @@ func createWindowsContainerDocument(ctx context.Context, coi *createOptionsInter v1.ProcessorWeight = uint64(cpuWeight) v2Container.Processor = &hcsschema.Processor{ - Count: cpuCount, - Maximum: cpuLimit, - Weight: cpuWeight, + Count: cpuCount, + Maximum: cpuLimit, + Weight: cpuWeight, + Affinity: cpuAffinity, } // Memory Resources diff --git a/internal/hcsoci/hcsdoc_wcow_test.go b/internal/hcsoci/hcsdoc_wcow_test.go new file mode 100644 index 0000000000..9755b5d777 --- /dev/null +++ b/internal/hcsoci/hcsdoc_wcow_test.go @@ -0,0 +1,149 @@ +//go:build windows + +package hcsoci + +import ( + "strings" + "testing" + + specs "github.com/opencontainers/runtime-spec/specs-go" +) + +func TestConvertCPUAffinity_Group0MaskSet(t *testing.T) { + s := &specs.Spec{ + Windows: &specs.Windows{ + Resources: &specs.WindowsResources{ + CPU: &specs.WindowsCPUResources{ + Affinity: []specs.WindowsCPUGroupAffinity{ + {Mask: 0x3, Group: 0}, + }, + }, + }, + }, + } + + affinity, err := ConvertCPUAffinity(s) + if err != nil { + t.Fatalf("ConvertCPUAffinity failed: %v", err) + } + if affinity != 0x3 { + t.Fatalf("unexpected cpu affinity: got %d want %d", affinity, uint64(0x3)) + } +} + +func TestConvertCPUAffinity_MultiGroupRejected(t *testing.T) { + s := &specs.Spec{ + Windows: &specs.Windows{ + Resources: &specs.WindowsResources{ + CPU: &specs.WindowsCPUResources{ + Affinity: []specs.WindowsCPUGroupAffinity{ + {Mask: 0x1, Group: 0}, + {Mask: 0x1, Group: 1}, + }, + }, + }, + }, + } + + _, err := ConvertCPUAffinity(s) + if err == nil { + t.Fatal("expected error for multiple affinity entries") + } + if !strings.Contains(err.Error(), "multiple processor groups") { + t.Fatalf("unexpected error: %v", err) + } +} + +func TestConvertCPUAffinity_NonZeroGroupRejected(t *testing.T) { + s := &specs.Spec{ + Windows: &specs.Windows{ + Resources: &specs.WindowsResources{ + CPU: &specs.WindowsCPUResources{ + Affinity: []specs.WindowsCPUGroupAffinity{ + {Mask: 0x1, Group: 1}, + }, + }, + }, + }, + } + + _, err := ConvertCPUAffinity(s) + if err == nil { + t.Fatal("expected error for non-zero affinity group") + } + if !strings.Contains(err.Error(), "processor group") { + t.Fatalf("unexpected error: %v", err) + } +} + +func TestConvertCPUAffinity_ZeroMaskRejected(t *testing.T) { + s := &specs.Spec{ + Windows: &specs.Windows{ + Resources: &specs.WindowsResources{ + CPU: &specs.WindowsCPUResources{ + Affinity: []specs.WindowsCPUGroupAffinity{ + {Mask: 0, Group: 0}, + }, + }, + }, + }, + } + + _, err := ConvertCPUAffinity(s) + if err == nil { + t.Fatal("expected error for zero affinity mask") + } + if !strings.Contains(err.Error(), "mask must be non-zero") { + t.Fatalf("unexpected error: %v", err) + } +} + +func TestConvertCPUAffinity_NoAffinity(t *testing.T) { + testCases := []struct { + name string + spec *specs.Spec + }{ + { + name: "nil spec.Windows", + spec: &specs.Spec{}, + }, + { + name: "nil spec.Windows.Resources", + spec: &specs.Spec{ + Windows: &specs.Windows{}, + }, + }, + { + name: "nil spec.Windows.Resources.CPU", + spec: &specs.Spec{ + Windows: &specs.Windows{ + Resources: &specs.WindowsResources{}, + }, + }, + }, + { + name: "empty affinity slice", + spec: &specs.Spec{ + Windows: &specs.Windows{ + Resources: &specs.WindowsResources{ + CPU: &specs.WindowsCPUResources{ + Affinity: []specs.WindowsCPUGroupAffinity{}, + }, + }, + }, + }, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + affinity, err := ConvertCPUAffinity(tc.spec) + if err != nil { + t.Fatalf("ConvertCPUAffinity failed: %v", err) + } + if affinity != 0 { + t.Fatalf("expected zero affinity, got %d", affinity) + } + }) + } +} From 95fdd2d1869628ffeeec362dbd0569536c2d644a Mon Sep 17 00:00:00 2001 From: Weyang1 Date: Mon, 20 Apr 2026 16:38:13 +0000 Subject: [PATCH 2/3] Retry CI Signed-off-by: Weyang1 From 4782c00dbb9949f4c560a59453a59a93f212aff1 Mon Sep 17 00:00:00 2001 From: Weyang1 Date: Thu, 30 Apr 2026 17:26:28 +0000 Subject: [PATCH 3/3] hcsoci: address PR review comments - Remove Affinity uint64 from schema2/Processor (not a real HCS API field); preserve ConvertCPUAffinity validation with a TODO to wire it properly once the correct schema field is confirmed - Define sentinel errors (ErrCPUAffinityMultipleGroups, ErrCPUAffinityNonZeroGroup, ErrCPUAffinityMaskZero) and replace strings.Contains checks in tests with errors.Is for more robust error identity verification Signed-off-by: Weyang1 --- cmd/containerd-shim-runhcs-v1/task_hcs.go | 13 ++- internal/hcs/schema2/processor.go | 7 +- .../hcs/schema2/processor_group_affinity.go | 23 +++++ internal/hcsoci/hcsdoc_wcow.go | 75 ++++++++++++---- internal/hcsoci/hcsdoc_wcow_test.go | 66 +++++++++----- internal/jobcontainers/oci.go | 29 ++++--- internal/jobcontainers/oci_test.go | 68 +++++++++++---- internal/jobobject/jobobject.go | 13 ++- internal/jobobject/limits.go | 87 ++++++++++++++++++- internal/winapi/jobobject.go | 20 +++++ 10 files changed, 321 insertions(+), 80 deletions(-) create mode 100644 internal/hcs/schema2/processor_group_affinity.go diff --git a/cmd/containerd-shim-runhcs-v1/task_hcs.go b/cmd/containerd-shim-runhcs-v1/task_hcs.go index 77779b3797..e8fc1d2df7 100644 --- a/cmd/containerd-shim-runhcs-v1/task_hcs.go +++ b/cmd/containerd-shim-runhcs-v1/task_hcs.go @@ -935,7 +935,7 @@ func (ht *hcsTask) updateWCOWContainerCPU(ctx context.Context, cpu *specs.Window req.Weight = int32(*cpu.Shares) } if len(cpu.Affinity) > 0 { - // Create a temporary spec to reuse the existing ConvertCPUAffinity validation + // Validate and retrieve CPU affinity. tempSpec := &specs.Spec{ Windows: &specs.Windows{ Resources: &specs.WindowsResources{ @@ -943,11 +943,18 @@ func (ht *hcsTask) updateWCOWContainerCPU(ctx context.Context, cpu *specs.Window }, }, } - affinity, err := hcsoci.ConvertCPUAffinity(tempSpec) + affinities, err := hcsoci.ConvertCPUAffinity(tempSpec) if err != nil { return err } - req.Affinity = affinity + groupAffs := make([]hcsschema.ProcessorGroupAffinity, len(affinities)) + for i, a := range affinities { + groupAffs[i] = hcsschema.ProcessorGroupAffinity{ + Mask: a.Mask, + Group: uint16(a.Group), + } + } + req.GroupAffinities = groupAffs } return ht.requestUpdateContainer(ctx, resourcepaths.SiloProcessorResourcePath, req) } diff --git a/internal/hcs/schema2/processor.go b/internal/hcs/schema2/processor.go index ff7617113c..f44bf2222d 100644 --- a/internal/hcs/schema2/processor.go +++ b/internal/hcs/schema2/processor.go @@ -3,7 +3,7 @@ * * No description provided (generated by Swagger Codegen https://github.com/swagger-api/swagger-codegen) * - * API version: 2.1 + * API version: 2.4 * Generated by: Swagger Codegen (https://github.com/swagger-api/swagger-codegen.git) */ @@ -16,5 +16,8 @@ type Processor struct { Weight int32 `json:"Weight,omitempty"` - Affinity uint64 `json:"Affinity,omitempty"` + // GroupAffinities specifies the processor group affinity for the container. + // Each entry pins the container to the given set of processors within a processor group. + // Requires Windows Server 2022 (build 20348) or later. + GroupAffinities []ProcessorGroupAffinity `json:"GroupAffinities,omitempty"` } diff --git a/internal/hcs/schema2/processor_group_affinity.go b/internal/hcs/schema2/processor_group_affinity.go new file mode 100644 index 0000000000..c5fdef7b80 --- /dev/null +++ b/internal/hcs/schema2/processor_group_affinity.go @@ -0,0 +1,23 @@ +/* + * HCS API + * + * No description provided (generated by Swagger Codegen https://github.com/swagger-api/swagger-codegen) + * + * API version: 2.4 + * Generated by: Swagger Codegen (https://github.com/swagger-api/swagger-codegen.git) + */ + +package hcsschema + +// ProcessorGroupAffinity specifies a processor group and an affinity mask within +// that group for an HCS container. It mirrors the Win32 GROUP_AFFINITY structure. +// Requires Windows Server 2022 (build 20348) or later. +// https://learn.microsoft.com/en-us/windows-hardware/drivers/ddi/miniport/ns-miniport-_group_affinity +type ProcessorGroupAffinity struct { + // Mask is the bitmask of processors within Group. + Mask uint64 `json:"Mask,omitempty"` + // Group is the processor group number (0-based). Group 0 is the most common + // value; the tag intentionally omits omitempty so that group 0 is not + // silently dropped from the JSON sent to HCS. + Group uint16 `json:"Group"` +} diff --git a/internal/hcsoci/hcsdoc_wcow.go b/internal/hcsoci/hcsdoc_wcow.go index 35e12f806c..ff0fc33f52 100644 --- a/internal/hcsoci/hcsdoc_wcow.go +++ b/internal/hcsoci/hcsdoc_wcow.go @@ -31,6 +31,22 @@ import ( const createContainerSubdirectoryForProcessDumpSuffix = "{container_id}" +// Sentinel errors returned by ConvertCPUAffinity. +var ( + // ErrCPUAffinityMultipleGroupsNotSupported is returned when multiple processor-group + // affinity entries are requested on a host older than Windows Server 2022 (build 20348), + // which does not support multi-group affinity for job object silos. + // On Windows Server 2022+, multiple processor groups are fully supported. + ErrCPUAffinityMultipleGroupsNotSupported = errors.New("cpu affinity with multiple processor groups requires Windows Server 2022 or later") + // ErrCPUAffinityNonZeroGroupNotSupported is returned when a non-zero processor group is + // requested on a host older than Windows Server 2022 (build 20348). + // On Windows Server 2022+, non-zero processor groups are fully supported. + ErrCPUAffinityNonZeroGroupNotSupported = errors.New("cpu affinity with a non-zero processor group requires Windows Server 2022 or later") + // ErrCPUAffinityMaskZero is returned when an affinity entry has a zero bitmask, + // which would select no processors and is always invalid. + ErrCPUAffinityMaskZero = errors.New("cpu affinity mask must be non-zero") +) + // A simple wrapper struct around the container mount configs that should be added to the // container. type mountsConfig struct { @@ -97,26 +113,36 @@ func createMountsConfig(ctx context.Context, coi *createOptionsInternal) (*mount // ConvertCPUAffinity handles the logic of converting and validating the container's CPU affinity // specified in the OCI spec to what HCS expects. // -// Returns the CPU affinity bitmask (0 if not specified) and any validation error. -// Phase 2 limitations: -// - Multiple affinity entries are rejected -// - Non-zero processor groups are rejected -func ConvertCPUAffinity(spec *specs.Spec) (uint64, error) { +// Returns the validated affinity entries (nil if not specified) and any validation error. +// Multiple processor groups and non-zero group numbers require Windows Server 2022 +// (build 20348) or later; on older hosts only a single entry for group 0 is accepted. +func ConvertCPUAffinity(spec *specs.Spec) ([]specs.WindowsCPUGroupAffinity, error) { if spec.Windows == nil || spec.Windows.Resources == nil || spec.Windows.Resources.CPU == nil || len(spec.Windows.Resources.CPU.Affinity) == 0 { - return 0, nil + return nil, nil } affinity := spec.Windows.Resources.CPU.Affinity - if len(affinity) != 1 { - return 0, fmt.Errorf("cpu affinity with multiple processor groups is not supported") - } - if affinity[0].Group != 0 { - return 0, fmt.Errorf("cpu affinity processor group %d is not supported", affinity[0].Group) + + // Zero masks are never valid regardless of OS version. + for i, a := range affinity { + if a.Mask == 0 { + return nil, fmt.Errorf("%w: entry %d has zero mask", ErrCPUAffinityMaskZero, i) + } } - if affinity[0].Mask == 0 { - return 0, fmt.Errorf("cpu affinity mask must be non-zero") + + // Determine whether multi-group features are needed: either multiple entries, + // or a single entry targeting a non-zero processor group. + multiGroup := len(affinity) > 1 || affinity[0].Group != 0 + + // Multiple processor groups are only supported on Windows Server 2022+. + if multiGroup && osversion.Build() < osversion.LTSC2022 { + if len(affinity) > 1 { + return nil, fmt.Errorf("%w: %d entries", ErrCPUAffinityMultipleGroupsNotSupported, len(affinity)) + } + return nil, fmt.Errorf("%w: group %d", ErrCPUAffinityNonZeroGroupNotSupported, affinity[0].Group) } - return affinity[0].Mask, nil + + return affinity, nil } // ConvertCPULimits handles the logic of converting and validating the containers CPU limits @@ -209,6 +235,7 @@ func createWindowsContainerDocument(ctx context.Context, coi *createOptionsInter return nil, nil, err } + // Validate and retrieve CPU affinity from the spec. cpuAffinity, err := ConvertCPUAffinity(coi.Spec) if err != nil { return nil, nil, err @@ -262,12 +289,22 @@ func createWindowsContainerDocument(ctx context.Context, coi *createOptionsInter v1.ProcessorMaximum = int64(cpuLimit) v1.ProcessorWeight = uint64(cpuWeight) - v2Container.Processor = &hcsschema.Processor{ - Count: cpuCount, - Maximum: cpuLimit, - Weight: cpuWeight, - Affinity: cpuAffinity, + v2Processor := &hcsschema.Processor{ + Count: cpuCount, + Maximum: cpuLimit, + Weight: cpuWeight, + } + if len(cpuAffinity) > 0 { + groupAffs := make([]hcsschema.ProcessorGroupAffinity, len(cpuAffinity)) + for i, a := range cpuAffinity { + groupAffs[i] = hcsschema.ProcessorGroupAffinity{ + Mask: a.Mask, + Group: uint16(a.Group), + } + } + v2Processor.GroupAffinities = groupAffs } + v2Container.Processor = v2Processor // Memory Resources memoryMaxInMB := oci.ParseAnnotationsMemory(ctx, coi.Spec, annotations.ContainerMemorySizeInMB, 0) diff --git a/internal/hcsoci/hcsdoc_wcow_test.go b/internal/hcsoci/hcsdoc_wcow_test.go index 9755b5d777..4c50413f39 100644 --- a/internal/hcsoci/hcsdoc_wcow_test.go +++ b/internal/hcsoci/hcsdoc_wcow_test.go @@ -3,10 +3,12 @@ package hcsoci import ( - "strings" + "errors" "testing" specs "github.com/opencontainers/runtime-spec/specs-go" + + "github.com/Microsoft/hcsshim/osversion" ) func TestConvertCPUAffinity_Group0MaskSet(t *testing.T) { @@ -22,16 +24,16 @@ func TestConvertCPUAffinity_Group0MaskSet(t *testing.T) { }, } - affinity, err := ConvertCPUAffinity(s) + affinities, err := ConvertCPUAffinity(s) if err != nil { t.Fatalf("ConvertCPUAffinity failed: %v", err) } - if affinity != 0x3 { - t.Fatalf("unexpected cpu affinity: got %d want %d", affinity, uint64(0x3)) + if len(affinities) != 1 || affinities[0].Mask != 0x3 || affinities[0].Group != 0 { + t.Fatalf("unexpected cpu affinity: got %v", affinities) } } -func TestConvertCPUAffinity_MultiGroupRejected(t *testing.T) { +func TestConvertCPUAffinity_MultiGroup(t *testing.T) { s := &specs.Spec{ Windows: &specs.Windows{ Resources: &specs.WindowsResources{ @@ -45,16 +47,26 @@ func TestConvertCPUAffinity_MultiGroupRejected(t *testing.T) { }, } - _, err := ConvertCPUAffinity(s) - if err == nil { - t.Fatal("expected error for multiple affinity entries") - } - if !strings.Contains(err.Error(), "multiple processor groups") { - t.Fatalf("unexpected error: %v", err) + affinities, err := ConvertCPUAffinity(s) + if osversion.Build() >= osversion.LTSC2022 { + // Multi-group is supported on WS2022+. + if err != nil { + t.Fatalf("expected success for multi-group on WS2022+, got: %v", err) + } + if len(affinities) != 2 { + t.Fatalf("expected 2 affinity entries, got %d", len(affinities)) + } + } else { + if err == nil { + t.Fatal("expected error for multiple affinity entries on pre-WS2022") + } + if !errors.Is(err, ErrCPUAffinityMultipleGroupsNotSupported) { + t.Fatalf("unexpected error: %v", err) + } } } -func TestConvertCPUAffinity_NonZeroGroupRejected(t *testing.T) { +func TestConvertCPUAffinity_NonZeroGroup(t *testing.T) { s := &specs.Spec{ Windows: &specs.Windows{ Resources: &specs.WindowsResources{ @@ -67,12 +79,22 @@ func TestConvertCPUAffinity_NonZeroGroupRejected(t *testing.T) { }, } - _, err := ConvertCPUAffinity(s) - if err == nil { - t.Fatal("expected error for non-zero affinity group") - } - if !strings.Contains(err.Error(), "processor group") { - t.Fatalf("unexpected error: %v", err) + affinities, err := ConvertCPUAffinity(s) + if osversion.Build() >= osversion.LTSC2022 { + // Non-zero group is supported on WS2022+. + if err != nil { + t.Fatalf("expected success for non-zero group on WS2022+, got: %v", err) + } + if len(affinities) != 1 || affinities[0].Group != 1 { + t.Fatalf("unexpected affinity: got %v", affinities) + } + } else { + if err == nil { + t.Fatal("expected error for non-zero affinity group on pre-WS2022") + } + if !errors.Is(err, ErrCPUAffinityNonZeroGroupNotSupported) { + t.Fatalf("unexpected error: %v", err) + } } } @@ -93,7 +115,7 @@ func TestConvertCPUAffinity_ZeroMaskRejected(t *testing.T) { if err == nil { t.Fatal("expected error for zero affinity mask") } - if !strings.Contains(err.Error(), "mask must be non-zero") { + if !errors.Is(err, ErrCPUAffinityMaskZero) { t.Fatalf("unexpected error: %v", err) } } @@ -137,12 +159,12 @@ func TestConvertCPUAffinity_NoAffinity(t *testing.T) { for _, tc := range testCases { t.Run(tc.name, func(t *testing.T) { - affinity, err := ConvertCPUAffinity(tc.spec) + affinities, err := ConvertCPUAffinity(tc.spec) if err != nil { t.Fatalf("ConvertCPUAffinity failed: %v", err) } - if affinity != 0 { - t.Fatalf("expected zero affinity, got %d", affinity) + if len(affinities) != 0 { + t.Fatalf("expected empty affinities, got %v", affinities) } }) } diff --git a/internal/jobcontainers/oci.go b/internal/jobcontainers/oci.go index 259aff376f..2807ddcda9 100644 --- a/internal/jobcontainers/oci.go +++ b/internal/jobcontainers/oci.go @@ -4,7 +4,6 @@ package jobcontainers import ( "context" - "fmt" "github.com/Microsoft/hcsshim/internal/hcsoci" "github.com/Microsoft/hcsshim/internal/jobobject" @@ -41,19 +40,21 @@ func specToLimits(ctx context.Context, cid string, s *specs.Spec) (*jobobject.Jo return nil, err } - var cpuAffinity uint64 - if s.Windows != nil && s.Windows.Resources != nil && s.Windows.Resources.CPU != nil && len(s.Windows.Resources.CPU.Affinity) > 0 { - affinity := s.Windows.Resources.CPU.Affinity - if len(affinity) != 1 { - return nil, fmt.Errorf("cpu affinity with multiple processor groups is not supported") - } - if affinity[0].Group != 0 { - return nil, fmt.Errorf("cpu affinity processor group %d is not supported", affinity[0].Group) - } - if affinity[0].Mask == 0 { - return nil, fmt.Errorf("cpu affinity mask must be non-zero") + // Validate and retrieve CPU affinity using the shared helper, which enforces the + // OS version gate for multi-group support (WS2022+). + affinities, err := hcsoci.ConvertCPUAffinity(s) + if err != nil { + return nil, err + } + var groupAffinities []jobobject.GroupAffinity + if len(affinities) > 0 { + groupAffinities = make([]jobobject.GroupAffinity, len(affinities)) + for i, a := range affinities { + groupAffinities[i] = jobobject.GroupAffinity{ + Mask: a.Mask, + Group: uint16(a.Group), + } } - cpuAffinity = affinity[0].Mask } realCPULimit, realCPUWeight := uint32(cpuLimit), uint32(cpuWeight) @@ -77,7 +78,7 @@ func specToLimits(ctx context.Context, cid string, s *specs.Spec) (*jobobject.Jo return &jobobject.JobLimits{ CPULimit: realCPULimit, CPUWeight: realCPUWeight, - CPUAffinity: cpuAffinity, + GroupAffinities: groupAffinities, MaxIOPS: maxIops, MaxBandwidth: maxBandwidth, MemoryLimitInBytes: memLimitMB * memory.MiB, diff --git a/internal/jobcontainers/oci_test.go b/internal/jobcontainers/oci_test.go index 82307b3c7a..5b8f9e5488 100644 --- a/internal/jobcontainers/oci_test.go +++ b/internal/jobcontainers/oci_test.go @@ -4,10 +4,14 @@ package jobcontainers import ( "context" - "strings" + "errors" "testing" specs "github.com/opencontainers/runtime-spec/specs-go" + + "github.com/Microsoft/hcsshim/internal/hcsoci" + "github.com/Microsoft/hcsshim/internal/jobobject" + "github.com/Microsoft/hcsshim/osversion" ) func TestSpecToLimits_CPUAffinity_Group0MaskSet(t *testing.T) { @@ -27,12 +31,14 @@ func TestSpecToLimits_CPUAffinity_Group0MaskSet(t *testing.T) { if err != nil { t.Fatalf("specToLimits failed: %v", err) } - if limits.CPUAffinity != 0x3 { - t.Fatalf("unexpected cpu affinity: got %d want %d", limits.CPUAffinity, uint64(0x3)) + if len(limits.GroupAffinities) != 1 || + limits.GroupAffinities[0].Mask != 0x3 || + limits.GroupAffinities[0].Group != 0 { + t.Fatalf("unexpected cpu group affinities: got %v", limits.GroupAffinities) } } -func TestSpecToLimits_CPUAffinity_MultiGroupRejected(t *testing.T) { +func TestSpecToLimits_CPUAffinity_MultiGroup(t *testing.T) { s := &specs.Spec{ Windows: &specs.Windows{ Resources: &specs.WindowsResources{ @@ -46,16 +52,32 @@ func TestSpecToLimits_CPUAffinity_MultiGroupRejected(t *testing.T) { }, } - _, err := specToLimits(context.Background(), "cid", s) - if err == nil { - t.Fatal("expected error for multiple affinity entries") - } - if !strings.Contains(err.Error(), "multiple processor groups") { - t.Fatalf("unexpected error: %v", err) + limits, err := specToLimits(context.Background(), "cid", s) + if osversion.Build() >= osversion.LTSC2022 { + // Multi-group is supported on WS2022+. + if err != nil { + t.Fatalf("expected success for multi-group on WS2022+, got: %v", err) + } + if len(limits.GroupAffinities) != 2 { + t.Fatalf("expected 2 group affinities, got %d: %v", len(limits.GroupAffinities), limits.GroupAffinities) + } + want := []jobobject.GroupAffinity{{Mask: 0x1, Group: 0}, {Mask: 0x1, Group: 1}} + for i, a := range limits.GroupAffinities { + if a != want[i] { + t.Fatalf("affinity[%d]: got %v, want %v", i, a, want[i]) + } + } + } else { + if err == nil { + t.Fatal("expected error for multiple affinity entries on pre-WS2022") + } + if !errors.Is(err, hcsoci.ErrCPUAffinityMultipleGroupsNotSupported) { + t.Fatalf("unexpected error: %v", err) + } } } -func TestSpecToLimits_CPUAffinity_NonZeroGroupRejected(t *testing.T) { +func TestSpecToLimits_CPUAffinity_NonZeroGroup(t *testing.T) { s := &specs.Spec{ Windows: &specs.Windows{ Resources: &specs.WindowsResources{ @@ -68,12 +90,22 @@ func TestSpecToLimits_CPUAffinity_NonZeroGroupRejected(t *testing.T) { }, } - _, err := specToLimits(context.Background(), "cid", s) - if err == nil { - t.Fatal("expected error for non-zero affinity group") - } - if !strings.Contains(err.Error(), "processor group") { - t.Fatalf("unexpected error: %v", err) + limits, err := specToLimits(context.Background(), "cid", s) + if osversion.Build() >= osversion.LTSC2022 { + // Non-zero group is supported on WS2022+. + if err != nil { + t.Fatalf("expected success for non-zero group on WS2022+, got: %v", err) + } + if len(limits.GroupAffinities) != 1 || limits.GroupAffinities[0].Group != 1 { + t.Fatalf("unexpected group affinities: got %v", limits.GroupAffinities) + } + } else { + if err == nil { + t.Fatal("expected error for non-zero affinity group on pre-WS2022") + } + if !errors.Is(err, hcsoci.ErrCPUAffinityNonZeroGroupNotSupported) { + t.Fatalf("unexpected error: %v", err) + } } } @@ -94,7 +126,7 @@ func TestSpecToLimits_CPUAffinity_ZeroMaskRejected(t *testing.T) { if err == nil { t.Fatal("expected error for zero affinity mask") } - if !strings.Contains(err.Error(), "mask must be non-zero") { + if !errors.Is(err, hcsoci.ErrCPUAffinityMaskZero) { t.Fatalf("unexpected error: %v", err) } } diff --git a/internal/jobobject/jobobject.go b/internal/jobobject/jobobject.go index 5f062e5c5e..3150eec493 100644 --- a/internal/jobobject/jobobject.go +++ b/internal/jobobject/jobobject.go @@ -28,11 +28,22 @@ type JobObject struct { handleLock sync.RWMutex } +// GroupAffinity specifies a processor group and an affinity mask within that group. +// It corresponds to the Win32 GROUP_AFFINITY structure and is used for multi-group +// CPU affinity on machines with more than 64 logical processors (WS2022+). +type GroupAffinity struct { + // Mask is the bitmask of processors within Group. + Mask uint64 + // Group is the processor group number (0-based). + Group uint16 +} + // JobLimits represents the resource constraints that can be applied to a job object. type JobLimits struct { CPULimit uint32 CPUWeight uint32 - CPUAffinity uint64 + CPUAffinity uint64 // legacy single-group (group 0) affinity mask; use GroupAffinities when non-empty + GroupAffinities []GroupAffinity // multi-processor-group affinity (WS2022+); takes precedence over CPUAffinity MemoryLimitInBytes uint64 MaxIOPS int64 MaxBandwidth int64 diff --git a/internal/jobobject/limits.go b/internal/jobobject/limits.go index 5bb20df00f..7d95c4303c 100644 --- a/internal/jobobject/limits.go +++ b/internal/jobobject/limits.go @@ -38,7 +38,11 @@ func (job *JobObject) SetResourceLimits(limits *JobLimits) error { } } - if limits.CPUAffinity != 0 { + if len(limits.GroupAffinities) > 0 { + if err := job.SetCPUGroupAffinities(limits.GroupAffinities); err != nil { + return fmt.Errorf("failed to set job object cpu group affinities: %w", err) + } + } else if limits.CPUAffinity != 0 { if err := job.SetCPUAffinity(limits.CPUAffinity); err != nil { return fmt.Errorf("failed to set job object cpu affinity: %w", err) } @@ -141,8 +145,89 @@ func (job *JobObject) GetCPULimit(rateControlType CPURateControlType) (uint32, e return info.Value, nil } +// SetCPUGroupAffinities sets the processor group affinities for the job object using +// JobObjectGroupInformationEx, which supports multi-processor-group machines (WS2022+). +// Each entry in affinities specifies a processor group number and a bitmask of processors +// within that group. affinities must be non-empty. +// https://learn.microsoft.com/en-us/windows/win32/api/jobapi2/nf-jobapi2-setinformationjobobject +func (job *JobObject) SetCPUGroupAffinities(affinities []GroupAffinity) error { + if len(affinities) == 0 { + return errors.New("affinities must be non-empty") + } + winapiAffinities := make([]winapi.GROUP_AFFINITY, len(affinities)) + for i, a := range affinities { + winapiAffinities[i] = winapi.GROUP_AFFINITY{ + Mask: uintptr(a.Mask), + Group: a.Group, + } + } + + job.handleLock.RLock() + defer job.handleLock.RUnlock() + + if job.handle == 0 { + return ErrAlreadyClosed + } + + if _, err := windows.SetInformationJobObject( + job.handle, + windows.JobObjectGroupInformationEx, + uintptr(unsafe.Pointer(&winapiAffinities[0])), + uint32(len(winapiAffinities))*uint32(unsafe.Sizeof(winapiAffinities[0])), + ); err != nil { + return fmt.Errorf("failed to set cpu group affinities on job object: %w", err) + } + return nil +} + +// GetCPUGroupAffinities returns the processor group affinities set on the job object. +// https://learn.microsoft.com/en-us/windows/win32/api/jobapi2/nf-jobapi2-queryinformationjobobject +func (job *JobObject) GetCPUGroupAffinities() ([]GroupAffinity, error) { + job.handleLock.RLock() + defer job.handleLock.RUnlock() + + if job.handle == 0 { + return nil, ErrAlreadyClosed + } + + // First call with a zero-length buffer to determine the required buffer size. + // This call is expected to fail with ERROR_INSUFFICIENT_BUFFER; we only care + // about the returned length. + var returnLen uint32 + _ = winapi.QueryInformationJobObject( + job.handle, + windows.JobObjectGroupInformationEx, + nil, + 0, + &returnLen, + ) + if returnLen == 0 { + return nil, nil + } + + count := returnLen / uint32(unsafe.Sizeof(winapi.GROUP_AFFINITY{})) + winapiAffinities := make([]winapi.GROUP_AFFINITY, count) + if err := winapi.QueryInformationJobObject( + job.handle, + windows.JobObjectGroupInformationEx, + unsafe.Pointer(&winapiAffinities[0]), + returnLen, + nil, + ); err != nil { + return nil, fmt.Errorf("failed to query cpu group affinities on job object: %w", err) + } + + result := make([]GroupAffinity, count) + for i, a := range winapiAffinities { + result[i] = GroupAffinity{Mask: uint64(a.Mask), Group: a.Group} + } + return result, nil +} + // SetCPUAffinity sets the processor affinity for the job object. // The affinity is passed in as a bitmask. +// Note: this uses JOB_OBJECT_LIMIT_AFFINITY which is restricted to processor group 0. +// For machines with more than 64 logical processors, use SetCPUGroupAffinities instead. func (job *JobObject) SetCPUAffinity(affinityBitMask uint64) error { info, err := job.getExtendedInformation() if err != nil { diff --git a/internal/winapi/jobobject.go b/internal/winapi/jobobject.go index 4c04dd3f83..290cbbf7af 100644 --- a/internal/winapi/jobobject.go +++ b/internal/winapi/jobobject.go @@ -98,6 +98,26 @@ type JOBOBJECT_BASIC_PROCESS_ID_LIST struct { ProcessIdList [1]uintptr } +// GROUP_AFFINITY specifies a processor group and an affinity mask for that group. +// Mirrors the Win32 _GROUP_AFFINITY structure. +// +// Processor group support was introduced in Windows 7 / Windows Server 2008 R2 +// to handle machines with more than 64 logical processors. The structure is used +// with SetInformationJobObject(JobObjectGroupInformationEx) and +// QueryInformationJobObject(JobObjectGroupInformationEx), both available since +// Windows 7 / Windows Server 2008 R2. +// +// For job object silos (containers), multi-processor-group affinity requires +// Windows Server 2022 (build 20348) or later. +// +// https://learn.microsoft.com/en-us/windows-hardware/drivers/ddi/miniport/ns-miniport-_group_affinity +// https://learn.microsoft.com/en-us/windows/win32/procthread/processor-groups +type GROUP_AFFINITY struct { + Mask uintptr // KAFFINITY = ULONG_PTR: bitmask of processors in Group + Group uint16 // Processor group number + Reserved [3]uint16 // Must be zero +} + // AllPids returns all the process Ids in the job object. func (p *JOBOBJECT_BASIC_PROCESS_ID_LIST) AllPids() []uintptr { return (*[(1 << 27) - 1]uintptr)(unsafe.Pointer(&p.ProcessIdList[0]))[:p.NumberOfProcessIdsInList:p.NumberOfProcessIdsInList]