NVIDIA · kerthcet · Dec 5, 2023 · Dec 6, 2023 · Dec 6, 2023 · Dec 12, 2023
diff --git a/gpuallocator/allocator.go b/gpuallocator/allocator.go
@@ -19,7 +19,7 @@ type Allocator struct {
 	allocated DeviceSet
 }
 
-// Policy defines an interface for plugagable allocation policies to be added
+// Policy defines an interface for pluggable allocation policies to be added
 // to an Allocator.
 type Policy interface {
 	// Allocate is meant to do the heavy-lifting of implementing the actual

diff --git a/gpuallocator/besteffort_policy.go b/gpuallocator/besteffort_policy.go
@@ -46,6 +46,33 @@ func (p *bestEffortPolicy) Allocate(available []*Device, required []*Device, siz
 		return []*Device{}
 	}
 
+	if len(required) > len(available) {
+		return []*Device{}
+	}
+
+	// Optimize for the case when we required is actually the `size`.
+	if size == len(required) {
+		if gpuPartitionContainsSetWithAll([][]*Device{available}, required) {
+			return required
+		} else {
+			return []*Device{}
+		}
+	}
+
+	// Optimize for the case when size == 1.
+	// We'll pick the device with the minimum sum of scores with available devices.
+	if size == 1 {
+		var bestDevice *Device
+		var minScore int
+		iterateGPUSetScore(available, func(score int, index int) {
+			if score < minScore || bestDevice == nil {
+				minScore = score
+				bestDevice = available[index]
+			}
+		})
+		return []*Device{bestDevice}
+	}
+
 	// Find the highest scoring GPU partition with sets of of size 'size'.
 	// Don't consider partitions that don't have at least one set that contains
 	// all of the GPUs 'required' by the allocation.
@@ -217,14 +244,6 @@ func iterateGPUPartitions(devices []*Device, size int, callback func([][]*Device
 		return
 	}
 
-	// Optimize for the case when size == 1.
-	if size == 1 {
-		for _, device := range devices {
-			callback([][]*Device{{device}})
-		}
-		return
-	}
-
 	// Otherwise, pad the list of available GPUs on the node such that the list
 	// can be evenly partitioned into subsets of size 'size'. This is necessary
 	// to ensure that the recursive solution does not exit early and actually
@@ -392,3 +411,16 @@ func calculateGPUPartitionScore(gpuPartition [][]*Device) int {
 
 	return score
 }
+
+func iterateGPUSetScore(gpuSet []*Device, callback func(int, int)) {
+	for i := range gpuSet {
+		score := 0
+		for j := range gpuSet {
+			if i == j {
+				continue
+			}
+			score += calculateGPUPairScore(gpuSet[i], gpuSet[j])
+		}
+		callback(score, i)
+	}
+}
diff --git a/gpuallocator/besteffort_test.go b/gpuallocator/besteffort_test.go
@@ -110,6 +110,46 @@ func TestBestEffortAllocate(t *testing.T) {
 			4,
 			[]int{},
 		},
+		{
+			"Required too many devices than available",
+			devices,
+			[]int{0, 1, 2, 3, 4, 5},
+			[]int{1, 2, 3, 4, 5, 6},
+			1,
+			[]int{},
+		},
+		{
+			"Required devices is equal to the size",
+			devices,
+			[]int{0, 1, 2, 4, 5, 6},
+			[]int{0, 1, 2, 5},
+			4,
+			[]int{0, 1, 2, 5},
+		},
+		{
+			"Required 1 device exists",
+			devices,
+			[]int{0, 1, 2, 4, 5, 6},
+			[]int{2},
+			1,
+			[]int{2},
+		},
+		{
+			"Required 1 device not exists",
+			devices,
+			[]int{0, 1, 2, 4, 5, 6},
+			[]int{3},
+			1,
+			[]int{},
+		},
+		{
+			"Required 1 best effort device",
+			devices,
+			[]int{0, 1, 2},
+			[]int{},
+			1,
+			[]int{0},
+		},
 	}
 
 	RunPolicyAllocTests(t, policy, tests)