Skip to content

Commit 6d11c36

Browse files
committed
fix: flake integration test
1 parent a03c7d1 commit 6d11c36

File tree

2 files changed

+18
-12
lines changed

2 files changed

+18
-12
lines changed

pkg/scheduler/framework/preemption/preemption.go

Lines changed: 15 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -496,10 +496,20 @@ func (ev *Evaluator) prepareCandidateAsync(c Candidate, pod *v1.Pod, pluginName
496496
}
497497

498498
errCh := parallelize.NewErrorChannel()
499+
// Whether all victim pods are already deleted before making API call.
500+
allPodsAlreadyDeleted := true
499501
preemptPod := func(index int) {
500502
victim := victimPods[index]
501-
if err := ev.PreemptPod(ctx, c, pod, victim, pluginName); err != nil {
503+
err := ev.PreemptPod(ctx, c, pod, victim, pluginName)
504+
switch {
505+
case apierrors.IsNotFound(err):
506+
logger.V(2).Info("Victim Pod is already deleted", "preemptor", klog.KObj(pod), "node", c.Name(), "err", err)
507+
case err != nil:
502508
errCh.SendErrorWithCancel(err, cancel)
509+
default:
510+
ev.mu.Lock()
511+
allPodsAlreadyDeleted = false
512+
ev.mu.Unlock()
503513
}
504514
}
505515

@@ -511,11 +521,11 @@ func (ev *Evaluator) prepareCandidateAsync(c Candidate, pod *v1.Pod, pluginName
511521
startTime := time.Now()
512522
result := metrics.GoroutineResultSuccess
513523

514-
// Whether all victim pods are already deleted before making API call.
515-
allPodsAlreadyDeleted := true
516524
defer metrics.PreemptionGoroutinesDuration.WithLabelValues(result).Observe(metrics.SinceInSeconds(startTime))
517525
defer metrics.PreemptionGoroutinesExecutionTotal.WithLabelValues(result).Inc()
518526
defer func() {
527+
ev.mu.Lock()
528+
defer ev.mu.Unlock()
519529
// When API call isn't successful, the Pod may get stuck in the unschedulable pod pool in the worst case.
520530
// So, we should move the Pod to the activeQ.
521531
if result == metrics.GoroutineResultError ||
@@ -547,15 +557,9 @@ func (ev *Evaluator) prepareCandidateAsync(c Candidate, pod *v1.Pod, pluginName
547557
// and the pod could end up stucking at the unschedulable pod pool
548558
// by all the pod removal events being ignored.
549559
ev.Handler.Parallelizer().Until(ctx, len(victimPods)-1, preemptPod, ev.PluginName)
550-
err := errCh.ReceiveError()
551-
switch {
552-
case apierrors.IsNotFound(err):
553-
logger.V(2).Info("Victim Pod is already deleted", "preemptor", klog.KObj(pod), "node", c.Name(), "err", err)
554-
case err != nil:
560+
if err := errCh.ReceiveError(); err != nil {
555561
utilruntime.HandleErrorWithContext(ctx, err, "Error occurred during async preemption")
556562
result = metrics.GoroutineResultError
557-
default:
558-
allPodsAlreadyDeleted = false
559563
}
560564
}
561565

@@ -569,6 +573,7 @@ func (ev *Evaluator) prepareCandidateAsync(c Candidate, pod *v1.Pod, pluginName
569573
logger.V(2).Info("Victim Pod is already deleted", "preemptor", klog.KObj(pod), "node", c.Name(), "err", err)
570574
case err != nil:
571575
utilruntime.HandleErrorWithContext(ctx, err, "Error occurred during async preemption")
576+
cancel()
572577
result = metrics.GoroutineResultError
573578
default:
574579
allPodsAlreadyDeleted = false

test/integration/scheduler/preemption/preemption_test.go

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1569,17 +1569,18 @@ func TestNominatedNodeCleanUp(t *testing.T) {
15691569
st.MakePod().Name("low-4").Priority(lowPriority).Req(map[v1.ResourceName]string{v1.ResourceCPU: "1"}).Obj(),
15701570
},
15711571
{
1572-
st.MakePod().Name("medium").Priority(mediumPriority).Req(map[v1.ResourceName]string{v1.ResourceCPU: "4"}).Obj(),
1572+
st.MakePod().Name("medium").Priority(mediumPriority).Req(map[v1.ResourceName]string{v1.ResourceCPU: "3"}).Obj(),
15731573
},
15741574
{
1575-
st.MakePod().Name("high").Priority(highPriority).Req(map[v1.ResourceName]string{v1.ResourceCPU: "3"}).Obj(),
1575+
st.MakePod().Name("high").Priority(highPriority).Req(map[v1.ResourceName]string{v1.ResourceCPU: "4"}).Obj(),
15761576
},
15771577
},
15781578
postChecks: []func(ctx context.Context, cs clientset.Interface, pod *v1.Pod) error{
15791579
testutils.WaitForPodToSchedule,
15801580
testutils.WaitForNominatedNodeName,
15811581
testutils.WaitForNominatedNodeName,
15821582
},
1583+
podNamesToDelete: []string{"low-1", "low-2", "low-3", "low-4"},
15831584
},
15841585
{
15851586
name: "mid-priority pod preempts low-priority pod, followed by a high-priority pod without additional preemption",

0 commit comments

Comments
 (0)