Skip to content

Commit adca57b

Browse files
pajakdSingularity23x0
authored andcommitted
hotswap reschedule evicted (kubernetes-sigs#7376)
1 parent a416c2e commit adca57b

File tree

2 files changed

+5
-9
lines changed

2 files changed

+5
-9
lines changed

pkg/controller/jobframework/reconciler.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -545,7 +545,7 @@ func (r *JobReconciler) ReconcileGenericJob(ctx context.Context, req ctrl.Reques
545545
log.V(6).Info("The job is no longer active, clear the workloads admission")
546546
err := workload.PatchAdmissionStatus(ctx, r.client, wl, r.clock, func() (*kueue.Workload, bool, error) {
547547
// The requeued condition status set to true only on EvictedByPreemption
548-
setRequeued := evCond.Reason == kueue.WorkloadEvictedByPreemption
548+
setRequeued := (evCond.Reason == kueue.WorkloadEvictedByPreemption) || (evCond.Reason == kueue.WorkloadEvictedDueToNodeFailures)
549549
updated := workload.SetRequeuedCondition(wl, evCond.Reason, evCond.Message, setRequeued)
550550
if workload.UnsetQuotaReservationWithCondition(wl, "Pending", evCond.Message, r.clock.Now()) {
551551
updated = true

test/e2e/tas/hotswap_test.go

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,6 @@ import (
2525
"github.com/onsi/gomega"
2626
batchv1 "k8s.io/api/batch/v1"
2727
corev1 "k8s.io/api/core/v1"
28-
apimeta "k8s.io/apimachinery/pkg/api/meta"
2928
"k8s.io/apimachinery/pkg/fields"
3029
"k8s.io/apimachinery/pkg/labels"
3130
"k8s.io/utils/ptr"
@@ -256,13 +255,10 @@ var _ = ginkgo.Describe("Hotswap for Topology Aware Scheduling", ginkgo.Ordered,
256255
nodeToRestore = node.DeepCopy()
257256
gomega.Expect(k8sClient.Delete(ctx, node)).To(gomega.Succeed())
258257
})
259-
wl := &kueue.Workload{}
260-
ginkgo.By("Check that the workload is evicted", func() {
261-
gomega.Eventually(func(g gomega.Gomega) {
262-
g.Expect(k8sClient.Get(ctx, wlKey, wl)).To(gomega.Succeed())
263-
g.Expect(wl.Status.Admission).To(gomega.BeNil())
264-
g.Expect(apimeta.IsStatusConditionTrue(wl.Status.Conditions, kueue.WorkloadEvicted)).To(gomega.BeTrue())
265-
}, util.LongTimeout, util.Interval).Should(gomega.Succeed())
258+
ginkgo.By("Check that workload is rescheduled to a different rack", func() {
259+
expectWorkloadTopologyAssignment(ctx, k8sClient, wlKey, numPods, []string{
260+
"kind-worker3", "kind-worker4",
261+
})
266262
})
267263
})
268264
})

0 commit comments

Comments
 (0)