Skip to content

Commit ecc5972

Browse files
RayJobs with clusterSelectors should not be managed/validated by kueue.
They will be submitted against RayClusters which are already managed by kueue so kueueing them would double up on the quota consumed.
1 parent baf4c23 commit ecc5972

File tree

2 files changed

+13
-5
lines changed

2 files changed

+13
-5
lines changed

pkg/controller/jobs/rayjob/rayjob_controller.go

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,7 @@ type RayJob rayv1.RayJob
7878

7979
var _ jobframework.GenericJob = (*RayJob)(nil)
8080
var _ jobframework.JobWithManagedBy = (*RayJob)(nil)
81+
var _ jobframework.JobWithSkip = (*RayJob)(nil)
8182

8283
func (j *RayJob) Object() client.Object {
8384
return (*rayv1.RayJob)(j)
@@ -100,6 +101,12 @@ func (j *RayJob) Suspend() {
100101
j.Spec.Suspend = true
101102
}
102103

104+
func (j *RayJob) Skip(ctx context.Context) bool {
105+
// Skip reconciliation for RayJobs that use clusterSelector to reference existing clusters.
106+
// These jobs are not managed by Kueue.
107+
return len(j.Spec.ClusterSelector) > 0
108+
}
109+
103110
func (j *RayJob) GVK() schema.GroupVersionKind {
104111
return gvk
105112
}

pkg/controller/jobs/rayjob/rayjob_webhook.go

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,12 @@ func (w *RayJobWebhook) validateCreate(ctx context.Context, job *rayv1.RayJob) (
107107
var allErrors field.ErrorList
108108
kueueJob := (*RayJob)(job)
109109

110+
// RayJobs with clusterSelector use an existing cluster and should not be managed by Kueue.
111+
// Skip all Kueue validation for these jobs.
112+
if len(job.Spec.ClusterSelector) > 0 {
113+
return allErrors, nil
114+
}
115+
110116
if w.manageJobsWithoutQueueName || jobframework.QueueName(kueueJob) != "" {
111117
spec := &job.Spec
112118
specPath := field.NewPath("spec")
@@ -116,11 +122,6 @@ func (w *RayJobWebhook) validateCreate(ctx context.Context, job *rayv1.RayJob) (
116122
allErrors = append(allErrors, field.Invalid(specPath.Child("shutdownAfterJobFinishes"), spec.ShutdownAfterJobFinishes, "a kueue managed job should delete the cluster after finishing"))
117123
}
118124

119-
// Should not want existing cluster. Kueue (workload) should be able to control the admission of the actual work, not only the trigger.
120-
if len(spec.ClusterSelector) > 0 {
121-
allErrors = append(allErrors, field.Invalid(specPath.Child("clusterSelector"), spec.ClusterSelector, "a kueue managed job should not use an existing cluster"))
122-
}
123-
124125
clusterSpec := spec.RayClusterSpec
125126
clusterSpecPath := specPath.Child("rayClusterSpec")
126127

0 commit comments

Comments
 (0)