@@ -499,15 +499,15 @@ func (s *Autoscaler) GenerateRunnerJitConfig(ctx context.Context, url string, ru
499
499
}
500
500
}
501
501
502
- func (s * Autoscaler ) createCallbackTaskWithToken (ctx context.Context , url string , secret string , job Job , delay time.Duration ) error {
502
+ func (s * Autoscaler ) CreateCallbackTaskWithToken (ctx context.Context , url string , secret string , job Job , delay time.Duration ) error {
503
503
504
504
data , _ := json .Marshal (job )
505
505
now := timestamppb .Now ()
506
506
now .Seconds += int64 (delay .Seconds ())
507
507
req := & taskspb.CreateTaskRequest {
508
508
Parent : s .conf .TaskQueue ,
509
509
Task : & taskspb.Task {
510
- Name : fmt .Sprintf ("%s/tasks/%d" , s .conf .TaskQueue , job .Id ),
510
+ Name : fmt .Sprintf ("%s/tasks/%d-0 " , s .conf .TaskQueue , job .Id ),
511
511
DispatchDeadline : & durationpb.Duration {
512
512
Seconds : 120 , // the timeout of the cloud task callback - must be greater the time it takes to start the VM
513
513
Nanos : 0 ,
@@ -531,19 +531,30 @@ func (s *Autoscaler) createCallbackTaskWithToken(ctx context.Context, url string
531
531
defer client .Close ()
532
532
_ , err := client .CreateTask (ctx , req )
533
533
if err != nil {
534
- return fmt .Errorf ("cloudtasks.CreateTask failed for job Id %d: %v" , job .Id , err )
534
+ // parse error so we can workaround de-duplication
535
+ if match , _ := regexp .MatchString ("code = AlreadyExists" , err .Error ()); match {
536
+ req .Task .Name = fmt .Sprintf ("%s/tasks/%d-1" , s .conf .TaskQueue , job .Id )
537
+ _ , err := client .CreateTask (ctx , req )
538
+ if err != nil {
539
+ return fmt .Errorf ("cloudtasks.CreateTask finally failed for job Id %d: %v" , job .Id , err )
540
+ } else {
541
+ log .Infof ("Finally created cloud task callback for workflow job Id %d with url \" %s\" and payload \" %s\" " , job .Id , url , data )
542
+ }
543
+ } else {
544
+ return fmt .Errorf ("cloudtasks.CreateTask failed for job Id %d: %v" , job .Id , err )
545
+ }
535
546
} else {
536
547
log .Infof ("Created cloud task callback for workflow job Id %d with url \" %s\" and payload \" %s\" " , job .Id , url , data )
537
548
}
538
549
return nil
539
550
}
540
551
541
- func (s * Autoscaler ) deleteCallbackTask (ctx context.Context , job Job ) error {
552
+ func (s * Autoscaler ) DeleteCallbackTask (ctx context.Context , job Job ) error {
542
553
543
554
client := newTaskClient (ctx )
544
555
defer client .Close ()
545
556
err := client .DeleteTask (ctx , & taskspb.DeleteTaskRequest {
546
- Name : fmt .Sprintf ("%s/tasks/%d" , s .conf .TaskQueue , job .Id ),
557
+ Name : fmt .Sprintf ("%s/tasks/%d-0 " , s .conf .TaskQueue , job .Id ),
547
558
})
548
559
if err != nil {
549
560
return fmt .Errorf ("cloudtasks.DeleteTask failed for job Id %d: %v" , job .Id , err )
@@ -670,7 +681,7 @@ func (s *Autoscaler) handleWebhook(ctx *gin.Context) {
670
681
if ok , missingLabels := payload .Job .HasAllLabels (s .conf .RunnerLabels ); ok {
671
682
createUrl := createCallbackUrl (ctx , s .conf .RouteCreateVm , s .conf .SourceQueryParam , src .Name )
672
683
// delay the create vm callback so we have a chance to delete it if the workflow job is changing its state to 'waiting'
673
- if err := s .createCallbackTaskWithToken (ctx , createUrl , src .Secret , payload .Job , time .Duration (s .conf .CreateVmDelay )* time .Second ); err != nil {
684
+ if err := s .CreateCallbackTaskWithToken (ctx , createUrl , src .Secret , payload .Job , time .Duration (s .conf .CreateVmDelay )* time .Second ); err != nil {
674
685
log .Errorf ("Can not enqueue create-vm cloud task callback: %s" , err .Error ())
675
686
ctx .AbortWithError (http .StatusInternalServerError , err )
676
687
return
@@ -681,7 +692,7 @@ func (s *Autoscaler) handleWebhook(ctx *gin.Context) {
681
692
} else if payload .Action == WAITING {
682
693
// the waiting action happens if a deployment environment is configured in the workflow that requires a review. We have to cancel the cloud task callback
683
694
if ok , missingLabels := payload .Job .HasAllLabels (s .conf .RunnerLabels ); ok {
684
- if err := s .deleteCallbackTask (ctx , payload .Job ); err != nil {
695
+ if err := s .DeleteCallbackTask (ctx , payload .Job ); err != nil {
685
696
// best effort - this is not considered an error
686
697
log .Warnf ("Can not delete create-vm cloud task callback: %s" , err .Error ())
687
698
}
@@ -697,10 +708,10 @@ func (s *Autoscaler) handleWebhook(ctx *gin.Context) {
697
708
if ok , missingLabels := payload .Job .HasAllLabels (s .conf .RunnerLabels ); ok {
698
709
699
710
// if the user immediately cancels a workflow we have the chance to delete the callback if not older than 10 seconds - best effort, ignore all errors
700
- s .deleteCallbackTask (ctx , payload .Job )
711
+ s .DeleteCallbackTask (ctx , payload .Job )
701
712
702
713
deleteUrl := createCallbackUrl (ctx , s .conf .RouteDeleteVm , s .conf .SourceQueryParam , src .Name )
703
- if err := s .createCallbackTaskWithToken (ctx , deleteUrl , src .Secret , payload .Job , 1 * time .Second ); err != nil {
714
+ if err := s .CreateCallbackTaskWithToken (ctx , deleteUrl , src .Secret , payload .Job , 1 * time .Second ); err != nil {
704
715
log .Errorf ("Can not enqueue delete-vm cloud task callback: %s" , err .Error ())
705
716
ctx .AbortWithError (http .StatusInternalServerError , err )
706
717
return
0 commit comments