executor: serialize task handling

taskUpdater will be called serially and won't block. It'll execute a goroutine for executing the task and for sending the task state to the scheduler. executeTask will just start task execution, all the logic of choosing if starting a task is moved inside taskUpdater In this way we avoid concurrency issues when handling the same executorTask in parallel
2020-02-27 17:01:19 +01:00 · 2020-02-27 17:01:19 +01:00 · 88dbca15a3
commit 88dbca15a3
parent 268a2b83ea
1 changed files with 65 additions and 69 deletions
--- a/internal/services/executor/executor.go
+++ b/internal/services/executor/executor.go
@ -724,32 +724,7 @@ func (e *Executor) sendExecutorTaskStatus(ctx context.Context, et *types.Executo
 	return err
 }
-func (e *Executor) stopTask(ctx context.Context, et *types.ExecutorTask) {
+func (e *Executor) executeTask(ctx context.Context, rt *runningTask) {
 	if rt, ok := e.runningTasks.get(et.ID); ok {
 		rt.Lock()
 		defer rt.Unlock()
 		if rt.et.Status.Phase.IsFinished() {
 			return
 		}
 		if rt.pod != nil {
 			if err := rt.pod.Stop(ctx); err != nil {
 				log.Errorf("err: %+v", err)
 				return
 			}
 			if rt.et.Status.Phase == types.ExecutorTaskPhaseNotStarted {
 				rt.et.Status.Phase = types.ExecutorTaskPhaseCancelled
 			} else {
 				rt.et.Status.Phase = types.ExecutorTaskPhaseStopped
 			}
 			if err := e.sendExecutorTaskStatus(ctx, et); err != nil {
 				log.Errorf("err: %+v", err)
 				return
 			}
 		}
 	}
 }
 func (e *Executor) executeTask(ctx context.Context, et *types.ExecutorTask) {
 	// * save in local state that we have a running task
 	// * start the pod
 	// * then update the executortask status to in-progress
@ -757,37 +732,16 @@ func (e *Executor) executeTask(ctx context.Context, et *types.ExecutorTask) {
 	// In this way we are sure that the pod cleaner will only remove pod that don't
 	// have an in progress running task
 	if et.Status.Phase != types.ExecutorTaskPhaseNotStarted {
 		log.Debugf("task phase is not \"not started\"")
 		return
 	}
 	activeTasks := e.runningTasks.len()
 	// don't start task if we have reached the active tasks limit
 	// they will be executed later
 	if activeTasks > e.c.ActiveTasksLimit {
 		return
 	}
 	rt := &runningTask{
 		et:        et,
 		executing: true,
 	}
 	rt.Lock()
 	if !e.runningTasks.addIfNotExists(et.ID, rt) {
 		log.Debugf("task %s already running", et.ID)
 		rt.Unlock()
 		return
 	}
 	defer func() {
 		rt.Lock()
 		rt.executing = false
 		rt.Unlock()
 	}()
 	et := rt.et
 	et.Status.Phase = types.ExecutorTaskPhaseRunning
 	et.Status.StartTime = util.TimeP(time.Now())
 	et.Status.SetupStep.Phase = types.ExecutorTaskPhaseRunning
@ -1151,7 +1105,7 @@ func (e *Executor) tasksUpdater(ctx context.Context) error {
 	}
 	log.Debugf("ets: %v", util.Dump(ets))
 	for _, et := range ets {
-		go e.taskUpdater(ctx, et)
+		e.taskUpdater(ctx, et)
 	}
 	// remove runningTasks not existing in the runservice
@ -1175,31 +1129,73 @@ func (e *Executor) taskUpdater(ctx context.Context, et *types.ExecutorTask) {
 		return
 	}
-	if et.Spec.Stop {
+	rt, _ := e.runningTasks.get(et.ID)
-		e.stopTask(ctx, et)
+	if rt != nil {
-	}
+		rt.Lock()
 		// update running task Spec.Stop value only when there's a transitions from false to true,
 		// other spec values cannot change once the task has been scheduled
 		if !rt.et.Spec.Stop && et.Spec.Stop {
 			rt.et.Spec.Stop = et.Spec.Stop
-	if et.Status.Phase == types.ExecutorTaskPhaseNotStarted {
+			if !rt.et.Status.Phase.IsFinished() && rt.pod != nil {
-		e.executeTask(ctx, et)
+				if err := rt.pod.Stop(ctx); err != nil {
-	}
+					log.Errorf("err: %+v", err)
 	if et.Status.Phase == types.ExecutorTaskPhaseRunning {
 		_, ok := e.runningTasks.get(et.ID)
 		if !ok {
 			log.Infof("marking executor task %s as failed since there's no running task", et.ID)
 			et.Status.Phase = types.ExecutorTaskPhaseFailed
 			et.Status.EndTime = util.TimeP(time.Now())
 			// mark in progress step as failed too
 			for _, s := range et.Status.Steps {
 				if s.Phase == types.ExecutorTaskPhaseRunning {
 					s.Phase = types.ExecutorTaskPhaseFailed
 					s.EndTime = util.TimeP(time.Now())
 				}
 			}
 		}
 		rt.Unlock()
 		return
 	}
 	// rt == nil
 	// only send cancelled phase when the executor task isn't in running tasks and is not started
 	if et.Spec.Stop && et.Status.Phase == types.ExecutorTaskPhaseNotStarted {
 		et.Status.Phase = types.ExecutorTaskPhaseCancelled
 		go func() {
 			if err := e.sendExecutorTaskStatus(ctx, et); err != nil {
 				log.Errorf("err: %+v", err)
 			}
 		}()
 	}
 	if !et.Spec.Stop && et.Status.Phase == types.ExecutorTaskPhaseRunning {
 		log.Infof("marking executor task %s as failed since there's no running task", et.ID)
 		et.Status.Phase = types.ExecutorTaskPhaseFailed
 		et.Status.EndTime = util.TimeP(time.Now())
 		// mark in progress step as failed too
 		for _, s := range et.Status.Steps {
 			if s.Phase == types.ExecutorTaskPhaseRunning {
 				s.Phase = types.ExecutorTaskPhaseFailed
 				s.EndTime = util.TimeP(time.Now())
 			}
 		}
 		go func() {
 			if err := e.sendExecutorTaskStatus(ctx, et); err != nil {
 				log.Errorf("err: %+v", err)
 			}
 		}()
 	}
 	if !et.Spec.Stop && et.Status.Phase == types.ExecutorTaskPhaseNotStarted {
 		activeTasks := e.runningTasks.len()
 		// don't start task if we have reached the active tasks limit (they will be retried
 		// on next taskUpdater calls)
 		if activeTasks > e.c.ActiveTasksLimit {
 			return
 		}
 		rt := &runningTask{
 			et:        et,
 			executing: true,
 		}
 		if !e.runningTasks.addIfNotExists(et.ID, rt) {
 			log.Warnf("task %s already running, this shouldn't happen", et.ID)
 			return
 		}
 		go e.executeTask(ctx, rt)
 	}
 }
@ -1309,7 +1305,7 @@ func (r *runningTasks) ids() []string {
 func (e *Executor) handleTasks(ctx context.Context, c <-chan *types.ExecutorTask) {
 	for et := range c {
-		go e.executeTask(ctx, et)
+		e.taskUpdater(ctx, et)
 	}
 }