executor: serialize task handling

taskUpdater will be called serially and won't block. It'll execute a goroutine
for executing the task and for sending the task state to the scheduler.

executeTask will just start task execution, all the logic of choosing if
starting a task is moved inside taskUpdater

In this way we avoid concurrency issues when handling the same executorTask
in parallel
This commit is contained in:
Simone Gotti 2020-02-27 17:01:19 +01:00
parent 268a2b83ea
commit 88dbca15a3

View File

@ -724,32 +724,7 @@ func (e *Executor) sendExecutorTaskStatus(ctx context.Context, et *types.Executo
return err
}
func (e *Executor) stopTask(ctx context.Context, et *types.ExecutorTask) {
if rt, ok := e.runningTasks.get(et.ID); ok {
rt.Lock()
defer rt.Unlock()
if rt.et.Status.Phase.IsFinished() {
return
}
if rt.pod != nil {
if err := rt.pod.Stop(ctx); err != nil {
log.Errorf("err: %+v", err)
return
}
if rt.et.Status.Phase == types.ExecutorTaskPhaseNotStarted {
rt.et.Status.Phase = types.ExecutorTaskPhaseCancelled
} else {
rt.et.Status.Phase = types.ExecutorTaskPhaseStopped
}
if err := e.sendExecutorTaskStatus(ctx, et); err != nil {
log.Errorf("err: %+v", err)
return
}
}
}
}
func (e *Executor) executeTask(ctx context.Context, et *types.ExecutorTask) {
func (e *Executor) executeTask(ctx context.Context, rt *runningTask) {
// * save in local state that we have a running task
// * start the pod
// * then update the executortask status to in-progress
@ -757,37 +732,16 @@ func (e *Executor) executeTask(ctx context.Context, et *types.ExecutorTask) {
// In this way we are sure that the pod cleaner will only remove pod that don't
// have an in progress running task
if et.Status.Phase != types.ExecutorTaskPhaseNotStarted {
log.Debugf("task phase is not \"not started\"")
return
}
activeTasks := e.runningTasks.len()
// don't start task if we have reached the active tasks limit
// they will be executed later
if activeTasks > e.c.ActiveTasksLimit {
return
}
rt := &runningTask{
et: et,
executing: true,
}
rt.Lock()
if !e.runningTasks.addIfNotExists(et.ID, rt) {
log.Debugf("task %s already running", et.ID)
rt.Unlock()
return
}
defer func() {
rt.Lock()
rt.executing = false
rt.Unlock()
}()
et := rt.et
et.Status.Phase = types.ExecutorTaskPhaseRunning
et.Status.StartTime = util.TimeP(time.Now())
et.Status.SetupStep.Phase = types.ExecutorTaskPhaseRunning
@ -1151,7 +1105,7 @@ func (e *Executor) tasksUpdater(ctx context.Context) error {
}
log.Debugf("ets: %v", util.Dump(ets))
for _, et := range ets {
go e.taskUpdater(ctx, et)
e.taskUpdater(ctx, et)
}
// remove runningTasks not existing in the runservice
@ -1175,31 +1129,73 @@ func (e *Executor) taskUpdater(ctx context.Context, et *types.ExecutorTask) {
return
}
if et.Spec.Stop {
e.stopTask(ctx, et)
}
rt, _ := e.runningTasks.get(et.ID)
if rt != nil {
rt.Lock()
// update running task Spec.Stop value only when there's a transitions from false to true,
// other spec values cannot change once the task has been scheduled
if !rt.et.Spec.Stop && et.Spec.Stop {
rt.et.Spec.Stop = et.Spec.Stop
if et.Status.Phase == types.ExecutorTaskPhaseNotStarted {
e.executeTask(ctx, et)
}
if et.Status.Phase == types.ExecutorTaskPhaseRunning {
_, ok := e.runningTasks.get(et.ID)
if !ok {
log.Infof("marking executor task %s as failed since there's no running task", et.ID)
et.Status.Phase = types.ExecutorTaskPhaseFailed
et.Status.EndTime = util.TimeP(time.Now())
// mark in progress step as failed too
for _, s := range et.Status.Steps {
if s.Phase == types.ExecutorTaskPhaseRunning {
s.Phase = types.ExecutorTaskPhaseFailed
s.EndTime = util.TimeP(time.Now())
if !rt.et.Status.Phase.IsFinished() && rt.pod != nil {
if err := rt.pod.Stop(ctx); err != nil {
log.Errorf("err: %+v", err)
}
}
}
rt.Unlock()
return
}
// rt == nil
// only send cancelled phase when the executor task isn't in running tasks and is not started
if et.Spec.Stop && et.Status.Phase == types.ExecutorTaskPhaseNotStarted {
et.Status.Phase = types.ExecutorTaskPhaseCancelled
go func() {
if err := e.sendExecutorTaskStatus(ctx, et); err != nil {
log.Errorf("err: %+v", err)
}
}()
}
if !et.Spec.Stop && et.Status.Phase == types.ExecutorTaskPhaseRunning {
log.Infof("marking executor task %s as failed since there's no running task", et.ID)
et.Status.Phase = types.ExecutorTaskPhaseFailed
et.Status.EndTime = util.TimeP(time.Now())
// mark in progress step as failed too
for _, s := range et.Status.Steps {
if s.Phase == types.ExecutorTaskPhaseRunning {
s.Phase = types.ExecutorTaskPhaseFailed
s.EndTime = util.TimeP(time.Now())
}
}
go func() {
if err := e.sendExecutorTaskStatus(ctx, et); err != nil {
log.Errorf("err: %+v", err)
}
}()
}
if !et.Spec.Stop && et.Status.Phase == types.ExecutorTaskPhaseNotStarted {
activeTasks := e.runningTasks.len()
// don't start task if we have reached the active tasks limit (they will be retried
// on next taskUpdater calls)
if activeTasks > e.c.ActiveTasksLimit {
return
}
rt := &runningTask{
et: et,
executing: true,
}
if !e.runningTasks.addIfNotExists(et.ID, rt) {
log.Warnf("task %s already running, this shouldn't happen", et.ID)
return
}
go e.executeTask(ctx, rt)
}
}
@ -1309,7 +1305,7 @@ func (r *runningTasks) ids() []string {
func (e *Executor) handleTasks(ctx context.Context, c <-chan *types.ExecutorTask) {
for et := range c {
go e.executeTask(ctx, et)
e.taskUpdater(ctx, et)
}
}