agola/internal/datamanager/datamanager.go

300 lines
8.9 KiB
Go

// Copyright 2019 Sorint.lab
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied
// See the License for the specific language governing permissions and
// limitations under the License.
package datamanager
import (
"context"
"fmt"
"path"
"strings"
"time"
"agola.io/agola/internal/errors"
"agola.io/agola/internal/etcd"
"agola.io/agola/internal/objectstorage"
"agola.io/agola/internal/sequence"
"github.com/rs/zerolog"
)
// TODO(sgotti) handle etcd unwanted changes:
// * Etcd cluster rebuild: we cannot rely on etcd header ClusterID since it could be the same as it's generated using the listen urls. We should add our own clusterid key and use it.
// * Etcd cluster restored to a previous revision: really bad cause should detect that the revision is smaller than the current one
const (
DefaultSyncInterval = 5 * time.Second
DefaultCheckpointInterval = 10 * time.Second
DefaultCheckpointCleanInterval = 5 * time.Minute
DefaultEtcdWalCleanInterval = 2 * time.Second
DefaultStorageWalCleanInterval = 5 * time.Minute
DefaultCompactChangeGroupsInterval = 1 * time.Second
DefaultEtcdPingerInterval = 1 * time.Second
DefaultEtcdWalsKeepNum = 100
DefaultMinCheckpointWalsNum = 100
)
var (
ErrCompacted = errors.New("required revision has been compacted")
ErrConcurrency = errors.New("wal concurrency error: change groups already updated")
)
type ErrNotExist struct {
err error
}
func newErrNotExist(err error) error {
return &ErrNotExist{err: err}
}
func (e *ErrNotExist) Error() string {
return e.err.Error()
}
func (e *ErrNotExist) Unwrap() error {
return e.err
}
func IsNotExist(err error) bool {
var e *ErrNotExist
return errors.As(err, &e)
}
func fromOSTError(err error) error {
if objectstorage.IsNotExist(err) {
return newErrNotExist(err)
}
return err
}
var (
// Storage paths. Always use path (not filepath) to use the "/" separator
storageDataDir = "data"
storageWalsDir = "wals"
storageWalsStatusDir = path.Join(storageWalsDir, "status")
storageWalsDataDir = path.Join(storageWalsDir, "data")
// etcd paths. Always use path (not filepath) to use the "/" separator
etcdWalBaseDir = "datamanager"
etcdWalsDir = path.Join(etcdWalBaseDir, "wals")
etcdWalsDataKey = path.Join(etcdWalBaseDir, "walsdata")
etcdWalSeqKey = path.Join(etcdWalBaseDir, "walseq")
etcdLastCommittedStorageWalSeqKey = path.Join(etcdWalBaseDir, "lastcommittedstoragewalseq")
etcdCheckpointSeqKey = path.Join(etcdWalBaseDir, "checkpointseq")
etcdInitEtcdLockKey = path.Join(etcdWalBaseDir, "initetcd")
etcdSyncLockKey = path.Join(etcdWalBaseDir, "synclock")
etcdCompactChangeGroupsLockKey = path.Join(etcdWalBaseDir, "compactchangegroupslock")
etcdCheckpointLockKey = path.Join(etcdWalBaseDir, "checkpointlock")
etcdWalCleanerLockKey = path.Join(etcdWalBaseDir, "walcleanerlock")
etcdStorageWalCleanerLockKey = path.Join(etcdWalBaseDir, "storagewalcleanerlock")
etcdChangeGroupsDir = path.Join(etcdWalBaseDir, "changegroups")
etcdChangeGroupMinRevisionKey = path.Join(etcdWalBaseDir, "changegroupsminrev")
etcdPingKey = path.Join(etcdWalBaseDir, "ping")
)
const (
etcdChangeGroupMinRevisionRange = 1000
maxChangegroupNameLength = 256
)
type DataManagerConfig struct {
BasePath string
E *etcd.Store
OST *objectstorage.ObjStorage
DataTypes []string
EtcdWalsKeepNum int
CheckpointInterval time.Duration
CheckpointCleanInterval time.Duration
// MinCheckpointWalsNum is the minimum number of wals required before doing a checkpoint
MinCheckpointWalsNum int
MaxDataFileSize int64
MaintenanceMode bool
}
type DataManager struct {
basePath string
log zerolog.Logger
e *etcd.Store
ost *objectstorage.ObjStorage
changes *WalChanges
dataTypes []string
etcdWalsKeepNum int
checkpointInterval time.Duration
checkpointCleanInterval time.Duration
minCheckpointWalsNum int
maxDataFileSize int64
maintenanceMode bool
}
func NewDataManager(ctx context.Context, log zerolog.Logger, conf *DataManagerConfig) (*DataManager, error) {
if conf.EtcdWalsKeepNum == 0 {
conf.EtcdWalsKeepNum = DefaultEtcdWalsKeepNum
}
if conf.EtcdWalsKeepNum < 1 {
return nil, errors.New("etcdWalsKeepNum must be greater than 0")
}
if conf.CheckpointInterval == 0 {
conf.CheckpointInterval = DefaultCheckpointInterval
}
if conf.CheckpointCleanInterval == 0 {
conf.CheckpointCleanInterval = DefaultCheckpointCleanInterval
}
if conf.MinCheckpointWalsNum == 0 {
conf.MinCheckpointWalsNum = DefaultMinCheckpointWalsNum
}
if conf.MinCheckpointWalsNum < 1 {
return nil, errors.New("minCheckpointWalsNum must be greater than 0")
}
if conf.MaxDataFileSize == 0 {
conf.MaxDataFileSize = DefaultMaxDataFileSize
}
d := &DataManager{
basePath: conf.BasePath,
log: log,
e: conf.E,
ost: conf.OST,
changes: NewWalChanges(conf.DataTypes),
dataTypes: conf.DataTypes,
etcdWalsKeepNum: conf.EtcdWalsKeepNum,
checkpointInterval: conf.CheckpointInterval,
checkpointCleanInterval: conf.CheckpointCleanInterval,
minCheckpointWalsNum: conf.MinCheckpointWalsNum,
maxDataFileSize: conf.MaxDataFileSize,
maintenanceMode: conf.MaintenanceMode,
}
// add trailing slash the basepath
if d.basePath != "" && !strings.HasSuffix(d.basePath, "/") {
d.basePath = d.basePath + "/"
}
return d, nil
}
func (d *DataManager) storageWalStatusDir() string {
return path.Join(d.basePath, storageWalsStatusDir)
}
func (d *DataManager) storageWalStatusFile(walSeq string) string {
return path.Join(d.storageWalStatusDir(), walSeq)
}
func (d *DataManager) storageWalDataDir() string {
return path.Join(d.basePath, storageWalsDataDir)
}
func (d *DataManager) storageWalDataFile(walFileID string) string {
return path.Join(d.storageWalDataDir(), walFileID)
}
func (d *DataManager) storageDataDir() string {
return path.Join(d.basePath, storageDataDir)
}
func (d *DataManager) dataStatusPath(sequence *sequence.Sequence) string {
return fmt.Sprintf("%s/%s.status", d.storageDataDir(), sequence)
}
func (d *DataManager) DataTypeDir(dataType string) string {
return fmt.Sprintf("%s/%s", d.storageDataDir(), dataType)
}
func (d *DataManager) DataFileBasePath(dataType, name string) string {
return fmt.Sprintf("%s/%s", d.DataTypeDir(dataType), name)
}
func (d *DataManager) DataFileIndexPath(dataType, name string) string {
return fmt.Sprintf("%s.index", d.DataFileBasePath(dataType, name))
}
func (d *DataManager) DataFilePath(dataType, name string) string {
return fmt.Sprintf("%s.data", d.DataFileBasePath(dataType, name))
}
func etcdWalKey(walSeq string) string {
return path.Join(etcdWalsDir, walSeq)
}
// SetMaintenanceMode sets the datamanager in maintenance mode. This method must
// be called before invoking the Run method
func (d *DataManager) SetMaintenanceMode(maintenanceMode bool) {
d.maintenanceMode = maintenanceMode
}
// deleteEtcd deletes all etcd data excluding keys used for locking
func (d *DataManager) deleteEtcd(ctx context.Context) error {
prefixes := []string{
etcdWalsDir + "/",
etcdWalsDataKey,
etcdWalSeqKey,
etcdLastCommittedStorageWalSeqKey,
etcdCheckpointSeqKey,
etcdChangeGroupsDir + "/",
etcdChangeGroupMinRevisionKey,
}
for _, prefix := range prefixes {
if err := d.e.DeletePrefix(ctx, prefix); err != nil {
return errors.WithStack(err)
}
}
return nil
}
func (d *DataManager) Run(ctx context.Context, readyCh chan struct{}) error {
if !d.maintenanceMode {
for {
err := d.InitEtcd(ctx, nil)
if err == nil {
break
}
d.log.Err(err).Msgf("failed to initialize etcd")
sleepCh := time.NewTimer(1 * time.Second).C
select {
case <-ctx.Done():
return nil
case <-sleepCh:
}
}
readyCh <- struct{}{}
go d.watcherLoop(ctx)
go d.syncLoop(ctx)
go d.checkpointLoop(ctx)
go d.checkpointCleanLoop(ctx)
go d.etcdWalCleanerLoop(ctx)
go d.storageWalCleanerLoop(ctx)
go d.compactChangeGroupsLoop(ctx)
go d.etcdPingerLoop(ctx)
} else {
d.log.Info().Msgf("datamanager starting in maintenance mode")
readyCh <- struct{}{}
}
<-ctx.Done()
d.log.Info().Msgf("datamanager exiting")
return nil
}