agola/internal/datamanager/datamanager.go
Simone Gotti d2b09d854f *: use new errors handling library
Implement a new error handling library based on pkg/errors. It provides
stack saving on wrapping and exports some function to add stack saving
also to external errors.
It also implements custom zerolog error formatting without adding too
much verbosity by just printing the chain error file:line without a full
stack trace of every error.

* Add a --detailed-errors options to print error with they full chain
* Wrap all error returns. Use errors.WithStack to wrap without adding a
  new messsage and error.Wrap[f] to add a message.
* Add golangci-lint wrapcheck to check that external packages errors are
  wrapped. This won't check that internal packages error are wrapped.
  But we want also to ensure this case so we'll have to find something
  else to check also these.
2022-02-28 12:49:13 +01:00

300 lines
8.9 KiB
Go

// Copyright 2019 Sorint.lab
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied
// See the License for the specific language governing permissions and
// limitations under the License.
package datamanager
import (
"context"
"fmt"
"path"
"strings"
"time"
"agola.io/agola/internal/errors"
"agola.io/agola/internal/etcd"
"agola.io/agola/internal/objectstorage"
"agola.io/agola/internal/sequence"
"github.com/rs/zerolog"
)
// TODO(sgotti) handle etcd unwanted changes:
// * Etcd cluster rebuild: we cannot rely on etcd header ClusterID since it could be the same as it's generated using the listen urls. We should add our own clusterid key and use it.
// * Etcd cluster restored to a previous revision: really bad cause should detect that the revision is smaller than the current one
const (
DefaultSyncInterval = 5 * time.Second
DefaultCheckpointInterval = 10 * time.Second
DefaultCheckpointCleanInterval = 5 * time.Minute
DefaultEtcdWalCleanInterval = 2 * time.Second
DefaultStorageWalCleanInterval = 5 * time.Minute
DefaultCompactChangeGroupsInterval = 1 * time.Second
DefaultEtcdPingerInterval = 1 * time.Second
DefaultEtcdWalsKeepNum = 100
DefaultMinCheckpointWalsNum = 100
)
var (
ErrCompacted = errors.New("required revision has been compacted")
ErrConcurrency = errors.New("wal concurrency error: change groups already updated")
)
type ErrNotExist struct {
err error
}
func newErrNotExist(err error) error {
return &ErrNotExist{err: err}
}
func (e *ErrNotExist) Error() string {
return e.err.Error()
}
func (e *ErrNotExist) Unwrap() error {
return e.err
}
func IsNotExist(err error) bool {
var e *ErrNotExist
return errors.As(err, &e)
}
func fromOSTError(err error) error {
if objectstorage.IsNotExist(err) {
return newErrNotExist(err)
}
return err
}
var (
// Storage paths. Always use path (not filepath) to use the "/" separator
storageDataDir = "data"
storageWalsDir = "wals"
storageWalsStatusDir = path.Join(storageWalsDir, "status")
storageWalsDataDir = path.Join(storageWalsDir, "data")
// etcd paths. Always use path (not filepath) to use the "/" separator
etcdWalBaseDir = "datamanager"
etcdWalsDir = path.Join(etcdWalBaseDir, "wals")
etcdWalsDataKey = path.Join(etcdWalBaseDir, "walsdata")
etcdWalSeqKey = path.Join(etcdWalBaseDir, "walseq")
etcdLastCommittedStorageWalSeqKey = path.Join(etcdWalBaseDir, "lastcommittedstoragewalseq")
etcdCheckpointSeqKey = path.Join(etcdWalBaseDir, "checkpointseq")
etcdInitEtcdLockKey = path.Join(etcdWalBaseDir, "initetcd")
etcdSyncLockKey = path.Join(etcdWalBaseDir, "synclock")
etcdCompactChangeGroupsLockKey = path.Join(etcdWalBaseDir, "compactchangegroupslock")
etcdCheckpointLockKey = path.Join(etcdWalBaseDir, "checkpointlock")
etcdWalCleanerLockKey = path.Join(etcdWalBaseDir, "walcleanerlock")
etcdStorageWalCleanerLockKey = path.Join(etcdWalBaseDir, "storagewalcleanerlock")
etcdChangeGroupsDir = path.Join(etcdWalBaseDir, "changegroups")
etcdChangeGroupMinRevisionKey = path.Join(etcdWalBaseDir, "changegroupsminrev")
etcdPingKey = path.Join(etcdWalBaseDir, "ping")
)
const (
etcdChangeGroupMinRevisionRange = 1000
maxChangegroupNameLength = 256
)
type DataManagerConfig struct {
BasePath string
E *etcd.Store
OST *objectstorage.ObjStorage
DataTypes []string
EtcdWalsKeepNum int
CheckpointInterval time.Duration
CheckpointCleanInterval time.Duration
// MinCheckpointWalsNum is the minimum number of wals required before doing a checkpoint
MinCheckpointWalsNum int
MaxDataFileSize int64
MaintenanceMode bool
}
type DataManager struct {
basePath string
log zerolog.Logger
e *etcd.Store
ost *objectstorage.ObjStorage
changes *WalChanges
dataTypes []string
etcdWalsKeepNum int
checkpointInterval time.Duration
checkpointCleanInterval time.Duration
minCheckpointWalsNum int
maxDataFileSize int64
maintenanceMode bool
}
func NewDataManager(ctx context.Context, log zerolog.Logger, conf *DataManagerConfig) (*DataManager, error) {
if conf.EtcdWalsKeepNum == 0 {
conf.EtcdWalsKeepNum = DefaultEtcdWalsKeepNum
}
if conf.EtcdWalsKeepNum < 1 {
return nil, errors.New("etcdWalsKeepNum must be greater than 0")
}
if conf.CheckpointInterval == 0 {
conf.CheckpointInterval = DefaultCheckpointInterval
}
if conf.CheckpointCleanInterval == 0 {
conf.CheckpointCleanInterval = DefaultCheckpointCleanInterval
}
if conf.MinCheckpointWalsNum == 0 {
conf.MinCheckpointWalsNum = DefaultMinCheckpointWalsNum
}
if conf.MinCheckpointWalsNum < 1 {
return nil, errors.New("minCheckpointWalsNum must be greater than 0")
}
if conf.MaxDataFileSize == 0 {
conf.MaxDataFileSize = DefaultMaxDataFileSize
}
d := &DataManager{
basePath: conf.BasePath,
log: log,
e: conf.E,
ost: conf.OST,
changes: NewWalChanges(conf.DataTypes),
dataTypes: conf.DataTypes,
etcdWalsKeepNum: conf.EtcdWalsKeepNum,
checkpointInterval: conf.CheckpointInterval,
checkpointCleanInterval: conf.CheckpointCleanInterval,
minCheckpointWalsNum: conf.MinCheckpointWalsNum,
maxDataFileSize: conf.MaxDataFileSize,
maintenanceMode: conf.MaintenanceMode,
}
// add trailing slash the basepath
if d.basePath != "" && !strings.HasSuffix(d.basePath, "/") {
d.basePath = d.basePath + "/"
}
return d, nil
}
func (d *DataManager) storageWalStatusDir() string {
return path.Join(d.basePath, storageWalsStatusDir)
}
func (d *DataManager) storageWalStatusFile(walSeq string) string {
return path.Join(d.storageWalStatusDir(), walSeq)
}
func (d *DataManager) storageWalDataDir() string {
return path.Join(d.basePath, storageWalsDataDir)
}
func (d *DataManager) storageWalDataFile(walFileID string) string {
return path.Join(d.storageWalDataDir(), walFileID)
}
func (d *DataManager) storageDataDir() string {
return path.Join(d.basePath, storageDataDir)
}
func (d *DataManager) dataStatusPath(sequence *sequence.Sequence) string {
return fmt.Sprintf("%s/%s.status", d.storageDataDir(), sequence)
}
func (d *DataManager) DataTypeDir(dataType string) string {
return fmt.Sprintf("%s/%s", d.storageDataDir(), dataType)
}
func (d *DataManager) DataFileBasePath(dataType, name string) string {
return fmt.Sprintf("%s/%s", d.DataTypeDir(dataType), name)
}
func (d *DataManager) DataFileIndexPath(dataType, name string) string {
return fmt.Sprintf("%s.index", d.DataFileBasePath(dataType, name))
}
func (d *DataManager) DataFilePath(dataType, name string) string {
return fmt.Sprintf("%s.data", d.DataFileBasePath(dataType, name))
}
func etcdWalKey(walSeq string) string {
return path.Join(etcdWalsDir, walSeq)
}
// SetMaintenanceMode sets the datamanager in maintenance mode. This method must
// be called before invoking the Run method
func (d *DataManager) SetMaintenanceMode(maintenanceMode bool) {
d.maintenanceMode = maintenanceMode
}
// deleteEtcd deletes all etcd data excluding keys used for locking
func (d *DataManager) deleteEtcd(ctx context.Context) error {
prefixes := []string{
etcdWalsDir + "/",
etcdWalsDataKey,
etcdWalSeqKey,
etcdLastCommittedStorageWalSeqKey,
etcdCheckpointSeqKey,
etcdChangeGroupsDir + "/",
etcdChangeGroupMinRevisionKey,
}
for _, prefix := range prefixes {
if err := d.e.DeletePrefix(ctx, prefix); err != nil {
return errors.WithStack(err)
}
}
return nil
}
func (d *DataManager) Run(ctx context.Context, readyCh chan struct{}) error {
if !d.maintenanceMode {
for {
err := d.InitEtcd(ctx, nil)
if err == nil {
break
}
d.log.Err(err).Msgf("failed to initialize etcd")
sleepCh := time.NewTimer(1 * time.Second).C
select {
case <-ctx.Done():
return nil
case <-sleepCh:
}
}
readyCh <- struct{}{}
go d.watcherLoop(ctx)
go d.syncLoop(ctx)
go d.checkpointLoop(ctx)
go d.checkpointCleanLoop(ctx)
go d.etcdWalCleanerLoop(ctx)
go d.storageWalCleanerLoop(ctx)
go d.compactChangeGroupsLoop(ctx)
go d.etcdPingerLoop(ctx)
} else {
d.log.Info().Msgf("datamanager starting in maintenance mode")
readyCh <- struct{}{}
}
<-ctx.Done()
d.log.Info().Msgf("datamanager exiting")
return nil
}