2019-04-26 14:00:03 +00:00
// Copyright 2019 Sorint.lab
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied
// See the License for the specific language governing permissions and
// limitations under the License.
package datamanager
import (
"context"
2019-07-03 15:03:37 +00:00
"fmt"
2019-04-26 14:00:03 +00:00
"path"
"strings"
"time"
2019-07-01 09:40:20 +00:00
"agola.io/agola/internal/etcd"
"agola.io/agola/internal/objectstorage"
2019-10-29 12:23:42 +00:00
"agola.io/agola/internal/sequence"
2019-04-26 14:00:03 +00:00
"go.uber.org/zap"
2019-05-23 09:23:14 +00:00
errors "golang.org/x/xerrors"
2019-04-26 14:00:03 +00:00
)
// TODO(sgotti) handle etcd unwanted changes:
// * Etcd cluster rebuild: we cannot rely on etcd header ClusterID since it could be the same as it's generated using the listen urls. We should add our own clusterid key and use it.
// * Etcd cluster restored to a previous revision: really bad cause should detect that the revision is smaller than the current one
const (
2019-11-08 09:10:56 +00:00
DefaultSyncInterval = 5 * time . Second
DefaultCheckpointInterval = 10 * time . Second
DefaultCheckpointCleanInterval = 5 * time . Minute
DefaultEtcdWalCleanInterval = 2 * time . Second
DefaultStorageWalCleanInterval = 5 * time . Minute
DefaultCompactChangeGroupsInterval = 1 * time . Second
DefaultEtcdPingerInterval = 1 * time . Second
DefaultEtcdWalsKeepNum = 100
DefaultMinCheckpointWalsNum = 100
2019-04-26 14:00:03 +00:00
)
var (
ErrCompacted = errors . New ( "required revision has been compacted" )
ErrConcurrency = errors . New ( "wal concurrency error: change groups already updated" )
)
2022-02-21 11:19:55 +00:00
type ErrNotExist struct {
err error
}
func newErrNotExist ( err error ) error {
return & ErrNotExist { err : err }
}
func ( e * ErrNotExist ) Error ( ) string {
return e . err . Error ( )
}
func ( e * ErrNotExist ) Unwrap ( ) error {
return e . err
}
func IsNotExist ( err error ) bool {
var e * ErrNotExist
return errors . As ( err , & e )
}
func fromOSTError ( err error ) error {
if objectstorage . IsNotExist ( err ) {
return newErrNotExist ( err )
}
return err
}
2019-04-26 14:00:03 +00:00
var (
// Storage paths. Always use path (not filepath) to use the "/" separator
storageDataDir = "data"
storageWalsDir = "wals"
storageWalsStatusDir = path . Join ( storageWalsDir , "status" )
storageWalsDataDir = path . Join ( storageWalsDir , "data" )
// etcd paths. Always use path (not filepath) to use the "/" separator
etcdWalBaseDir = "datamanager"
etcdWalsDir = path . Join ( etcdWalBaseDir , "wals" )
etcdWalsDataKey = path . Join ( etcdWalBaseDir , "walsdata" )
etcdWalSeqKey = path . Join ( etcdWalBaseDir , "walseq" )
etcdLastCommittedStorageWalSeqKey = path . Join ( etcdWalBaseDir , "lastcommittedstoragewalseq" )
2019-07-17 15:09:36 +00:00
etcdCheckpointSeqKey = path . Join ( etcdWalBaseDir , "checkpointseq" )
2019-04-26 14:00:03 +00:00
2019-07-17 15:09:36 +00:00
etcdInitEtcdLockKey = path . Join ( etcdWalBaseDir , "initetcd" )
2019-07-10 08:20:03 +00:00
etcdSyncLockKey = path . Join ( etcdWalBaseDir , "synclock" )
etcdCompactChangeGroupsLockKey = path . Join ( etcdWalBaseDir , "compactchangegroupslock" )
etcdCheckpointLockKey = path . Join ( etcdWalBaseDir , "checkpointlock" )
etcdWalCleanerLockKey = path . Join ( etcdWalBaseDir , "walcleanerlock" )
2019-11-08 09:10:56 +00:00
etcdStorageWalCleanerLockKey = path . Join ( etcdWalBaseDir , "storagewalcleanerlock" )
2019-04-26 14:00:03 +00:00
etcdChangeGroupsDir = path . Join ( etcdWalBaseDir , "changegroups" )
etcdChangeGroupMinRevisionKey = path . Join ( etcdWalBaseDir , "changegroupsminrev" )
etcdPingKey = path . Join ( etcdWalBaseDir , "ping" )
)
const (
etcdChangeGroupMinRevisionRange = 1000
2019-04-29 08:12:03 +00:00
maxChangegroupNameLength = 256
2019-04-26 14:00:03 +00:00
)
type DataManagerConfig struct {
2019-10-29 12:23:42 +00:00
BasePath string
E * etcd . Store
OST * objectstorage . ObjStorage
DataTypes [ ] string
EtcdWalsKeepNum int
CheckpointInterval time . Duration
CheckpointCleanInterval time . Duration
2019-04-26 14:00:03 +00:00
// MinCheckpointWalsNum is the minimum number of wals required before doing a checkpoint
MinCheckpointWalsNum int
2019-06-03 14:17:27 +00:00
MaxDataFileSize int64
2019-07-17 15:13:35 +00:00
MaintenanceMode bool
2019-04-26 14:00:03 +00:00
}
type DataManager struct {
2019-10-29 12:23:42 +00:00
basePath string
log * zap . SugaredLogger
e * etcd . Store
ost * objectstorage . ObjStorage
changes * WalChanges
dataTypes [ ] string
etcdWalsKeepNum int
checkpointInterval time . Duration
checkpointCleanInterval time . Duration
minCheckpointWalsNum int
maxDataFileSize int64
maintenanceMode bool
2019-04-26 14:00:03 +00:00
}
func NewDataManager ( ctx context . Context , logger * zap . Logger , conf * DataManagerConfig ) ( * DataManager , error ) {
if conf . EtcdWalsKeepNum == 0 {
conf . EtcdWalsKeepNum = DefaultEtcdWalsKeepNum
}
if conf . EtcdWalsKeepNum < 1 {
return nil , errors . New ( "etcdWalsKeepNum must be greater than 0" )
}
if conf . CheckpointInterval == 0 {
conf . CheckpointInterval = DefaultCheckpointInterval
}
2019-10-29 12:23:42 +00:00
if conf . CheckpointCleanInterval == 0 {
conf . CheckpointCleanInterval = DefaultCheckpointCleanInterval
}
2019-04-26 14:00:03 +00:00
if conf . MinCheckpointWalsNum == 0 {
conf . MinCheckpointWalsNum = DefaultMinCheckpointWalsNum
}
if conf . MinCheckpointWalsNum < 1 {
return nil , errors . New ( "minCheckpointWalsNum must be greater than 0" )
}
2019-06-03 14:17:27 +00:00
if conf . MaxDataFileSize == 0 {
conf . MaxDataFileSize = DefaultMaxDataFileSize
}
2019-04-26 14:00:03 +00:00
d := & DataManager {
2019-10-29 12:23:42 +00:00
basePath : conf . BasePath ,
log : logger . Sugar ( ) ,
e : conf . E ,
ost : conf . OST ,
changes : NewWalChanges ( conf . DataTypes ) ,
dataTypes : conf . DataTypes ,
etcdWalsKeepNum : conf . EtcdWalsKeepNum ,
checkpointInterval : conf . CheckpointInterval ,
checkpointCleanInterval : conf . CheckpointCleanInterval ,
minCheckpointWalsNum : conf . MinCheckpointWalsNum ,
maxDataFileSize : conf . MaxDataFileSize ,
maintenanceMode : conf . MaintenanceMode ,
2019-04-26 14:00:03 +00:00
}
// add trailing slash the basepath
if d . basePath != "" && ! strings . HasSuffix ( d . basePath , "/" ) {
d . basePath = d . basePath + "/"
}
return d , nil
}
2019-11-08 09:10:56 +00:00
func ( d * DataManager ) storageWalStatusDir ( ) string {
return path . Join ( d . basePath , storageWalsStatusDir )
}
2019-07-03 15:03:37 +00:00
func ( d * DataManager ) storageWalStatusFile ( walSeq string ) string {
2019-11-08 09:10:56 +00:00
return path . Join ( d . storageWalStatusDir ( ) , walSeq )
}
func ( d * DataManager ) storageWalDataDir ( ) string {
return path . Join ( d . basePath , storageWalsDataDir )
2019-07-03 15:03:37 +00:00
}
func ( d * DataManager ) storageWalDataFile ( walFileID string ) string {
2019-11-08 09:10:56 +00:00
return path . Join ( d . storageWalDataDir ( ) , walFileID )
2019-07-03 15:03:37 +00:00
}
func ( d * DataManager ) storageDataDir ( ) string {
return path . Join ( d . basePath , storageDataDir )
}
2019-10-29 12:23:42 +00:00
func ( d * DataManager ) dataStatusPath ( sequence * sequence . Sequence ) string {
2019-07-03 15:03:37 +00:00
return fmt . Sprintf ( "%s/%s.status" , d . storageDataDir ( ) , sequence )
}
2019-10-29 12:23:42 +00:00
func ( d * DataManager ) DataTypeDir ( dataType string ) string {
return fmt . Sprintf ( "%s/%s" , d . storageDataDir ( ) , dataType )
2019-07-03 15:03:37 +00:00
}
2019-10-29 12:23:42 +00:00
func ( d * DataManager ) DataFileBasePath ( dataType , name string ) string {
return fmt . Sprintf ( "%s/%s" , d . DataTypeDir ( dataType ) , name )
}
func ( d * DataManager ) DataFileIndexPath ( dataType , name string ) string {
return fmt . Sprintf ( "%s.index" , d . DataFileBasePath ( dataType , name ) )
}
func ( d * DataManager ) DataFilePath ( dataType , name string ) string {
return fmt . Sprintf ( "%s.data" , d . DataFileBasePath ( dataType , name ) )
2019-07-03 15:03:37 +00:00
}
func etcdWalKey ( walSeq string ) string {
return path . Join ( etcdWalsDir , walSeq )
}
2019-07-17 15:13:35 +00:00
// SetMaintenanceMode sets the datamanager in maintenance mode. This method must
// be called before invoking the Run method
func ( d * DataManager ) SetMaintenanceMode ( maintenanceMode bool ) {
d . maintenanceMode = maintenanceMode
}
2019-07-17 15:09:36 +00:00
// deleteEtcd deletes all etcd data excluding keys used for locking
func ( d * DataManager ) deleteEtcd ( ctx context . Context ) error {
prefixes := [ ] string {
etcdWalsDir + "/" ,
etcdWalsDataKey ,
etcdWalSeqKey ,
etcdLastCommittedStorageWalSeqKey ,
etcdCheckpointSeqKey ,
etcdChangeGroupsDir + "/" ,
etcdChangeGroupMinRevisionKey ,
}
for _ , prefix := range prefixes {
if err := d . e . DeletePrefix ( ctx , prefix ) ; err != nil {
return err
}
}
return nil
}
2019-04-26 14:00:03 +00:00
func ( d * DataManager ) Run ( ctx context . Context , readyCh chan struct { } ) error {
2019-07-17 15:13:35 +00:00
if ! d . maintenanceMode {
for {
err := d . InitEtcd ( ctx , nil )
if err == nil {
break
}
d . log . Errorf ( "failed to initialize etcd: %+v" , err )
2019-07-25 13:53:26 +00:00
sleepCh := time . NewTimer ( 1 * time . Second ) . C
select {
case <- ctx . Done ( ) :
return nil
case <- sleepCh :
}
2019-04-26 14:00:03 +00:00
}
2019-07-17 15:13:35 +00:00
readyCh <- struct { } { }
2019-04-26 14:00:03 +00:00
2019-07-17 15:13:35 +00:00
go d . watcherLoop ( ctx )
go d . syncLoop ( ctx )
go d . checkpointLoop ( ctx )
2019-10-29 12:23:42 +00:00
go d . checkpointCleanLoop ( ctx )
2019-11-08 09:10:56 +00:00
go d . etcdWalCleanerLoop ( ctx )
go d . storageWalCleanerLoop ( ctx )
2019-07-17 15:13:35 +00:00
go d . compactChangeGroupsLoop ( ctx )
go d . etcdPingerLoop ( ctx )
} else {
d . log . Infof ( "datamanager starting in maintenance mode" )
readyCh <- struct { } { }
}
2019-04-26 14:00:03 +00:00
2019-07-02 12:27:51 +00:00
<- ctx . Done ( )
2019-07-17 15:13:35 +00:00
d . log . Infof ( "datamanager exiting" )
2019-07-02 12:27:51 +00:00
return nil
2019-04-26 14:00:03 +00:00
}