2019-02-22 07:45:59 +00:00
// Copyright 2019 Sorint.lab
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied
// See the License for the specific language governing permissions and
// limitations under the License.
2019-04-26 14:00:03 +00:00
package datamanager
2019-02-22 07:45:59 +00:00
import (
"bytes"
"context"
"encoding/json"
2019-04-29 08:12:03 +00:00
"fmt"
2019-02-22 07:45:59 +00:00
"io"
"io/ioutil"
"path"
"strings"
"time"
2019-07-01 09:40:20 +00:00
"agola.io/agola/internal/etcd"
2019-11-08 15:25:53 +00:00
"agola.io/agola/internal/objectstorage"
2019-07-01 09:40:20 +00:00
"agola.io/agola/internal/sequence"
2019-11-06 12:29:42 +00:00
"agola.io/agola/internal/util"
2019-02-22 07:45:59 +00:00
2019-05-21 13:17:53 +00:00
uuid "github.com/satori/go.uuid"
2019-02-22 07:45:59 +00:00
etcdclientv3 "go.etcd.io/etcd/clientv3"
"go.etcd.io/etcd/clientv3/concurrency"
etcdclientv3rpc "go.etcd.io/etcd/etcdserver/api/v3rpc/rpctypes"
"go.etcd.io/etcd/mvcc/mvccpb"
2019-05-23 09:23:14 +00:00
errors "golang.org/x/xerrors"
2019-02-22 07:45:59 +00:00
)
type ActionType string
const (
ActionTypePut ActionType = "put"
ActionTypeDelete ActionType = "delete"
)
type Action struct {
ActionType ActionType
2019-04-01 10:54:43 +00:00
DataType string
ID string
2019-02-22 07:45:59 +00:00
Data [ ] byte
}
type WalHeader struct {
WalDataFileID string
PreviousWalSequence string
}
type WalStatus string
const (
2019-04-27 13:16:48 +00:00
// WalStatusCommitted represent a wal written to the objectstorage
2019-02-22 07:45:59 +00:00
WalStatusCommitted WalStatus = "committed"
2019-04-27 13:16:48 +00:00
// WalStatusCommittedStorage represent the .committed marker file written to the objectstorage
2019-02-22 07:45:59 +00:00
WalStatusCommittedStorage WalStatus = "committed_storage"
2019-04-27 13:16:48 +00:00
// WalStatusCheckpointed mean that all the wal actions have been executed on the objectstorage
2019-02-22 07:45:59 +00:00
WalStatusCheckpointed WalStatus = "checkpointed"
)
type WalsData struct {
LastCommittedWalSequence string
Revision int64 ` json:"-" `
}
type WalData struct {
WalDataFileID string
WalStatus WalStatus
WalSequence string
PreviousWalSequence string
2019-04-26 14:00:03 +00:00
// internal values not saved
Revision int64 ` json:"-" `
2019-02-22 07:45:59 +00:00
}
type ChangeGroupsUpdateToken struct {
CurRevision int64 ` json:"cur_revision" `
ChangeGroupsRevisions changeGroupsRevisions ` json:"change_groups_revisions" `
}
type changeGroupsRevisions map [ string ] int64
2019-04-26 14:00:03 +00:00
func ( d * DataManager ) GetChangeGroupsUpdateToken ( cgNames [ ] string ) ( * ChangeGroupsUpdateToken , error ) {
d . changes . Lock ( )
defer d . changes . Unlock ( )
if ! d . changes . initialized {
2019-03-28 15:01:08 +00:00
return nil , errors . Errorf ( "wal changes not ready" )
}
2019-04-26 14:00:03 +00:00
revision := d . changes . curRevision ( )
cgr := d . changes . getChangeGroups ( cgNames )
2019-03-28 15:01:08 +00:00
return & ChangeGroupsUpdateToken { CurRevision : revision , ChangeGroupsRevisions : cgr } , nil
2019-02-22 07:45:59 +00:00
}
2019-04-26 14:00:03 +00:00
func ( d * DataManager ) ReadObject ( dataType , id string , cgNames [ ] string ) ( io . ReadCloser , * ChangeGroupsUpdateToken , error ) {
d . changes . Lock ( )
if ! d . changes . initialized {
d . changes . Unlock ( )
2019-03-28 15:01:08 +00:00
return nil , nil , errors . Errorf ( "wal changes not ready" )
}
2019-04-26 14:00:03 +00:00
walseq , ok := d . changes . getPut ( dataType , id )
revision := d . changes . curRevision ( )
cgr := d . changes . getChangeGroups ( cgNames )
actions := d . changes . actions [ walseq ]
d . changes . Unlock ( )
2019-02-22 07:45:59 +00:00
cgt := & ChangeGroupsUpdateToken { CurRevision : revision , ChangeGroupsRevisions : cgr }
if ok {
for _ , action := range actions {
2019-04-01 10:54:43 +00:00
if action . ActionType == ActionTypePut {
2019-04-26 14:00:03 +00:00
if action . DataType == dataType && action . ID == id {
d . log . Debugf ( "reading datatype %q, id %q from wal: %q" , dataType , id )
2019-04-01 10:54:43 +00:00
return ioutil . NopCloser ( bytes . NewReader ( action . Data ) ) , cgt , nil
}
2019-02-22 07:45:59 +00:00
}
}
2019-11-06 12:29:42 +00:00
return nil , nil , util . NewErrNotExist ( errors . Errorf ( "no datatype %q, id %q in wal %s" , dataType , id , walseq ) )
2019-02-22 07:45:59 +00:00
}
2019-04-26 14:00:03 +00:00
f , err := d . Read ( dataType , id )
return ioutil . NopCloser ( f ) , cgt , err
2019-02-22 07:45:59 +00:00
}
2019-04-26 14:00:03 +00:00
func ( d * DataManager ) HasOSTWal ( walseq string ) ( bool , error ) {
_ , err := d . ost . Stat ( d . storageWalStatusFile ( walseq ) + ".committed" )
2019-11-06 12:29:42 +00:00
if objectstorage . IsNotExist ( err ) {
2019-02-22 07:45:59 +00:00
return false , nil
}
if err != nil {
return false , err
}
return true , nil
}
2019-11-08 08:44:22 +00:00
func ( d * DataManager ) ReadWal ( walseq string ) ( * WalHeader , error ) {
walFilef , err := d . ost . ReadObject ( d . storageWalStatusFile ( walseq ) + ".committed" )
if err != nil {
return nil , err
}
defer walFilef . Close ( )
dec := json . NewDecoder ( walFilef )
var header * WalHeader
if err = dec . Decode ( & header ) ; err != nil {
return nil , err
}
return header , nil
2019-02-22 07:45:59 +00:00
}
2019-04-26 14:00:03 +00:00
func ( d * DataManager ) ReadWalData ( walFileID string ) ( io . ReadCloser , error ) {
return d . ost . ReadObject ( d . storageWalDataFile ( walFileID ) )
2019-02-22 07:45:59 +00:00
}
type WalFile struct {
2019-11-05 16:45:55 +00:00
WalSequence string
Err error
2019-02-22 07:45:59 +00:00
}
2019-04-26 14:00:03 +00:00
func ( d * DataManager ) ListOSTWals ( start string ) <- chan * WalFile {
2019-02-22 07:45:59 +00:00
walCh := make ( chan * WalFile , 1 )
go func ( ) {
doneCh := make ( chan struct { } )
defer close ( doneCh )
defer close ( walCh )
curWal := & WalFile { }
var startPath string
if start != "" {
2019-04-26 14:00:03 +00:00
startPath = d . storageWalStatusFile ( start )
2019-02-22 07:45:59 +00:00
}
2019-11-08 09:10:56 +00:00
for object := range d . ost . List ( d . storageWalStatusDir ( ) + "/" , startPath , true , doneCh ) {
2019-02-22 07:45:59 +00:00
if object . Err != nil {
walCh <- & WalFile {
Err : object . Err ,
}
return
}
name := path . Base ( object . Path )
ext := path . Ext ( name )
2019-11-05 16:45:55 +00:00
// accept only ".committed" files (skip old files that had ".checkpointed" extensions)
if ext != ".committed" {
continue
}
2019-02-22 07:45:59 +00:00
walSequence := strings . TrimSuffix ( name , ext )
2019-11-05 16:45:55 +00:00
2019-02-22 07:45:59 +00:00
// wal file refers to another wal, so return the current one
if curWal . WalSequence != walSequence {
if curWal . WalSequence != "" {
2019-11-05 16:45:55 +00:00
walCh <- curWal
2019-02-22 07:45:59 +00:00
}
curWal = & WalFile {
WalSequence : walSequence ,
}
}
}
2019-11-05 16:45:55 +00:00
2019-02-22 07:45:59 +00:00
if curWal . WalSequence != "" {
walCh <- curWal
}
} ( )
return walCh
}
type ListEtcdWalsElement struct {
WalData * WalData
Err error
}
2019-04-26 14:00:03 +00:00
func ( d * DataManager ) ListEtcdWals ( ctx context . Context , revision int64 ) <- chan * ListEtcdWalsElement {
2019-02-22 07:45:59 +00:00
walCh := make ( chan * ListEtcdWalsElement , 1 )
go func ( ) {
defer close ( walCh )
var continuation * etcd . ListPagedContinuation
for {
2019-04-26 14:00:03 +00:00
listResp , err := d . e . ListPaged ( ctx , etcdWalsDir , revision , 10 , continuation )
2019-02-22 07:45:59 +00:00
if err != nil {
walCh <- & ListEtcdWalsElement {
Err : err ,
}
return
}
resp := listResp . Resp
continuation = listResp . Continuation
for _ , kv := range resp . Kvs {
var walData * WalData
err := json . Unmarshal ( kv . Value , & walData )
walCh <- & ListEtcdWalsElement {
WalData : walData ,
Err : err ,
}
}
if ! listResp . HasMore {
break
}
}
} ( )
return walCh
}
2019-04-29 08:13:13 +00:00
func ( d * DataManager ) ListEtcdChangeGroups ( ctx context . Context , revision int64 ) ( changeGroupsRevisions , error ) {
changeGroupsRevisions := changeGroupsRevisions { }
resp , err := d . e . List ( ctx , etcdChangeGroupsDir , "" , revision )
if err != nil {
return nil , err
}
for _ , kv := range resp . Kvs {
changegroupID := path . Base ( string ( kv . Key ) )
changeGroupsRevisions [ changegroupID ] = kv . ModRevision
}
return changeGroupsRevisions , nil
}
2019-02-22 07:45:59 +00:00
// FirstAvailableWalData returns the first (the one with smaller sequence) wal
// and returns it (or nil if not available) and the etcd revision at the time of
// the operation
2019-04-26 14:00:03 +00:00
func ( d * DataManager ) FirstAvailableWalData ( ctx context . Context ) ( * WalData , int64 , error ) {
2019-02-22 07:45:59 +00:00
// list waldata and just get the first if available
2019-04-26 14:00:03 +00:00
listResp , err := d . e . ListPaged ( ctx , etcdWalsDir , 0 , 1 , nil )
2019-02-22 07:45:59 +00:00
if err != nil {
return nil , 0 , err
}
resp := listResp . Resp
revision := resp . Header . Revision
if len ( resp . Kvs ) == 0 {
return nil , revision , nil
}
var walData * WalData
if err := json . Unmarshal ( resp . Kvs [ 0 ] . Value , & walData ) ; err != nil {
return nil , 0 , err
}
return walData , revision , nil
}
2019-04-26 14:00:03 +00:00
func ( d * DataManager ) LastCommittedStorageWal ( ctx context . Context ) ( string , int64 , error ) {
resp , err := d . e . Get ( ctx , etcdLastCommittedStorageWalSeqKey , 0 )
2019-02-22 07:45:59 +00:00
if err != nil && err != etcd . ErrKeyNotFound {
return "" , 0 , err
}
if err == etcd . ErrKeyNotFound {
return "" , 0 , errors . Errorf ( "no last committedstorage wal on etcd" )
}
lastCommittedStorageWal := string ( resp . Kvs [ 0 ] . Value )
revision := resp . Header . Revision
return lastCommittedStorageWal , revision , nil
}
type WatchElement struct {
Revision int64
WalData * WalData
ChangeGroupsRevisions changeGroupsRevisions
Err error
}
2019-04-26 14:00:03 +00:00
func ( d * DataManager ) Watch ( ctx context . Context , revision int64 ) <- chan * WatchElement {
2019-02-22 07:45:59 +00:00
walCh := make ( chan * WatchElement , 1 )
// TODO(sgotti) if the etcd cluster goes down, watch won't return an error but
// wait until it comes back. We have to find a way to detect when the cluster
// is down and report an error so our clients can react (i.e. a readdb could
// mark itself as not in sync)
wctx := etcdclientv3 . WithRequireLeader ( ctx )
2019-04-26 14:00:03 +00:00
wch := d . e . Watch ( wctx , etcdWalBaseDir + "/" , revision )
2019-02-22 07:45:59 +00:00
go func ( ) {
defer close ( walCh )
for wresp := range wch {
we := & WatchElement { ChangeGroupsRevisions : make ( changeGroupsRevisions ) }
2019-03-27 19:40:23 +00:00
send := false
2019-02-22 07:45:59 +00:00
if wresp . Canceled {
err := wresp . Err ( )
switch err {
case etcdclientv3rpc . ErrCompacted :
we . Err = ErrCompacted
default :
we . Err = err
}
walCh <- we
return
}
we . Revision = wresp . Header . Revision
for _ , ev := range wresp . Events {
key := string ( ev . Kv . Key )
switch {
case strings . HasPrefix ( key , etcdWalsDir + "/" ) :
2019-03-27 19:40:23 +00:00
send = true
2019-02-22 07:45:59 +00:00
switch ev . Type {
case mvccpb . PUT :
var walData * WalData
if err := json . Unmarshal ( ev . Kv . Value , & walData ) ; err != nil {
we . Err = wresp . Err ( )
walCh <- we
return
}
we . WalData = walData
}
case strings . HasPrefix ( key , etcdChangeGroupsDir + "/" ) :
2019-03-27 19:40:23 +00:00
send = true
2019-02-22 07:45:59 +00:00
switch ev . Type {
case mvccpb . PUT :
changeGroup := path . Base ( string ( ev . Kv . Key ) )
we . ChangeGroupsRevisions [ changeGroup ] = ev . Kv . ModRevision
case mvccpb . DELETE :
changeGroup := path . Base ( string ( ev . Kv . Key ) )
we . ChangeGroupsRevisions [ changeGroup ] = 0
}
2019-03-27 19:40:23 +00:00
case key == etcdPingKey :
send = true
2019-02-22 07:45:59 +00:00
default :
continue
}
}
2019-03-27 19:40:23 +00:00
if send {
walCh <- we
}
2019-02-22 07:45:59 +00:00
}
} ( )
return walCh
}
// WriteWal writes the provided actions in a wal file. The wal will be marked as
// "committed" on etcd if the provided group changes aren't changed in the
// meantime or a optimistic concurrency error will be returned and the wal won't
// be committed
//
// TODO(sgotti) save inside the wal file also the previous committed wal to
2019-04-27 13:16:48 +00:00
// handle possible objectstorage list operation eventual consistency gaps (list
// won't report a wal at seq X but a wal at X+n, if this kind of eventual
// consistency ever exists)
2019-04-26 14:00:03 +00:00
func ( d * DataManager ) WriteWal ( ctx context . Context , actions [ ] * Action , cgt * ChangeGroupsUpdateToken ) ( * ChangeGroupsUpdateToken , error ) {
return d . WriteWalAdditionalOps ( ctx , actions , cgt , nil , nil )
2019-02-22 07:45:59 +00:00
}
2019-04-26 14:00:03 +00:00
func ( d * DataManager ) WriteWalAdditionalOps ( ctx context . Context , actions [ ] * Action , cgt * ChangeGroupsUpdateToken , cmp [ ] etcdclientv3 . Cmp , then [ ] etcdclientv3 . Op ) ( * ChangeGroupsUpdateToken , error ) {
2019-04-29 08:12:03 +00:00
// check changegroups name
if cgt != nil {
for cgName := range cgt . ChangeGroupsRevisions {
if strings . Contains ( cgName , "/" ) {
return nil , fmt . Errorf ( ` changegroup name %q must not contain "/" ` , cgName )
}
if len ( cgName ) > maxChangegroupNameLength {
return nil , fmt . Errorf ( "changegroup name %q too long" , cgName )
}
}
}
2019-02-22 07:45:59 +00:00
if len ( actions ) == 0 {
return nil , errors . Errorf ( "cannot write wal: actions is empty" )
}
2019-04-26 14:00:03 +00:00
walSequence , err := sequence . IncSequence ( ctx , d . e , etcdWalSeqKey )
2019-02-22 07:45:59 +00:00
if err != nil {
return nil , err
}
2019-04-26 14:00:03 +00:00
resp , err := d . e . Get ( ctx , etcdWalsDataKey , 0 )
2019-02-22 07:45:59 +00:00
if err != nil {
return nil , err
}
var walsData WalsData
if err := json . Unmarshal ( resp . Kvs [ 0 ] . Value , & walsData ) ; err != nil {
return nil , err
}
walsData . Revision = resp . Kvs [ 0 ] . ModRevision
walDataFileID := uuid . NewV4 ( ) . String ( )
2019-04-26 14:00:03 +00:00
walDataFilePath := d . storageWalDataFile ( walDataFileID )
2019-02-22 07:45:59 +00:00
walKey := etcdWalKey ( walSequence . String ( ) )
var buf bytes . Buffer
for _ , action := range actions {
actionj , err := json . Marshal ( action )
if err != nil {
return nil , err
}
if _ , err := buf . Write ( actionj ) ; err != nil {
return nil , err
}
}
2019-05-02 07:49:55 +00:00
if err := d . ost . WriteObject ( walDataFilePath , bytes . NewReader ( buf . Bytes ( ) ) , int64 ( buf . Len ( ) ) , true ) ; err != nil {
2019-02-22 07:45:59 +00:00
return nil , err
}
2019-04-26 14:00:03 +00:00
d . log . Debugf ( "wrote wal file: %s" , walDataFilePath )
2019-02-22 07:45:59 +00:00
walData := & WalData {
2019-07-18 13:16:10 +00:00
WalSequence : walSequence . String ( ) ,
WalDataFileID : walDataFileID ,
WalStatus : WalStatusCommitted ,
PreviousWalSequence : walsData . LastCommittedWalSequence ,
2019-02-22 07:45:59 +00:00
}
2019-07-18 13:16:10 +00:00
walsData . LastCommittedWalSequence = walSequence . String ( )
2019-02-22 07:45:59 +00:00
walsDataj , err := json . Marshal ( walsData )
if err != nil {
return nil , err
}
walDataj , err := json . Marshal ( walData )
if err != nil {
return nil , err
}
if cmp == nil {
cmp = [ ] etcdclientv3 . Cmp { }
}
if then == nil {
then = [ ] etcdclientv3 . Op { }
}
getWalsData := etcdclientv3 . OpGet ( etcdWalsDataKey )
getWal := etcdclientv3 . OpGet ( walKey )
if cgt != nil {
for cgName , cgRev := range cgt . ChangeGroupsRevisions {
cgKey := path . Join ( etcdChangeGroupsDir , cgName )
if cgRev > 0 {
cmp = append ( cmp , etcdclientv3 . Compare ( etcdclientv3 . ModRevision ( cgKey ) , "=" , cgRev ) )
} else {
cmp = append ( cmp , etcdclientv3 . Compare ( etcdclientv3 . CreateRevision ( cgKey ) , "=" , 0 ) )
}
then = append ( then , etcdclientv3 . OpPut ( cgKey , "" ) )
}
if cgt . CurRevision > 0 {
cmp = append ( cmp , etcdclientv3 . Compare ( etcdclientv3 . ModRevision ( etcdChangeGroupMinRevisionKey ) , "<" , cgt . CurRevision + etcdChangeGroupMinRevisionRange ) )
}
}
cmp = append ( cmp , etcdclientv3 . Compare ( etcdclientv3 . ModRevision ( etcdWalsDataKey ) , "=" , walsData . Revision ) )
cmp = append ( cmp , etcdclientv3 . Compare ( etcdclientv3 . Version ( walKey ) , "=" , 0 ) )
then = append ( then , etcdclientv3 . OpPut ( etcdWalsDataKey , string ( walsDataj ) ) )
then = append ( then , etcdclientv3 . OpPut ( walKey , string ( walDataj ) ) )
// This will only succeed if no one else have concurrently updated the walsData
// TODO(sgotti) retry if it failed due to concurrency errors
2019-04-26 14:00:03 +00:00
txn := d . e . Client ( ) . Txn ( ctx ) . If ( cmp ... ) . Then ( then ... ) . Else ( getWalsData , getWal )
2019-02-22 07:45:59 +00:00
tresp , err := txn . Commit ( )
if err != nil {
return nil , etcd . FromEtcdError ( err )
}
if ! tresp . Succeeded {
walsDataRev := tresp . Responses [ 0 ] . GetResponseRange ( ) . Kvs [ 0 ] . ModRevision
walDataCreateRev := tresp . Responses [ 0 ] . GetResponseRange ( ) . Kvs [ 0 ] . CreateRevision
// TODO(sgotti) If the tx failed due to walsdata already updated we could retry
if walsDataRev == walsData . Revision && walDataCreateRev == 0 {
return nil , errors . Errorf ( "failed to write committed wal: wals groups already updated" )
}
return nil , ErrConcurrency
}
ncgt := & ChangeGroupsUpdateToken { CurRevision : tresp . Header . Revision , ChangeGroupsRevisions : make ( changeGroupsRevisions ) }
if cgt != nil {
for cgName := range cgt . ChangeGroupsRevisions {
ncgt . ChangeGroupsRevisions [ cgName ] = tresp . Header . Revision
}
}
// try to commit storage right now
2019-04-26 14:00:03 +00:00
if err := d . sync ( ctx ) ; err != nil {
d . log . Errorf ( "wal sync error: %+v" , err )
2019-02-22 07:45:59 +00:00
}
return ncgt , nil
}
2019-04-26 14:00:03 +00:00
func ( d * DataManager ) syncLoop ( ctx context . Context ) {
2019-02-22 07:45:59 +00:00
for {
2019-04-26 14:00:03 +00:00
d . log . Debugf ( "syncer" )
if err := d . sync ( ctx ) ; err != nil {
d . log . Errorf ( "syncer error: %+v" , err )
2019-02-22 07:45:59 +00:00
}
2019-11-08 09:10:56 +00:00
sleepCh := time . NewTimer ( DefaultSyncInterval ) . C
2019-02-22 07:45:59 +00:00
select {
case <- ctx . Done ( ) :
return
2019-07-25 13:53:26 +00:00
case <- sleepCh :
2019-02-22 07:45:59 +00:00
}
}
}
2019-04-26 14:00:03 +00:00
func ( d * DataManager ) sync ( ctx context . Context ) error {
session , err := concurrency . NewSession ( d . e . Client ( ) , concurrency . WithTTL ( 5 ) , concurrency . WithContext ( ctx ) )
2019-02-22 07:45:59 +00:00
if err != nil {
return err
}
defer session . Close ( )
m := concurrency . NewMutex ( session , etcdSyncLockKey )
2019-07-10 08:20:03 +00:00
// TODO(sgotti) find a way to use a trylock so we'll just return if already
// locked. Currently multiple task updaters will enqueue and start when another
// finishes (unuseful and consume resources)
2019-02-22 07:45:59 +00:00
if err := m . Lock ( ctx ) ; err != nil {
return err
}
2019-07-02 12:27:51 +00:00
defer func ( ) { _ = m . Unlock ( ctx ) } ( )
2019-02-22 07:45:59 +00:00
2019-04-26 14:00:03 +00:00
resp , err := d . e . List ( ctx , etcdWalsDir + "/" , "" , 0 )
2019-02-22 07:45:59 +00:00
if err != nil {
return err
}
for _ , kv := range resp . Kvs {
var walData WalData
if err := json . Unmarshal ( kv . Value , & walData ) ; err != nil {
return err
}
// wals must be committed and checkpointed in order.
// TODO(sgotti) this could be optimized by parallelizing writes of wals that don't have common change groups
switch walData . WalStatus {
case WalStatusCommitted :
2019-04-26 14:00:03 +00:00
walFilePath := d . storageWalStatusFile ( walData . WalSequence )
2019-07-18 11:34:10 +00:00
d . log . Debugf ( "syncing committed wal %q to storage" , walData . WalSequence )
2019-02-22 07:45:59 +00:00
header := & WalHeader {
WalDataFileID : walData . WalDataFileID ,
PreviousWalSequence : walData . PreviousWalSequence ,
}
headerj , err := json . Marshal ( header )
if err != nil {
return err
}
walFileCommittedPath := walFilePath + ".committed"
2019-05-02 07:49:55 +00:00
if err := d . ost . WriteObject ( walFileCommittedPath , bytes . NewReader ( headerj ) , int64 ( len ( headerj ) ) , true ) ; err != nil {
2019-02-22 07:45:59 +00:00
return err
}
2019-04-26 14:00:03 +00:00
d . log . Debugf ( "updating wal to state %q" , WalStatusCommittedStorage )
2019-02-22 07:45:59 +00:00
walData . WalStatus = WalStatusCommittedStorage
walDataj , err := json . Marshal ( walData )
if err != nil {
return err
}
cmp := [ ] etcdclientv3 . Cmp { }
then := [ ] etcdclientv3 . Op { }
cmp = append ( cmp , etcdclientv3 . Compare ( etcdclientv3 . ModRevision ( string ( kv . Key ) ) , "=" , kv . ModRevision ) )
then = append ( then , etcdclientv3 . OpPut ( string ( kv . Key ) , string ( walDataj ) ) )
then = append ( then , etcdclientv3 . OpPut ( string ( etcdLastCommittedStorageWalSeqKey ) , string ( walData . WalSequence ) ) )
// This will only succeed if the no one else have concurrently updated the wal keys in etcd
2019-04-26 14:00:03 +00:00
txn := d . e . Client ( ) . Txn ( ctx ) . If ( cmp ... ) . Then ( then ... )
2019-02-22 07:45:59 +00:00
tresp , err := txn . Commit ( )
if err != nil {
return etcd . FromEtcdError ( err )
}
if ! tresp . Succeeded {
return errors . Errorf ( "failed to write committedstorage wal: concurrent update" )
}
}
}
return nil
}
2019-04-26 14:00:03 +00:00
func ( d * DataManager ) checkpointLoop ( ctx context . Context ) {
2019-02-22 07:45:59 +00:00
for {
2019-04-26 14:00:03 +00:00
d . log . Debugf ( "checkpointer" )
2019-07-18 12:58:42 +00:00
if err := d . checkpoint ( ctx , false ) ; err != nil {
2019-04-26 14:00:03 +00:00
d . log . Errorf ( "checkpoint error: %v" , err )
2019-02-22 07:45:59 +00:00
}
2019-07-25 13:53:26 +00:00
sleepCh := time . NewTimer ( d . checkpointInterval ) . C
2019-02-22 07:45:59 +00:00
select {
case <- ctx . Done ( ) :
return
2019-07-25 13:53:26 +00:00
case <- sleepCh :
2019-02-22 07:45:59 +00:00
}
}
}
2019-07-18 12:58:42 +00:00
func ( d * DataManager ) checkpoint ( ctx context . Context , force bool ) error {
2019-04-26 14:00:03 +00:00
session , err := concurrency . NewSession ( d . e . Client ( ) , concurrency . WithTTL ( 5 ) , concurrency . WithContext ( ctx ) )
2019-02-22 07:45:59 +00:00
if err != nil {
return err
}
defer session . Close ( )
m := concurrency . NewMutex ( session , etcdCheckpointLockKey )
2019-07-10 08:20:03 +00:00
// TODO(sgotti) find a way to use a trylock so we'll just return if already
// locked. Currently multiple task updaters will enqueue and start when another
// finishes (unuseful and consume resources)
2019-02-22 07:45:59 +00:00
if err := m . Lock ( ctx ) ; err != nil {
return err
}
2019-07-02 12:27:51 +00:00
defer func ( ) { _ = m . Unlock ( ctx ) } ( )
2019-02-22 07:45:59 +00:00
2019-04-26 14:00:03 +00:00
resp , err := d . e . List ( ctx , etcdWalsDir + "/" , "" , 0 )
2019-02-22 07:45:59 +00:00
if err != nil {
return err
}
2019-04-26 14:00:03 +00:00
walsData := [ ] * WalData { }
2019-02-22 07:45:59 +00:00
for _ , kv := range resp . Kvs {
2019-04-26 14:00:03 +00:00
var walData * WalData
2019-02-22 07:45:59 +00:00
if err := json . Unmarshal ( kv . Value , & walData ) ; err != nil {
return err
}
2019-04-26 14:00:03 +00:00
walData . Revision = kv . ModRevision
2019-02-22 07:45:59 +00:00
if walData . WalStatus == WalStatusCommitted {
2019-04-26 14:00:03 +00:00
d . log . Warnf ( "wal %s not yet committed storage" , walData . WalSequence )
2019-02-22 07:45:59 +00:00
break
}
if walData . WalStatus == WalStatusCheckpointed {
continue
}
2019-04-26 14:00:03 +00:00
walsData = append ( walsData , walData )
}
2019-07-18 12:58:42 +00:00
if ! force && len ( walsData ) < d . minCheckpointWalsNum {
return nil
}
if len ( walsData ) == 0 {
2019-04-26 14:00:03 +00:00
return nil
}
2019-02-22 07:45:59 +00:00
2019-06-03 14:17:27 +00:00
if err := d . writeDataSnapshot ( ctx , walsData ) ; err != nil {
2019-05-23 09:23:14 +00:00
return errors . Errorf ( "checkpoint function error: %w" , err )
2019-04-26 14:00:03 +00:00
}
2019-02-22 07:45:59 +00:00
2019-04-26 14:00:03 +00:00
for _ , walData := range walsData {
d . log . Debugf ( "updating wal to state %q" , WalStatusCheckpointed )
2019-02-22 07:45:59 +00:00
walData . WalStatus = WalStatusCheckpointed
walDataj , err := json . Marshal ( walData )
if err != nil {
return err
}
2019-04-26 14:00:03 +00:00
walKey := etcdWalKey ( walData . WalSequence )
if _ , err := d . e . AtomicPut ( ctx , walKey , walDataj , walData . Revision , nil ) ; err != nil {
2019-02-22 07:45:59 +00:00
return err
}
}
return nil
}
2019-10-29 12:23:42 +00:00
func ( d * DataManager ) checkpointCleanLoop ( ctx context . Context ) {
for {
d . log . Debugf ( "checkpointCleanLoop" )
if err := d . checkpointClean ( ctx ) ; err != nil {
d . log . Errorf ( "checkpointClean error: %v" , err )
}
sleepCh := time . NewTimer ( d . checkpointCleanInterval ) . C
select {
case <- ctx . Done ( ) :
return
case <- sleepCh :
}
}
}
func ( d * DataManager ) checkpointClean ( ctx context . Context ) error {
session , err := concurrency . NewSession ( d . e . Client ( ) , concurrency . WithTTL ( 5 ) , concurrency . WithContext ( ctx ) )
if err != nil {
return err
}
defer session . Close ( )
m := concurrency . NewMutex ( session , etcdCheckpointLockKey )
// TODO(sgotti) find a way to use a trylock so we'll just return if already
// locked. Currently multiple task updaters will enqueue and start when another
// finishes (unuseful and consume resources)
if err := m . Lock ( ctx ) ; err != nil {
return err
}
defer func ( ) { _ = m . Unlock ( ctx ) } ( )
if err := d . CleanOldCheckpoints ( ctx ) ; err != nil {
return err
}
return nil
}
2019-11-08 09:10:56 +00:00
func ( d * DataManager ) etcdWalCleanerLoop ( ctx context . Context ) {
2019-02-22 07:45:59 +00:00
for {
2019-11-08 09:10:56 +00:00
d . log . Debugf ( "etcdwalcleaner" )
if err := d . etcdWalCleaner ( ctx ) ; err != nil {
d . log . Errorf ( "etcdwalcleaner error: %v" , err )
2019-02-22 07:45:59 +00:00
}
2019-11-08 09:10:56 +00:00
sleepCh := time . NewTimer ( DefaultEtcdWalCleanInterval ) . C
2019-02-22 07:45:59 +00:00
select {
case <- ctx . Done ( ) :
return
2019-07-25 13:53:26 +00:00
case <- sleepCh :
2019-02-22 07:45:59 +00:00
}
}
}
2019-11-08 09:10:56 +00:00
// etcdWalCleaner will clean already checkpointed wals from etcd
2019-02-22 07:45:59 +00:00
// it must always keep at least one wal that is needed for resync operations
// from clients
2019-11-08 09:10:56 +00:00
func ( d * DataManager ) etcdWalCleaner ( ctx context . Context ) error {
2019-04-26 14:00:03 +00:00
session , err := concurrency . NewSession ( d . e . Client ( ) , concurrency . WithTTL ( 5 ) , concurrency . WithContext ( ctx ) )
2019-02-22 07:45:59 +00:00
if err != nil {
return err
}
defer session . Close ( )
m := concurrency . NewMutex ( session , etcdWalCleanerLockKey )
2019-07-10 08:20:03 +00:00
// TODO(sgotti) find a way to use a trylock so we'll just return if already
// locked. Currently multiple task updaters will enqueue and start when another
// finishes (unuseful and consume resources)
2019-02-22 07:45:59 +00:00
if err := m . Lock ( ctx ) ; err != nil {
return err
}
2019-07-02 12:27:51 +00:00
defer func ( ) { _ = m . Unlock ( ctx ) } ( )
2019-02-22 07:45:59 +00:00
2019-04-26 14:00:03 +00:00
resp , err := d . e . List ( ctx , etcdWalsDir + "/" , "" , 0 )
2019-02-22 07:45:59 +00:00
if err != nil {
return err
}
2019-04-26 14:00:03 +00:00
if len ( resp . Kvs ) <= d . etcdWalsKeepNum {
2019-02-22 07:45:59 +00:00
return nil
}
2019-04-26 14:00:03 +00:00
removeCount := len ( resp . Kvs ) - d . etcdWalsKeepNum
2019-02-22 07:45:59 +00:00
for _ , kv := range resp . Kvs {
var walData WalData
if err := json . Unmarshal ( kv . Value , & walData ) ; err != nil {
return err
}
if walData . WalStatus != WalStatusCheckpointed {
break
}
// TODO(sgotti) check that the objectstorage returns the wal actions as checkpointed.
// With eventual consistent object storages like S3 we shouldn't remove a wal
// file from etcd (and so from the cache) until we are sure there're no
// eventual consistency issues. The difficult part is how to check them and be
// sure that no objects with old data will be returned? Is it enough to read
// it back or the result could just be luckily correct but another client may
// arrive to a differnt S3 server that is not yet in sync?
2019-04-26 14:00:03 +00:00
d . log . Infof ( "removing wal %q from etcd" , walData . WalSequence )
if _ , err := d . e . AtomicDelete ( ctx , string ( kv . Key ) , kv . ModRevision ) ; err != nil {
2019-02-22 07:45:59 +00:00
return err
}
removeCount --
if removeCount == 0 {
return nil
}
}
return nil
}
2019-11-08 09:10:56 +00:00
func ( d * DataManager ) storageWalCleanerLoop ( ctx context . Context ) {
for {
d . log . Debugf ( "storagewalcleaner" )
if err := d . storageWalCleaner ( ctx ) ; err != nil {
d . log . Errorf ( "storagewalcleaner error: %v" , err )
}
sleepCh := time . NewTimer ( DefaultStorageWalCleanInterval ) . C
select {
case <- ctx . Done ( ) :
return
case <- sleepCh :
}
}
}
// storageWalCleaner will clean unneeded wals from the storage
func ( d * DataManager ) storageWalCleaner ( ctx context . Context ) error {
session , err := concurrency . NewSession ( d . e . Client ( ) , concurrency . WithTTL ( 5 ) , concurrency . WithContext ( ctx ) )
if err != nil {
return err
}
defer session . Close ( )
m := concurrency . NewMutex ( session , etcdStorageWalCleanerLockKey )
// TODO(sgotti) find a way to use a trylock so we'll just return if already
// locked. Currently multiple task updaters will enqueue and start when another
// finishes (unuseful and consume resources)
if err := m . Lock ( ctx ) ; err != nil {
return err
}
defer func ( ) { _ = m . Unlock ( ctx ) } ( )
firstDataStatus , err := d . GetFirstDataStatus ( )
if err != nil {
return err
}
firstWalSequence := firstDataStatus . WalSequence
// get the first wal in etcd (in any state) and use it's wal sequence if
// it's lesser than the first data status wal sequence
resp , err := d . e . List ( ctx , etcdWalsDir + "/" , "" , 0 )
if err != nil {
return err
}
if len ( resp . Kvs ) == 0 {
return errors . Errorf ( "no wals in etcd" )
}
var walData WalData
if err := json . Unmarshal ( resp . Kvs [ 0 ] . Value , & walData ) ; err != nil {
return err
}
if walData . WalSequence < firstWalSequence {
firstWalSequence = walData . WalSequence
}
doneCh := make ( chan struct { } )
defer close ( doneCh )
for object := range d . ost . List ( d . storageWalStatusDir ( ) + "/" , "" , true , doneCh ) {
if object . Err != nil {
return err
}
name := path . Base ( object . Path )
ext := path . Ext ( name )
walSequence := strings . TrimSuffix ( name , ext )
// handle committed status file and related data file
if ext == ".committed" {
if walSequence >= firstWalSequence {
break
}
header , err := d . ReadWal ( walSequence )
if err != nil {
return err
}
// first remove wal data file
walStatusFilePath := d . storageWalDataFile ( header . WalDataFileID )
d . log . Infof ( "removing %q" , walStatusFilePath )
if err := d . ost . DeleteObject ( walStatusFilePath ) ; err != nil {
2019-11-06 12:29:42 +00:00
if ! objectstorage . IsNotExist ( err ) {
2019-11-08 09:10:56 +00:00
return err
}
}
// then remove wal status files
d . log . Infof ( "removing %q" , object . Path )
if err := d . ost . DeleteObject ( object . Path ) ; err != nil {
2019-11-06 12:29:42 +00:00
if ! objectstorage . IsNotExist ( err ) {
2019-11-08 09:10:56 +00:00
return err
}
}
}
// handle old checkpointed status file
// TODO(sgotti) remove this in future versions since .checkpointed files are not created anymore
if ext == ".checkpointed" {
d . log . Infof ( "removing %q" , object . Path )
if err := d . ost . DeleteObject ( object . Path ) ; err != nil {
2019-11-06 12:29:42 +00:00
if ! objectstorage . IsNotExist ( err ) {
2019-11-08 09:10:56 +00:00
return err
}
}
}
}
return nil
}
2019-04-26 14:00:03 +00:00
func ( d * DataManager ) compactChangeGroupsLoop ( ctx context . Context ) {
2019-02-22 07:45:59 +00:00
for {
2019-04-26 14:00:03 +00:00
if err := d . compactChangeGroups ( ctx ) ; err != nil {
d . log . Errorf ( "err: %+v" , err )
2019-02-22 07:45:59 +00:00
}
2019-11-08 09:10:56 +00:00
sleepCh := time . NewTimer ( DefaultCompactChangeGroupsInterval ) . C
2019-02-22 07:45:59 +00:00
select {
case <- ctx . Done ( ) :
return
2019-07-25 13:53:26 +00:00
case <- sleepCh :
2019-02-22 07:45:59 +00:00
}
}
}
2019-04-26 14:00:03 +00:00
func ( d * DataManager ) compactChangeGroups ( ctx context . Context ) error {
2019-07-10 08:20:03 +00:00
session , err := concurrency . NewSession ( d . e . Client ( ) , concurrency . WithTTL ( 5 ) , concurrency . WithContext ( ctx ) )
if err != nil {
return err
}
defer session . Close ( )
m := concurrency . NewMutex ( session , etcdCompactChangeGroupsLockKey )
// TODO(sgotti) find a way to use a trylock so we'll just return if already
// locked. Currently multiple task updaters will enqueue and start when another
// finishes (unuseful and consume resources)
if err := m . Lock ( ctx ) ; err != nil {
return err
}
defer func ( ) { _ = m . Unlock ( ctx ) } ( )
2019-04-26 14:00:03 +00:00
resp , err := d . e . Client ( ) . Get ( ctx , etcdChangeGroupMinRevisionKey )
2019-02-22 07:45:59 +00:00
if err != nil {
return err
}
2019-07-10 08:20:03 +00:00
if len ( resp . Kvs ) == 0 {
return errors . Errorf ( "no change group min revision key in etcd" )
}
2019-02-22 07:45:59 +00:00
revision := resp . Kvs [ 0 ] . ModRevision
// first update minrevision
cmp := etcdclientv3 . Compare ( etcdclientv3 . ModRevision ( etcdChangeGroupMinRevisionKey ) , "=" , revision )
then := etcdclientv3 . OpPut ( etcdChangeGroupMinRevisionKey , "" )
2019-04-26 14:00:03 +00:00
txn := d . e . Client ( ) . Txn ( ctx ) . If ( cmp ) . Then ( then )
2019-02-22 07:45:59 +00:00
tresp , err := txn . Commit ( )
if err != nil {
return etcd . FromEtcdError ( err )
}
if ! tresp . Succeeded {
return errors . Errorf ( "failed to update change group min revision key due to concurrent update" )
}
revision = tresp . Header . Revision
// then remove all the groups keys with modrevision < minrevision
2019-04-26 14:00:03 +00:00
resp , err = d . e . List ( ctx , etcdChangeGroupsDir , "" , 0 )
2019-02-22 07:45:59 +00:00
if err != nil {
return err
}
for _ , kv := range resp . Kvs {
if kv . ModRevision < revision - etcdChangeGroupMinRevisionRange {
cmp := etcdclientv3 . Compare ( etcdclientv3 . ModRevision ( string ( kv . Key ) ) , "=" , kv . ModRevision )
then := etcdclientv3 . OpDelete ( string ( kv . Key ) )
2019-04-26 14:00:03 +00:00
txn := d . e . Client ( ) . Txn ( ctx ) . If ( cmp ) . Then ( then )
2019-02-22 07:45:59 +00:00
tresp , err := txn . Commit ( )
if err != nil {
return etcd . FromEtcdError ( err )
}
if ! tresp . Succeeded {
2019-04-26 14:00:03 +00:00
d . log . Errorf ( "failed to update change group min revision key due to concurrent update" )
2019-02-22 07:45:59 +00:00
}
}
}
return nil
}
// etcdPingerLoop periodically updates a key.
// This is used by watchers to inform the client of the current revision
// this is needed since if other users are updating other unwatched keys on
// etcd we won't be notified, not updating the known revisions and thus all the
// walWrites will fails since the provided changegrouptoken will have an old
// revision
// TODO(sgotti) use upcoming etcd 3.4 watch RequestProgress???
2019-04-26 14:00:03 +00:00
func ( d * DataManager ) etcdPingerLoop ( ctx context . Context ) {
2019-02-22 07:45:59 +00:00
for {
2019-04-26 14:00:03 +00:00
if err := d . etcdPinger ( ctx ) ; err != nil {
d . log . Errorf ( "err: %+v" , err )
2019-02-22 07:45:59 +00:00
}
2019-11-08 09:10:56 +00:00
sleepCh := time . NewTimer ( DefaultEtcdPingerInterval ) . C
2019-02-22 07:45:59 +00:00
select {
case <- ctx . Done ( ) :
return
2019-07-25 13:53:26 +00:00
case <- sleepCh :
2019-02-22 07:45:59 +00:00
}
}
}
2019-04-26 14:00:03 +00:00
func ( d * DataManager ) etcdPinger ( ctx context . Context ) error {
if _ , err := d . e . Put ( ctx , etcdPingKey , [ ] byte { } , nil ) ; err != nil {
2019-02-22 07:45:59 +00:00
return err
}
return nil
}
2019-07-18 12:54:07 +00:00
func ( d * DataManager ) InitEtcd ( ctx context . Context , dataStatus * DataStatus ) error {
2019-11-05 16:48:11 +00:00
writeWal := func ( wal * WalFile , prevWalSequence string ) error {
2019-04-26 14:00:03 +00:00
walFile , err := d . ost . ReadObject ( d . storageWalStatusFile ( wal . WalSequence ) + ".committed" )
2019-02-22 07:45:59 +00:00
if err != nil {
return err
}
dec := json . NewDecoder ( walFile )
var header * WalHeader
if err = dec . Decode ( & header ) ; err != nil && err != io . EOF {
walFile . Close ( )
return err
}
walFile . Close ( )
2019-11-05 16:48:11 +00:00
if prevWalSequence != "" {
if header . PreviousWalSequence != "" && header . PreviousWalSequence != prevWalSequence {
return errors . Errorf ( "wal %q previousWalSequence %q is different than expected walSequence %q" , wal . WalSequence , header . PreviousWalSequence , prevWalSequence )
}
}
2019-02-22 07:45:59 +00:00
walData := & WalData {
2019-07-18 13:16:10 +00:00
WalSequence : wal . WalSequence ,
WalDataFileID : header . WalDataFileID ,
WalStatus : WalStatusCommittedStorage ,
PreviousWalSequence : header . PreviousWalSequence ,
2019-02-22 07:45:59 +00:00
}
walDataj , err := json . Marshal ( walData )
if err != nil {
return err
}
cmp := [ ] etcdclientv3 . Cmp { }
then := [ ] etcdclientv3 . Op { }
// only add if it doesn't exist
cmp = append ( cmp , etcdclientv3 . Compare ( etcdclientv3 . CreateRevision ( etcdWalKey ( wal . WalSequence ) ) , "=" , 0 ) )
then = append ( then , etcdclientv3 . OpPut ( etcdWalKey ( wal . WalSequence ) , string ( walDataj ) ) )
2019-04-26 14:00:03 +00:00
txn := d . e . Client ( ) . Txn ( ctx ) . If ( cmp ... ) . Then ( then ... )
2019-02-22 07:45:59 +00:00
tresp , err := txn . Commit ( )
if err != nil {
return etcd . FromEtcdError ( err )
}
if ! tresp . Succeeded {
return errors . Errorf ( "failed to sync etcd: wal %q already written" , wal . WalSequence )
}
return nil
}
2019-07-10 08:20:03 +00:00
session , err := concurrency . NewSession ( d . e . Client ( ) , concurrency . WithTTL ( 5 ) , concurrency . WithContext ( ctx ) )
if err != nil {
return err
}
defer session . Close ( )
m := concurrency . NewMutex ( session , etcdInitEtcdLockKey )
// TODO(sgotti) find a way to use a trylock so we'll just return if already
// locked. Currently multiple task updaters will enqueue and start when another
// finishes (unuseful and consume resources)
if err := m . Lock ( ctx ) ; err != nil {
return err
}
defer func ( ) { _ = m . Unlock ( ctx ) } ( )
2019-07-17 15:05:35 +00:00
mustInit := false
_ , err = d . e . Get ( ctx , etcdWalsDataKey , 0 )
if err != nil {
if err != etcd . ErrKeyNotFound {
return err
}
mustInit = true
}
if mustInit {
d . log . Infof ( "no data found in etcd, initializing" )
// delete all wals from etcd
if err := d . deleteEtcd ( ctx ) ; err != nil {
return err
}
}
// Always create changegroup min revision if it doesn't exists
2019-02-22 07:45:59 +00:00
cmp := [ ] etcdclientv3 . Cmp { }
then := [ ] etcdclientv3 . Op { }
cmp = append ( cmp , etcdclientv3 . Compare ( etcdclientv3 . CreateRevision ( etcdChangeGroupMinRevisionKey ) , "=" , 0 ) )
then = append ( then , etcdclientv3 . OpPut ( etcdChangeGroupMinRevisionKey , "" ) )
2019-04-26 14:00:03 +00:00
txn := d . e . Client ( ) . Txn ( ctx ) . If ( cmp ... ) . Then ( then ... )
2019-02-22 07:45:59 +00:00
if _ , err := txn . Commit ( ) ; err != nil {
return etcd . FromEtcdError ( err )
}
2019-07-17 15:05:35 +00:00
if ! mustInit {
2019-02-22 07:45:59 +00:00
return nil
}
// walsdata not found in etcd
2019-07-18 13:02:11 +00:00
var firstWal string
2019-07-18 12:54:07 +00:00
if dataStatus != nil {
firstWal = dataStatus . WalSequence
} else {
dataStatus , err = d . GetLastDataStatus ( )
2019-11-06 12:29:42 +00:00
if err != nil && ! errors . Is ( err , ErrNoDataStatus ) {
2019-07-18 12:54:07 +00:00
return err
}
// set the first wal to import in etcd if there's a snapshot. In this way we'll
// ignore older wals (or wals left after an import)
if err == nil {
firstWal = dataStatus . WalSequence
}
2019-07-18 13:02:11 +00:00
}
2019-04-27 13:16:48 +00:00
// if there're some wals in the objectstorage this means etcd has been reset.
2019-02-22 07:45:59 +00:00
// So take all the wals in committed or checkpointed state starting from the
// first not checkpointed wal and put them in etcd
lastCommittedStorageWalSequence := ""
2019-11-05 16:48:11 +00:00
previousWalSequence := ""
2019-02-22 07:45:59 +00:00
wroteWals := 0
2019-04-26 14:00:03 +00:00
for wal := range d . ListOSTWals ( "" ) {
2019-07-18 12:54:28 +00:00
// if there're wals in ost but not a datastatus return an error
if dataStatus == nil {
return errors . Errorf ( "no datastatus in etcd but some wals are present, this shouldn't happen" )
}
2019-04-26 14:00:03 +00:00
d . log . Debugf ( "wal: %s" , wal )
2019-02-22 07:45:59 +00:00
if wal . Err != nil {
return wal . Err
}
2019-07-18 13:02:11 +00:00
if wal . WalSequence < firstWal {
continue
}
2019-02-22 07:45:59 +00:00
lastCommittedStorageWalSequence = wal . WalSequence
2019-11-05 16:48:11 +00:00
if err := writeWal ( wal , previousWalSequence ) ; err != nil {
2019-02-22 07:45:59 +00:00
return err
}
2019-11-05 16:48:11 +00:00
previousWalSequence = wal . WalSequence
2019-02-22 07:45:59 +00:00
wroteWals ++
}
2019-07-18 11:34:10 +00:00
// insert an empty wal and make it already committedstorage
walSequence , err := sequence . IncSequence ( ctx , d . e , etcdWalSeqKey )
if err != nil {
return err
}
walDataFileID := uuid . NewV4 ( ) . String ( )
walDataFilePath := d . storageWalDataFile ( walDataFileID )
walKey := etcdWalKey ( walSequence . String ( ) )
if err := d . ost . WriteObject ( walDataFilePath , bytes . NewReader ( [ ] byte { } ) , 0 , true ) ; err != nil {
return err
}
d . log . Debugf ( "wrote wal file: %s" , walDataFilePath )
walFilePath := d . storageWalStatusFile ( walSequence . String ( ) )
d . log . Infof ( "syncing committed wal %q to storage" , walSequence . String ( ) )
header := & WalHeader {
WalDataFileID : walDataFileID ,
PreviousWalSequence : lastCommittedStorageWalSequence ,
}
headerj , err := json . Marshal ( header )
if err != nil {
return err
}
walFileCommittedPath := walFilePath + ".committed"
if err := d . ost . WriteObject ( walFileCommittedPath , bytes . NewReader ( headerj ) , int64 ( len ( headerj ) ) , true ) ; err != nil {
return err
}
walData := & WalData {
2019-07-18 13:16:10 +00:00
WalSequence : walSequence . String ( ) ,
WalDataFileID : walDataFileID ,
WalStatus : WalStatusCommittedStorage ,
PreviousWalSequence : lastCommittedStorageWalSequence ,
2019-07-18 11:34:10 +00:00
}
2019-07-18 13:16:10 +00:00
lastCommittedStorageWalSequence = walSequence . String ( )
2019-02-22 07:45:59 +00:00
walsData := & WalsData {
LastCommittedWalSequence : lastCommittedStorageWalSequence ,
}
2019-07-18 11:34:10 +00:00
walDataj , err := json . Marshal ( walData )
if err != nil {
return err
}
2019-02-22 07:45:59 +00:00
walsDataj , err := json . Marshal ( walsData )
if err != nil {
return err
}
// save walsdata and lastcommittedstoragewalseq only after writing all the
// wals in etcd
// in this way if something fails while adding wals to etcd it'll be retried
// since waldata doesn't exists
cmp = [ ] etcdclientv3 . Cmp { }
then = [ ] etcdclientv3 . Op { }
cmp = append ( cmp , etcdclientv3 . Compare ( etcdclientv3 . CreateRevision ( etcdWalsDataKey ) , "=" , 0 ) )
then = append ( then , etcdclientv3 . OpPut ( etcdWalsDataKey , string ( walsDataj ) ) )
then = append ( then , etcdclientv3 . OpPut ( etcdLastCommittedStorageWalSeqKey , lastCommittedStorageWalSequence ) )
2019-07-18 11:34:10 +00:00
then = append ( then , etcdclientv3 . OpPut ( walKey , string ( walDataj ) ) )
2019-04-26 14:00:03 +00:00
txn = d . e . Client ( ) . Txn ( ctx ) . If ( cmp ... ) . Then ( then ... )
2019-02-22 07:45:59 +00:00
tresp , err := txn . Commit ( )
if err != nil {
return etcd . FromEtcdError ( err )
}
if ! tresp . Succeeded {
2019-07-17 15:05:35 +00:00
return errors . Errorf ( "failed to sync etcd: walsdata already written" )
2019-02-22 07:45:59 +00:00
}
2019-07-18 11:34:10 +00:00
// force a checkpoint
if err := d . checkpoint ( ctx , true ) ; err != nil {
return err
}
2019-02-22 07:45:59 +00:00
return nil
}