enhancement: automatic cleanup of old repos/branches

This commit is contained in:
alessandro.pinna 2021-11-22 14:46:06 +01:00
parent 40bc118a1b
commit 9251a2a046
5 changed files with 375 additions and 0 deletions

View File

@ -162,6 +162,9 @@ type Gitserver struct {
Web Web `yaml:"web"` Web Web `yaml:"web"`
Etcd Etcd `yaml:"etcd"` Etcd Etcd `yaml:"etcd"`
ObjectStorage ObjectStorage `yaml:"objectStorage"` ObjectStorage ObjectStorage `yaml:"objectStorage"`
RepositoryCleanupInterval time.Duration `yaml:"repositoryCleanupInterval"`
RepositoryRefsExpireInterval time.Duration `yaml:"repositoryRefsExpireInterval"`
} }
type Web struct { type Web struct {
@ -261,6 +264,10 @@ var defaultConfig = Config{
}, },
ActiveTasksLimit: 2, ActiveTasksLimit: 2,
}, },
Gitserver: Gitserver{
RepositoryCleanupInterval: 24 * time.Hour,
RepositoryRefsExpireInterval: 30 * 24 * time.Hour,
},
} }
func Parse(configFile string, componentsNames []string) (*Config, error) { func Parse(configFile string, componentsNames []string) (*Config, error) {

View File

@ -0,0 +1,201 @@
// Copyright 2019 Sorint.lab
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied
// See the License for the specific language governing permissions and
// limitations under the License.
package gitserver
import (
"context"
"errors"
"io/ioutil"
"os"
"path/filepath"
"testing"
"time"
"agola.io/agola/internal/services/config"
"agola.io/agola/internal/util"
)
const (
branchName = "master"
tagName = "v1.0"
)
func createTag(t *testing.T, ctx context.Context, git *util.Git, committerTime time.Time) {
if _, err := git.Output(ctx, nil, "branch", "test"); err != nil {
t.Fatalf("unexpected err: %v", err)
}
if _, err := git.Output(ctx, nil, "checkout", "test"); err != nil {
t.Fatalf("unexpected err: %v", err)
}
git.Env = append(git.Env, "GIT_COMMITTER_DATE="+committerTime.Format(time.RFC3339))
if _, err := git.Output(ctx, nil, "commit", "--allow-empty", "-m", "root commit"); err != nil {
t.Fatalf("unexpected err: %v", err)
}
if _, err := git.Output(ctx, nil, "tag", tagName, "-m", "tag test"); err != nil {
t.Fatalf("unexpected err: %v", err)
}
}
func createBranch(t *testing.T, ctx context.Context, git *util.Git, committerTime time.Time) {
git.Env = append(git.Env, "GIT_COMMITTER_DATE="+committerTime.Format(time.RFC3339))
if _, err := git.Output(ctx, nil, "commit", "--allow-empty", "-m", "'root commit'"); err != nil {
t.Fatalf("unexpected err: %v", err)
}
}
func TestRepoCleaner(t *testing.T) {
tests := []struct {
name string
branchOldTime bool
tagOldTime bool
}{
{
name: "test delete branch",
branchOldTime: true,
tagOldTime: false,
},
{
name: "test delete tag",
branchOldTime: false,
tagOldTime: true,
},
{
name: "test delete repository dir",
branchOldTime: true,
tagOldTime: true,
},
}
oldCommitterTime := time.Date(2015, time.January, 15, 1, 1, 1, 1, time.UTC)
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
dir, err := ioutil.TempDir("", "agola")
if err != nil {
t.Fatalf("unexpected err: %v", err)
}
defer os.RemoveAll(dir)
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
gitDataDir := filepath.Join(dir, "gitserver")
config := &config.Gitserver{
DataDir: gitDataDir,
RepositoryCleanupInterval: 10 * time.Second,
RepositoryRefsExpireInterval: 24 * time.Hour,
}
gs, err := NewGitserver(ctx, logger, config)
if err != nil {
t.Fatalf("unexpected err: %v", err)
}
userDirRepo := filepath.Join(gitDataDir, "user01", "repo01")
err = os.MkdirAll(userDirRepo, os.ModePerm)
if err != nil {
t.Fatalf("unexpected err: %v", err)
}
git := &util.Git{GitDir: userDirRepo}
if _, err := git.Output(ctx, nil, "init"); err != nil {
t.Fatalf("unexpected err: %v", err)
}
if _, err := git.Output(ctx, nil, "config", "--unset", "core.bare"); err != nil {
t.Fatalf("unexpected err: %v", err)
}
if _, err := git.Output(ctx, nil, "config", "user.email", "user01@example.com"); err != nil {
t.Fatalf("unexpected err: %v", err)
}
if _, err := git.Output(ctx, nil, "config", "user.name", "user01"); err != nil {
t.Fatalf("unexpected err: %v", err)
}
var committerTime time.Time
if tt.branchOldTime {
committerTime = oldCommitterTime
} else {
committerTime = time.Now()
}
createBranch(t, ctx, git, committerTime)
if tt.tagOldTime {
committerTime = oldCommitterTime
} else {
committerTime = time.Now()
}
createTag(t, ctx, git, committerTime)
if _, err := git.Output(ctx, nil, "config", "--bool", "core.bare", "true"); err != nil {
t.Fatalf("unexpected err: %v", err)
}
if err := gs.scanRepos(ctx); err != nil {
t.Fatalf("unexpected err: %v", err)
}
if tt.branchOldTime && tt.tagOldTime {
_, err = os.Open(userDirRepo)
if !errors.Is(err, os.ErrNotExist) {
t.Fatalf("got %v error, want error: %v", err, os.ErrNotExist)
}
return
}
branches, err := gs.getBranches(git, ctx)
if err != nil {
t.Fatalf("unexpected err: %v", err)
}
found := false
for _, b := range branches {
if b == branchName {
found = true
break
}
}
if tt.branchOldTime && found {
t.Fatalf("expected branch %s deleted", branchName)
}
if !tt.branchOldTime && !found {
t.Fatalf("expected branch %s", branchName)
}
tags, err := gs.getTags(git, ctx)
if err != nil {
t.Fatalf("unexpected err: %v", err)
}
found = false
for _, b := range tags {
if b == tagName {
found = true
break
}
}
if tt.tagOldTime && found {
t.Fatalf("expected tag %s deleted", tagName)
}
if !tt.tagOldTime && !found {
t.Fatalf("expected tag %s", tagName)
}
})
}
}

View File

@ -179,6 +179,9 @@ func (s *Gitserver) Run(ctx context.Context) error {
} }
}() }()
//TODO a lock is needed or it'll cause some concurrency issues if repo cleaner runs when someone at the same time is pushing
go s.repoCleanerLoop(ctx)
select { select {
case <-ctx.Done(): case <-ctx.Done():
log.Infof("gitserver exiting") log.Infof("gitserver exiting")

View File

@ -0,0 +1,163 @@
package gitserver
import (
"context"
"errors"
"fmt"
"io/ioutil"
"os"
"path/filepath"
"time"
"agola.io/agola/internal/util"
)
func (s *Gitserver) repoCleanerLoop(ctx context.Context) {
for {
select {
case <-ctx.Done():
log.Info("repoCleaner exiting")
return
case <-time.After(s.c.RepositoryCleanupInterval):
if err := s.scanRepos(ctx); err != nil {
log.Errorf("scanRepos error: %v", err)
}
}
}
}
func (s *Gitserver) scanRepos(ctx context.Context) error {
log.Info("repoCleaner scanRepos start")
usersDir, err := ioutil.ReadDir(s.c.DataDir)
if err != nil {
return err
}
for _, u := range usersDir {
if !u.IsDir() {
continue
}
reposDir, _ := ioutil.ReadDir(filepath.Join(s.c.DataDir, u.Name()))
for _, r := range reposDir {
if !r.IsDir() {
continue
}
if err := s.scanRepo(ctx, filepath.Join(s.c.DataDir, u.Name(), r.Name())); err != nil {
log.Errorf("scanRepo error: %v", err)
}
}
}
log.Info("repoCleaner scanRepos end")
return nil
}
func (s *Gitserver) scanRepo(ctx context.Context, repoDir string) error {
git := &util.Git{GitDir: repoDir}
branches, _ := s.getBranches(git, ctx)
for _, b := range branches {
committerTime, err := s.getLastCommiterTime(ctx, git, "refs/heads/"+b)
if err != nil {
return fmt.Errorf("return failed to get last commit time: %w", err)
}
if time.Since(committerTime) >= s.c.RepositoryRefsExpireInterval {
if err := s.deleteBranch(ctx, git, b); err != nil {
return fmt.Errorf("failed to delete git branch: %w", err)
}
}
}
tags, _ := s.getTags(git, ctx)
for _, tag := range tags {
committerTime, err := s.getLastCommiterTime(ctx, git, "refs/tags/"+tag)
if err != nil {
return fmt.Errorf("failed to get last commit time: %w", err)
}
if time.Since(committerTime) >= s.c.RepositoryRefsExpireInterval {
if err := s.deleteTag(ctx, git, tag); err != nil {
return fmt.Errorf("failed to delete git tag: %w", err)
}
}
}
if _, err := git.Output(ctx, nil, "prune"); err != nil {
return fmt.Errorf("git prune failed: %w", err)
}
b, err := s.getBranches(git, ctx)
if err != nil {
return fmt.Errorf("failed to get git branches: %w", err)
}
t, err := s.getTags(git, ctx)
if err != nil {
return fmt.Errorf("failed to get git tags: %w", err)
}
if len(b) == 0 && len(t) == 0 {
log.Info("deleting repo:", repoDir)
if err := s.deleteRepo(ctx, repoDir); err != nil {
return fmt.Errorf("failed to delete repository: %w", err)
}
}
return nil
}
func (s *Gitserver) getBranches(git *util.Git, ctx context.Context) ([]string, error) {
branches, err := git.OutputLines(ctx, nil, "for-each-ref", "--format=%(refname:short)", "refs/heads/")
if err != nil {
return nil, err
}
return branches, nil
}
func (s *Gitserver) getTags(git *util.Git, ctx context.Context) ([]string, error) {
tags, err := git.OutputLines(ctx, nil, "for-each-ref", "--format=%(refname:short)", "refs/tags/")
if err != nil {
return nil, err
}
return tags, nil
}
func (s *Gitserver) getLastCommiterTime(ctx context.Context, git *util.Git, ref string) (time.Time, error) {
output, err := git.OutputLines(ctx, nil, "log", "-1", "--format=%cI", ref)
if err != nil {
return time.Time{}, err
}
if len(output) != 1 {
return time.Time{}, errors.New("git log error: must return one line")
}
committerTime, err := time.Parse(time.RFC3339, output[0])
if err != nil {
return time.Time{}, err
}
return committerTime, nil
}
func (s *Gitserver) deleteBranch(ctx context.Context, git *util.Git, branch string) error {
_, err := git.Output(ctx, nil, "branch", "-D", branch)
return err
}
func (s *Gitserver) deleteTag(ctx context.Context, git *util.Git, tag string) error {
_, err := git.Output(ctx, nil, "tag", "-d", tag)
return err
}
func (s *Gitserver) deleteRepo(ctx context.Context, repoDir string) error {
return os.RemoveAll(repoDir)
}

View File

@ -293,6 +293,7 @@ func setup(ctx context.Context, t *testing.T, dir string) (*testutil.TestEmbedde
Etcd: config.Etcd{ Etcd: config.Etcd{
Endpoints: "", Endpoints: "",
}, },
RepositoryCleanupInterval: 24 * time.Hour,
}, },
} }