cleanup.go (472 lines of code) (raw):
package main
import (
"bytes"
"errors"
"fmt"
"github.com/Sirupsen/logrus"
"github.com/codegangsta/cli"
"github.com/dustin/go-humanize"
"github.com/fsouza/go-dockerclient"
"gitlab.com/ayufan/golang-cli-helpers"
"gitlab.com/gitlab-org/gitlab-ci-multi-runner/helpers/cli"
"gitlab.com/gitlab-org/gitlab-ci-multi-runner/helpers/docker"
"os"
"path"
"path/filepath"
"strings"
"syscall"
"time"
)
const dockerAPIVersion = "1.18"
const objectPastTimeDivisor = time.Second
const danglingImageBonus = 1000
const spaceAllFree uint64 = ^uint64(0)
var version string = "dev"
var revision string = "dev"
var internalImages = []string{
"gitlab/gitlab-runner:*",
"quay.io/gitlab-runner:*",
"quay.io/gitlab-runner-*:*",
}
var diskSpaceImage = "alpine"
var opts = struct {
MonitorPath string `long:"check-path" description:"Path to monitor when verifying disk space" env:"CHECK_PATH"`
LowFreeSpace string `long:"low-free-space" description:"When to trigger cleanup cycle" env:"LOW_FREE_SPACE"`
ExpectedFreeSpace string `long:"expected-free-space" description:"How much free space to cleanup" env:"EXPECTED_FREE_SPACE"`
LowFreeFilesCount uint64 `long:"low-files-count" description:"Trigger cleanup cycle if number of i-nodes runs below this value" env:"LOW_FREE_FILES_COUNT"`
ExpectedFreeFilesCount uint64 `long:"expected-files-count" description:"How much free i-nodes to recycle" env:"EXPECTED_FREE_FILES_COUNT"`
UseDf bool `long:"use-df" description:"Use 'df' to check disk space instead of docker container" env:"USE_DF"`
CheckInterval time.Duration `long:"check-interval" description:"How often to check disk space?" env:"CHECK_INTERVAL"`
RetryInterval time.Duration `long:"retry-interval" description:"How long to wait before trying again?" env:"RETRY_INTERVAL"`
DefaultTTL time.Duration `long:"ttl" description:"Default minimum TTL for caches and images" env:"DEFAULT_TTL"`
}{
"/",
"1GB",
"2GB",
128 * 1024,
256 * 1024,
false,
10 * time.Second,
30 * time.Second,
1 * time.Minute,
}
type DiskSpace struct {
BytesFree, BytesTotal uint64
FilesFree, FilesTotal uint64
}
type DockerClient interface {
Ping() error
ListImages(opts docker.ListImagesOptions) ([]docker.APIImages, error)
ListContainers(opts docker.ListContainersOptions) ([]docker.APIContainers, error)
RemoveImage(name string) error
RemoveContainer(opts docker.RemoveContainerOptions) error
InspectContainer(id string) (*docker.Container, error)
DiskSpace(path string) (DiskSpace, error)
}
type CustomDockerClient struct {
*docker.Client
}
type ObjectTTL struct {
Used time.Time
TTL time.Time
}
func (u *ObjectTTL) mark(ttl time.Duration) {
u.Used = time.Now()
u.TTL = u.Used.Add(ttl)
}
func (u *ObjectTTL) score() int64 {
return int64(time.Now().Sub(u.TTL) / objectPastTimeDivisor)
}
type ImageInfo struct {
docker.APIImages
ObjectTTL
}
func (i *ImageInfo) score() int64 {
s := i.ObjectTTL.score()
if s > 0 && len(i.RepoTags) == 0 {
s += danglingImageBonus
}
return s
}
type CacheInfo struct {
docker.APIContainers
ObjectTTL
}
var dockerCredentials docker_helpers.DockerCredentials
var imagesUsed map[string]ImageInfo = make(map[string]ImageInfo)
var cachesUsed map[string]CacheInfo = make(map[string]CacheInfo)
func (c *CustomDockerClient) diskSpaceLocally(path string) (ds DiskSpace, err error) {
var stat syscall.Statfs_t
err = syscall.Statfs(path, &stat)
if err == nil {
ds = DiskSpace{
BytesFree: stat.Bavail * uint64(stat.Bsize),
BytesTotal: stat.Blocks * uint64(stat.Bsize),
FilesFree: stat.Ffree,
FilesTotal: stat.Files,
}
}
return
}
func (c *CustomDockerClient) diskSpaceRemotely(path string) (ds DiskSpace, err error) {
_, err = c.InspectImage(diskSpaceImage)
if err != nil {
logrus.Debugln("Pulling", diskSpaceImage, "...")
err = c.PullImage(docker.PullImageOptions{
Repository: diskSpaceImage,
}, docker.AuthConfiguration{})
if err != nil {
return
}
}
// create container for the first time
container, err := c.CreateContainer(docker.CreateContainerOptions{
Config: &docker.Config{
Image: diskSpaceImage,
Entrypoint: []string{"/bin/stat"},
Cmd: []string{"-f", "-c%a %b %s %d %c", path},
AttachStdout: true,
},
})
if err != nil {
return
}
defer c.RemoveContainer(docker.RemoveContainerOptions{
ID: container.ID,
Force: true,
})
err = c.StartContainer(container.ID, nil)
if err != nil {
return
}
errorCode, err := c.WaitContainer(container.ID)
if err != nil || errorCode != 0 {
return
}
var buffer bytes.Buffer
err = c.Logs(docker.LogsOptions{
Container: container.ID,
OutputStream: &buffer,
Stdout: true,
Tail: "1",
})
if err != nil {
return
}
var freeBlocks, totalBlocks, blockSize, freeFiles, totalFiles uint64
_, err = fmt.Fscanln(&buffer, &freeBlocks, &totalBlocks, &blockSize, &freeFiles, &totalFiles)
if err != nil {
return
}
ds = DiskSpace{
BytesFree: uint64(freeBlocks * blockSize),
BytesTotal: uint64(totalBlocks * blockSize),
FilesFree: freeFiles,
FilesTotal: totalFiles,
}
return
}
func (c *CustomDockerClient) DiskSpace(path string) (DiskSpace, error) {
if opts.UseDf {
return c.diskSpaceLocally(path)
} else {
return c.diskSpaceRemotely(path)
}
}
func isInternalImage(image docker.APIImages) bool {
for _, tag := range image.RepoTags {
for _, internalImage := range internalImages {
if matched, _ := filepath.Match(internalImage, tag); matched {
return true
}
}
}
return false
}
func removeImage(client DockerClient, image docker.APIImages) error {
err := client.RemoveImage(image.ID)
if err == nil {
logrus.Infoln("Removed image", image.ID, image.RepoTags)
} else {
logrus.Warningln("Failed to remove image", image.ID, image.RepoTags, strings.TrimSpace(err.Error()))
}
return err
}
func removeCache(client DockerClient, cache docker.APIContainers) error {
err := client.RemoveContainer(docker.RemoveContainerOptions{
ID: cache.ID,
RemoveVolumes: true,
Force: true,
})
if err == nil {
logrus.Infoln("Removed cache", cache.ID, cache.Names)
} else {
logrus.Warningln("Failed to remove cache", cache.ID, cache.Names, strings.TrimSpace(err.Error()))
}
return err
}
func handleDockerImageID(client DockerClient, id string) {
logrus.Debugln("handleDockerImageID", id)
image, ok := imagesUsed[id]
if !ok {
return
}
image.mark(opts.DefaultTTL)
imagesUsed[id] = image
if image.ParentID != "" {
handleDockerImageID(client, image.ParentID)
}
}
func isCacheContainer(names ...string) bool {
for _, name := range names {
if strings.Contains(name, "runner-") &&
strings.Contains(name, "-project-") &&
strings.Contains(name, "-concurrent-") &&
strings.Contains(name, "-cache-") {
return true
}
}
return false
}
func handleDockerContainer(client DockerClient, container *docker.Container) {
logrus.Debugln("handleDockerContainer", container.Name, container.ID, container.Image, container.State.Running)
handleDockerImageID(client, container.Image)
if isCacheContainer(container.Name) {
if cache, ok := cachesUsed[container.ID]; ok {
cache.mark(opts.DefaultTTL)
cachesUsed[container.ID] = cache
}
return
}
for _, otherContainer := range container.HostConfig.VolumesFrom {
handleDockerContainerID(client, otherContainer)
}
for _, otherContainer := range container.HostConfig.Links {
containerAndAlias := strings.SplitN(otherContainer, ":", 2)
if len(containerAndAlias) < 1 {
continue
}
handleDockerContainerID(client, containerAndAlias[0])
}
}
func handleDockerContainerID(client DockerClient, containerID string) {
container, err := client.InspectContainer(containerID)
if err != nil {
logrus.Warningln("Failed to inspect container", containerID, err)
return
}
handleDockerContainer(client, container)
}
func updateImages(client DockerClient) error {
newUsed := make(map[string]ImageInfo)
// traverse all images
images, err := client.ListImages(docker.ListImagesOptions{
All: true,
})
if err != nil {
return err
}
for _, image := range images {
imageInfo := ImageInfo{
APIImages: image,
}
if imageUsed, ok := imagesUsed[image.ID]; ok {
imageInfo.ObjectTTL = imageUsed.ObjectTTL
} else {
logrus.Infoln("Detected a new image", image.ID, image.RepoTags)
imageInfo.mark(opts.DefaultTTL)
}
newUsed[image.ID] = imageInfo
}
imagesUsed = newUsed
return nil
}
func updateContainers(client DockerClient) error {
// traverse all running containers
containers, err := client.ListContainers(docker.ListContainersOptions{
All: true,
})
if err != nil {
return err
}
newCaches := make(map[string]CacheInfo)
// detect caches
for _, container := range containers {
if !isCacheContainer(container.Names...) {
continue
}
cacheInfo := CacheInfo{
APIContainers: container,
}
if cacheUsed, ok := cachesUsed[container.ID]; ok {
cacheInfo.ObjectTTL = cacheUsed.ObjectTTL
} else {
logrus.Infoln("Detected a new cache", container.ID, container.Names)
cacheInfo.mark(opts.DefaultTTL)
}
newCaches[container.ID] = cacheInfo
}
cachesUsed = newCaches
// traverse all other containers to mark images and caches as used
for _, container := range containers {
if isCacheContainer(container.Names...) {
continue
}
handleDockerContainerID(client, container.ID)
}
return nil
}
func doFreeSpace(client DockerClient, freeSpace, freeFiles uint64) error {
images, err := client.ListImages(docker.ListImagesOptions{})
if err != nil {
logrus.Warningln("Failed to list images:", err)
return err
}
containers, err := client.ListContainers(docker.ListContainersOptions{
All: true,
})
if err != nil {
logrus.Warningln("Failed to list containers:", err)
return err
}
var lastError error
for {
diskSpace, err := client.DiskSpace(opts.MonitorPath)
if err != nil {
return err
}
if diskSpace.BytesFree > freeSpace && diskSpace.FilesFree > freeFiles {
break
}
var bestScore int64 = -1
bestImageIndex := -1
bestCacheIndex := -1
for idx, image := range images {
if isInternalImage(image) {
continue
}
if imageInfo, ok := imagesUsed[image.ID]; ok {
score := imageInfo.score()
if score > bestScore {
bestImageIndex = idx
bestCacheIndex = -1
bestScore = score
}
}
}
for idx, container := range containers {
if !isCacheContainer(container.Names...) {
continue
}
if cacheInfo, ok := cachesUsed[container.ID]; ok {
score := cacheInfo.score()
if score > bestScore {
bestImageIndex = -1
bestCacheIndex = idx
bestScore = score
}
}
}
logrus.Debugln("doFreeCycle", bestScore, bestImageIndex, bestCacheIndex)
if bestImageIndex >= 0 {
lastError = removeImage(client, images[bestImageIndex])
images = append(images[0:bestImageIndex], images[bestImageIndex+1:len(images)]...)
} else if bestCacheIndex >= 0 {
lastError = removeCache(client, containers[bestCacheIndex])
containers = append(containers[0:bestCacheIndex], containers[bestCacheIndex+1:len(containers)]...)
} else {
lastError = errors.New("no images or caches to delete")
break
}
}
return lastError
}
func doCycle(client DockerClient, lowFreeSpace, freeSpace, lowFreeFiles, freeFiles uint64) error {
err := updateImages(client)
if err != nil {
logrus.Warningln("Failed to update images:", err)
return err
}
err = updateContainers(client)
if err != nil {
logrus.Warningln("Failed to update caches:", err)
return err
}
diskSpace, err := client.DiskSpace(opts.MonitorPath)
if err != nil {
logrus.Warningln("Failed to verify disk space:", err)
return err
}
if diskSpace.BytesFree >= lowFreeSpace && diskSpace.FilesFree >= lowFreeFiles {
if diskSpace.BytesFree >= lowFreeSpace {
logrus.Debugln("Nothing to free. Current free disk space", humanize.Bytes(diskSpace.BytesFree),
"is above the lower bound", humanize.Bytes(lowFreeSpace))
}
if diskSpace.FilesFree >= lowFreeFiles {
logrus.Debugln("Nothing to free. Current free files count", diskSpace.FilesFree,
"is above the lower bound", lowFreeFiles)
}
return nil
}
if diskSpace.BytesFree < lowFreeSpace {
logrus.Warningln("Freeing disk space. The disk space is below the lower bound:", humanize.Bytes(diskSpace.BytesFree),
"trying to free up to:", humanize.Bytes(freeSpace))
}
if diskSpace.FilesFree < lowFreeSpace {
logrus.Warningln("Freeing files count. The free file count is below the lower bound:", diskSpace.FilesFree,
"trying to free up to:", freeFiles)
}
freeSpaceErr := doFreeSpace(client, freeSpace, freeFiles)
if freeSpaceErr != nil {
logrus.Warningln("Failed to free disk space:", freeSpaceErr)
}
currentDiskSpace, err := client.DiskSpace(opts.MonitorPath)
if err == nil {
logrus.Infoln("Freed",
"bytes:", humanize.Bytes(currentDiskSpace.BytesFree-diskSpace.BytesFree),
"files:", currentDiskSpace.FilesFree-diskSpace.FilesFree)
}
return freeSpaceErr
}
func runCleanupTool(c *cli.Context) {
lowFreeSpace, err := humanize.ParseBytes(opts.LowFreeSpace)
if err != nil {
logrus.Fatalln(err)
}
expectedFreeSpace, err := humanize.ParseBytes(opts.ExpectedFreeSpace)
if err != nil {
logrus.Fatalln(err)
}
var dockerClient DockerClient
logrus.Infoln("Watching disk space...")
for {
if dockerClient == nil || dockerClient.Ping() != nil {
dockerClient = nil
client, err := docker_helpers.Connect(dockerCredentials, dockerAPIVersion)
if err != nil {
logrus.Warningln("Failed to connect to daemon:", err)
time.Sleep(opts.RetryInterval)
continue
}
dockerClient = &CustomDockerClient{
Client: client,
}
}
err = doCycle(dockerClient, lowFreeSpace, expectedFreeSpace, opts.LowFreeFilesCount, opts.ExpectedFreeFilesCount)
if err == nil {
time.Sleep(opts.CheckInterval)
} else {
time.Sleep(opts.RetryInterval)
}
}
}
func main() {
app := cli.NewApp()
app.Name = path.Base(os.Args[0])
app.Usage = "a GitLab Runner Docker Image Cleanup Tool"
app.Version = fmt.Sprintf("%s (%s)", version, revision)
app.Author = "Kamil TrzciĆski"
app.Email = "ayufan@ayufan.eu"
cli_helpers.SetupLogLevelOptions(app)
app.Flags = append(app.Flags, clihelpers.GetFlagsFromStruct(&dockerCredentials, "docker")...)
app.Flags = append(app.Flags, clihelpers.GetFlagsFromStruct(&opts)...)
app.Action = runCleanupTool
if err := app.Run(os.Args); err != nil {
logrus.Fatal(err)
}
}