diff --git a/integration/image/remove_unix_test.go b/integration/image/remove_unix_test.go new file mode 100644 index 0000000000..984ce5c9d7 --- /dev/null +++ b/integration/image/remove_unix_test.go @@ -0,0 +1,88 @@ +// +build !windows + +package image // import "github.com/docker/docker/integration/image" + +import ( + "context" + "io" + "io/ioutil" + "os" + "strconv" + "syscall" + "testing" + "unsafe" + + "github.com/docker/docker/api/types" + "github.com/docker/docker/internal/test/daemon" + "github.com/docker/docker/internal/test/fakecontext" + "gotest.tools/assert" + "gotest.tools/skip" +) + +// This is a regression test for #38488 +// It ensures that orphan layers can be found and cleaned up +// after unsuccessful image removal +func TestRemoveImageGarbageCollector(t *testing.T) { + // This test uses very platform specific way to prevent + // daemon for remove image layer. + skip.If(t, testEnv.DaemonInfo.OSType != "linux") + skip.If(t, os.Getenv("DOCKER_ENGINE_GOARCH") != "amd64") + + // Create daemon with overlay2 graphdriver because vfs uses disk differently + // and this test case would not work with it. + d := daemon.New(t, daemon.WithStorageDriver("overlay2"), daemon.WithImageService) + d.Start(t) + defer d.Stop(t) + + ctx := context.Background() + client := d.NewClientT(t) + i := d.ImageService() + + img := "test-garbage-collector" + + // Build a image with multiple layers + dockerfile := `FROM busybox + RUN echo echo Running... > /run.sh` + source := fakecontext.New(t, "", fakecontext.WithDockerfile(dockerfile)) + defer source.Close() + resp, err := client.ImageBuild(ctx, + source.AsTarReader(t), + types.ImageBuildOptions{ + Remove: true, + ForceRemove: true, + Tags: []string{img}, + }) + assert.NilError(t, err) + _, err = io.Copy(ioutil.Discard, resp.Body) + resp.Body.Close() + assert.NilError(t, err) + image, _, err := client.ImageInspectWithRaw(ctx, img) + assert.NilError(t, err) + + // Mark latest image layer to immutable + data := image.GraphDriver.Data + file, _ := os.Open(data["UpperDir"]) + attr := 0x00000010 + fsflags := uintptr(0x40086602) + argp := uintptr(unsafe.Pointer(&attr)) + _, _, errno := syscall.Syscall(syscall.SYS_IOCTL, file.Fd(), fsflags, argp) + assert.Equal(t, "errno 0", errno.Error()) + + // Try to remove the image, it should generate error + // but marking layer back to mutable before checking errors (so we don't break CI server) + _, err = client.ImageRemove(ctx, img, types.ImageRemoveOptions{}) + attr = 0x00000000 + argp = uintptr(unsafe.Pointer(&attr)) + _, _, errno = syscall.Syscall(syscall.SYS_IOCTL, file.Fd(), fsflags, argp) + assert.Equal(t, "errno 0", errno.Error()) + assert.ErrorContains(t, err, "permission denied") + + // Verify that layer remaining on disk + dir, _ := os.Stat(data["UpperDir"]) + assert.Equal(t, "true", strconv.FormatBool(dir.IsDir())) + + // Run imageService.Cleanup() and make sure that layer was removed from disk + i.Cleanup() + dir, err = os.Stat(data["UpperDir"]) + assert.ErrorContains(t, err, "no such file or directory") +} diff --git a/internal/test/daemon/daemon.go b/internal/test/daemon/daemon.go index f835011d04..4705d2f4ee 100644 --- a/internal/test/daemon/daemon.go +++ b/internal/test/daemon/daemon.go @@ -16,6 +16,7 @@ import ( "github.com/docker/docker/api/types" "github.com/docker/docker/api/types/events" "github.com/docker/docker/client" + "github.com/docker/docker/daemon/images" "github.com/docker/docker/internal/test" "github.com/docker/docker/internal/test/request" "github.com/docker/docker/opts" @@ -71,6 +72,7 @@ type Daemon struct { init bool dockerdBinary string log logT + imageService *images.ImageService // swarm related field swarmListenAddr string @@ -700,3 +702,8 @@ func cleanupRaftDir(t testingT, rootPath string) { t.Logf("error removing %v: %v", walDir, err) } } + +// ImageService returns the Daemon's ImageService +func (d *Daemon) ImageService() *images.ImageService { + return d.imageService +} diff --git a/internal/test/daemon/ops.go b/internal/test/daemon/ops.go index da5f44a7e5..9f869dee0f 100644 --- a/internal/test/daemon/ops.go +++ b/internal/test/daemon/ops.go @@ -70,3 +70,10 @@ func WithEnvironment(e environment.Execution) func(*Daemon) { } } } + +// WithStorageDriver sets store driver option +func WithStorageDriver(driver string) func(d *Daemon) { + return func(d *Daemon) { + d.storageDriver = driver + } +} diff --git a/internal/test/daemon/ops_unix.go b/internal/test/daemon/ops_unix.go new file mode 100644 index 0000000000..41011e87c5 --- /dev/null +++ b/internal/test/daemon/ops_unix.go @@ -0,0 +1,34 @@ +// +build !windows + +package daemon + +import ( + "path/filepath" + "runtime" + + "github.com/docker/docker/daemon/images" + "github.com/docker/docker/layer" + + // register graph drivers + _ "github.com/docker/docker/daemon/graphdriver/register" + "github.com/docker/docker/pkg/idtools" +) + +// WithImageService sets imageService options +func WithImageService(d *Daemon) { + layerStores := make(map[string]layer.Store) + os := runtime.GOOS + layerStores[os], _ = layer.NewStoreFromOptions(layer.StoreOptions{ + Root: d.Root, + MetadataStorePathTemplate: filepath.Join(d.RootDir(), "image", "%s", "layerdb"), + GraphDriver: d.storageDriver, + GraphDriverOptions: nil, + IDMapping: &idtools.IdentityMapping{}, + PluginGetter: nil, + ExperimentalEnabled: false, + OS: os, + }) + d.imageService = images.NewImageService(images.ImageServiceConfig{ + LayerStores: layerStores, + }) +} diff --git a/layer/filestore.go b/layer/filestore.go index 208a0c3a85..37a927022a 100644 --- a/layer/filestore.go +++ b/layer/filestore.go @@ -305,6 +305,47 @@ func (fms *fileMetadataStore) GetMountParent(mount string) (ChainID, error) { return ChainID(dgst), nil } +func (fms *fileMetadataStore) getOrphan() ([]roLayer, error) { + var orphanLayers []roLayer + for _, algorithm := range supportedAlgorithms { + fileInfos, err := ioutil.ReadDir(filepath.Join(fms.root, string(algorithm))) + if err != nil { + if os.IsNotExist(err) { + continue + } + return nil, err + } + + for _, fi := range fileInfos { + if fi.IsDir() && strings.Contains(fi.Name(), "-removing") { + nameSplit := strings.Split(fi.Name(), "-") + dgst := digest.NewDigestFromEncoded(algorithm, nameSplit[0]) + if err := dgst.Validate(); err != nil { + logrus.Debugf("Ignoring invalid digest %s:%s", algorithm, nameSplit[0]) + } else { + chainID := ChainID(dgst) + chainFile := filepath.Join(fms.root, string(algorithm), fi.Name(), "cache-id") + contentBytes, err := ioutil.ReadFile(chainFile) + if err != nil { + logrus.WithError(err).WithField("digest", dgst).Error("cannot get cache ID") + } + cacheID := strings.TrimSpace(string(contentBytes)) + if cacheID == "" { + logrus.Errorf("invalid cache id value") + } + + l := &roLayer{ + chainID: chainID, + cacheID: cacheID, + } + orphanLayers = append(orphanLayers, *l) + } + } + } + } + return orphanLayers, nil +} + func (fms *fileMetadataStore) List() ([]ChainID, []string, error) { var ids []ChainID for _, algorithm := range supportedAlgorithms { @@ -346,8 +387,39 @@ func (fms *fileMetadataStore) List() ([]ChainID, []string, error) { return ids, mounts, nil } -func (fms *fileMetadataStore) Remove(layer ChainID) error { - return os.RemoveAll(fms.getLayerDirectory(layer)) +// Remove layerdb folder if that is marked for removal +func (fms *fileMetadataStore) Remove(layer ChainID, cache string) error { + dgst := digest.Digest(layer) + files, err := ioutil.ReadDir(filepath.Join(fms.root, string(dgst.Algorithm()))) + if err != nil { + return err + } + for _, f := range files { + if !strings.HasSuffix(f.Name(), "-removing") || !strings.HasPrefix(f.Name(), dgst.String()) { + continue + } + + // Make sure that we only remove layerdb folder which points to + // requested cacheID + dir := filepath.Join(fms.root, string(dgst.Algorithm()), f.Name()) + chainFile := filepath.Join(dir, "cache-id") + contentBytes, err := ioutil.ReadFile(chainFile) + if err != nil { + logrus.WithError(err).WithField("file", chainFile).Error("cannot get cache ID") + continue + } + cacheID := strings.TrimSpace(string(contentBytes)) + if cacheID != cache { + continue + } + logrus.Debugf("Removing folder: %s", dir) + err = os.RemoveAll(dir) + if err != nil && !os.IsNotExist(err) { + logrus.WithError(err).WithField("name", f.Name()).Error("cannot remove layer") + continue + } + } + return nil } func (fms *fileMetadataStore) RemoveMount(mount string) error { diff --git a/layer/layer_store.go b/layer/layer_store.go index 81730e9d92..7b8c011f4c 100644 --- a/layer/layer_store.go +++ b/layer/layer_store.go @@ -5,6 +5,8 @@ import ( "fmt" "io" "io/ioutil" + "os" + "path/filepath" "sync" "github.com/docker/distribution" @@ -415,11 +417,24 @@ func (ls *layerStore) Map() map[ChainID]Layer { } func (ls *layerStore) deleteLayer(layer *roLayer, metadata *Metadata) error { + // Rename layer digest folder first so we detect orphan layer(s) + // if ls.driver.Remove fails + dir := ls.store.getLayerDirectory(layer.chainID) + for { + dgst := digest.Digest(layer.chainID) + tmpID := fmt.Sprintf("%s-%s-removing", dgst.Hex(), stringid.GenerateRandomID()) + dir := filepath.Join(ls.store.root, string(dgst.Algorithm()), tmpID) + err := os.Rename(ls.store.getLayerDirectory(layer.chainID), dir) + if os.IsExist(err) { + continue + } + break + } err := ls.driver.Remove(layer.cacheID) if err != nil { return err } - err = ls.store.Remove(layer.chainID) + err = os.RemoveAll(dir) if err != nil { return err } @@ -452,12 +467,14 @@ func (ls *layerStore) releaseLayer(l *roLayer) ([]Metadata, error) { if l.hasReferences() { panic("cannot delete referenced layer") } + // Remove layer from layer map first so it is not considered to exist + // when if ls.deleteLayer fails. + delete(ls.layerMap, l.chainID) + var metadata Metadata if err := ls.deleteLayer(l, &metadata); err != nil { return nil, err } - - delete(ls.layerMap, l.chainID) removed = append(removed, metadata) if l.parent == nil { @@ -743,6 +760,23 @@ func (ls *layerStore) assembleTarTo(graphID string, metadata io.ReadCloser, size } func (ls *layerStore) Cleanup() error { + orphanLayers, err := ls.store.getOrphan() + if err != nil { + logrus.Errorf("Cannot get orphan layers: %v", err) + } + logrus.Debugf("found %v orphan layers", len(orphanLayers)) + for _, orphan := range orphanLayers { + logrus.Debugf("removing orphan layer, chain ID: %v , cache ID: %v", orphan.chainID, orphan.cacheID) + err = ls.driver.Remove(orphan.cacheID) + if err != nil && !os.IsNotExist(err) { + logrus.WithError(err).WithField("cache-id", orphan.cacheID).Error("cannot remove orphan layer") + continue + } + err = ls.store.Remove(orphan.chainID, orphan.cacheID) + if err != nil { + logrus.WithError(err).WithField("chain-id", orphan.chainID).Error("cannot remove orphan layer metadata") + } + } return ls.driver.Cleanup() }