diff --git a/pkg/container/docker_run.go b/pkg/container/docker_run.go index 2747c30..5334bfe 100644 --- a/pkg/container/docker_run.go +++ b/pkg/container/docker_run.go @@ -576,7 +576,6 @@ func (cr *containerReference) exec(cmd []string, env map[string]string, user, wo } } -// nolint: gocyclo func (cr *containerReference) copyDir(dstPath string, srcPath string, useGitIgnore bool) common.Executor { return func(ctx context.Context) error { logger := common.Logger(ctx) @@ -585,8 +584,17 @@ func (cr *containerReference) copyDir(dstPath string, srcPath string, useGitIgno return err } log.Debugf("Writing tarball %s from %s", tarFile.Name(), srcPath) - defer tarFile.Close() - defer os.Remove(tarFile.Name()) + defer func(tarFile *os.File) { + name := tarFile.Name() + err := tarFile.Close() + if err != nil { + logger.Error(err) + } + err = os.Remove(name) + if err != nil { + logger.Error(err) + } + }(tarFile) tw := tar.NewWriter(tarFile) srcPrefix := filepath.Dir(srcPath) @@ -605,69 +613,17 @@ func (cr *containerReference) copyDir(dstPath string, srcPath string, useGitIgno ignorer = gitignore.NewMatcher(ps) } - err = filepath.Walk(srcPath, func(file string, fi os.FileInfo, err error) error { - if err != nil { - return err - } + fc := &fileCollector{ + Fs: &defaultFs{}, + Ignorer: ignorer, + SrcPath: srcPath, + SrcPrefix: srcPrefix, + Handler: &tarCollector{ + TarWriter: tw, + }, + } - sansPrefix := strings.TrimPrefix(file, srcPrefix) - split := strings.Split(sansPrefix, string(filepath.Separator)) - if ignorer != nil && ignorer.Match(split, fi.IsDir()) { - if fi.IsDir() { - return filepath.SkipDir - } - return nil - } - - // return on non-regular files (thanks to [kumo](https://medium.com/@komuw/just-like-you-did-fbdd7df829d3) for this suggested update) - linkName := fi.Name() - if fi.Mode()&os.ModeSymlink == os.ModeSymlink { - linkName, err = os.Readlink(file) - if err != nil { - return errors.WithMessagef(err, "unable to readlink %s", file) - } - } else if !fi.Mode().IsRegular() { - return nil - } - - // create a new dir/file header - header, err := tar.FileInfoHeader(fi, linkName) - if err != nil { - return err - } - - // update the name to correctly reflect the desired destination when untaring - header.Name = filepath.ToSlash(sansPrefix) - header.Mode = int64(fi.Mode()) - header.ModTime = fi.ModTime() - - // write the header - if err := tw.WriteHeader(header); err != nil { - return err - } - - // symlinks don't need to be copied - if fi.Mode()&os.ModeSymlink == os.ModeSymlink { - return nil - } - - // open files for taring - f, err := os.Open(file) - if err != nil { - return err - } - - // copy file data into tar writer - if _, err := io.Copy(tw, f); err != nil { - return err - } - - // manually close here after each file operation; deferring would cause each file close - // to wait until all operations have completed. - f.Close() - - return nil - }) + err = filepath.Walk(srcPath, fc.collectFiles(ctx, []string{})) if err != nil { return err } diff --git a/pkg/container/file_collector.go b/pkg/container/file_collector.go new file mode 100644 index 0000000..1e5a8af --- /dev/null +++ b/pkg/container/file_collector.go @@ -0,0 +1,183 @@ +package container + +import ( + "archive/tar" + "context" + "fmt" + "io" + "io/fs" + "os" + "path" + "path/filepath" + "strings" + + git "github.com/go-git/go-git/v5" + "github.com/go-git/go-git/v5/plumbing/filemode" + "github.com/go-git/go-git/v5/plumbing/format/gitignore" + "github.com/go-git/go-git/v5/plumbing/format/index" + "github.com/pkg/errors" +) + +type fileCollectorHandler interface { + WriteFile(path string, fi fs.FileInfo, linkName string, f io.Reader) error +} + +type tarCollector struct { + TarWriter *tar.Writer +} + +func (tc tarCollector) WriteFile(path string, fi fs.FileInfo, linkName string, f io.Reader) error { + // create a new dir/file header + header, err := tar.FileInfoHeader(fi, linkName) + if err != nil { + return err + } + + // update the name to correctly reflect the desired destination when untaring + header.Name = path + header.Mode = int64(fi.Mode()) + header.ModTime = fi.ModTime() + + // write the header + if err := tc.TarWriter.WriteHeader(header); err != nil { + return err + } + + // this is a symlink no reader provided + if f == nil { + return nil + } + + // copy file data into tar writer + if _, err := io.Copy(tc.TarWriter, f); err != nil { + return err + } + return nil +} + +type fileCollector struct { + Ignorer gitignore.Matcher + SrcPath string + SrcPrefix string + Fs fileCollectorFs + Handler fileCollectorHandler +} + +type fileCollectorFs interface { + Walk(root string, fn filepath.WalkFunc) error + OpenGitIndex(path string) (*index.Index, error) + Open(path string) (io.ReadCloser, error) + Readlink(path string) (string, error) +} + +type defaultFs struct { +} + +func (*defaultFs) Walk(root string, fn filepath.WalkFunc) error { + return filepath.Walk(root, fn) +} + +func (*defaultFs) OpenGitIndex(path string) (*index.Index, error) { + r, err := git.PlainOpen(path) + if err != nil { + return nil, err + } + i, err := r.Storer.Index() + if err != nil { + return nil, err + } + return i, nil +} + +func (*defaultFs) Open(path string) (io.ReadCloser, error) { + return os.Open(path) +} + +func (*defaultFs) Readlink(path string) (string, error) { + return os.Readlink(path) +} + +// nolint: gocyclo +func (fc *fileCollector) collectFiles(ctx context.Context, submodulePath []string) filepath.WalkFunc { + i, _ := fc.Fs.OpenGitIndex(path.Join(fc.SrcPath, path.Join(submodulePath...))) + return func(file string, fi os.FileInfo, err error) error { + if err != nil { + return err + } + if ctx != nil { + select { + case <-ctx.Done(): + return fmt.Errorf("copy cancelled") + default: + } + } + + sansPrefix := strings.TrimPrefix(file, fc.SrcPrefix) + split := strings.Split(sansPrefix, string(filepath.Separator)) + // The root folders should be skipped, submodules only have the last path component set to "." by filepath.Walk + if fi.IsDir() && len(split) > 0 && split[len(split)-1] == "." { + return nil + } + var entry *index.Entry + if i != nil { + entry, err = i.Entry(strings.Join(split[len(submodulePath):], "/")) + } else { + err = index.ErrEntryNotFound + } + if err != nil && fc.Ignorer != nil && fc.Ignorer.Match(split, fi.IsDir()) { + if fi.IsDir() { + if i != nil { + ms, err := i.Glob(strings.Join(append(split[len(submodulePath):], "**"), "/")) + if err != nil || len(ms) == 0 { + return filepath.SkipDir + } + } else { + return filepath.SkipDir + } + } else { + return nil + } + } + if err == nil && entry.Mode == filemode.Submodule { + err = filepath.Walk(fi.Name(), fc.collectFiles(ctx, split)) + if err != nil { + return err + } + return filepath.SkipDir + } + path := filepath.ToSlash(sansPrefix) + + // return on non-regular files (thanks to [kumo](https://medium.com/@komuw/just-like-you-did-fbdd7df829d3) for this suggested update) + if fi.Mode()&os.ModeSymlink == os.ModeSymlink { + linkName, err := fc.Fs.Readlink(file) + if err != nil { + return errors.WithMessagef(err, "unable to readlink %s", file) + } + return fc.Handler.WriteFile(path, fi, linkName, nil) + } else if !fi.Mode().IsRegular() { + return nil + } + + // open file + f, err := fc.Fs.Open(file) + if err != nil { + return err + } + defer f.Close() + + if ctx != nil { + // make io.Copy cancellable by closing the file + cpctx, cpfinish := context.WithCancel(ctx) + defer cpfinish() + go func() { + select { + case <-cpctx.Done(): + case <-ctx.Done(): + f.Close() + } + }() + } + + return fc.Handler.WriteFile(path, fi, "", f) + } +} diff --git a/pkg/container/file_collector_test.go b/pkg/container/file_collector_test.go new file mode 100644 index 0000000..86b8003 --- /dev/null +++ b/pkg/container/file_collector_test.go @@ -0,0 +1,117 @@ +package container + +import ( + "archive/tar" + "context" + "io" + "path/filepath" + "strings" + "testing" + + "github.com/go-git/go-billy/v5" + "github.com/go-git/go-billy/v5/memfs" + git "github.com/go-git/go-git/v5" + "github.com/go-git/go-git/v5/plumbing/cache" + "github.com/go-git/go-git/v5/plumbing/format/gitignore" + "github.com/go-git/go-git/v5/plumbing/format/index" + "github.com/go-git/go-git/v5/storage/filesystem" + "github.com/stretchr/testify/assert" +) + +type memoryFs struct { + billy.Filesystem +} + +func (mfs *memoryFs) walk(root string, fn filepath.WalkFunc) error { + dir, err := mfs.ReadDir(root) + if err != nil { + return err + } + for i := 0; i < len(dir); i++ { + filename := filepath.Join(root, dir[i].Name()) + err = fn(filename, dir[i], nil) + if dir[i].IsDir() { + if err == filepath.SkipDir { + err = nil + } else if err := mfs.walk(filename, fn); err != nil { + return err + } + } + if err != nil { + return err + } + } + return nil +} + +func (mfs *memoryFs) Walk(root string, fn filepath.WalkFunc) error { + stat, err := mfs.Lstat(root) + if err != nil { + return err + } + err = fn(strings.Join([]string{root, "."}, string(filepath.Separator)), stat, nil) + if err != nil { + return err + } + return mfs.walk(root, fn) +} + +func (mfs *memoryFs) OpenGitIndex(path string) (*index.Index, error) { + f, _ := mfs.Filesystem.Chroot(filepath.Join(path, ".git")) + storage := filesystem.NewStorage(f, cache.NewObjectLRUDefault()) + i, err := storage.Index() + if err != nil { + return nil, err + } + return i, nil +} + +func (mfs *memoryFs) Open(path string) (io.ReadCloser, error) { + return mfs.Filesystem.Open(path) +} + +func (mfs *memoryFs) Readlink(path string) (string, error) { + return mfs.Filesystem.Readlink(path) +} + +func TestIgnoredTrackedfile(t *testing.T) { + fs := memfs.New() + _ = fs.MkdirAll("mygitrepo/.git", 0777) + dotgit, _ := fs.Chroot("mygitrepo/.git") + worktree, _ := fs.Chroot("mygitrepo") + repo, _ := git.Init(filesystem.NewStorage(dotgit, cache.NewObjectLRUDefault()), worktree) + f, _ := worktree.Create(".gitignore") + _, _ = f.Write([]byte(".*\n")) + f.Close() + // This file shouldn't be in the tar + f, _ = worktree.Create(".env") + _, _ = f.Write([]byte("test=val1\n")) + f.Close() + w, _ := repo.Worktree() + // .gitignore is in the tar after adding it to the index + _, _ = w.Add(".gitignore") + + tmpTar, _ := fs.Create("temp.tar") + tw := tar.NewWriter(tmpTar) + ps, _ := gitignore.ReadPatterns(worktree, []string{}) + ignorer := gitignore.NewMatcher(ps) + fc := &fileCollector{ + Fs: &memoryFs{Filesystem: fs}, + Ignorer: ignorer, + SrcPath: "mygitrepo", + SrcPrefix: "mygitrepo" + string(filepath.Separator), + Handler: &tarCollector{ + TarWriter: tw, + }, + } + err := fc.Fs.Walk("mygitrepo", fc.collectFiles(context.Background(), []string{})) + assert.NoError(t, err, "successfully collect files") + tw.Close() + _, _ = tmpTar.Seek(0, io.SeekStart) + tr := tar.NewReader(tmpTar) + h, err := tr.Next() + assert.NoError(t, err, "tar must not be empty") + assert.Equal(t, ".gitignore", h.Name) + _, err = tr.Next() + assert.ErrorIs(t, err, io.EOF, "tar must only contain one element") +}