From 1003675b8683f5a3da3e3f32e044b326c79abf54 Mon Sep 17 00:00:00 2001 From: Harry Brundage Date: Thu, 9 May 2024 10:50:50 -0400 Subject: [PATCH 1/4] Update nix flake vendor hash -- we added new deps that need to be included --- default.nix | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/default.nix b/default.nix index 8fe1aeaf..9953d21b 100644 --- a/default.nix +++ b/default.nix @@ -17,7 +17,7 @@ buildGoModule rec { version = "0.7.11"; src = ./.; proxyVendor = true; # Fixes: cannot query module due to -mod=vendor running make install - vendorHash = "sha256-AeCAM/MQ/zb+C8MNnGe2Dv/vWoSpea4AhE49X/vLNHg="; + vendorHash = "sha256-3XVou4NW28QxIe1t0gawbCoyl/3X22PoQdJOjcG0nDM="; outputs = [ "out" "client" "server" "migrations" ]; From 545589a28af5a6fa1e724f3eff46d1797a221261 Mon Sep 17 00:00:00 2001 From: Harry Brundage Date: Sun, 5 May 2024 22:31:46 -0400 Subject: [PATCH 2/4] Add a test and benchmark for the hardlink dir walker --- Makefile | 4 ++ test/hardlink_dir_benchmark_test.go | 45 +++++++++++++ test/shared_test.go | 98 ++++++++++++++++++++++++++++- 3 files changed, 145 insertions(+), 2 deletions(-) create mode 100644 test/hardlink_dir_benchmark_test.go diff --git a/Makefile b/Makefile index 17bf233f..be1c6a0d 100644 --- a/Makefile +++ b/Makefile @@ -108,6 +108,10 @@ test-fuzz: export DL_SKIP_SSL_VERIFICATION=1 test-fuzz: reset-db go run cmd/fuzz-test/main.go --host $(GRPC_HOST) --iterations 1000 --projects 5 +bench: export DB_URI = postgres://$(DB_USER):$(DB_PASS)@$(DB_HOST):5432/dl_tests +bench: migrate + cd test && go test -bench . -run=^# + reset-db: migrate psql $(DB_URI) -c "truncate dl.objects; truncate dl.contents; truncate dl.projects; truncate dl.cache_versions;" diff --git a/test/hardlink_dir_benchmark_test.go b/test/hardlink_dir_benchmark_test.go new file mode 100644 index 00000000..2b1e4804 --- /dev/null +++ b/test/hardlink_dir_benchmark_test.go @@ -0,0 +1,45 @@ +package test + +import ( + "fmt" + "os" + "path" + "testing" + + "github.com/gadget-inc/dateilager/internal/files" + "github.com/stretchr/testify/require" +) + +func TestHardlinkDir(t *testing.T) { + wd, err := os.Getwd() + require.NoError(t, err, "os.Getwd() failed") + + bigDir := path.Join(wd, "../js/node_modules") + tmpDir := emptyTmpDir(t) + defer os.RemoveAll(tmpDir) + + copyDir := path.Join(tmpDir, "node_modules") + err = files.HardlinkDir(bigDir, copyDir) + require.NoError(t, err, "HardlinkDir failed") + + err = CompareDirectories(bigDir, copyDir) + require.NoError(t, err, "compareDirectories %s vs %s failed", bigDir, tmpDir) +} + +func BenchmarkHardlinkDir(b *testing.B) { + wd, err := os.Getwd() + if err != nil { + b.Error(err) + } + + bigDir := path.Join(wd, "../js/node_modules") + tmpDir := emptyTmpDir(b) + defer os.RemoveAll(tmpDir) + + for n := 0; n < b.N; n++ { + err := files.HardlinkDir(bigDir, path.Join(tmpDir, "node_modules", fmt.Sprintf("%d", n))) + if err != nil { + b.Error(err) + } + } +} diff --git a/test/shared_test.go b/test/shared_test.go index a7b184e6..d0c6ae41 100644 --- a/test/shared_test.go +++ b/test/shared_test.go @@ -4,6 +4,7 @@ import ( "archive/tar" "bytes" "context" + "crypto/sha256" "fmt" "io" "io/fs" @@ -275,9 +276,11 @@ func writeFile(t *testing.T, dir string, path string, content string) { require.NoError(t, err, "write file %v", path) } -func emptyTmpDir(t *testing.T) string { +func emptyTmpDir(t testing.TB) string { dir, err := os.MkdirTemp("", "dateilager_tests_") - require.NoError(t, err, "create temp dir") + if err != nil { + t.Fatal(err) + } return dir } @@ -764,3 +767,94 @@ func formatPtr(p *int64) string { } return fmt.Sprint(*p) } + +// CompareDirectories compares the contents and permissions of two directories recursively. +func CompareDirectories(dir1, dir2 string) error { + files1 := make(map[string]os.FileInfo) + err := filepath.Walk(dir1, func(path string, info os.FileInfo, err error) error { + if err != nil { + return err + } + relPath, err := filepath.Rel(dir1, path) + if err != nil { + return err + } + files1[relPath] = info + return nil + }) + if err != nil { + return err + } + + err = filepath.Walk(dir2, func(path string, info os.FileInfo, err error) error { + if err != nil { + return err + } + relPath, err := filepath.Rel(dir2, path) + if err != nil { + return err + } + if origInfo, ok := files1[relPath]; ok { + if info.IsDir() && origInfo.IsDir() { + // Only compare directories for existence, not content + delete(files1, relPath) + return nil + } + if !compareFileInfo(origInfo, info) { + return fmt.Errorf("file permissions differ for %s: %o vs %o", relPath, origInfo.Mode()&os.ModePerm, info.Mode()&os.ModePerm) + } + if equal, err := compareFileContents(filepath.Join(dir1, relPath), filepath.Join(dir2, relPath)); err != nil { + return fmt.Errorf("error comparing contents of %s: %v", relPath, err) + } else if !equal { + return fmt.Errorf("contents differ for %s", relPath) + } + delete(files1, relPath) + } else { + return fmt.Errorf("extra file found in directory 2: %s", path) + } + return nil + }) + if err != nil { + return err + } + for file := range files1 { + err = fmt.Errorf("file missing in directory 2: %s", file) + } + return err +} + +// compareFileInfo compares the permissions and other metadata of two files. +func compareFileInfo(info1, info2 os.FileInfo) bool { + return (info1.Mode() & os.ModePerm) == (info2.Mode() & os.ModePerm) +} + +// compareFileContents compares the contents of two files. +func compareFileContents(file1, file2 string) (bool, error) { + f1, err := os.Open(file1) + if err != nil { + return false, err + } + defer f1.Close() + f2, err := os.Open(file2) + if err != nil { + return false, err + } + defer f2.Close() + + if info1, _ := f1.Stat(); info1.IsDir() { + return true, nil // Skip directories in content comparison + } + if info2, _ := f2.Stat(); info2.IsDir() { + return true, nil // Skip directories in content comparison + } + + hash1, hash2 := sha256.New(), sha256.New() + if _, err := io.Copy(hash1, f1); err != nil { + return false, err + } + if _, err := io.Copy(hash2, f2); err != nil { + return false, err + } + + return bytes.Equal(hash1.Sum(nil), hash2.Sum(nil)), nil +} From b5ba5046c4d6e7015a29f821a8447c6121b1b6c1 Mon Sep 17 00:00:00 2001 From: Harry Brundage Date: Sun, 5 May 2024 23:04:59 -0400 Subject: [PATCH 3/4] Don't MkdirAll for every directory in the hardlink tree We know `fs.Walk` does a traversal that gives us parents before children, so we don't need to re-make all the grandparents every time, with the exception of the root, which we make special. --- internal/files/writer.go | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/internal/files/writer.go b/internal/files/writer.go index 4f8a6b00..b479cdeb 100644 --- a/internal/files/writer.go +++ b/internal/files/writer.go @@ -201,15 +201,28 @@ func HardlinkDir(olddir, newdir string) error { } } + rootInfo, err := os.Lstat(olddir) + if err != nil { + return fmt.Errorf("cannot stat olddir %v: %w", olddir, err) + } + + err = os.MkdirAll(newdir, rootInfo.Mode()) + if err != nil { + return fmt.Errorf("cannot create new root dir %v: %w", olddir, err) + } + return filepath.Walk(olddir, func(oldpath string, info fs.FileInfo, err error) error { if err != nil { return fmt.Errorf("failed to walk dir: %v, %w", info, err) } + if oldpath == olddir { + return nil + } newpath := filepath.Join(newdir, strings.TrimPrefix(oldpath, olddir)) if info.IsDir() { - err := os.MkdirAll(newpath, info.Mode()) + err := os.Mkdir(newpath, info.Mode()) if err != nil { return fmt.Errorf("cannot create dir %v: %w", newpath, err) } From 3b51048931a25ed739839776eefb1f34d2f16223 Mon Sep 17 00:00:00 2001 From: Harry Brundage Date: Sun, 5 May 2024 23:54:14 -0400 Subject: [PATCH 4/4] use filepath.WalkDir to avoid stat-ing files unnecessarily --- internal/files/writer.go | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/internal/files/writer.go b/internal/files/writer.go index b479cdeb..07abe8ab 100644 --- a/internal/files/writer.go +++ b/internal/files/writer.go @@ -211,9 +211,9 @@ func HardlinkDir(olddir, newdir string) error { return fmt.Errorf("cannot create new root dir %v: %w", olddir, err) } - return filepath.Walk(olddir, func(oldpath string, info fs.FileInfo, err error) error { + return filepath.WalkDir(olddir, func(oldpath string, d os.DirEntry, err error) error { if err != nil { - return fmt.Errorf("failed to walk dir: %v, %w", info, err) + return fmt.Errorf("failed to walk dir: %v, %w", oldpath, err) } if oldpath == olddir { return nil @@ -221,8 +221,12 @@ func HardlinkDir(olddir, newdir string) error { newpath := filepath.Join(newdir, strings.TrimPrefix(oldpath, olddir)) - if info.IsDir() { - err := os.Mkdir(newpath, info.Mode()) + if d.IsDir() { + info, err := d.Info() + if err != nil { + return fmt.Errorf("cannot access directory info %v: %w", oldpath, err) + } + err = os.Mkdir(newpath, info.Mode()) if err != nil { return fmt.Errorf("cannot create dir %v: %w", newpath, err) }