From 331bf3e2613d2b1f730f3867dd32bd0dc1ca43a6 Mon Sep 17 00:00:00 2001 From: Eelco Dolstra Date: Fri, 29 Nov 2024 16:55:27 +0100 Subject: [PATCH 1/7] Git fetcher: Calculate a fingerprint for dirty workdirs This restores evaluation caching for dirty Git workdirs. --- src/libfetchers/git-utils.cc | 18 ++++++++++++++++-- src/libfetchers/git-utils.hh | 7 ++++++- src/libfetchers/git.cc | 30 +++++++++++++++++++++++++++--- 3 files changed, 49 insertions(+), 6 deletions(-) diff --git a/src/libfetchers/git-utils.cc b/src/libfetchers/git-utils.cc index 74e68fe1281..bd578685763 100644 --- a/src/libfetchers/git-utils.cc +++ b/src/libfetchers/git-utils.cc @@ -437,7 +437,12 @@ struct GitRepoImpl : GitRepo, std::enable_shared_from_this { if (!(statusFlags & GIT_STATUS_INDEX_DELETED) && !(statusFlags & GIT_STATUS_WT_DELETED)) - info.files.insert(CanonPath(path)); + info.files.emplace(CanonPath(path), + statusFlags == GIT_STATUS_CURRENT + ? WorkdirInfo::State::Clean + : WorkdirInfo::State::Dirty); + else + info.deletedFiles.insert(CanonPath(path)); if (statusFlags != GIT_STATUS_CURRENT) info.isDirty = true; return 0; @@ -1202,6 +1207,15 @@ ref GitRepoImpl::getAccessor(const Hash & rev, bool exportIgnore } } +template +std::set getKeys(const std::map & c) +{ + std::set res; + for (auto & i : c) + res.insert(i.first); + return res; +} + ref GitRepoImpl::getAccessor(const WorkdirInfo & wd, bool exportIgnore, MakeNotAllowedError makeNotAllowedError) { auto self = ref(shared_from_this()); @@ -1214,7 +1228,7 @@ ref GitRepoImpl::getAccessor(const WorkdirInfo & wd, bool export ? makeEmptySourceAccessor() : AllowListSourceAccessor::create( makeFSSourceAccessor(path), - std::set { wd.files }, + std::set { getKeys(wd.files) }, std::move(makeNotAllowedError)).cast(); if (exportIgnore) return make_ref(self, fileAccessor, std::nullopt); diff --git a/src/libfetchers/git-utils.hh b/src/libfetchers/git-utils.hh index f45b5a50425..12cee5db107 100644 --- a/src/libfetchers/git-utils.hh +++ b/src/libfetchers/git-utils.hh @@ -55,9 +55,14 @@ struct GitRepo in the repo yet. */ std::optional headRev; + enum State { Clean, Dirty }; + /* All files in the working directory that are unchanged, modified or added, but excluding deleted files. */ - std::set files; + std::map files; + + /* The deleted files. */ + std::set deletedFiles; /* The submodules listed in .gitmodules of this workdir. */ std::vector submodules; diff --git a/src/libfetchers/git.cc b/src/libfetchers/git.cc index a6883a2d355..eec134980c3 100644 --- a/src/libfetchers/git.cc +++ b/src/libfetchers/git.cc @@ -685,7 +685,7 @@ struct GitInputScheme : InputScheme if (getSubmodulesAttr(input)) /* Create mountpoints for the submodules. */ for (auto & submodule : repoInfo.workdirInfo.submodules) - repoInfo.workdirInfo.files.insert(submodule.path); + repoInfo.workdirInfo.files.emplace(submodule.path, GitRepo::WorkdirInfo::State::Clean); auto repo = GitRepo::openRepo(repoInfo.url, false, false); @@ -793,10 +793,34 @@ struct GitInputScheme : InputScheme std::optional getFingerprint(ref store, const Input & input) const override { + auto makeFingerprint = [&](const Hash & rev) + { + return rev.gitRev() + (getSubmodulesAttr(input) ? ";s" : "") + (getExportIgnoreAttr(input) ? ";e" : ""); + }; + if (auto rev = input.getRev()) - return rev->gitRev() + (getSubmodulesAttr(input) ? ";s" : "") + (getExportIgnoreAttr(input) ? ";e" : ""); - else + return makeFingerprint(*rev); + else { + auto repoInfo = getRepoInfo(input); + if (repoInfo.isLocal && repoInfo.workdirInfo.headRev) { + /* Calculate a fingerprint that takes into account the + deleted and modified/added files. */ + HashSink hashSink{HashAlgorithm::SHA512}; + for (auto & file : repoInfo.workdirInfo.files) + if (file.second == GitRepo::WorkdirInfo::State::Dirty) { + writeString("modified:", hashSink); + writeString(file.first.abs(), hashSink); + readFile(std::filesystem::path(repoInfo.url) + file.first.abs(), hashSink); + } + for (auto & file : repoInfo.workdirInfo.deletedFiles) { + writeString("deleted:", hashSink); + writeString(file.abs(), hashSink); + } + return makeFingerprint(*repoInfo.workdirInfo.headRev) + + ";d=" + hashSink.finish().first.to_string(HashFormat::Base16, false); + } return std::nullopt; + } } bool isLocked(const Input & input) const override From d044a05197c1d4066ef0c0b67ff0461ee3d5fa6c Mon Sep 17 00:00:00 2001 From: Eelco Dolstra Date: Tue, 3 Dec 2024 15:50:39 +0100 Subject: [PATCH 2/7] Don't fingerprint dirty repos with submodules for now Fixes nixpkgsLibTests. --- src/libfetchers/git.cc | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/libfetchers/git.cc b/src/libfetchers/git.cc index eec134980c3..d47e731f14e 100644 --- a/src/libfetchers/git.cc +++ b/src/libfetchers/git.cc @@ -15,6 +15,7 @@ #include "finally.hh" #include "fetch-settings.hh" #include "json-utils.hh" +#include "archive.hh" #include #include @@ -802,7 +803,7 @@ struct GitInputScheme : InputScheme return makeFingerprint(*rev); else { auto repoInfo = getRepoInfo(input); - if (repoInfo.isLocal && repoInfo.workdirInfo.headRev) { + if (repoInfo.isLocal && repoInfo.workdirInfo.headRev && repoInfo.workdirInfo.submodules.empty()) { /* Calculate a fingerprint that takes into account the deleted and modified/added files. */ HashSink hashSink{HashAlgorithm::SHA512}; @@ -810,7 +811,7 @@ struct GitInputScheme : InputScheme if (file.second == GitRepo::WorkdirInfo::State::Dirty) { writeString("modified:", hashSink); writeString(file.first.abs(), hashSink); - readFile(std::filesystem::path(repoInfo.url) + file.first.abs(), hashSink); + dumpPath(repoInfo.url + "/" + file.first.abs(), hashSink); } for (auto & file : repoInfo.workdirInfo.deletedFiles) { writeString("deleted:", hashSink); From f469bc2ae4a221d0397775b2663ba90a2dff0b9c Mon Sep 17 00:00:00 2001 From: Eelco Dolstra Date: Wed, 4 Dec 2024 13:17:31 +0100 Subject: [PATCH 3/7] Cache result of Input::getFingerprint() The fingerprint calculation can be expensive (especially for dirty Git trees) so we need to cache it. --- src/libfetchers/fetchers.cc | 14 +++++++++++--- src/libfetchers/fetchers.hh | 5 +++++ 2 files changed, 16 insertions(+), 3 deletions(-) diff --git a/src/libfetchers/fetchers.cc b/src/libfetchers/fetchers.cc index e15a460d0e4..b105c252a30 100644 --- a/src/libfetchers/fetchers.cc +++ b/src/libfetchers/fetchers.cc @@ -113,7 +113,15 @@ Input Input::fromAttrs(const Settings & settings, Attrs && attrs) std::optional Input::getFingerprint(ref store) const { - return scheme ? scheme->getFingerprint(store, *this) : std::nullopt; + if (!scheme) return std::nullopt; + + if (cachedFingerprint) return *cachedFingerprint; + + auto fingerprint = scheme->getFingerprint(store, *this); + + cachedFingerprint = fingerprint; + + return fingerprint; } ParsedURL Input::toURL() const @@ -307,7 +315,7 @@ std::pair, Input> Input::getAccessorUnchecked(ref sto auto accessor = makeStorePathAccessor(store, storePath); - accessor->fingerprint = scheme->getFingerprint(store, *this); + accessor->fingerprint = getFingerprint(store); return {accessor, *this}; } catch (Error & e) { @@ -318,7 +326,7 @@ std::pair, Input> Input::getAccessorUnchecked(ref sto auto [accessor, result] = scheme->getAccessor(store, *this); assert(!accessor->fingerprint); - accessor->fingerprint = scheme->getFingerprint(store, result); + accessor->fingerprint = result.getFingerprint(store); return {accessor, std::move(result)}; } diff --git a/src/libfetchers/fetchers.hh b/src/libfetchers/fetchers.hh index ff04d5551ad..841a4404163 100644 --- a/src/libfetchers/fetchers.hh +++ b/src/libfetchers/fetchers.hh @@ -46,6 +46,11 @@ struct Input */ std::optional parent; + /** + * Cached result of getFingerprint(). + */ + mutable std::optional> cachedFingerprint; + public: /** * Create an `Input` from a URL. From 7ba933e989b9baf56c2b542c7788b38f6d9ccb50 Mon Sep 17 00:00:00 2001 From: Eelco Dolstra Date: Wed, 4 Dec 2024 13:56:03 +0100 Subject: [PATCH 4/7] Cache calls to GitRepo::getWorkdirInfo() A command like `nix flake metadata` was causing about 4 calls to getWorkdirInfo(), which is slow for large repos (even when they're not dirty). --- src/libfetchers/git-utils.cc | 14 ++++++++++++++ src/libfetchers/git-utils.hh | 2 ++ src/libfetchers/git.cc | 2 +- 3 files changed, 17 insertions(+), 1 deletion(-) diff --git a/src/libfetchers/git-utils.cc b/src/libfetchers/git-utils.cc index bd578685763..15fa540b3ac 100644 --- a/src/libfetchers/git-utils.cc +++ b/src/libfetchers/git-utils.cc @@ -5,6 +5,7 @@ #include "signals.hh" #include "users.hh" #include "fs-sink.hh" +#include "sync.hh" #include #include @@ -1276,4 +1277,17 @@ ref getTarballCache() return GitRepo::openRepo(repoDir, true, true); } +GitRepo::WorkdirInfo GitRepo::getCachedWorkdirInfo(const std::filesystem::path & path) +{ + static Sync> _cache; + { + auto cache(_cache.lock()); + auto i = cache->find(path); + if (i != cache->end()) return i->second; + } + auto workdirInfo = GitRepo::openRepo(path)->getWorkdirInfo(); + _cache.lock()->emplace(path, workdirInfo); + return workdirInfo; +} + } diff --git a/src/libfetchers/git-utils.hh b/src/libfetchers/git-utils.hh index 12cee5db107..2db64e43823 100644 --- a/src/libfetchers/git-utils.hh +++ b/src/libfetchers/git-utils.hh @@ -70,6 +70,8 @@ struct GitRepo virtual WorkdirInfo getWorkdirInfo() = 0; + static WorkdirInfo getCachedWorkdirInfo(const std::filesystem::path & path); + /* Get the ref that HEAD points to. */ virtual std::optional getWorkdirRef() = 0; diff --git a/src/libfetchers/git.cc b/src/libfetchers/git.cc index d47e731f14e..a584542d147 100644 --- a/src/libfetchers/git.cc +++ b/src/libfetchers/git.cc @@ -431,7 +431,7 @@ struct GitInputScheme : InputScheme // If this is a local directory and no ref or revision is // given, then allow the use of an unclean working tree. if (!input.getRef() && !input.getRev() && repoInfo.isLocal) - repoInfo.workdirInfo = GitRepo::openRepo(repoInfo.url)->getWorkdirInfo(); + repoInfo.workdirInfo = GitRepo::getCachedWorkdirInfo(repoInfo.url); return repoInfo; } From b9f60faab50f79c9d5fd4d11d1bf06feeb62edc5 Mon Sep 17 00:00:00 2001 From: Eelco Dolstra Date: Wed, 4 Dec 2024 14:51:41 +0100 Subject: [PATCH 5/7] Fix macOS build --- src/libfetchers/git-utils.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/libfetchers/git-utils.cc b/src/libfetchers/git-utils.cc index 15fa540b3ac..a41546c44eb 100644 --- a/src/libfetchers/git-utils.cc +++ b/src/libfetchers/git-utils.cc @@ -1279,7 +1279,7 @@ ref getTarballCache() GitRepo::WorkdirInfo GitRepo::getCachedWorkdirInfo(const std::filesystem::path & path) { - static Sync> _cache; + static Sync> _cache; { auto cache(_cache.lock()); auto i = cache->find(path); From 33852ead6b01b58cd66a474b6615595244dc56d5 Mon Sep 17 00:00:00 2001 From: Eelco Dolstra Date: Wed, 4 Dec 2024 15:31:19 +0100 Subject: [PATCH 6/7] Optimisation --- src/libfetchers/git-utils.cc | 21 ++++++--------------- src/libfetchers/git-utils.hh | 7 ++++--- src/libfetchers/git.cc | 13 ++++++------- 3 files changed, 16 insertions(+), 25 deletions(-) diff --git a/src/libfetchers/git-utils.cc b/src/libfetchers/git-utils.cc index a41546c44eb..b54416b1062 100644 --- a/src/libfetchers/git-utils.cc +++ b/src/libfetchers/git-utils.cc @@ -438,11 +438,11 @@ struct GitRepoImpl : GitRepo, std::enable_shared_from_this { if (!(statusFlags & GIT_STATUS_INDEX_DELETED) && !(statusFlags & GIT_STATUS_WT_DELETED)) - info.files.emplace(CanonPath(path), - statusFlags == GIT_STATUS_CURRENT - ? WorkdirInfo::State::Clean - : WorkdirInfo::State::Dirty); - else + { + info.files.insert(CanonPath(path)); + if (statusFlags != GIT_STATUS_CURRENT) + info.dirtyFiles.insert(CanonPath(path)); + } else info.deletedFiles.insert(CanonPath(path)); if (statusFlags != GIT_STATUS_CURRENT) info.isDirty = true; @@ -1208,15 +1208,6 @@ ref GitRepoImpl::getAccessor(const Hash & rev, bool exportIgnore } } -template -std::set getKeys(const std::map & c) -{ - std::set res; - for (auto & i : c) - res.insert(i.first); - return res; -} - ref GitRepoImpl::getAccessor(const WorkdirInfo & wd, bool exportIgnore, MakeNotAllowedError makeNotAllowedError) { auto self = ref(shared_from_this()); @@ -1229,7 +1220,7 @@ ref GitRepoImpl::getAccessor(const WorkdirInfo & wd, bool export ? makeEmptySourceAccessor() : AllowListSourceAccessor::create( makeFSSourceAccessor(path), - std::set { getKeys(wd.files) }, + std::set { wd.files }, std::move(makeNotAllowedError)).cast(); if (exportIgnore) return make_ref(self, fileAccessor, std::nullopt); diff --git a/src/libfetchers/git-utils.hh b/src/libfetchers/git-utils.hh index 2db64e43823..ff115143fc7 100644 --- a/src/libfetchers/git-utils.hh +++ b/src/libfetchers/git-utils.hh @@ -55,11 +55,12 @@ struct GitRepo in the repo yet. */ std::optional headRev; - enum State { Clean, Dirty }; - /* All files in the working directory that are unchanged, modified or added, but excluding deleted files. */ - std::map files; + std::set files; + + /* All modified or added files. */ + std::set dirtyFiles; /* The deleted files. */ std::set deletedFiles; diff --git a/src/libfetchers/git.cc b/src/libfetchers/git.cc index a584542d147..c73f537652d 100644 --- a/src/libfetchers/git.cc +++ b/src/libfetchers/git.cc @@ -686,7 +686,7 @@ struct GitInputScheme : InputScheme if (getSubmodulesAttr(input)) /* Create mountpoints for the submodules. */ for (auto & submodule : repoInfo.workdirInfo.submodules) - repoInfo.workdirInfo.files.emplace(submodule.path, GitRepo::WorkdirInfo::State::Clean); + repoInfo.workdirInfo.files.insert(submodule.path); auto repo = GitRepo::openRepo(repoInfo.url, false, false); @@ -807,12 +807,11 @@ struct GitInputScheme : InputScheme /* Calculate a fingerprint that takes into account the deleted and modified/added files. */ HashSink hashSink{HashAlgorithm::SHA512}; - for (auto & file : repoInfo.workdirInfo.files) - if (file.second == GitRepo::WorkdirInfo::State::Dirty) { - writeString("modified:", hashSink); - writeString(file.first.abs(), hashSink); - dumpPath(repoInfo.url + "/" + file.first.abs(), hashSink); - } + for (auto & file : repoInfo.workdirInfo.dirtyFiles) { + writeString("modified:", hashSink); + writeString(file.abs(), hashSink); + dumpPath(repoInfo.url + "/" + file.abs(), hashSink); + } for (auto & file : repoInfo.workdirInfo.deletedFiles) { writeString("deleted:", hashSink); writeString(file.abs(), hashSink); From 757ea706449925636a99dfbcb09a09c62b1de319 Mon Sep 17 00:00:00 2001 From: Eelco Dolstra Date: Tue, 17 Dec 2024 13:13:39 +0100 Subject: [PATCH 7/7] Add a test --- tests/functional/flakes/flakes.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/functional/flakes/flakes.sh b/tests/functional/flakes/flakes.sh index de37ae1b781..6c466a0c7cb 100755 --- a/tests/functional/flakes/flakes.sh +++ b/tests/functional/flakes/flakes.sh @@ -77,6 +77,7 @@ hash1=$(echo "$json" | jq -r .revision) echo foo > "$flake1Dir/foo" git -C "$flake1Dir" add $flake1Dir/foo [[ $(nix flake metadata flake1 --json --refresh | jq -r .dirtyRevision) == "$hash1-dirty" ]] +[[ "$(nix flake metadata flake1 --json | jq -r .fingerprint)" != null ]] echo -n '# foo' >> "$flake1Dir/flake.nix" flake1OriginalCommit=$(git -C "$flake1Dir" rev-parse HEAD)