-
-
Notifications
You must be signed in to change notification settings - Fork 1.6k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
libgit2, GitRepo: Write (thin) packfiles #11330
Merged
Changes from all commits
Commits
Show all changes
8 commits
Select commit
Hold shift + click to select a range
5dd6c4f
libgit2, GitRepo: Write thin packfiles
roberth d0f8a92
Make tarball cache more interruptible
roberth 97ff2ed
Sync tarball cache within tarball cache Activity
roberth fb8d3ed
fixup: sync -> flush
roberth 57c4830
fixup: Release odb
roberth c1fe354
libgit2: Add libgit2-packbuilder-callback-interruptible.patch
roberth 976f539
Make Repo::flush interruptible
roberth 459d026
fix Windows build
roberth File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,282 @@ | ||
commit 9bacade4a3ef4b6b26e2c02f549eef0e9eb9eaa2 | ||
Author: Robert Hensing <robert@roberthensing.nl> | ||
Date: Sun Aug 18 20:20:36 2024 +0200 | ||
|
||
Add unoptimized git_mempack_write_thin_pack | ||
|
||
diff --git a/include/git2/sys/mempack.h b/include/git2/sys/mempack.h | ||
index 17da590a3..3688bdd50 100644 | ||
--- a/include/git2/sys/mempack.h | ||
+++ b/include/git2/sys/mempack.h | ||
@@ -44,6 +44,29 @@ GIT_BEGIN_DECL | ||
*/ | ||
GIT_EXTERN(int) git_mempack_new(git_odb_backend **out); | ||
|
||
+/** | ||
+ * Write a thin packfile with the objects in the memory store. | ||
+ * | ||
+ * A thin packfile is a packfile that does not contain its transitive closure of | ||
+ * references. This is useful for efficiently distributing additions to a | ||
+ * repository over the network, but also finds use in the efficient bulk | ||
+ * addition of objects to a repository, locally. | ||
+ * | ||
+ * This operation performs the (shallow) insert operations into the | ||
+ * `git_packbuilder`, but does not write the packfile to disk; | ||
+ * see `git_packbuilder_write_buf`. | ||
+ * | ||
+ * It also does not reset the memory store; see `git_mempack_reset`. | ||
+ * | ||
+ * @note This function may or may not write trees and blobs that are not | ||
+ * referenced by commits. Currently everything is written, but this | ||
+ * behavior may change in the future as the packer is optimized. | ||
+ * | ||
+ * @param backend The mempack backend | ||
+ * @param pb The packbuilder to use to write the packfile | ||
+ */ | ||
+GIT_EXTERN(int) git_mempack_write_thin_pack(git_odb_backend *backend, git_packbuilder *pb); | ||
+ | ||
/** | ||
* Dump all the queued in-memory writes to a packfile. | ||
* | ||
diff --git a/src/libgit2/odb_mempack.c b/src/libgit2/odb_mempack.c | ||
index 6f27f45f8..0b61e2b66 100644 | ||
--- a/src/libgit2/odb_mempack.c | ||
+++ b/src/libgit2/odb_mempack.c | ||
@@ -132,6 +132,35 @@ cleanup: | ||
return err; | ||
} | ||
|
||
+int git_mempack_write_thin_pack(git_odb_backend *backend, git_packbuilder *pb) | ||
+{ | ||
+ struct memory_packer_db *db = (struct memory_packer_db *)backend; | ||
+ const git_oid *oid; | ||
+ size_t iter = 0; | ||
+ int err = -1; | ||
+ | ||
+ /* TODO: Implement the recency heuristics. | ||
+ For this it probably makes sense to only write what's referenced | ||
+ through commits, an option I've carved out for you in the docs. | ||
+ wrt heuristics: ask your favorite LLM to translate https://git-scm.com/docs/pack-heuristics/en | ||
+ to actual normal reference documentation. */ | ||
+ while (true) { | ||
+ err = git_oidmap_iterate(NULL, db->objects, &iter, &oid); | ||
+ if (err == GIT_ITEROVER) { | ||
+ err = 0; | ||
+ break; | ||
+ } | ||
+ if (err != 0) | ||
+ return err; | ||
+ | ||
+ err = git_packbuilder_insert(pb, oid, NULL); | ||
+ if (err != 0) | ||
+ return err; | ||
+ } | ||
+ | ||
+ return 0; | ||
+} | ||
+ | ||
int git_mempack_dump( | ||
git_buf *pack, | ||
git_repository *repo, | ||
diff --git a/tests/libgit2/mempack/thinpack.c b/tests/libgit2/mempack/thinpack.c | ||
new file mode 100644 | ||
index 000000000..604a4dda2 | ||
--- /dev/null | ||
+++ b/tests/libgit2/mempack/thinpack.c | ||
@@ -0,0 +1,196 @@ | ||
+#include "clar_libgit2.h" | ||
+#include "git2/indexer.h" | ||
+#include "git2/odb_backend.h" | ||
+#include "git2/tree.h" | ||
+#include "git2/types.h" | ||
+#include "git2/sys/mempack.h" | ||
+#include "git2/sys/odb_backend.h" | ||
+#include "util.h" | ||
+ | ||
+static git_repository *_repo; | ||
+static git_odb_backend * _mempack_backend; | ||
+ | ||
+void test_mempack_thinpack__initialize(void) | ||
+{ | ||
+ git_odb *odb; | ||
+ | ||
+ _repo = cl_git_sandbox_init_new("mempack_thinpack_repo"); | ||
+ | ||
+ cl_git_pass(git_mempack_new(&_mempack_backend)); | ||
+ cl_git_pass(git_repository_odb(&odb, _repo)); | ||
+ cl_git_pass(git_odb_add_backend(odb, _mempack_backend, 999)); | ||
+ git_odb_free(odb); | ||
+} | ||
+ | ||
+void _mempack_thinpack__cleanup(void) | ||
+{ | ||
+ cl_git_sandbox_cleanup(); | ||
+} | ||
+ | ||
+/* | ||
+ Generating a packfile for an unchanged repo works and produces an empty packfile. | ||
+ Even if we allow this scenario to be detected, it shouldn't misbehave if the | ||
+ application is unaware of it. | ||
+*/ | ||
+void test_mempack_thinpack__empty(void) | ||
+{ | ||
+ git_packbuilder *pb; | ||
+ int version; | ||
+ int n; | ||
+ git_buf buf = GIT_BUF_INIT; | ||
+ | ||
+ git_packbuilder_new(&pb, _repo); | ||
+ | ||
+ cl_git_pass(git_mempack_write_thin_pack(_mempack_backend, pb)); | ||
+ cl_git_pass(git_packbuilder_write_buf(&buf, pb)); | ||
+ cl_assert_in_range(12, buf.size, 1024 /* empty packfile is >0 bytes, but certainly not that big */); | ||
+ cl_assert(buf.ptr[0] == 'P'); | ||
+ cl_assert(buf.ptr[1] == 'A'); | ||
+ cl_assert(buf.ptr[2] == 'C'); | ||
+ cl_assert(buf.ptr[3] == 'K'); | ||
+ version = (buf.ptr[4] << 24) | (buf.ptr[5] << 16) | (buf.ptr[6] << 8) | buf.ptr[7]; | ||
+ /* Subject to change. https://git-scm.com/docs/pack-format: Git currently accepts version number 2 or 3 but generates version 2 only.*/ | ||
+ cl_assert_equal_i(2, version); | ||
+ n = (buf.ptr[8] << 24) | (buf.ptr[9] << 16) | (buf.ptr[10] << 8) | buf.ptr[11]; | ||
+ cl_assert_equal_i(0, n); | ||
+ git_buf_dispose(&buf); | ||
+ | ||
+ git_packbuilder_free(pb); | ||
+} | ||
+ | ||
+#define LIT_LEN(x) x, sizeof(x) - 1 | ||
+ | ||
+/* | ||
+ Check that git_mempack_write_thin_pack produces a thin packfile. | ||
+*/ | ||
+void test_mempack_thinpack__thin(void) | ||
+{ | ||
+ /* Outline: | ||
+ - Create tree 1 | ||
+ - Flush to packfile A | ||
+ - Create tree 2 | ||
+ - Flush to packfile B | ||
+ | ||
+ Tree 2 has a new blob and a reference to a blob from tree 1. | ||
+ | ||
+ Expectation: | ||
+ - Packfile B is thin and does not contain the objects from packfile A | ||
+ */ | ||
+ | ||
+ | ||
+ git_oid oid_blob_1; | ||
+ git_oid oid_blob_2; | ||
+ git_oid oid_blob_3; | ||
+ git_oid oid_tree_1; | ||
+ git_oid oid_tree_2; | ||
+ git_treebuilder *tb; | ||
+ | ||
+ git_packbuilder *pb; | ||
+ git_buf buf = GIT_BUF_INIT; | ||
+ git_indexer *indexer; | ||
+ git_indexer_progress stats; | ||
+ char pack_dir_path[1024]; | ||
+ | ||
+ char sbuf[1024]; | ||
+ const char * repo_path; | ||
+ const char * pack_name_1; | ||
+ const char * pack_name_2; | ||
+ git_str pack_path_1 = GIT_STR_INIT; | ||
+ git_str pack_path_2 = GIT_STR_INIT; | ||
+ git_odb_backend * pack_odb_backend_1; | ||
+ git_odb_backend * pack_odb_backend_2; | ||
+ | ||
+ | ||
+ cl_assert_in_range(0, snprintf(pack_dir_path, sizeof(pack_dir_path), "%s/objects/pack", git_repository_path(_repo)), sizeof(pack_dir_path)); | ||
+ | ||
+ /* Create tree 1 */ | ||
+ | ||
+ cl_git_pass(git_blob_create_from_buffer(&oid_blob_1, _repo, LIT_LEN("thinpack blob 1"))); | ||
+ cl_git_pass(git_blob_create_from_buffer(&oid_blob_2, _repo, LIT_LEN("thinpack blob 2"))); | ||
+ | ||
+ | ||
+ cl_git_pass(git_treebuilder_new(&tb, _repo, NULL)); | ||
+ cl_git_pass(git_treebuilder_insert(NULL, tb, "blob1", &oid_blob_1, GIT_FILEMODE_BLOB)); | ||
+ cl_git_pass(git_treebuilder_insert(NULL, tb, "blob2", &oid_blob_2, GIT_FILEMODE_BLOB)); | ||
+ cl_git_pass(git_treebuilder_write(&oid_tree_1, tb)); | ||
+ | ||
+ /* Flush */ | ||
+ | ||
+ cl_git_pass(git_packbuilder_new(&pb, _repo)); | ||
+ cl_git_pass(git_mempack_write_thin_pack(_mempack_backend, pb)); | ||
+ cl_git_pass(git_packbuilder_write_buf(&buf, pb)); | ||
+ cl_git_pass(git_indexer_new(&indexer, pack_dir_path, 0, NULL, NULL)); | ||
+ cl_git_pass(git_indexer_append(indexer, buf.ptr, buf.size, &stats)); | ||
+ cl_git_pass(git_indexer_commit(indexer, &stats)); | ||
+ pack_name_1 = strdup(git_indexer_name(indexer)); | ||
+ cl_assert(pack_name_1); | ||
+ git_buf_dispose(&buf); | ||
+ git_mempack_reset(_mempack_backend); | ||
+ git_indexer_free(indexer); | ||
+ git_packbuilder_free(pb); | ||
+ | ||
+ /* Create tree 2 */ | ||
+ | ||
+ cl_git_pass(git_treebuilder_clear(tb)); | ||
+ /* blob 1 won't be used, but we add it anyway to test that just "declaring" an object doesn't | ||
+ necessarily cause its inclusion in the next thin packfile. It must only be included if new. */ | ||
+ cl_git_pass(git_blob_create_from_buffer(&oid_blob_1, _repo, LIT_LEN("thinpack blob 1"))); | ||
+ cl_git_pass(git_blob_create_from_buffer(&oid_blob_3, _repo, LIT_LEN("thinpack blob 3"))); | ||
+ cl_git_pass(git_treebuilder_insert(NULL, tb, "blob1", &oid_blob_1, GIT_FILEMODE_BLOB)); | ||
+ cl_git_pass(git_treebuilder_insert(NULL, tb, "blob3", &oid_blob_3, GIT_FILEMODE_BLOB)); | ||
+ cl_git_pass(git_treebuilder_write(&oid_tree_2, tb)); | ||
+ | ||
+ /* Flush */ | ||
+ | ||
+ cl_git_pass(git_packbuilder_new(&pb, _repo)); | ||
+ cl_git_pass(git_mempack_write_thin_pack(_mempack_backend, pb)); | ||
+ cl_git_pass(git_packbuilder_write_buf(&buf, pb)); | ||
+ cl_git_pass(git_indexer_new(&indexer, pack_dir_path, 0, NULL, NULL)); | ||
+ cl_git_pass(git_indexer_append(indexer, buf.ptr, buf.size, &stats)); | ||
+ cl_git_pass(git_indexer_commit(indexer, &stats)); | ||
+ pack_name_2 = strdup(git_indexer_name(indexer)); | ||
+ cl_assert(pack_name_2); | ||
+ git_buf_dispose(&buf); | ||
+ git_mempack_reset(_mempack_backend); | ||
+ git_indexer_free(indexer); | ||
+ git_packbuilder_free(pb); | ||
+ git_treebuilder_free(tb); | ||
+ | ||
+ /* Assertions */ | ||
+ | ||
+ assert(pack_name_1); | ||
+ assert(pack_name_2); | ||
+ | ||
+ repo_path = git_repository_path(_repo); | ||
+ | ||
+ snprintf(sbuf, sizeof(sbuf), "objects/pack/pack-%s.pack", pack_name_1); | ||
+ git_str_joinpath(&pack_path_1, repo_path, sbuf); | ||
+ snprintf(sbuf, sizeof(sbuf), "objects/pack/pack-%s.pack", pack_name_2); | ||
+ git_str_joinpath(&pack_path_2, repo_path, sbuf); | ||
+ | ||
+ /* If they're the same, something definitely went wrong. */ | ||
+ cl_assert(strcmp(pack_name_1, pack_name_2) != 0); | ||
+ | ||
+ cl_git_pass(git_odb_backend_one_pack(&pack_odb_backend_1, pack_path_1.ptr)); | ||
+ cl_assert(pack_odb_backend_1->exists(pack_odb_backend_1, &oid_blob_1)); | ||
+ cl_assert(pack_odb_backend_1->exists(pack_odb_backend_1, &oid_blob_2)); | ||
+ cl_assert(!pack_odb_backend_1->exists(pack_odb_backend_1, &oid_blob_3)); | ||
+ cl_assert(pack_odb_backend_1->exists(pack_odb_backend_1, &oid_tree_1)); | ||
+ cl_assert(!pack_odb_backend_1->exists(pack_odb_backend_1, &oid_tree_2)); | ||
+ | ||
+ cl_git_pass(git_odb_backend_one_pack(&pack_odb_backend_2, pack_path_2.ptr)); | ||
+ /* blob 1 is already in the packfile 1, so packfile 2 must not include it, in order to be _thin_. */ | ||
+ cl_assert(!pack_odb_backend_2->exists(pack_odb_backend_2, &oid_blob_1)); | ||
+ cl_assert(!pack_odb_backend_2->exists(pack_odb_backend_2, &oid_blob_2)); | ||
+ cl_assert(pack_odb_backend_2->exists(pack_odb_backend_2, &oid_blob_3)); | ||
+ cl_assert(!pack_odb_backend_2->exists(pack_odb_backend_2, &oid_tree_1)); | ||
+ cl_assert(pack_odb_backend_2->exists(pack_odb_backend_2, &oid_tree_2)); | ||
+ | ||
+ pack_odb_backend_1->free(pack_odb_backend_1); | ||
+ pack_odb_backend_2->free(pack_odb_backend_2); | ||
+ free((void *)pack_name_1); | ||
+ free((void *)pack_name_2); | ||
+ git_str_dispose(&pack_path_1); | ||
+ git_str_dispose(&pack_path_2); | ||
+ | ||
+} |
Oops, something went wrong.
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
After reading libgit2 again, this optimization is performed by the later call to
git_packbuilder_write_buf
instead. It is not a responsibility.I've removed this from the PR I've submitted upstream
git_mempack_write_thin_pack
libgit2/libgit2#6875.