git.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Jeff King <peff@peff.net>
To: git@vger.kernel.org
Cc: Michael Haggerty <mhagger@alum.mit.edu>,
	Junio C Hamano <gitster@pobox.com>
Subject: [PATCH v2 16/25] pack-objects: match prune logic for discarding objects
Date: Wed, 15 Oct 2014 18:42:09 -0400	[thread overview]
Message-ID: <20141015224209.GP25630@peff.net> (raw)
In-Reply-To: <20141015223244.GA25368@peff.net>

A recent commit taught git-prune to keep non-recent objects
that are reachable from recent ones. However, pack-objects,
when loosening unreachable objects, tries to optimize out
the write in the case that the object will be immediately
pruned. It now gets this wrong, since its rule does not
reflect the new prune code (and this can be seen by running
t6501 with a strategically placed repack).

Let's teach pack-objects similar logic.

Signed-off-by: Jeff King <peff@peff.net>
---
 builtin/pack-objects.c     | 39 +++++++++++++++++++
 reachable.c                |  4 +-
 reachable.h                |  2 +
 t/t6501-freshen-objects.sh | 93 +++++++++++++++++++++++++++-------------------
 4 files changed, 98 insertions(+), 40 deletions(-)

diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c
index 2fe2ab0..4df9499 100644
--- a/builtin/pack-objects.c
+++ b/builtin/pack-objects.c
@@ -20,6 +20,8 @@
 #include "streaming.h"
 #include "thread-utils.h"
 #include "pack-bitmap.h"
+#include "reachable.h"
+#include "sha1-array.h"
 
 static const char *pack_usage[] = {
 	N_("git pack-objects --stdout [options...] [< ref-list | < object-list]"),
@@ -2407,6 +2409,15 @@ static int has_sha1_pack_kept_or_nonlocal(const unsigned char *sha1)
 	return 0;
 }
 
+/*
+ * Store a list of sha1s that are should not be discarded
+ * because they are either written too recently, or are
+ * reachable from another object that was.
+ *
+ * This is filled by get_object_list.
+ */
+static struct sha1_array recent_objects;
+
 static int loosened_object_can_be_discarded(const unsigned char *sha1,
 					    unsigned long mtime)
 {
@@ -2414,6 +2425,8 @@ static int loosened_object_can_be_discarded(const unsigned char *sha1,
 		return 0;
 	if (mtime > unpack_unreachable_expiration)
 		return 0;
+	if (sha1_array_lookup(&recent_objects, sha1) >= 0)
+		return 0;
 	return 1;
 }
 
@@ -2470,6 +2483,19 @@ static int get_object_list_from_bitmap(struct rev_info *revs)
 	return 0;
 }
 
+static void record_recent_object(struct object *obj,
+				 const struct name_path *path,
+				 const char *last,
+				 void *data)
+{
+	sha1_array_append(&recent_objects, obj->sha1);
+}
+
+static void record_recent_commit(struct commit *commit, void *data)
+{
+	sha1_array_append(&recent_objects, commit->object.sha1);
+}
+
 static void get_object_list(int ac, const char **av)
 {
 	struct rev_info revs;
@@ -2517,10 +2543,23 @@ static void get_object_list(int ac, const char **av)
 	mark_edges_uninteresting(&revs, show_edge);
 	traverse_commit_list(&revs, show_commit, show_object, NULL);
 
+	if (unpack_unreachable_expiration) {
+		revs.ignore_missing_links = 1;
+		if (add_unseen_recent_objects_to_traversal(&revs,
+				unpack_unreachable_expiration))
+			die("unable to add recent objects");
+		if (prepare_revision_walk(&revs))
+			die("revision walk setup failed");
+		traverse_commit_list(&revs, record_recent_commit,
+				     record_recent_object, NULL);
+	}
+
 	if (keep_unreachable)
 		add_objects_in_unpacked_packs(&revs);
 	if (unpack_unreachable)
 		loosen_unused_packed_objects(&revs);
+
+	sha1_array_clear(&recent_objects);
 }
 
 static int option_parse_index_version(const struct option *opt,
diff --git a/reachable.c b/reachable.c
index 55589a0..0176a88 100644
--- a/reachable.c
+++ b/reachable.c
@@ -183,8 +183,8 @@ static int add_recent_packed(const unsigned char *sha1,
 	return 0;
 }
 
-static int add_unseen_recent_objects_to_traversal(struct rev_info *revs,
-						  unsigned long timestamp)
+int add_unseen_recent_objects_to_traversal(struct rev_info *revs,
+					   unsigned long timestamp)
 {
 	struct recent_data data;
 	int r;
diff --git a/reachable.h b/reachable.h
index 141fe30..d23efc3 100644
--- a/reachable.h
+++ b/reachable.h
@@ -2,6 +2,8 @@
 #define REACHEABLE_H
 
 struct progress;
+extern int add_unseen_recent_objects_to_traversal(struct rev_info *revs,
+						  unsigned long timestamp);
 extern void mark_reachable_objects(struct rev_info *revs, int mark_reflog,
 				   unsigned long mark_recent, struct progress *);
 
diff --git a/t/t6501-freshen-objects.sh b/t/t6501-freshen-objects.sh
index de941c2..e25c47d 100755
--- a/t/t6501-freshen-objects.sh
+++ b/t/t6501-freshen-objects.sh
@@ -39,50 +39,67 @@ commit () {
 	git commit -m "$1"
 }
 
-test_expect_success 'disable reflogs' '
-	git config core.logallrefupdates false &&
-	rm -rf .git/logs
-'
+maybe_repack () {
+	if test -n "$repack"; then
+		git repack -ad
+	fi
+}
+
+for repack in '' true; do
+	title=${repack:+repack}
+	title=${title:-loose}
+
+	test_expect_success "make repo completely empty ($title)" '
+		rm -rf .git &&
+		git init
+	'
+
+	test_expect_success "disable reflogs ($title)" '
+		git config core.logallrefupdates false &&
+		rm -rf .git/logs
+	'
 
-test_expect_success 'setup basic history' '
-	commit base
-'
+	test_expect_success "setup basic history ($title)" '
+		commit base
+	'
 
-test_expect_success 'create and abandon some objects' '
-	git checkout -b experiment &&
-	commit abandon &&
-	git checkout master &&
-	git branch -D experiment
-'
+	test_expect_success "create and abandon some objects ($title)" '
+		git checkout -b experiment &&
+		commit abandon &&
+		maybe_repack &&
+		git checkout master &&
+		git branch -D experiment
+	'
 
-test_expect_success 'simulate time passing' '
-	find .git/objects -type f |
-	xargs test-chmtime -v -86400
-'
+	test_expect_success "simulate time passing ($title)" '
+		find .git/objects -type f |
+		xargs test-chmtime -v -86400
+	'
 
-test_expect_success 'start writing new commit with old blob' '
-	tree=$(
-		GIT_INDEX_FILE=index.tmp &&
-		export GIT_INDEX_FILE &&
-		git read-tree HEAD &&
-		add unrelated &&
-		add abandon &&
-		git write-tree
-	)
-'
+	test_expect_success "start writing new commit with old blob ($title)" '
+		tree=$(
+			GIT_INDEX_FILE=index.tmp &&
+			export GIT_INDEX_FILE &&
+			git read-tree HEAD &&
+			add unrelated &&
+			add abandon &&
+			git write-tree
+		)
+	'
 
-test_expect_success 'simultaneous gc' '
-	git gc --prune=12.hours.ago
-'
+	test_expect_success "simultaneous gc ($title)" '
+		git gc --prune=12.hours.ago
+	'
 
-test_expect_success 'finish writing out commit' '
-	commit=$(echo foo | git commit-tree -p HEAD $tree) &&
-	git update-ref HEAD $commit
-'
+	test_expect_success "finish writing out commit ($title)" '
+		commit=$(echo foo | git commit-tree -p HEAD $tree) &&
+		git update-ref HEAD $commit
+	'
 
-# "abandon" blob should have been rescued by reference from new tree
-test_expect_success 'repository passes fsck' '
-	git fsck
-'
+	# "abandon" blob should have been rescued by reference from new tree
+	test_expect_success "repository passes fsck ($title)" '
+		git fsck
+	'
+done
 
 test_done
-- 
2.1.2.596.g7379948

  parent reply	other threads:[~2014-10-15 22:42 UTC|newest]

Thread overview: 48+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2014-10-15 22:32 [PATCH v2 0/25] prune-safety Jeff King
2014-10-15 22:33 ` [PATCH v2 01/25] foreach_alt_odb: propagate return value from callback Jeff King
2014-10-15 22:34 ` [PATCH v2 02/25] isxdigit: cast input to unsigned char Jeff King
2014-10-16 17:16   ` Junio C Hamano
2014-10-15 22:34 ` [PATCH v2 03/25] object_array: factor out slopbuf-freeing logic Jeff King
2014-10-16 17:39   ` Junio C Hamano
2014-10-17  0:33     ` git-bundle rev handling and de-duping Jeff King
2014-10-17 21:03       ` Philip Oakley
2014-10-17 22:41         ` Junio C Hamano
2014-10-15 22:34 ` [PATCH v2 04/25] object_array: add a "clear" function Jeff King
2014-10-15 22:35 ` [PATCH v2 05/25] clean up name allocation in prepare_revision_walk Jeff King
2014-10-15 22:37 ` [PATCH v2 06/25] reachable: use traverse_commit_list instead of custom walk Jeff King
2014-10-16 17:53   ` Junio C Hamano
2014-10-15 22:38 ` [PATCH v2 07/25] reachable: reuse revision.c "add all reflogs" code Jeff King
2014-10-15 22:38 ` [PATCH v2 08/25] prune: factor out loose-object directory traversal Jeff King
2014-10-15 22:40 ` [PATCH v2 09/25] reachable: mark index blobs as SEEN Jeff King
2014-10-15 22:40 ` [PATCH v2 10/25] prune-packed: use for_each_loose_file_in_objdir Jeff King
2014-10-15 22:40 ` [PATCH v2 11/25] count-objects: do not use xsize_t when counting object size Jeff King
2014-10-15 22:41 ` [PATCH v2 12/25] count-objects: use for_each_loose_file_in_objdir Jeff King
2014-10-15 22:41 ` [PATCH v2 13/25] sha1_file: add for_each iterators for loose and packed objects Jeff King
2014-10-15 22:41 ` [PATCH v2 14/25] prune: keep objects reachable from recent objects Jeff King
2014-10-15 22:41 ` [PATCH v2 15/25] pack-objects: refactor unpack-unreachable expiration check Jeff King
2014-10-15 22:42 ` Jeff King [this message]
2014-10-15 22:42 ` [PATCH v2 17/25] write_sha1_file: freshen existing objects Jeff King
2014-10-15 22:42 ` [PATCH v2 18/25] make add_object_array_with_context interface more sane Jeff King
2014-10-15 22:43 ` [PATCH v2 19/25] traverse_commit_list: support pending blobs/trees with paths Jeff King
2014-10-15 22:43 ` [PATCH v2 20/25] rev-list: document --reflog option Jeff King
2014-10-15 22:44 ` [PATCH v2 21/25] rev-list: add --index-objects option Jeff King
2014-10-16 18:41   ` Junio C Hamano
2014-10-17  0:12     ` Jeff King
2014-10-17  0:43       ` Jeff King
2014-10-17  0:44         ` [PATCH v3 22/26] rev-list: add --indexed-objects option Jeff King
2014-10-17  0:44         ` [PATCH v3 23/26] reachable: use revision machinery's --indexed-objects code Jeff King
2014-10-17  0:44         ` [PATCH v3 24/26] pack-objects: use argv_array Jeff King
2014-10-17  0:44         ` [PATCH v3 25/26] repack: pack objects mentioned by the index Jeff King
2014-10-17  0:44         ` [PATCH v3 26/26] pack-objects: double-check options before discarding objects Jeff King
2014-10-15 22:44 ` [PATCH v2 22/25] reachable: use revision machinery's --index-objects code Jeff King
2014-10-15 22:45 ` [PATCH v2 23/25] pack-objects: use argv_array Jeff King
2014-10-15 22:46 ` [PATCH v2 24/25] repack: pack objects mentioned by the index Jeff King
2014-10-15 22:48 ` [PATCH v2 25/25] pack-objects: double-check options before discarding objects Jeff King
2014-10-16 21:07 ` [PATCH v2 0/25] prune-safety Junio C Hamano
2014-10-16 21:10   ` Junio C Hamano
2014-10-16 21:21   ` Jeff King
2014-10-16 21:39     ` Jeff King
2014-10-16 22:18       ` Junio C Hamano
2014-10-17  0:03         ` Jeff King
     [not found]       ` <CAPc5daX0AFv9jDrFyd_OnupW5AfZW9Je_rgzaViX_xxs3SG5zg@mail.gmail.com>
2014-10-17  4:49         ` Jeff King
2014-10-18 12:31       ` Jeff King

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20141015224209.GP25630@peff.net \
    --to=peff@peff.net \
    --cc=git@vger.kernel.org \
    --cc=gitster@pobox.com \
    --cc=mhagger@alum.mit.edu \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).