From: Jeff King <peff@peff.net>
To: git@vger.kernel.org
Cc: Michael Haggerty <mhagger@alum.mit.edu>,
Junio C Hamano <gitster@pobox.com>
Subject: [PATCH v2 08/25] prune: factor out loose-object directory traversal
Date: Wed, 15 Oct 2014 18:38:55 -0400 [thread overview]
Message-ID: <20141015223855.GH25630@peff.net> (raw)
In-Reply-To: <20141015223244.GA25368@peff.net>
Prune has to walk $GIT_DIR/objects/?? in order to find the
set of loose objects to prune. Other parts of the code
(e.g., count-objects) want to do the same. Let's factor it
out into a reusable for_each-style function.
Note that this is not quite a straight code movement. The
original code had strange behavior when it found a file of
the form "[0-9a-f]{2}/.{38}" that did _not_ contain all hex
digits. It executed a "break" from the loop, meaning that we
stopped pruning in that directory (but still pruned other
directories!). This was probably a bug; we do not want to
process the file as an object, but we should keep going
otherwise (and that is how the new code handles it).
We are also a little more careful with loose object
directories which fail to open. The original code silently
ignored any failures, but the new code will complain about
any problems besides ENOENT.
Signed-off-by: Jeff King <peff@peff.net>
---
builtin/prune.c | 87 +++++++++++++++++----------------------------------------
cache.h | 33 ++++++++++++++++++++++
sha1_file.c | 84 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
3 files changed, 143 insertions(+), 61 deletions(-)
diff --git a/builtin/prune.c b/builtin/prune.c
index 144a3bd..763f53e 100644
--- a/builtin/prune.c
+++ b/builtin/prune.c
@@ -31,11 +31,23 @@ static int prune_tmp_file(const char *fullpath)
return 0;
}
-static int prune_object(const char *fullpath, const unsigned char *sha1)
+static int prune_object(const unsigned char *sha1, const char *fullpath,
+ void *data)
{
struct stat st;
- if (lstat(fullpath, &st))
- return error("Could not stat '%s'", fullpath);
+
+ /*
+ * Do we know about this object?
+ * It must have been reachable
+ */
+ if (lookup_object(sha1))
+ return 0;
+
+ if (lstat(fullpath, &st)) {
+ /* report errors, but do not stop pruning */
+ error("Could not stat '%s'", fullpath);
+ return 0;
+ }
if (st.st_mtime > expire)
return 0;
if (show_only || verbose) {
@@ -48,68 +60,20 @@ static int prune_object(const char *fullpath, const unsigned char *sha1)
return 0;
}
-static int prune_dir(int i, struct strbuf *path)
+static int prune_cruft(const char *basename, const char *path, void *data)
{
- size_t baselen = path->len;
- DIR *dir = opendir(path->buf);
- struct dirent *de;
-
- if (!dir)
- return 0;
-
- while ((de = readdir(dir)) != NULL) {
- char name[100];
- unsigned char sha1[20];
-
- if (is_dot_or_dotdot(de->d_name))
- continue;
- if (strlen(de->d_name) == 38) {
- sprintf(name, "%02x", i);
- memcpy(name+2, de->d_name, 39);
- if (get_sha1_hex(name, sha1) < 0)
- break;
-
- /*
- * Do we know about this object?
- * It must have been reachable
- */
- if (lookup_object(sha1))
- continue;
-
- strbuf_addf(path, "/%s", de->d_name);
- prune_object(path->buf, sha1);
- strbuf_setlen(path, baselen);
- continue;
- }
- if (starts_with(de->d_name, "tmp_obj_")) {
- strbuf_addf(path, "/%s", de->d_name);
- prune_tmp_file(path->buf);
- strbuf_setlen(path, baselen);
- continue;
- }
- fprintf(stderr, "bad sha1 file: %s/%s\n", path->buf, de->d_name);
- }
- closedir(dir);
- if (!show_only)
- rmdir(path->buf);
+ if (starts_with(basename, "tmp_obj_"))
+ prune_tmp_file(path);
+ else
+ fprintf(stderr, "bad sha1 file: %s\n", path);
return 0;
}
-static void prune_object_dir(const char *path)
+static int prune_subdir(int nr, const char *path, void *data)
{
- struct strbuf buf = STRBUF_INIT;
- size_t baselen;
- int i;
-
- strbuf_addstr(&buf, path);
- strbuf_addch(&buf, '/');
- baselen = buf.len;
-
- for (i = 0; i < 256; i++) {
- strbuf_addf(&buf, "%02x", i);
- prune_dir(i, &buf);
- strbuf_setlen(&buf, baselen);
- }
+ if (!show_only)
+ rmdir(path);
+ return 0;
}
/*
@@ -173,7 +137,8 @@ int cmd_prune(int argc, const char **argv, const char *prefix)
mark_reachable_objects(&revs, 1, progress);
stop_progress(&progress);
- prune_object_dir(get_object_directory());
+ for_each_loose_file_in_objdir(get_object_directory(), prune_object,
+ prune_cruft, prune_subdir, NULL);
prune_packed_objects(show_only ? PRUNE_PACKED_DRY_RUN : 0);
remove_temporary_files(get_object_directory());
diff --git a/cache.h b/cache.h
index 13fadb6..8ffefaa 100644
--- a/cache.h
+++ b/cache.h
@@ -1221,6 +1221,39 @@ extern unsigned long unpack_object_header_buffer(const unsigned char *buf, unsig
extern unsigned long get_size_from_delta(struct packed_git *, struct pack_window **, off_t);
extern int unpack_object_header(struct packed_git *, struct pack_window **, off_t *, unsigned long *);
+/*
+ * Iterate over the files in the loose-object parts of the object
+ * directory "path", triggering the following callbacks:
+ *
+ * - loose_object is called for each loose object we find.
+ *
+ * - loose_cruft is called for any files that do not appear to be
+ * loose objects. Note that we only look in the loose object
+ * directories "objects/[0-9a-f]{2}/", so we will not report
+ * "objects/foobar" as cruft.
+ *
+ * - loose_subdir is called for each top-level hashed subdirectory
+ * of the object directory (e.g., "$OBJDIR/f0"). It is called
+ * after the objects in the directory are processed.
+ *
+ * Any callback that is NULL will be ignored. Callbacks returning non-zero
+ * will end the iteration.
+ */
+typedef int each_loose_object_fn(const unsigned char *sha1,
+ const char *path,
+ void *data);
+typedef int each_loose_cruft_fn(const char *basename,
+ const char *path,
+ void *data);
+typedef int each_loose_subdir_fn(int nr,
+ const char *path,
+ void *data);
+int for_each_loose_file_in_objdir(const char *path,
+ each_loose_object_fn obj_cb,
+ each_loose_cruft_fn cruft_cb,
+ each_loose_subdir_fn subdir_cb,
+ void *data);
+
struct object_info {
/* Request */
enum object_type *typep;
diff --git a/sha1_file.c b/sha1_file.c
index fa881bf..a20240b 100644
--- a/sha1_file.c
+++ b/sha1_file.c
@@ -3265,3 +3265,87 @@ void assert_sha1_type(const unsigned char *sha1, enum object_type expect)
die("%s is not a valid '%s' object", sha1_to_hex(sha1),
typename(expect));
}
+
+static int for_each_file_in_obj_subdir(int subdir_nr,
+ struct strbuf *path,
+ each_loose_object_fn obj_cb,
+ each_loose_cruft_fn cruft_cb,
+ each_loose_subdir_fn subdir_cb,
+ void *data)
+{
+ size_t baselen = path->len;
+ DIR *dir = opendir(path->buf);
+ struct dirent *de;
+ int r = 0;
+
+ if (!dir) {
+ if (errno == ENOENT)
+ return 0;
+ return error("unable to open %s: %s", path->buf, strerror(errno));
+ }
+
+ while ((de = readdir(dir))) {
+ if (is_dot_or_dotdot(de->d_name))
+ continue;
+
+ strbuf_setlen(path, baselen);
+ strbuf_addf(path, "/%s", de->d_name);
+
+ if (strlen(de->d_name) == 38) {
+ char hex[41];
+ unsigned char sha1[20];
+
+ snprintf(hex, sizeof(hex), "%02x%s",
+ subdir_nr, de->d_name);
+ if (!get_sha1_hex(hex, sha1)) {
+ if (obj_cb) {
+ r = obj_cb(sha1, path->buf, data);
+ if (r)
+ break;
+ }
+ continue;
+ }
+ }
+
+ if (cruft_cb) {
+ r = cruft_cb(de->d_name, path->buf, data);
+ if (r)
+ break;
+ }
+ }
+ strbuf_setlen(path, baselen);
+
+ if (!r && subdir_cb)
+ r = subdir_cb(subdir_nr, path->buf, data);
+
+ closedir(dir);
+ return r;
+}
+
+int for_each_loose_file_in_objdir(const char *path,
+ each_loose_object_fn obj_cb,
+ each_loose_cruft_fn cruft_cb,
+ each_loose_subdir_fn subdir_cb,
+ void *data)
+{
+ struct strbuf buf = STRBUF_INIT;
+ size_t baselen;
+ int r = 0;
+ int i;
+
+ strbuf_addstr(&buf, path);
+ strbuf_addch(&buf, '/');
+ baselen = buf.len;
+
+ for (i = 0; i < 256; i++) {
+ strbuf_addf(&buf, "%02x", i);
+ r = for_each_file_in_obj_subdir(i, &buf, obj_cb, cruft_cb,
+ subdir_cb, data);
+ strbuf_setlen(&buf, baselen);
+ if (r)
+ break;
+ }
+
+ strbuf_release(&buf);
+ return r;
+}
--
2.1.2.596.g7379948
next prev parent reply other threads:[~2014-10-15 22:39 UTC|newest]
Thread overview: 48+ messages / expand[flat|nested] mbox.gz Atom feed top
2014-10-15 22:32 [PATCH v2 0/25] prune-safety Jeff King
2014-10-15 22:33 ` [PATCH v2 01/25] foreach_alt_odb: propagate return value from callback Jeff King
2014-10-15 22:34 ` [PATCH v2 02/25] isxdigit: cast input to unsigned char Jeff King
2014-10-16 17:16 ` Junio C Hamano
2014-10-15 22:34 ` [PATCH v2 03/25] object_array: factor out slopbuf-freeing logic Jeff King
2014-10-16 17:39 ` Junio C Hamano
2014-10-17 0:33 ` git-bundle rev handling and de-duping Jeff King
2014-10-17 21:03 ` Philip Oakley
2014-10-17 22:41 ` Junio C Hamano
2014-10-15 22:34 ` [PATCH v2 04/25] object_array: add a "clear" function Jeff King
2014-10-15 22:35 ` [PATCH v2 05/25] clean up name allocation in prepare_revision_walk Jeff King
2014-10-15 22:37 ` [PATCH v2 06/25] reachable: use traverse_commit_list instead of custom walk Jeff King
2014-10-16 17:53 ` Junio C Hamano
2014-10-15 22:38 ` [PATCH v2 07/25] reachable: reuse revision.c "add all reflogs" code Jeff King
2014-10-15 22:38 ` Jeff King [this message]
2014-10-15 22:40 ` [PATCH v2 09/25] reachable: mark index blobs as SEEN Jeff King
2014-10-15 22:40 ` [PATCH v2 10/25] prune-packed: use for_each_loose_file_in_objdir Jeff King
2014-10-15 22:40 ` [PATCH v2 11/25] count-objects: do not use xsize_t when counting object size Jeff King
2014-10-15 22:41 ` [PATCH v2 12/25] count-objects: use for_each_loose_file_in_objdir Jeff King
2014-10-15 22:41 ` [PATCH v2 13/25] sha1_file: add for_each iterators for loose and packed objects Jeff King
2014-10-15 22:41 ` [PATCH v2 14/25] prune: keep objects reachable from recent objects Jeff King
2014-10-15 22:41 ` [PATCH v2 15/25] pack-objects: refactor unpack-unreachable expiration check Jeff King
2014-10-15 22:42 ` [PATCH v2 16/25] pack-objects: match prune logic for discarding objects Jeff King
2014-10-15 22:42 ` [PATCH v2 17/25] write_sha1_file: freshen existing objects Jeff King
2014-10-15 22:42 ` [PATCH v2 18/25] make add_object_array_with_context interface more sane Jeff King
2014-10-15 22:43 ` [PATCH v2 19/25] traverse_commit_list: support pending blobs/trees with paths Jeff King
2014-10-15 22:43 ` [PATCH v2 20/25] rev-list: document --reflog option Jeff King
2014-10-15 22:44 ` [PATCH v2 21/25] rev-list: add --index-objects option Jeff King
2014-10-16 18:41 ` Junio C Hamano
2014-10-17 0:12 ` Jeff King
2014-10-17 0:43 ` Jeff King
2014-10-17 0:44 ` [PATCH v3 22/26] rev-list: add --indexed-objects option Jeff King
2014-10-17 0:44 ` [PATCH v3 23/26] reachable: use revision machinery's --indexed-objects code Jeff King
2014-10-17 0:44 ` [PATCH v3 24/26] pack-objects: use argv_array Jeff King
2014-10-17 0:44 ` [PATCH v3 25/26] repack: pack objects mentioned by the index Jeff King
2014-10-17 0:44 ` [PATCH v3 26/26] pack-objects: double-check options before discarding objects Jeff King
2014-10-15 22:44 ` [PATCH v2 22/25] reachable: use revision machinery's --index-objects code Jeff King
2014-10-15 22:45 ` [PATCH v2 23/25] pack-objects: use argv_array Jeff King
2014-10-15 22:46 ` [PATCH v2 24/25] repack: pack objects mentioned by the index Jeff King
2014-10-15 22:48 ` [PATCH v2 25/25] pack-objects: double-check options before discarding objects Jeff King
2014-10-16 21:07 ` [PATCH v2 0/25] prune-safety Junio C Hamano
2014-10-16 21:10 ` Junio C Hamano
2014-10-16 21:21 ` Jeff King
2014-10-16 21:39 ` Jeff King
2014-10-16 22:18 ` Junio C Hamano
2014-10-17 0:03 ` Jeff King
[not found] ` <CAPc5daX0AFv9jDrFyd_OnupW5AfZW9Je_rgzaViX_xxs3SG5zg@mail.gmail.com>
2014-10-17 4:49 ` Jeff King
2014-10-18 12:31 ` Jeff King
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20141015223855.GH25630@peff.net \
--to=peff@peff.net \
--cc=git@vger.kernel.org \
--cc=gitster@pobox.com \
--cc=mhagger@alum.mit.edu \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).