From: Jeff King <peff@peff.net>
To: git@vger.kernel.org
Cc: Michael Haggerty <mhagger@alum.mit.edu>,
Junio C Hamano <gitster@pobox.com>
Subject: [PATCH v2 08/25] prune: factor out loose-object directory traversal
Date: Wed, 15 Oct 2014 18:38:55 -0400 [thread overview]
Message-ID: <20141015223855.GH25630@peff.net> (raw)
In-Reply-To: <20141015223244.GA25368@peff.net>
Prune has to walk $GIT_DIR/objects/?? in order to find the
set of loose objects to prune. Other parts of the code
(e.g., count-objects) want to do the same. Let's factor it
out into a reusable for_each-style function.
Note that this is not quite a straight code movement. The
original code had strange behavior when it found a file of
the form "[0-9a-f]{2}/.{38}" that did _not_ contain all hex
digits. It executed a "break" from the loop, meaning that we
stopped pruning in that directory (but still pruned other
directories!). This was probably a bug; we do not want to
process the file as an object, but we should keep going
otherwise (and that is how the new code handles it).
We are also a little more careful with loose object
directories which fail to open. The original code silently
ignored any failures, but the new code will complain about
any problems besides ENOENT.
Signed-off-by: Jeff King <peff@peff.net>
---
builtin/prune.c | 87 +++++++++++++++++----------------------------------------
cache.h | 33 ++++++++++++++++++++++
sha1_file.c | 84 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
3 files changed, 143 insertions(+), 61 deletions(-)
diff --git a/builtin/prune.c b/builtin/prune.c
index 144a3bd..763f53e 100644
--- a/builtin/prune.c
+++ b/builtin/prune.c
@@ -31,11 +31,23 @@ static int prune_tmp_file(const char *fullpath)
return 0;
}
-static int prune_object(const char *fullpath, const unsigned char *sha1)
+static int prune_object(const unsigned char *sha1, const char *fullpath,
+ void *data)
{
struct stat st;
- if (lstat(fullpath, &st))
- return error("Could not stat '%s'", fullpath);
+
+ /*
+ * Do we know about this object?
+ * It must have been reachable
+ */
+ if (lookup_object(sha1))
+ return 0;
+
+ if (lstat(fullpath, &st)) {
+ /* report errors, but do not stop pruning */
+ error("Could not stat '%s'", fullpath);
+ return 0;
+ }
if (st.st_mtime > expire)
return 0;
if (show_only || verbose) {
@@ -48,68 +60,20 @@ static int prune_object(const char *fullpath, const unsigned char *sha1)
return 0;
}
-static int prune_dir(int i, struct strbuf *path)
+static int prune_cruft(const char *basename, const char *path, void *data)
{
- size_t baselen = path->len;
- DIR *dir = opendir(path->buf);
- struct dirent *de;
-
- if (!dir)
- return 0;
-
- while ((de = readdir(dir)) != NULL) {
- char name[100];
- unsigned char sha1[20];
-
- if (is_dot_or_dotdot(de->d_name))
- continue;
- if (strlen(de->d_name) == 38) {
- sprintf(name, "%02x", i);
- memcpy(name+2, de->d_name, 39);
- if (get_sha1_hex(name, sha1) < 0)
- break;
-
- /*
- * Do we know about this object?
- * It must have been reachable
- */
- if (lookup_object(sha1))
- continue;
-
- strbuf_addf(path, "/%s", de->d_name);
- prune_object(path->buf, sha1);
- strbuf_setlen(path, baselen);
- continue;
- }
- if (starts_with(de->d_name, "tmp_obj_")) {
- strbuf_addf(path, "/%s", de->d_name);
- prune_tmp_file(path->buf);
- strbuf_setlen(path, baselen);
- continue;
- }
- fprintf(stderr, "bad sha1 file: %s/%s\n", path->buf, de->d_name);
- }
- closedir(dir);
- if (!show_only)
- rmdir(path->buf);
+ if (starts_with(basename, "tmp_obj_"))
+ prune_tmp_file(path);
+ else
+ fprintf(stderr, "bad sha1 file: %s\n", path);
return 0;
}
-static void prune_object_dir(const char *path)
+static int prune_subdir(int nr, const char *path, void *data)
{
- struct strbuf buf = STRBUF_INIT;
- size_t baselen;
- int i;
-
- strbuf_addstr(&buf, path);
- strbuf_addch(&buf, '/');
- baselen = buf.len;
-
- for (i = 0; i < 256; i++) {
- strbuf_addf(&buf, "%02x", i);
- prune_dir(i, &buf);
- strbuf_setlen(&buf, baselen);
- }
+ if (!show_only)
+ rmdir(path);
+ return 0;
}
/*
@@ -173,7 +137,8 @@ int cmd_prune(int argc, const char **argv, const char *prefix)
mark_reachable_objects(&revs, 1, progress);
stop_progress(&progress);
- prune_object_dir(get_object_directory());
+ for_each_loose_file_in_objdir(get_object_directory(), prune_object,
+ prune_cruft, prune_subdir, NULL);
prune_packed_objects(show_only ? PRUNE_PACKED_DRY_RUN : 0);
remove_temporary_files(get_object_directory());
diff --git a/cache.h b/cache.h
index 13fadb6..8ffefaa 100644
--- a/cache.h
+++ b/cache.h
@@ -1221,6 +1221,39 @@ extern unsigned long unpack_object_header_buffer(const unsigned char *buf, unsig
extern unsigned long get_size_from_delta(struct packed_git *, struct pack_window **, off_t);
extern int unpack_object_header(struct packed_git *, struct pack_window **, off_t *, unsigned long *);
+/*
+ * Iterate over the files in the loose-object parts of the object
+ * directory "path", triggering the following callbacks:
+ *
+ * - loose_object is called for each loose object we find.
+ *
+ * - loose_cruft is called for any files that do not appear to be
+ * loose objects. Note that we only look in the loose object
+ * directories "objects/[0-9a-f]{2}/", so we will not report
+ * "objects/foobar" as cruft.
+ *
+ * - loose_subdir is called for each top-level hashed subdirectory
+ * of the object directory (e.g., "$OBJDIR/f0"). It is called
+ * after the objects in the directory are processed.
+ *
+ * Any callback that is NULL will be ignored. Callbacks returning non-zero
+ * will end the iteration.
+ */
+typedef int each_loose_object_fn(const unsigned char *sha1,
+ const char *path,
+ void *data);
+typedef int each_loose_cruft_fn(const char *basename,
+ const char *path,
+ void *data);
+typedef int each_loose_subdir_fn(int nr,
+ const char *path,
+ void *data);
+int for_each_loose_file_in_objdir(const char *path,
+ each_loose_object_fn obj_cb,
+ each_loose_cruft_fn cruft_cb,
+ each_loose_subdir_fn subdir_cb,
+ void *data);
+
struct object_info {
/* Request */
enum object_type *typep;
diff --git a/sha1_file.c b/sha1_file.c
index fa881bf..a20240b 100644
--- a/sha1_file.c
+++ b/sha1_file.c
@@ -3265,3 +3265,87 @@ void assert_sha1_type(const unsigned char *sha1, enum object_type expect)
die("%s is not a valid '%s' object", sha1_to_hex(sha1),
typename(expect));
}
+
+static int for_each_file_in_obj_subdir(int subdir_nr,
+ struct strbuf *path,
+ each_loose_object_fn obj_cb,
+ each_loose_cruft_fn cruft_cb,
+ each_loose_subdir_fn subdir_cb,
+ void *data)
+{
+ size_t baselen = path->len;
+ DIR *dir = opendir(path->buf);
+ struct dirent *de;
+ int r = 0;
+
+ if (!dir) {
+ if (errno == ENOENT)
+ return 0;
+ return error("unable to open %s: %s", path->buf, strerror(errno));
+ }
+
+ while ((de = readdir(dir))) {
+ if (is_dot_or_dotdot(de->d_name))
+ continue;
+
+ strbuf_setlen(path, baselen);
+ strbuf_addf(path, "/%s", de->d_name);
+
+ if (strlen(de->d_name) == 38) {
+ char hex[41];
+ unsigned char sha1[20];
+
+ snprintf(hex, sizeof(hex), "%02x%s",
+ subdir_nr, de->d_name);
+ if (!get_sha1_hex(hex, sha1)) {
+ if (obj_cb) {
+ r = obj_cb(sha1, path->buf, data);
+ if (r)
+ break;
+ }
+ continue;
+ }
+ }
+
+ if (cruft_cb) {
+ r = cruft_cb(de->d_name, path->buf, data);
+ if (r)
+ break;
+ }
+ }
+ strbuf_setlen(path, baselen);
+
+ if (!r && subdir_cb)
+ r = subdir_cb(subdir_nr, path->buf, data);
+
+ closedir(dir);
+ return r;
+}
+
+int for_each_loose_file_in_objdir(const char *path,
+ each_loose_object_fn obj_cb,
+ each_loose_cruft_fn cruft_cb,
+ each_loose_subdir_fn subdir_cb,
+ void *data)
+{
+ struct strbuf buf = STRBUF_INIT;
+ size_t baselen;
+ int r = 0;
+ int i;
+
+ strbuf_addstr(&buf, path);
+ strbuf_addch(&buf, '/');
+ baselen = buf.len;
+
+ for (i = 0; i < 256; i++) {
+ strbuf_addf(&buf, "%02x", i);
+ r = for_each_file_in_obj_subdir(i, &buf, obj_cb, cruft_cb,
+ subdir_cb, data);
+ strbuf_setlen(&buf, baselen);
+ if (r)
+ break;
+ }
+
+ strbuf_release(&buf);
+ return r;
+}
--
2.1.2.596.g7379948
next prev parent reply other threads:[~2014-10-15 22:39 UTC|newest]
Thread overview: 48+ messages / expand[flat|nested] mbox.gz Atom feed top
2014-10-15 22:32 [PATCH v2 0/25] prune-safety Jeff King
2014-10-15 22:33 ` [PATCH v2 01/25] foreach_alt_odb: propagate return value from callback Jeff King
2014-10-15 22:34 ` [PATCH v2 02/25] isxdigit: cast input to unsigned char Jeff King
2014-10-16 17:16 ` Junio C Hamano
2014-10-15 22:34 ` [PATCH v2 03/25] object_array: factor out slopbuf-freeing logic Jeff King
2014-10-16 17:39 ` Junio C Hamano
2014-10-17 0:33 ` git-bundle rev handling and de-duping Jeff King
2014-10-17 21:03 ` Philip Oakley
2014-10-17 22:41 ` Junio C Hamano
2014-10-15 22:34 ` [PATCH v2 04/25] object_array: add a "clear" function Jeff King
2014-10-15 22:35 ` [PATCH v2 05/25] clean up name allocation in prepare_revision_walk Jeff King
2014-10-15 22:37 ` [PATCH v2 06/25] reachable: use traverse_commit_list instead of custom walk Jeff King
2014-10-16 17:53 ` Junio C Hamano
2014-10-15 22:38 ` [PATCH v2 07/25] reachable: reuse revision.c "add all reflogs" code Jeff King
2014-10-15 22:38 ` Jeff King [this message]
2014-10-15 22:40 ` [PATCH v2 09/25] reachable: mark index blobs as SEEN Jeff King
2014-10-15 22:40 ` [PATCH v2 10/25] prune-packed: use for_each_loose_file_in_objdir Jeff King
2014-10-15 22:40 ` [PATCH v2 11/25] count-objects: do not use xsize_t when counting object size Jeff King
2014-10-15 22:41 ` [PATCH v2 12/25] count-objects: use for_each_loose_file_in_objdir Jeff King
2014-10-15 22:41 ` [PATCH v2 13/25] sha1_file: add for_each iterators for loose and packed objects Jeff King
2014-10-15 22:41 ` [PATCH v2 14/25] prune: keep objects reachable from recent objects Jeff King
2014-10-15 22:41 ` [PATCH v2 15/25] pack-objects: refactor unpack-unreachable expiration check Jeff King
2014-10-15 22:42 ` [PATCH v2 16/25] pack-objects: match prune logic for discarding objects Jeff King
2014-10-15 22:42 ` [PATCH v2 17/25] write_sha1_file: freshen existing objects Jeff King
2014-10-15 22:42 ` [PATCH v2 18/25] make add_object_array_with_context interface more sane Jeff King
2014-10-15 22:43 ` [PATCH v2 19/25] traverse_commit_list: support pending blobs/trees with paths Jeff King
2014-10-15 22:43 ` [PATCH v2 20/25] rev-list: document --reflog option Jeff King
2014-10-15 22:44 ` [PATCH v2 21/25] rev-list: add --index-objects option Jeff King
2014-10-16 18:41 ` Junio C Hamano
2014-10-17 0:12 ` Jeff King
2014-10-17 0:43 ` Jeff King
2014-10-17 0:44 ` [PATCH v3 22/26] rev-list: add --indexed-objects option Jeff King
2014-10-17 0:44 ` [PATCH v3 23/26] reachable: use revision machinery's --indexed-objects code Jeff King
2014-10-17 0:44 ` [PATCH v3 24/26] pack-objects: use argv_array Jeff King
2014-10-17 0:44 ` [PATCH v3 25/26] repack: pack objects mentioned by the index Jeff King
2014-10-17 0:44 ` [PATCH v3 26/26] pack-objects: double-check options before discarding objects Jeff King
2014-10-15 22:44 ` [PATCH v2 22/25] reachable: use revision machinery's --index-objects code Jeff King
2014-10-15 22:45 ` [PATCH v2 23/25] pack-objects: use argv_array Jeff King
2014-10-15 22:46 ` [PATCH v2 24/25] repack: pack objects mentioned by the index Jeff King
2014-10-15 22:48 ` [PATCH v2 25/25] pack-objects: double-check options before discarding objects Jeff King
2014-10-16 21:07 ` [PATCH v2 0/25] prune-safety Junio C Hamano
2014-10-16 21:10 ` Junio C Hamano
2014-10-16 21:21 ` Jeff King
2014-10-16 21:39 ` Jeff King
2014-10-16 22:18 ` Junio C Hamano
2014-10-17 0:03 ` Jeff King
[not found] ` <CAPc5daX0AFv9jDrFyd_OnupW5AfZW9Je_rgzaViX_xxs3SG5zg@mail.gmail.com>
2014-10-17 4:49 ` Jeff King
2014-10-18 12:31 ` Jeff King
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20141015223855.GH25630@peff.net \
--to=peff@peff.net \
--cc=git@vger.kernel.org \
--cc=gitster@pobox.com \
--cc=mhagger@alum.mit.edu \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.