All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Nguyễn Thái Ngọc Duy" <pclouds@gmail.com>
To: git@vger.kernel.org
Cc: "Nguyễn Thái Ngọc Duy" <pclouds@gmail.com>
Subject: [PATCH 2/2] count-objects: report garbage files in .git/objects/pack directory too
Date: Mon,  4 Feb 2013 19:49:05 +0700	[thread overview]
Message-ID: <1359982145-10792-2-git-send-email-pclouds@gmail.com> (raw)
In-Reply-To: <1359982145-10792-1-git-send-email-pclouds@gmail.com>

While it's unusual to have strange files in loose object database,
.git/objects/pack/tmp_* is normal after a broken fetch and they
can eat up a lot of disk space if the user does not pay attention.
Report them.

Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
---
 The hook in prepare_packed_git_one is ugly, but I don't want to
 duplicate the search file logic there in count-objects. Maybe I'm
 wrong.

 Interestingly it reports .commits file in my repo too. A nice
 reminder to myself to remind Jeff about count-objects improvements
 for his commit-cache work :)

 Way may also need a more friendly format than this one, which I
 assume is plumbing. Something that average git user can understand
 without looking up the document. If "git stats" is too much for this
 purpose, perhaps "git gc --stats"?

 Documentation/git-count-objects.txt |  4 ++--
 builtin/count-objects.c             | 27 ++++++++++++++++++++++++++-
 sha1_file.c                         | 23 ++++++++++++++++++++---
 3 files changed, 48 insertions(+), 6 deletions(-)

diff --git a/Documentation/git-count-objects.txt b/Documentation/git-count-objects.txt
index e816823..1611d7c 100644
--- a/Documentation/git-count-objects.txt
+++ b/Documentation/git-count-objects.txt
@@ -33,8 +33,8 @@ size-pack: disk space consumed by the packs, in KiB
 prune-packable: the number of loose objects that are also present in
 the packs. These objects could be pruned using `git prune-packed`.
 +
-garbage: the number of files in loose object database that are not
-valid loose objects
+garbage: the number of files in object database that are not valid
+loose objects nor valid packs
 
 GIT
 ---
diff --git a/builtin/count-objects.c b/builtin/count-objects.c
index 9afaa88..e8fabcf 100644
--- a/builtin/count-objects.c
+++ b/builtin/count-objects.c
@@ -9,6 +9,8 @@
 #include "builtin.h"
 #include "parse-options.h"
 
+static unsigned long garbage;
+
 static void count_objects(DIR *d, char *path, int len, int verbose,
 			  unsigned long *loose,
 			  off_t *loose_size,
@@ -65,6 +67,27 @@ static void count_objects(DIR *d, char *path, int len, int verbose,
 	}
 }
 
+extern void (*report_pack_garbage)(const char *path, int len, const char *name);
+static void real_report_pack_garbage(const char *path, int len, const char *name)
+{
+	if (is_dot_or_dotdot(name))
+		return;
+	if (has_extension(name, ".pack")) {
+		struct strbuf idx_file = STRBUF_INIT;
+		struct stat st;
+
+		strbuf_addf(&idx_file, "%.*s/%.*s.idx", len, path,
+			    (int)strlen(name) - 5, name);
+		if (!stat(idx_file.buf, &st) && S_ISREG(st.st_mode)) {
+			strbuf_release(&idx_file);
+			return;
+		}
+		strbuf_release(&idx_file);
+	}
+	error("garbage found: %.*s/%s", len, path, name);
+	garbage++;
+}
+
 static char const * const count_objects_usage[] = {
 	N_("git count-objects [-v]"),
 	NULL
@@ -76,7 +99,7 @@ int cmd_count_objects(int argc, const char **argv, const char *prefix)
 	const char *objdir = get_object_directory();
 	int len = strlen(objdir);
 	char *path = xmalloc(len + 50);
-	unsigned long loose = 0, packed = 0, packed_loose = 0, garbage = 0;
+	unsigned long loose = 0, packed = 0, packed_loose = 0;
 	off_t loose_size = 0;
 	struct option opts[] = {
 		OPT__VERBOSE(&verbose, N_("be verbose")),
@@ -87,6 +110,8 @@ int cmd_count_objects(int argc, const char **argv, const char *prefix)
 	/* we do not take arguments other than flags for now */
 	if (argc)
 		usage_with_options(count_objects_usage, opts);
+	if (verbose)
+		report_pack_garbage = real_report_pack_garbage;
 	memcpy(path, objdir, len);
 	if (len && objdir[len-1] != '/')
 		path[len++] = '/';
diff --git a/sha1_file.c b/sha1_file.c
index 40b2329..6045946 100644
--- a/sha1_file.c
+++ b/sha1_file.c
@@ -1000,6 +1000,17 @@ void install_packed_git(struct packed_git *pack)
 	packed_git = pack;
 }
 
+static void dummy_report_pack_garbage(const char *path,
+				      int len,
+				      const char *name)
+{
+}
+
+void (*report_pack_garbage)(const char *path,
+			    int len,
+			    const char *name) =
+	dummy_report_pack_garbage;
+
 static void prepare_packed_git_one(char *objdir, int local)
 {
 	/* Ensure that this buffer is large enough so that we can
@@ -1024,11 +1035,15 @@ static void prepare_packed_git_one(char *objdir, int local)
 		int namelen = strlen(de->d_name);
 		struct packed_git *p;
 
-		if (!has_extension(de->d_name, ".idx"))
+		if (!has_extension(de->d_name, ".idx")) {
+			report_pack_garbage(path, len - 1, de->d_name);
 			continue;
+		}
 
-		if (len + namelen + 1 > sizeof(path))
+		if (len + namelen + 1 > sizeof(path)) {
+			report_pack_garbage(path, len - 1, de->d_name);
 			continue;
+		}
 
 		/* Don't reopen a pack we already have. */
 		strcpy(path + len, de->d_name);
@@ -1042,8 +1057,10 @@ static void prepare_packed_git_one(char *objdir, int local)
 		 * .pack file that we can map.
 		 */
 		p = add_packed_git(path, len + namelen, local);
-		if (!p)
+		if (!p) {
+			report_pack_garbage(path, len - 1, de->d_name);
 			continue;
+		}
 		install_packed_git(p);
 	}
 	closedir(dir);
-- 
1.8.1.2.536.gf441e6d

  reply	other threads:[~2013-02-04 12:49 UTC|newest]

Thread overview: 14+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2013-02-04 12:49 [PATCH 1/2] git-count-objects.txt: describe each line in -v output Nguyễn Thái Ngọc Duy
2013-02-04 12:49 ` Nguyễn Thái Ngọc Duy [this message]
2013-02-04 17:06   ` [PATCH 2/2] count-objects: report garbage files in .git/objects/pack directory too Junio C Hamano
2013-02-04 18:16   ` Junio C Hamano
2013-02-07  7:37     ` Duy Nguyen
2013-02-07 18:12       ` Junio C Hamano
2013-02-07 23:58         ` Duy Nguyen
2013-02-08  3:48 ` [PATCH v2 0/3] count-objects improvements Nguyễn Thái Ngọc Duy
2013-02-08  3:48   ` [PATCH v2 1/3] git-count-objects.txt: describe each line in -v output Nguyễn Thái Ngọc Duy
2013-02-08  3:48   ` [PATCH v2 2/3] count-objects: report garbage files in pack directory too Nguyễn Thái Ngọc Duy
2013-02-08 18:44     ` Junio C Hamano
2013-02-09  1:58       ` Duy Nguyen
2013-02-08  3:48   ` [PATCH v2 3/3] count-objects: report how much disk space taken by garbage files Nguyễn Thái Ngọc Duy
2013-02-08 18:47     ` Junio C Hamano

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1359982145-10792-2-git-send-email-pclouds@gmail.com \
    --to=pclouds@gmail.com \
    --cc=git@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.