git.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: "Nguyễn Thái Ngọc Duy" <pclouds@gmail.com>
To: git@vger.kernel.org
Cc: "Nguyễn Thái Ngọc Duy" <pclouds@gmail.com>
Subject: [PATCH 2/2] count-objects: report garbage files in .git/objects/pack directory too
Date: Mon,  4 Feb 2013 19:49:05 +0700	[thread overview]
Message-ID: <1359982145-10792-2-git-send-email-pclouds@gmail.com> (raw)
In-Reply-To: <1359982145-10792-1-git-send-email-pclouds@gmail.com>

While it's unusual to have strange files in loose object database,
.git/objects/pack/tmp_* is normal after a broken fetch and they
can eat up a lot of disk space if the user does not pay attention.
Report them.

Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
---
 The hook in prepare_packed_git_one is ugly, but I don't want to
 duplicate the search file logic there in count-objects. Maybe I'm
 wrong.

 Interestingly it reports .commits file in my repo too. A nice
 reminder to myself to remind Jeff about count-objects improvements
 for his commit-cache work :)

 Way may also need a more friendly format than this one, which I
 assume is plumbing. Something that average git user can understand
 without looking up the document. If "git stats" is too much for this
 purpose, perhaps "git gc --stats"?

 Documentation/git-count-objects.txt |  4 ++--
 builtin/count-objects.c             | 27 ++++++++++++++++++++++++++-
 sha1_file.c                         | 23 ++++++++++++++++++++---
 3 files changed, 48 insertions(+), 6 deletions(-)

diff --git a/Documentation/git-count-objects.txt b/Documentation/git-count-objects.txt
index e816823..1611d7c 100644
--- a/Documentation/git-count-objects.txt
+++ b/Documentation/git-count-objects.txt
@@ -33,8 +33,8 @@ size-pack: disk space consumed by the packs, in KiB
 prune-packable: the number of loose objects that are also present in
 the packs. These objects could be pruned using `git prune-packed`.
 +
-garbage: the number of files in loose object database that are not
-valid loose objects
+garbage: the number of files in object database that are not valid
+loose objects nor valid packs
 
 GIT
 ---
diff --git a/builtin/count-objects.c b/builtin/count-objects.c
index 9afaa88..e8fabcf 100644
--- a/builtin/count-objects.c
+++ b/builtin/count-objects.c
@@ -9,6 +9,8 @@
 #include "builtin.h"
 #include "parse-options.h"
 
+static unsigned long garbage;
+
 static void count_objects(DIR *d, char *path, int len, int verbose,
 			  unsigned long *loose,
 			  off_t *loose_size,
@@ -65,6 +67,27 @@ static void count_objects(DIR *d, char *path, int len, int verbose,
 	}
 }
 
+extern void (*report_pack_garbage)(const char *path, int len, const char *name);
+static void real_report_pack_garbage(const char *path, int len, const char *name)
+{
+	if (is_dot_or_dotdot(name))
+		return;
+	if (has_extension(name, ".pack")) {
+		struct strbuf idx_file = STRBUF_INIT;
+		struct stat st;
+
+		strbuf_addf(&idx_file, "%.*s/%.*s.idx", len, path,
+			    (int)strlen(name) - 5, name);
+		if (!stat(idx_file.buf, &st) && S_ISREG(st.st_mode)) {
+			strbuf_release(&idx_file);
+			return;
+		}
+		strbuf_release(&idx_file);
+	}
+	error("garbage found: %.*s/%s", len, path, name);
+	garbage++;
+}
+
 static char const * const count_objects_usage[] = {
 	N_("git count-objects [-v]"),
 	NULL
@@ -76,7 +99,7 @@ int cmd_count_objects(int argc, const char **argv, const char *prefix)
 	const char *objdir = get_object_directory();
 	int len = strlen(objdir);
 	char *path = xmalloc(len + 50);
-	unsigned long loose = 0, packed = 0, packed_loose = 0, garbage = 0;
+	unsigned long loose = 0, packed = 0, packed_loose = 0;
 	off_t loose_size = 0;
 	struct option opts[] = {
 		OPT__VERBOSE(&verbose, N_("be verbose")),
@@ -87,6 +110,8 @@ int cmd_count_objects(int argc, const char **argv, const char *prefix)
 	/* we do not take arguments other than flags for now */
 	if (argc)
 		usage_with_options(count_objects_usage, opts);
+	if (verbose)
+		report_pack_garbage = real_report_pack_garbage;
 	memcpy(path, objdir, len);
 	if (len && objdir[len-1] != '/')
 		path[len++] = '/';
diff --git a/sha1_file.c b/sha1_file.c
index 40b2329..6045946 100644
--- a/sha1_file.c
+++ b/sha1_file.c
@@ -1000,6 +1000,17 @@ void install_packed_git(struct packed_git *pack)
 	packed_git = pack;
 }
 
+static void dummy_report_pack_garbage(const char *path,
+				      int len,
+				      const char *name)
+{
+}
+
+void (*report_pack_garbage)(const char *path,
+			    int len,
+			    const char *name) =
+	dummy_report_pack_garbage;
+
 static void prepare_packed_git_one(char *objdir, int local)
 {
 	/* Ensure that this buffer is large enough so that we can
@@ -1024,11 +1035,15 @@ static void prepare_packed_git_one(char *objdir, int local)
 		int namelen = strlen(de->d_name);
 		struct packed_git *p;
 
-		if (!has_extension(de->d_name, ".idx"))
+		if (!has_extension(de->d_name, ".idx")) {
+			report_pack_garbage(path, len - 1, de->d_name);
 			continue;
+		}
 
-		if (len + namelen + 1 > sizeof(path))
+		if (len + namelen + 1 > sizeof(path)) {
+			report_pack_garbage(path, len - 1, de->d_name);
 			continue;
+		}
 
 		/* Don't reopen a pack we already have. */
 		strcpy(path + len, de->d_name);
@@ -1042,8 +1057,10 @@ static void prepare_packed_git_one(char *objdir, int local)
 		 * .pack file that we can map.
 		 */
 		p = add_packed_git(path, len + namelen, local);
-		if (!p)
+		if (!p) {
+			report_pack_garbage(path, len - 1, de->d_name);
 			continue;
+		}
 		install_packed_git(p);
 	}
 	closedir(dir);
-- 
1.8.1.2.536.gf441e6d

  reply	other threads:[~2013-02-04 12:49 UTC|newest]

Thread overview: 14+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2013-02-04 12:49 [PATCH 1/2] git-count-objects.txt: describe each line in -v output Nguyễn Thái Ngọc Duy
2013-02-04 12:49 ` Nguyễn Thái Ngọc Duy [this message]
2013-02-04 17:06   ` [PATCH 2/2] count-objects: report garbage files in .git/objects/pack directory too Junio C Hamano
2013-02-04 18:16   ` Junio C Hamano
2013-02-07  7:37     ` Duy Nguyen
2013-02-07 18:12       ` Junio C Hamano
2013-02-07 23:58         ` Duy Nguyen
2013-02-08  3:48 ` [PATCH v2 0/3] count-objects improvements Nguyễn Thái Ngọc Duy
2013-02-08  3:48   ` [PATCH v2 1/3] git-count-objects.txt: describe each line in -v output Nguyễn Thái Ngọc Duy
2013-02-08  3:48   ` [PATCH v2 2/3] count-objects: report garbage files in pack directory too Nguyễn Thái Ngọc Duy
2013-02-08 18:44     ` Junio C Hamano
2013-02-09  1:58       ` Duy Nguyen
2013-02-08  3:48   ` [PATCH v2 3/3] count-objects: report how much disk space taken by garbage files Nguyễn Thái Ngọc Duy
2013-02-08 18:47     ` Junio C Hamano

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1359982145-10792-2-git-send-email-pclouds@gmail.com \
    --to=pclouds@gmail.com \
    --cc=git@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).