All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Nguyễn Thái Ngọc Duy" <pclouds@gmail.com>
To: git@vger.kernel.org
Cc: "Junio C Hamano" <gitster@pobox.com>,
	"Nguyễn Thái Ngọc Duy" <pclouds@gmail.com>
Subject: [PATCH v2 2/3] count-objects: report garbage files in pack directory too
Date: Fri,  8 Feb 2013 10:48:26 +0700	[thread overview]
Message-ID: <1360295307-5469-3-git-send-email-pclouds@gmail.com> (raw)
In-Reply-To: <1360295307-5469-1-git-send-email-pclouds@gmail.com>

prepare_packed_git_one() is modified to allow count-objects to hook a
report function to so we don't need to duplicate the pack searching
logic in count-objects.c. When report_pack_garbage is NULL, the
overhead is insignificant.

Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
---
 Documentation/git-count-objects.txt |  4 +-
 builtin/count-objects.c             | 18 ++++++++-
 sha1_file.c                         | 81 +++++++++++++++++++++++++++++++++++--
 3 files changed, 97 insertions(+), 6 deletions(-)

diff --git a/Documentation/git-count-objects.txt b/Documentation/git-count-objects.txt
index e816823..1611d7c 100644
--- a/Documentation/git-count-objects.txt
+++ b/Documentation/git-count-objects.txt
@@ -33,8 +33,8 @@ size-pack: disk space consumed by the packs, in KiB
 prune-packable: the number of loose objects that are also present in
 the packs. These objects could be pruned using `git prune-packed`.
 +
-garbage: the number of files in loose object database that are not
-valid loose objects
+garbage: the number of files in object database that are not valid
+loose objects nor valid packs
 
 GIT
 ---
diff --git a/builtin/count-objects.c b/builtin/count-objects.c
index 9afaa88..118b2ae 100644
--- a/builtin/count-objects.c
+++ b/builtin/count-objects.c
@@ -9,6 +9,20 @@
 #include "builtin.h"
 #include "parse-options.h"
 
+static unsigned long garbage;
+
+extern void (*report_pack_garbage)(const char *path, int len, const char *name);
+static void real_report_pack_garbage(const char *path, int len, const char *name)
+{
+	if (len && name)
+		error("garbage found: %.*s/%s", len, path, name);
+	else if (!len && name)
+		error("garbage found: %s%s", path, name);
+	else
+		error("garbage found: %s", path);
+	garbage++;
+}
+
 static void count_objects(DIR *d, char *path, int len, int verbose,
 			  unsigned long *loose,
 			  off_t *loose_size,
@@ -76,7 +90,7 @@ int cmd_count_objects(int argc, const char **argv, const char *prefix)
 	const char *objdir = get_object_directory();
 	int len = strlen(objdir);
 	char *path = xmalloc(len + 50);
-	unsigned long loose = 0, packed = 0, packed_loose = 0, garbage = 0;
+	unsigned long loose = 0, packed = 0, packed_loose = 0;
 	off_t loose_size = 0;
 	struct option opts[] = {
 		OPT__VERBOSE(&verbose, N_("be verbose")),
@@ -87,6 +101,8 @@ int cmd_count_objects(int argc, const char **argv, const char *prefix)
 	/* we do not take arguments other than flags for now */
 	if (argc)
 		usage_with_options(count_objects_usage, opts);
+	if (verbose)
+		report_pack_garbage = real_report_pack_garbage;
 	memcpy(path, objdir, len);
 	if (len && objdir[len-1] != '/')
 		path[len++] = '/';
diff --git a/sha1_file.c b/sha1_file.c
index 40b2329..cc6ef03 100644
--- a/sha1_file.c
+++ b/sha1_file.c
@@ -21,6 +21,7 @@
 #include "sha1-lookup.h"
 #include "bulk-checkin.h"
 #include "streaming.h"
+#include "dir.h"
 
 #ifndef O_NOATIME
 #if defined(__linux__) && (defined(__i386__) || defined(__PPC__))
@@ -1000,6 +1001,54 @@ void install_packed_git(struct packed_git *pack)
 	packed_git = pack;
 }
 
+/* A hook for count-objects to report invalid files in pack directory */
+void (*report_pack_garbage)(const char *path, int len, const char *name);
+
+static const char *known_pack_extensions[] = { ".pack", ".keep", NULL };
+
+static void report_garbage(struct string_list *list)
+{
+	struct strbuf sb = STRBUF_INIT;
+	struct packed_git *p;
+	int i;
+
+	if (!report_pack_garbage)
+		return;
+
+	sort_string_list(list);
+
+	for (p = packed_git; p; p = p->next) {
+		struct string_list_item *item;
+		if (!p->pack_local)
+			continue;
+		strbuf_reset(&sb);
+		strbuf_add(&sb, p->pack_name,
+			   strlen(p->pack_name) - strlen(".pack"));
+		item = string_list_lookup(list, sb.buf);
+		if (!item)
+			continue;
+		/*
+		 * string_list_lookup does not guarantee to return the
+		 * first matched string if it's duplicated.
+		 */
+		while (item - list->items &&
+		       !strcmp(item[-1].string, item->string))
+			item--;
+		while (item - list->items < list->nr &&
+		       !strcmp(item->string, sb.buf)) {
+			item->util = NULL; /* non-garbage mark */
+			item++;
+		}
+	}
+	for (i = 0; i < list->nr; i++) {
+		struct string_list_item *item = list->items + i;
+		if (!item->util)
+			continue;
+		report_pack_garbage(item->string, 0, item->util);
+	}
+	strbuf_release(&sb);
+}
+
 static void prepare_packed_git_one(char *objdir, int local)
 {
 	/* Ensure that this buffer is large enough so that we can
@@ -1009,6 +1058,7 @@ static void prepare_packed_git_one(char *objdir, int local)
 	int len;
 	DIR *dir;
 	struct dirent *de;
+	struct string_list garbage = STRING_LIST_INIT_DUP;
 
 	sprintf(path, "%s/pack", objdir);
 	len = strlen(path);
@@ -1024,14 +1074,37 @@ static void prepare_packed_git_one(char *objdir, int local)
 		int namelen = strlen(de->d_name);
 		struct packed_git *p;
 
-		if (!has_extension(de->d_name, ".idx"))
+		if (len + namelen + 1 > sizeof(path)) {
+			if (report_pack_garbage)
+				report_pack_garbage(path, len - 1, de->d_name);
 			continue;
+		}
+
+		strcpy(path + len, de->d_name);
 
-		if (len + namelen + 1 > sizeof(path))
+		if (!has_extension(de->d_name, ".idx")) {
+			struct string_list_item *item;
+			int i, n;
+			if (!report_pack_garbage)
+				continue;
+			if (is_dot_or_dotdot(de->d_name))
+				continue;
+			for (i = 0; known_pack_extensions[i]; i++)
+				if (has_extension(de->d_name,
+						  known_pack_extensions[i]))
+					break;
+			if (!known_pack_extensions[i]) {
+				report_pack_garbage(path, 0, NULL);
+				continue;
+			}
+			n = strlen(path) - strlen(known_pack_extensions[i]);
+			item = string_list_append_nodup(&garbage,
+							xstrndup(path, n));
+			item->util = (void*)known_pack_extensions[i];
 			continue;
+		}
 
 		/* Don't reopen a pack we already have. */
-		strcpy(path + len, de->d_name);
 		for (p = packed_git; p; p = p->next) {
 			if (!memcmp(path, p->pack_name, len + namelen - 4))
 				break;
@@ -1047,6 +1120,8 @@ static void prepare_packed_git_one(char *objdir, int local)
 		install_packed_git(p);
 	}
 	closedir(dir);
+	report_garbage(&garbage);
+	string_list_clear(&garbage, 0);
 }
 
 static int sort_pack(const void *a_, const void *b_)
-- 
1.8.1.2.495.g3fdf5d5.dirty

  parent reply	other threads:[~2013-02-08  3:51 UTC|newest]

Thread overview: 14+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2013-02-04 12:49 [PATCH 1/2] git-count-objects.txt: describe each line in -v output Nguyễn Thái Ngọc Duy
2013-02-04 12:49 ` [PATCH 2/2] count-objects: report garbage files in .git/objects/pack directory too Nguyễn Thái Ngọc Duy
2013-02-04 17:06   ` Junio C Hamano
2013-02-04 18:16   ` Junio C Hamano
2013-02-07  7:37     ` Duy Nguyen
2013-02-07 18:12       ` Junio C Hamano
2013-02-07 23:58         ` Duy Nguyen
2013-02-08  3:48 ` [PATCH v2 0/3] count-objects improvements Nguyễn Thái Ngọc Duy
2013-02-08  3:48   ` [PATCH v2 1/3] git-count-objects.txt: describe each line in -v output Nguyễn Thái Ngọc Duy
2013-02-08  3:48   ` Nguyễn Thái Ngọc Duy [this message]
2013-02-08 18:44     ` [PATCH v2 2/3] count-objects: report garbage files in pack directory too Junio C Hamano
2013-02-09  1:58       ` Duy Nguyen
2013-02-08  3:48   ` [PATCH v2 3/3] count-objects: report how much disk space taken by garbage files Nguyễn Thái Ngọc Duy
2013-02-08 18:47     ` Junio C Hamano

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1360295307-5469-3-git-send-email-pclouds@gmail.com \
    --to=pclouds@gmail.com \
    --cc=git@vger.kernel.org \
    --cc=gitster@pobox.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.