From: "Nguyễn Thái Ngọc Duy" <pclouds@gmail.com>
To: git@vger.kernel.org
Cc: "Junio C Hamano" <gitster@pobox.com>,
"Nguyễn Thái Ngọc Duy" <pclouds@gmail.com>
Subject: [PATCH v2 2/3] count-objects: report garbage files in pack directory too
Date: Fri, 8 Feb 2013 10:48:26 +0700 [thread overview]
Message-ID: <1360295307-5469-3-git-send-email-pclouds@gmail.com> (raw)
In-Reply-To: <1360295307-5469-1-git-send-email-pclouds@gmail.com>
prepare_packed_git_one() is modified to allow count-objects to hook a
report function to so we don't need to duplicate the pack searching
logic in count-objects.c. When report_pack_garbage is NULL, the
overhead is insignificant.
Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
---
Documentation/git-count-objects.txt | 4 +-
builtin/count-objects.c | 18 ++++++++-
sha1_file.c | 81 +++++++++++++++++++++++++++++++++++--
3 files changed, 97 insertions(+), 6 deletions(-)
diff --git a/Documentation/git-count-objects.txt b/Documentation/git-count-objects.txt
index e816823..1611d7c 100644
--- a/Documentation/git-count-objects.txt
+++ b/Documentation/git-count-objects.txt
@@ -33,8 +33,8 @@ size-pack: disk space consumed by the packs, in KiB
prune-packable: the number of loose objects that are also present in
the packs. These objects could be pruned using `git prune-packed`.
+
-garbage: the number of files in loose object database that are not
-valid loose objects
+garbage: the number of files in object database that are not valid
+loose objects nor valid packs
GIT
---
diff --git a/builtin/count-objects.c b/builtin/count-objects.c
index 9afaa88..118b2ae 100644
--- a/builtin/count-objects.c
+++ b/builtin/count-objects.c
@@ -9,6 +9,20 @@
#include "builtin.h"
#include "parse-options.h"
+static unsigned long garbage;
+
+extern void (*report_pack_garbage)(const char *path, int len, const char *name);
+static void real_report_pack_garbage(const char *path, int len, const char *name)
+{
+ if (len && name)
+ error("garbage found: %.*s/%s", len, path, name);
+ else if (!len && name)
+ error("garbage found: %s%s", path, name);
+ else
+ error("garbage found: %s", path);
+ garbage++;
+}
+
static void count_objects(DIR *d, char *path, int len, int verbose,
unsigned long *loose,
off_t *loose_size,
@@ -76,7 +90,7 @@ int cmd_count_objects(int argc, const char **argv, const char *prefix)
const char *objdir = get_object_directory();
int len = strlen(objdir);
char *path = xmalloc(len + 50);
- unsigned long loose = 0, packed = 0, packed_loose = 0, garbage = 0;
+ unsigned long loose = 0, packed = 0, packed_loose = 0;
off_t loose_size = 0;
struct option opts[] = {
OPT__VERBOSE(&verbose, N_("be verbose")),
@@ -87,6 +101,8 @@ int cmd_count_objects(int argc, const char **argv, const char *prefix)
/* we do not take arguments other than flags for now */
if (argc)
usage_with_options(count_objects_usage, opts);
+ if (verbose)
+ report_pack_garbage = real_report_pack_garbage;
memcpy(path, objdir, len);
if (len && objdir[len-1] != '/')
path[len++] = '/';
diff --git a/sha1_file.c b/sha1_file.c
index 40b2329..cc6ef03 100644
--- a/sha1_file.c
+++ b/sha1_file.c
@@ -21,6 +21,7 @@
#include "sha1-lookup.h"
#include "bulk-checkin.h"
#include "streaming.h"
+#include "dir.h"
#ifndef O_NOATIME
#if defined(__linux__) && (defined(__i386__) || defined(__PPC__))
@@ -1000,6 +1001,54 @@ void install_packed_git(struct packed_git *pack)
packed_git = pack;
}
+/* A hook for count-objects to report invalid files in pack directory */
+void (*report_pack_garbage)(const char *path, int len, const char *name);
+
+static const char *known_pack_extensions[] = { ".pack", ".keep", NULL };
+
+static void report_garbage(struct string_list *list)
+{
+ struct strbuf sb = STRBUF_INIT;
+ struct packed_git *p;
+ int i;
+
+ if (!report_pack_garbage)
+ return;
+
+ sort_string_list(list);
+
+ for (p = packed_git; p; p = p->next) {
+ struct string_list_item *item;
+ if (!p->pack_local)
+ continue;
+ strbuf_reset(&sb);
+ strbuf_add(&sb, p->pack_name,
+ strlen(p->pack_name) - strlen(".pack"));
+ item = string_list_lookup(list, sb.buf);
+ if (!item)
+ continue;
+ /*
+ * string_list_lookup does not guarantee to return the
+ * first matched string if it's duplicated.
+ */
+ while (item - list->items &&
+ !strcmp(item[-1].string, item->string))
+ item--;
+ while (item - list->items < list->nr &&
+ !strcmp(item->string, sb.buf)) {
+ item->util = NULL; /* non-garbage mark */
+ item++;
+ }
+ }
+ for (i = 0; i < list->nr; i++) {
+ struct string_list_item *item = list->items + i;
+ if (!item->util)
+ continue;
+ report_pack_garbage(item->string, 0, item->util);
+ }
+ strbuf_release(&sb);
+}
+
static void prepare_packed_git_one(char *objdir, int local)
{
/* Ensure that this buffer is large enough so that we can
@@ -1009,6 +1058,7 @@ static void prepare_packed_git_one(char *objdir, int local)
int len;
DIR *dir;
struct dirent *de;
+ struct string_list garbage = STRING_LIST_INIT_DUP;
sprintf(path, "%s/pack", objdir);
len = strlen(path);
@@ -1024,14 +1074,37 @@ static void prepare_packed_git_one(char *objdir, int local)
int namelen = strlen(de->d_name);
struct packed_git *p;
- if (!has_extension(de->d_name, ".idx"))
+ if (len + namelen + 1 > sizeof(path)) {
+ if (report_pack_garbage)
+ report_pack_garbage(path, len - 1, de->d_name);
continue;
+ }
+
+ strcpy(path + len, de->d_name);
- if (len + namelen + 1 > sizeof(path))
+ if (!has_extension(de->d_name, ".idx")) {
+ struct string_list_item *item;
+ int i, n;
+ if (!report_pack_garbage)
+ continue;
+ if (is_dot_or_dotdot(de->d_name))
+ continue;
+ for (i = 0; known_pack_extensions[i]; i++)
+ if (has_extension(de->d_name,
+ known_pack_extensions[i]))
+ break;
+ if (!known_pack_extensions[i]) {
+ report_pack_garbage(path, 0, NULL);
+ continue;
+ }
+ n = strlen(path) - strlen(known_pack_extensions[i]);
+ item = string_list_append_nodup(&garbage,
+ xstrndup(path, n));
+ item->util = (void*)known_pack_extensions[i];
continue;
+ }
/* Don't reopen a pack we already have. */
- strcpy(path + len, de->d_name);
for (p = packed_git; p; p = p->next) {
if (!memcmp(path, p->pack_name, len + namelen - 4))
break;
@@ -1047,6 +1120,8 @@ static void prepare_packed_git_one(char *objdir, int local)
install_packed_git(p);
}
closedir(dir);
+ report_garbage(&garbage);
+ string_list_clear(&garbage, 0);
}
static int sort_pack(const void *a_, const void *b_)
--
1.8.1.2.495.g3fdf5d5.dirty
next prev parent reply other threads:[~2013-02-08 3:51 UTC|newest]
Thread overview: 14+ messages / expand[flat|nested] mbox.gz Atom feed top
2013-02-04 12:49 [PATCH 1/2] git-count-objects.txt: describe each line in -v output Nguyễn Thái Ngọc Duy
2013-02-04 12:49 ` [PATCH 2/2] count-objects: report garbage files in .git/objects/pack directory too Nguyễn Thái Ngọc Duy
2013-02-04 17:06 ` Junio C Hamano
2013-02-04 18:16 ` Junio C Hamano
2013-02-07 7:37 ` Duy Nguyen
2013-02-07 18:12 ` Junio C Hamano
2013-02-07 23:58 ` Duy Nguyen
2013-02-08 3:48 ` [PATCH v2 0/3] count-objects improvements Nguyễn Thái Ngọc Duy
2013-02-08 3:48 ` [PATCH v2 1/3] git-count-objects.txt: describe each line in -v output Nguyễn Thái Ngọc Duy
2013-02-08 3:48 ` Nguyễn Thái Ngọc Duy [this message]
2013-02-08 18:44 ` [PATCH v2 2/3] count-objects: report garbage files in pack directory too Junio C Hamano
2013-02-09 1:58 ` Duy Nguyen
2013-02-08 3:48 ` [PATCH v2 3/3] count-objects: report how much disk space taken by garbage files Nguyễn Thái Ngọc Duy
2013-02-08 18:47 ` Junio C Hamano
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1360295307-5469-3-git-send-email-pclouds@gmail.com \
--to=pclouds@gmail.com \
--cc=git@vger.kernel.org \
--cc=gitster@pobox.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).