* [PATCH] prune-packed: new option --min-age=N
@ 2007-01-18 17:18 Matthias Lederhofer
2007-01-18 17:24 ` Shawn O. Pearce
0 siblings, 1 reply; 30+ messages in thread
From: Matthias Lederhofer @ 2007-01-18 17:18 UTC (permalink / raw)
To: git
This option makes prune-packed ignore all packs younger than N seconds
(using mtime).
Signed-off-by: Matthias Lederhofer <matled@gmx.net>
---
This option allows to remove the unpacked version of an object only if
it has been packed for a minimum time. It is intended to work around
the problem of freshly packed objects which should not be deleted
because someone might still try to open the unpacked version.
if [ $(git count-objects | cut -d ' ' -f 3) -gt 10240 ]; then
git repack
fi
git prune-packed --min-age=$((24*60*60))
---
builtin-prune-packed.c | 48 +++++++++++++++++++++++++++++++++++++++++++-----
builtin-prune.c | 2 +-
builtin.h | 2 +-
3 files changed, 45 insertions(+), 7 deletions(-)
diff --git a/builtin-prune-packed.c b/builtin-prune-packed.c
index a57b76d..4359a41 100644
--- a/builtin-prune-packed.c
+++ b/builtin-prune-packed.c
@@ -7,7 +7,8 @@ static const char prune_packed_usage[] =
#define DRY_RUN 01
#define VERBOSE 02
-static void prune_dir(int i, DIR *dir, char *pathname, int len, int opts)
+static void prune_dir(int i, DIR *dir, char *pathname, int len, int opts,
+ char **ignore_packed)
{
struct dirent *de;
char hex[40];
@@ -20,7 +21,7 @@ static void prune_dir(int i, DIR *dir, char *pathname, int len, int opts)
memcpy(hex+2, de->d_name, 38);
if (get_sha1_hex(hex, sha1))
continue;
- if (!has_sha1_pack(sha1, NULL))
+ if (!has_sha1_pack(sha1, ignore_packed))
continue;
memcpy(pathname + len, de->d_name, 38);
if (opts & DRY_RUN)
@@ -32,12 +33,46 @@ static void prune_dir(int i, DIR *dir, char *pathname, int len, int opts)
rmdir(pathname);
}
-void prune_packed_objects(int opts)
+static char **prune_ignore(int min_age, int opts)
+{
+ int ignore_max = 10;
+ int ignore_count = 0;
+ char **ignore_packed = xcalloc(ignore_max, sizeof(char*));
+ int now = time(NULL);
+ struct packed_git *p;
+
+ prepare_packed_git();
+ for (p = packed_git; p; p = p->next) {
+ struct stat s;
+ if (stat(p->pack_name, &s)) {
+ warn("unable to stat %s", p->pack_name);
+ continue;
+ }
+ if (now-s.st_mtime >= min_age)
+ continue;
+ if (opts & VERBOSE)
+ fprintf(stderr, "ignoring pack %s\n", p->pack_name);
+ if (ignore_count >= ignore_max-1) {
+ ignore_max += 10;
+ ignore_packed = xrealloc(ignore_packed,
+ ignore_max*sizeof(char*));
+ }
+ ignore_packed[ignore_count++] = p->pack_name;
+ }
+ ignore_packed[ignore_count] = NULL;
+ return ignore_packed;
+}
+
+void prune_packed_objects(int opts, int min_age)
{
int i;
static char pathname[PATH_MAX];
const char *dir = get_object_directory();
int len = strlen(dir);
+ char **ignore_packed = NULL;
+
+ if (min_age > 0)
+ ignore_packed = prune_ignore(min_age, opts);
if (len > PATH_MAX - 42)
die("impossible object directory");
@@ -54,7 +89,7 @@ void prune_packed_objects(int opts)
((i+1) * 100) / 256);
if (!d)
continue;
- prune_dir(i, d, pathname, len + 3, opts);
+ prune_dir(i, d, pathname, len + 3, opts, ignore_packed);
closedir(d);
}
if (opts == VERBOSE)
@@ -65,6 +100,7 @@ int cmd_prune_packed(int argc, const char **argv, const char *prefix)
{
int i;
int opts = VERBOSE;
+ int min_age = 0;
for (i = 1; i < argc; i++) {
const char *arg = argv[i];
@@ -74,6 +110,8 @@ int cmd_prune_packed(int argc, const char **argv, const char *prefix)
opts |= DRY_RUN;
else if (!strcmp(arg, "-q"))
opts &= ~VERBOSE;
+ else if (!strncmp(arg, "--min-age=", 10))
+ min_age = atoi(arg+10);
else
usage(prune_packed_usage);
continue;
@@ -82,6 +120,6 @@ int cmd_prune_packed(int argc, const char **argv, const char *prefix)
usage(prune_packed_usage);
}
sync();
- prune_packed_objects(opts);
+ prune_packed_objects(opts, min_age);
return 0;
}
diff --git a/builtin-prune.c b/builtin-prune.c
index 6f0ba0d..46a0fe5 100644
--- a/builtin-prune.c
+++ b/builtin-prune.c
@@ -100,6 +100,6 @@ int cmd_prune(int argc, const char **argv, const char *prefix)
prune_object_dir(get_object_directory());
sync();
- prune_packed_objects(show_only);
+ prune_packed_objects(show_only, 0);
return 0;
}
diff --git a/builtin.h b/builtin.h
index 818c7bf..e9929da 100644
--- a/builtin.h
+++ b/builtin.h
@@ -11,7 +11,7 @@ extern int mailinfo(FILE *in, FILE *out, int ks, const char *encoding, const cha
extern int split_mbox(const char **mbox, const char *dir, int allow_bare, int nr_prec, int skip);
extern void stripspace(FILE *in, FILE *out);
extern int write_tree(unsigned char *sha1, int missing_ok, const char *prefix);
-extern void prune_packed_objects(int);
+extern void prune_packed_objects(int, int);
extern int cmd_add(int argc, const char **argv, const char *prefix);
extern int cmd_annotate(int argc, const char **argv, const char *prefix);
--
1.4.4.4
^ permalink raw reply related [flat|nested] 30+ messages in thread
* Re: [PATCH] prune-packed: new option --min-age=N
2007-01-18 17:18 [PATCH] prune-packed: new option --min-age=N Matthias Lederhofer
@ 2007-01-18 17:24 ` Shawn O. Pearce
2007-01-18 17:42 ` Matthias Lederhofer
0 siblings, 1 reply; 30+ messages in thread
From: Shawn O. Pearce @ 2007-01-18 17:24 UTC (permalink / raw)
To: Matthias Lederhofer; +Cc: git
Matthias Lederhofer <matled@gmx.net> wrote:
> This option allows to remove the unpacked version of an object only if
> it has been packed for a minimum time. It is intended to work around
> the problem of freshly packed objects which should not be deleted
> because someone might still try to open the unpacked version.
Are we not rescanning for new packs if we fail to find an
object in the object directory? I thought we were doing this in
read_sha1_file. *looks at code* Indeed, we are...
Is this somehow not working?
--
Shawn.
^ permalink raw reply [flat|nested] 30+ messages in thread
* Re: [PATCH] prune-packed: new option --min-age=N
2007-01-18 17:24 ` Shawn O. Pearce
@ 2007-01-18 17:42 ` Matthias Lederhofer
2007-01-18 17:51 ` Shawn O. Pearce
0 siblings, 1 reply; 30+ messages in thread
From: Matthias Lederhofer @ 2007-01-18 17:42 UTC (permalink / raw)
To: git
Shawn O. Pearce <spearce@spearce.org> wrote:
> Matthias Lederhofer <matled@gmx.net> wrote:
> > This option allows to remove the unpacked version of an object only if
> > it has been packed for a minimum time. It is intended to work around
> > the problem of freshly packed objects which should not be deleted
> > because someone might still try to open the unpacked version.
>
> Are we not rescanning for new packs if we fail to find an
> object in the object directory? I thought we were doing this in
> read_sha1_file. *looks at code* Indeed, we are...
>
> Is this somehow not working?
I just remembered people were warning about automatic prune-packed
(e.g. cron), so this patch does not make much sense anymore.
Any comments about adding such an option to git prune for unpacked
objects? This would allow to run git prune automatically on
repositories while new objects are created.
^ permalink raw reply [flat|nested] 30+ messages in thread
* Re: [PATCH] prune-packed: new option --min-age=N
2007-01-18 17:42 ` Matthias Lederhofer
@ 2007-01-18 17:51 ` Shawn O. Pearce
2007-01-18 22:29 ` [RFC] prune: --expire=seconds Matthias Lederhofer
0 siblings, 1 reply; 30+ messages in thread
From: Shawn O. Pearce @ 2007-01-18 17:51 UTC (permalink / raw)
To: Matthias Lederhofer; +Cc: git
Matthias Lederhofer <matled@gmx.net> wrote:
> I just remembered people were warning about automatic prune-packed
> (e.g. cron), so this patch does not make much sense anymore.
I think we've resolved it. There may be some hole we aren't aware
of, but I doubt one exists. The last hole was packs could be
removed by repack while they were still being brought in during
push or 'fetch -k', but that was fixed with the .keep file.
> Any comments about adding such an option to git prune for unpacked
> objects? This would allow to run git prune automatically on
> repositories while new objects are created.
An age makes sense on just plain prune. If the loose object was
last modified several hours ago and its not being referenced by
anything currently, its probably safe to remove.
If you are going to implement this I would suggest making the default
age 2 hours and allow the user to configure it from a gc.pruneexpire
configuration option, much like gc.reflogexpire.
--
Shawn.
^ permalink raw reply [flat|nested] 30+ messages in thread
* [RFC] prune: --expire=seconds
2007-01-18 17:51 ` Shawn O. Pearce
@ 2007-01-18 22:29 ` Matthias Lederhofer
2007-01-18 22:32 ` Junio C Hamano
0 siblings, 1 reply; 30+ messages in thread
From: Matthias Lederhofer @ 2007-01-18 22:29 UTC (permalink / raw)
To: git; +Cc: Shawn O. Pearce
This option specifies the minimum age of an object before it
may be removed by prune. The default value is 2 hours and
may be changed using gc.pruneexpire.
Signed-off-by: Matthias Lederhofer <matled@gmx.net>
---
Shawn O. Pearce <spearce@spearce.org> wrote:
> If you are going to implement this I would suggest making the default
> age 2 hours and allow the user to configure it from a gc.pruneexpire
> configuration option, much like gc.reflogexpire.
Here it is, I've set the default value to 2 hours as you suggested.
Any other comments if the default should be a value >0 or 0 to keep
the old behaviour?
---
builtin-prune.c | 25 ++++++++++++++++++++++++-
1 files changed, 24 insertions(+), 1 deletions(-)
diff --git a/builtin-prune.c b/builtin-prune.c
index 6f0ba0d..f46892d 100644
--- a/builtin-prune.c
+++ b/builtin-prune.c
@@ -5,8 +5,10 @@
#include "builtin.h"
#include "reachable.h"
-static const char prune_usage[] = "git-prune [-n]";
+static const char prune_usage[] = "git-prune [-n] [--expire=seconds]";
static int show_only;
+static int prune_expire;
+static time_t now;
static int prune_object(char *path, const char *filename, const unsigned char *sha1)
{
@@ -38,6 +40,7 @@ static int prune_dir(int i, char *path)
char name[100];
unsigned char sha1[20];
int len = strlen(de->d_name);
+ struct stat st;
switch (len) {
case 2:
@@ -60,6 +63,11 @@ static int prune_dir(int i, char *path)
if (lookup_object(sha1))
continue;
+ if (prune_expire > 0 &&
+ !stat(mkpath("%s/%s", path, de->d_name), &st) &&
+ now-st.st_mtime < prune_expire)
+ continue;
+
prune_object(path, de->d_name, sha1);
continue;
}
@@ -79,10 +87,21 @@ static void prune_object_dir(const char *path)
}
}
+static void git_prune_config(const char *var, const char *value)
+{
+ if (!strcmp(var, "gc.pruneexpire")) {
+ prune_expire = git_config_int(var, value);
+ return 0;
+ }
+ return git_default_config(var, value);
+}
+
int cmd_prune(int argc, const char **argv, const char *prefix)
{
int i;
struct rev_info revs;
+ prune_expire = 2*60*60;
+ now = time(NULL);
for (i = 1; i < argc; i++) {
const char *arg = argv[i];
@@ -90,6 +109,10 @@ int cmd_prune(int argc, const char **argv, const char *prefix)
show_only = 1;
continue;
}
+ if (!strncmp(arg, "--expire=", 9)) {
+ prune_expire = atoi(arg+9);
+ continue;
+ }
usage(prune_usage);
}
--
1.4.4.4
^ permalink raw reply related [flat|nested] 30+ messages in thread
* Re: [RFC] prune: --expire=seconds
2007-01-18 22:29 ` [RFC] prune: --expire=seconds Matthias Lederhofer
@ 2007-01-18 22:32 ` Junio C Hamano
2007-01-19 3:44 ` Shawn O. Pearce
0 siblings, 1 reply; 30+ messages in thread
From: Junio C Hamano @ 2007-01-18 22:32 UTC (permalink / raw)
To: Matthias Lederhofer; +Cc: Shawn O. Pearce, git
Matthias Lederhofer <matled@gmx.net> writes:
> This option specifies the minimum age of an object before it
> may be removed by prune. The default value is 2 hours and
> may be changed using gc.pruneexpire.
>
> Signed-off-by: Matthias Lederhofer <matled@gmx.net>
> ---
> Shawn O. Pearce <spearce@spearce.org> wrote:
>> If you are going to implement this I would suggest making the default
>> age 2 hours and allow the user to configure it from a gc.pruneexpire
>> configuration option, much like gc.reflogexpire.
>
> Here it is, I've set the default value to 2 hours as you suggested.
> Any other comments if the default should be a value >0 or 0 to keep
> the old behaviour?
I am not sure if this is needed, as Shawn explained earlier
rounds of loose-objects safety work.
If this is something we would want, it might make sense if we
allowed "prune --expire='1.day'" syntax ;-).
^ permalink raw reply [flat|nested] 30+ messages in thread
* Re: [RFC] prune: --expire=seconds
2007-01-18 22:32 ` Junio C Hamano
@ 2007-01-19 3:44 ` Shawn O. Pearce
2007-01-19 10:49 ` [PATCH] prune: --expire=time Matthias Lederhofer
0 siblings, 1 reply; 30+ messages in thread
From: Shawn O. Pearce @ 2007-01-19 3:44 UTC (permalink / raw)
To: Junio C Hamano; +Cc: Matthias Lederhofer, git
Junio C Hamano <junkio@cox.net> wrote:
> Matthias Lederhofer <matled@gmx.net> writes:
>
> > This option specifies the minimum age of an object before it
> > may be removed by prune. The default value is 2 hours and
> > may be changed using gc.pruneexpire.
>
> I am not sure if this is needed, as Shawn explained earlier
> rounds of loose-objects safety work.
I think we need this fix. We still have a race condition between
the loose object creation and the ref update.
We've closed this hole completely in the large push case (objects
>=receive.unpackLimit) and 'fetch -k' case by creating .keep files
before the .pack file, updating refs, then deleting the .keep file;
and by making sure git-repack leaves packs with .keeps alone. So
we cannot lose an object here.
But update-index/add/merge-recursive/write-tree/commit-tree, etc.
as well as small pushes (objects <receive.unpackLimit) and fetch
without -k option still have a race condition. The objects will
be created/unpacked into the loose objects directory with nothing
referencing them, and a prune which gets to run just before before
the ref update becomes visible would probably whack those objects.
Given that 'git gc' is the encouraged way to maintain a repository,
and that 'repack -a -d' is safe, and prune-packed is equally safe,
I think we should try to make prune safe too. Matthias' patch
does this by giving the ref update process a fairly large window
to perform its action within.
> If this is something we would want, it might make sense if we
> allowed "prune --expire='1.day'" syntax ;-).
Yes, I agree.
Matthias you can take a look at builtin-reflog.c's argument handling
for an example. I think you just need to use approxidate() in both
your config function and in your command line argument handling.
Then the default becomes '2.hours.ago' instead of just "2" (at
least from a documentation perspective).
Though the more I think about this perhaps the default should be
'1.day'. 24 hours is a hellva large window for any current ref
update to complete in, even if the ref update was some massive rsync
which is doing a such a large volume of data on a small bandwidth
link that it takes 20 hours to complete. Besides, users could
always force it to be much lower with the command line option if
they really need to prune _right_now_.
--
Shawn.
^ permalink raw reply [flat|nested] 30+ messages in thread
* [PATCH] prune: --expire=time
2007-01-19 3:44 ` Shawn O. Pearce
@ 2007-01-19 10:49 ` Matthias Lederhofer
2007-01-19 15:41 ` Nicolas Pitre
2007-01-19 19:18 ` Junio C Hamano
0 siblings, 2 replies; 30+ messages in thread
From: Matthias Lederhofer @ 2007-01-19 10:49 UTC (permalink / raw)
To: Shawn O. Pearce; +Cc: git
This option specifies the minimum age of an object before it
may be removed by prune. The default value is 24 hours and
may be changed using gc.pruneexpire.
Signed-off-by: Matthias Lederhofer <matled@gmx.net>
---
Shawn O. Pearce <spearce@spearce.org> wrote:
> Given that 'git gc' is the encouraged way to maintain a repository,
> and that 'repack -a -d' is safe, and prune-packed is equally safe,
> I think we should try to make prune safe too. Matthias' patch
> does this by giving the ref update process a fairly large window
> to perform its action within.
Ah, git repack -a -d is safe now too?
> Junio C Hamano <junkio@cox.net> wrote:
> > If this is something we would want, it might make sense if we
> > allowed "prune --expire='1.day'" syntax ;-).
>
> Yes, I agree.
>
> Matthias you can take a look at builtin-reflog.c's argument handling
> for an example. I think you just need to use approxidate() in both
> your config function and in your command line argument handling.
> Then the default becomes '2.hours.ago' instead of just "2" (at
> least from a documentation perspective).
>
> Though the more I think about this perhaps the default should be
> '1.day'. 24 hours is a hellva large window for any current ref
> update to complete in, even if the ref update was some massive rsync
> which is doing a such a large volume of data on a small bandwidth
> link that it takes 20 hours to complete. Besides, users could
> always force it to be much lower with the command line option if
> they really need to prune _right_now_.
Thanks for the advice, this is much better than specifying seconds.
Here is the new version.
Things I'm not sure about, any further comments/discussion?
- default value for gc.pruneexpire
- special value(s) for gc.pruneexpire/--expire which mean 'do not
check for the age', currently it is 'off'
---
builtin-prune.c | 31 ++++++++++++++++++++++++++++++-
1 files changed, 30 insertions(+), 1 deletions(-)
diff --git a/builtin-prune.c b/builtin-prune.c
index 6f0ba0d..410b3b2 100644
--- a/builtin-prune.c
+++ b/builtin-prune.c
@@ -5,8 +5,9 @@
#include "builtin.h"
#include "reachable.h"
-static const char prune_usage[] = "git-prune [-n]";
+static const char prune_usage[] = "git-prune [-n] [--expire=time]";
static int show_only;
+static int prune_expire;
static int prune_object(char *path, const char *filename, const unsigned char *sha1)
{
@@ -38,6 +39,7 @@ static int prune_dir(int i, char *path)
char name[100];
unsigned char sha1[20];
int len = strlen(de->d_name);
+ struct stat st;
switch (len) {
case 2:
@@ -60,6 +62,11 @@ static int prune_dir(int i, char *path)
if (lookup_object(sha1))
continue;
+ if (prune_expire > 0 &&
+ !stat(mkpath("%s/%s", path, de->d_name), &st) &&
+ st.st_mtime > prune_expire)
+ continue;
+
prune_object(path, de->d_name, sha1);
continue;
}
@@ -79,10 +86,25 @@ static void prune_object_dir(const char *path)
}
}
+static int git_prune_config(const char *var, const char *value)
+{
+ if (!strcmp(var, "gc.pruneexpire")) {
+ if (!strcmp(value, "off"))
+ prune_expire = 0;
+ else
+ prune_expire = approxidate(value);
+ return 0;
+ }
+ return git_default_config(var, value);
+}
+
int cmd_prune(int argc, const char **argv, const char *prefix)
{
int i;
struct rev_info revs;
+ prune_expire = time(NULL)-24*60*60;
+
+ git_config(git_prune_config);
for (i = 1; i < argc; i++) {
const char *arg = argv[i];
@@ -90,6 +112,13 @@ int cmd_prune(int argc, const char **argv, const char *prefix)
show_only = 1;
continue;
}
+ if (!strncmp(arg, "--expire=", 9)) {
+ if (!strcmp(arg+9, "off"))
+ prune_expire = 0;
+ else
+ prune_expire = approxidate(arg+9);
+ continue;
+ }
usage(prune_usage);
}
--
1.4.4.4
^ permalink raw reply related [flat|nested] 30+ messages in thread
* Re: [PATCH] prune: --expire=time
2007-01-19 10:49 ` [PATCH] prune: --expire=time Matthias Lederhofer
@ 2007-01-19 15:41 ` Nicolas Pitre
2007-01-19 19:18 ` Junio C Hamano
1 sibling, 0 replies; 30+ messages in thread
From: Nicolas Pitre @ 2007-01-19 15:41 UTC (permalink / raw)
To: Matthias Lederhofer; +Cc: Shawn O. Pearce, git
On Fri, 19 Jan 2007, Matthias Lederhofer wrote:
> Ah, git repack -a -d is safe now too?
Yes.
Nicolas
^ permalink raw reply [flat|nested] 30+ messages in thread
* Re: [PATCH] prune: --expire=time
2007-01-19 10:49 ` [PATCH] prune: --expire=time Matthias Lederhofer
2007-01-19 15:41 ` Nicolas Pitre
@ 2007-01-19 19:18 ` Junio C Hamano
2007-01-20 11:18 ` Matthias Lederhofer
2007-01-20 12:06 ` Simon 'corecode' Schubert
1 sibling, 2 replies; 30+ messages in thread
From: Junio C Hamano @ 2007-01-19 19:18 UTC (permalink / raw)
To: Matthias Lederhofer; +Cc: git, Shawn O. Pearce
Matthias Lederhofer <matled@gmx.net> writes:
> Shawn O. Pearce <spearce@spearce.org> wrote:
>
>> Junio C Hamano <junkio@cox.net> wrote:
>> > If this is something we would want, it might make sense if we
>> > allowed "prune --expire='1.day'" syntax ;-).
>>
>> Yes, I agree.
>
> Here is the new version.
Thanks.
> Things I'm not sure about, any further comments/discussion?
> - default value for gc.pruneexpire
> - special value(s) for gc.pruneexpire/--expire which mean 'do not
> check for the age', currently it is 'off'
No single timeout value can be the right timeout for everybody,
so a big debate is not useful here. I think 1 day as you and
Shawn did makes sense.
git prune --expire=off
felt a bit confusing to me at the first glance. Does it turn
off the expiration mechanism, retaining all cruft, or turns off
the mechanism to give grace period for recent objects? The
answer is obviosuly the latter as "retain all cruft" is
meaningless, but still it somehow feels funny. It might be
easier to explain if it was:
git prune --expire=now
Maybe an alternative:
git prune --retain=1.day
git prune --retain=off
perhaps? I dunno.
We seem to use "unsigned long" as a stand-in type for time_t in
the rest of the code. I'd feel better if prune_expire was also
"unsigned long". We probably should talk about making them all
time_t at some point but not right now.
I was tempted to say that we might want to teach approxidate
"now" (add one entry to struct special in date.c), but I do not
think it is useful for this application (you want prune_expire
set to zero not the time we run time(NULL)), and I do not think
of any other application that wants "now".
^ permalink raw reply [flat|nested] 30+ messages in thread
* Re: [PATCH] prune: --expire=time
2007-01-19 19:18 ` Junio C Hamano
@ 2007-01-20 11:18 ` Matthias Lederhofer
2007-01-21 6:55 ` Junio C Hamano
2007-01-20 12:06 ` Simon 'corecode' Schubert
1 sibling, 1 reply; 30+ messages in thread
From: Matthias Lederhofer @ 2007-01-20 11:18 UTC (permalink / raw)
To: Junio C Hamano; +Cc: git
Junio C Hamano <junkio@cox.net> wrote:
> git prune --expire=off
>
> felt a bit confusing to me at the first glance. Does it turn
> off the expiration mechanism, retaining all cruft, or turns off
> the mechanism to give grace period for recent objects? The
> answer is obviosuly the latter as "retain all cruft" is
> meaningless, but still it somehow feels funny. It might be
> easier to explain if it was:
>
> git prune --expire=now
>
> Maybe an alternative:
>
> git prune --retain=1.day
> git prune --retain=off
>
> perhaps? I dunno.
Perhaps we could also use 'none' or 'all, e.g. --retain=none or
--expire=all.
^ permalink raw reply [flat|nested] 30+ messages in thread
* Re: [PATCH] prune: --expire=time
2007-01-19 19:18 ` Junio C Hamano
2007-01-20 11:18 ` Matthias Lederhofer
@ 2007-01-20 12:06 ` Simon 'corecode' Schubert
1 sibling, 0 replies; 30+ messages in thread
From: Simon 'corecode' Schubert @ 2007-01-20 12:06 UTC (permalink / raw)
To: Junio C Hamano; +Cc: Matthias Lederhofer, git, Shawn O. Pearce
[-- Attachment #1: Type: text/plain, Size: 1439 bytes --]
Junio C Hamano wrote:
>> Things I'm not sure about, any further comments/discussion?
>> - default value for gc.pruneexpire
>> - special value(s) for gc.pruneexpire/--expire which mean 'do not
>> check for the age', currently it is 'off'
>
> No single timeout value can be the right timeout for everybody,
> so a big debate is not useful here. I think 1 day as you and
> Shawn did makes sense.
Not that I want to sabotage this discussion, but you have a very valid point. A timeout can always be crossed, and then bad things[tm] happen.
My idea is to create a marker file when creating (yet) unconnected loose objects, i.e. on commit/push/fetch. After the ref was updated or on abort, this marker would be removed. Prune then can simply search for the oldest marker and only remove objects older than this marker.
Of course this also can mean that a marker file somehow stays and prune fails to clean properly, but that's still better than accidentially cleaning too much. In the case of dangling marker files, the admin would simply remove them. rm .git/marker/* when the repo is quiet.
cheers
simon
--
Serve - BSD +++ RENT this banner advert +++ ASCII Ribbon /"\
Work - Mac +++ space for low €€€ NOW!1 +++ Campaign \ /
Party Enjoy Relax | http://dragonflybsd.org Against HTML \
Dude 2c 2 the max ! http://golden-apple.biz Mail + News / \
[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 252 bytes --]
^ permalink raw reply [flat|nested] 30+ messages in thread
* Re: [PATCH] prune: --expire=time
2007-01-20 11:18 ` Matthias Lederhofer
@ 2007-01-21 6:55 ` Junio C Hamano
2007-01-21 7:53 ` Shawn O. Pearce
2007-01-21 10:37 ` Matthias Lederhofer
0 siblings, 2 replies; 30+ messages in thread
From: Junio C Hamano @ 2007-01-21 6:55 UTC (permalink / raw)
To: Matthias Lederhofer; +Cc: git
Matthias Lederhofer <matled@gmx.net> writes:
> Junio C Hamano <junkio@cox.net> wrote:
>...
>> Maybe an alternative:
>>
>> git prune --retain=1.day
>> git prune --retain=off
>>
>> perhaps? I dunno.
>
> Perhaps we could also use 'none' or 'all, e.g. --retain=none or
> --expire=all.
I am considering to commit the attached instead.
The command "prune" is about expiring loose objects that are
unreachable, and the option introduces a grace period for the
expiration process. Other places that we do use the word
'expire' to specify the time do mean the expiration timeout.
As long as you do not rewind/rebase too much, there is not much
you can gain from 'git-prune' ('git-repack -a -d' would give you
much more disk savings and performance gain). I do not think it
makes sense to run uncontrolled 'git-prune' from automated cron
jobs. Even if you rewind/rebase often, 1.5.0 will protect the
objects reflog entries refer to, so there will be even less to
be gained from 'git-prune'. I am having a feeling that it might
even make sense not to run 'git-prune' from 'git-gc'.
While I sympathize with what Simon says to certain degree, I
tend to think the complication it needs to introduce is really
not worth it. Perfect is the enemy of the good.
By the way.
While updating the documentation, I noticed that we lost the
'extra heads' support when git-prune was rewritten as a built-in
in commit ba84a797 (July 6th 2006). The example (commented out
in the patch) is a valid way to safely prune a repository whose
objects are borrowed via the alternates mechanism by some other
repository, albeit not really scalable.
The way we might want to address this would be when 'clone -s'
makes a new repository that borrows from an existing repository,
we could make a symlink under .git/refs/borrowers/ in the
original repository that points at .git/refs directory of the
cloned repository -- you can do that by hand today and it would
be much nicer than having to specify the other repository when
running 'git prune' as the example suggests.
For this reason, I would say losing that 'extra heads' support
from git-prune, which happened half year ago, was Ok.
So far, we have been telling not to rewind refs in repositories
whose objects are borrowed by other repositories via the
alternates mechanism, and I think that advice is still a very
reasonable one to give, but we probably should make it more
prominent. As long as we do that, we would not even need the
'refs/borrowers/*' symlinks.
-- >8 --
From: Matthias Lederhofer <matled@gmx.net>
[PATCH] prune: --grace=time
This option gives grace period to objects that are unreachable
from the refs from getting pruned.
The default value is 24 hours and may be changed using
gc.prunegrace.
Signed-off-by: Matthias Lederhofer <matled@gmx.net>
Signed-off-by: Junio C Hamano <junkio@cox.net>
---
Documentation/git-prune.txt | 9 ++++++++-
builtin-prune.c | 31 ++++++++++++++++++++++++++++++-
2 files changed, 38 insertions(+), 2 deletions(-)
diff --git a/Documentation/git-prune.txt b/Documentation/git-prune.txt
index a11e303..fbd344d 100644
--- a/Documentation/git-prune.txt
+++ b/Documentation/git-prune.txt
@@ -8,7 +8,7 @@ git-prune - Prunes all unreachable objects from the object database
SYNOPSIS
--------
-'git-prune' [-n] [--] [<head>...]
+'git-prune' [-n] [--grace=<time>]
DESCRIPTION
-----------
@@ -28,6 +28,12 @@ OPTIONS
Do not remove anything; just report what it would
remove.
+--grace=<time>::
+ Do not prune loose objects that are younger than the
+ specified time. This gives a grace period to newly
+ created objects from getting pruned.
+
+////////////////////////////////////////////
\--::
Do not interpret any more arguments as options.
@@ -46,6 +52,7 @@ borrows from your repository via its
------------
$ git prune $(cd ../another && $(git-rev-parse --all))
------------
+////////////////////////////////////////////
Author
------
diff --git a/builtin-prune.c b/builtin-prune.c
index 6f0ba0d..7929af1 100644
--- a/builtin-prune.c
+++ b/builtin-prune.c
@@ -5,8 +5,9 @@
#include "builtin.h"
#include "reachable.h"
-static const char prune_usage[] = "git-prune [-n]";
+static const char prune_usage[] = "git-prune [-n] [--grace=time]";
static int show_only;
+static int prune_grace_period;
static int prune_object(char *path, const char *filename, const unsigned char *sha1)
{
@@ -38,6 +39,7 @@ static int prune_dir(int i, char *path)
char name[100];
unsigned char sha1[20];
int len = strlen(de->d_name);
+ struct stat st;
switch (len) {
case 2:
@@ -60,6 +62,11 @@ static int prune_dir(int i, char *path)
if (lookup_object(sha1))
continue;
+ if (prune_grace_period > 0 &&
+ !stat(mkpath("%s/%s", path, de->d_name), &st) &&
+ st.st_mtime > prune_grace_period)
+ continue;
+
prune_object(path, de->d_name, sha1);
continue;
}
@@ -79,10 +86,25 @@ static void prune_object_dir(const char *path)
}
}
+static int git_prune_config(const char *var, const char *value)
+{
+ if (!strcmp(var, "gc.prunegrace")) {
+ if (!strcmp(value, "off"))
+ prune_grace_period = 0;
+ else
+ prune_grace_period = approxidate(value);
+ return 0;
+ }
+ return git_default_config(var, value);
+}
+
int cmd_prune(int argc, const char **argv, const char *prefix)
{
int i;
struct rev_info revs;
+ prune_grace_period = time(NULL)-24*60*60;
+
+ git_config(git_prune_config);
for (i = 1; i < argc; i++) {
const char *arg = argv[i];
@@ -90,6 +112,13 @@ int cmd_prune(int argc, const char **argv, const char *prefix)
show_only = 1;
continue;
}
+ if (!strncmp(arg, "--grace=", 8)) {
+ if (!strcmp(arg+8, "off"))
+ prune_grace_period = 0;
+ else
+ prune_grace_period = approxidate(arg+8);
+ continue;
+ }
usage(prune_usage);
}
--
1.5.0.rc1.g40ab
^ permalink raw reply related [flat|nested] 30+ messages in thread
* Re: [PATCH] prune: --expire=time
2007-01-21 6:55 ` Junio C Hamano
@ 2007-01-21 7:53 ` Shawn O. Pearce
2007-01-21 10:37 ` Matthias Lederhofer
1 sibling, 0 replies; 30+ messages in thread
From: Shawn O. Pearce @ 2007-01-21 7:53 UTC (permalink / raw)
To: Junio C Hamano; +Cc: Matthias Lederhofer, git
Junio C Hamano <junkio@cox.net> wrote:
> The way we might want to address this would be when 'clone -s'
> makes a new repository that borrows from an existing repository,
> we could make a symlink under .git/refs/borrowers/ in the
> original repository that points at .git/refs directory of the
> cloned repository -- you can do that by hand today and it would
> be much nicer than having to specify the other repository when
> running 'git prune' as the example suggests.
Unrelated topic: We may also want to consider doing the reverse,
that is symlink '.git/refs/borrows-from' of the cloned repository
to '.git/refs' in the original repository. This way pushing into
the cloned repository can avoid uploading objects which the cloned
repository already has access to via its objects/info/alternates
setup.
This would especially be nice with project forks, such as on
repo.or.cz. I don't need to upload all of 'master' if it has
already been uploaded by pasky's mirror job, for example.
--
Shawn.
^ permalink raw reply [flat|nested] 30+ messages in thread
* Re: [PATCH] prune: --expire=time
2007-01-21 6:55 ` Junio C Hamano
2007-01-21 7:53 ` Shawn O. Pearce
@ 2007-01-21 10:37 ` Matthias Lederhofer
2007-01-21 11:17 ` Junio C Hamano
1 sibling, 1 reply; 30+ messages in thread
From: Matthias Lederhofer @ 2007-01-21 10:37 UTC (permalink / raw)
To: Junio C Hamano; +Cc: git
Junio C Hamano <junkio@cox.net> wrote:
> I am considering to commit the attached instead.
Looks fine. Just one question: You said normally unsigned long would
be used for time_t but time_t itself seems to be signed. Using
unsigned long instead of int for prune_grace_period (which is used as
time_t here) results in 'warning: comparison between signed and
unsigned'. Perhaps you want to change it here anyway to be consistent
with the rest of the code (approxidate returns unsigned long too).
^ permalink raw reply [flat|nested] 30+ messages in thread
* Re: [PATCH] prune: --expire=time
2007-01-21 10:37 ` Matthias Lederhofer
@ 2007-01-21 11:17 ` Junio C Hamano
2007-01-21 22:01 ` Jeff King
0 siblings, 1 reply; 30+ messages in thread
From: Junio C Hamano @ 2007-01-21 11:17 UTC (permalink / raw)
To: Matthias Lederhofer; +Cc: git
Matthias Lederhofer <matled@gmx.net> writes:
> Junio C Hamano <junkio@cox.net> wrote:
>> I am considering to commit the attached instead.
> Looks fine. Just one question: You said normally unsigned long would
> be used for time_t but time_t itself seems to be signed. Using
> unsigned long instead of int for prune_grace_period (which is used as
> time_t here) results in 'warning: comparison between signed and
> unsigned'. Perhaps you want to change it here anyway to be consistent
> with the rest of the code (approxidate returns unsigned long too).
Although I've merged this and pushed out v1.5.0-rc2, I am
starting to think this whole implementation of grace period is
unfortunately busted and does not buy us much. Running
git-prune in an uncontrolled way from a cron job is still not
safe.
Suppose there is a repository that has one old blob that is not
referenced from any existing ref (in other words, its been more
than the grace period since 'prune' was run in the repository,
and one of its heads were rewound which lost the last reference
to the blob). You are pushing a new commit into it, whose tree
has that blob as one of the files.
You construct a pack, and unpack-objects starts to run to
extract the objects you send in the said repository. The pack
you are sending does contain the blob (because no refs reached
it in the repository), but unpack-objects safety measure means
the blob is not re-extracted to overwrite the existing old blob.
Now, an automated prune runs and finishes reading the available
refs before your push concludes and updates the ref with your
new commit.
What happens?
If we wanted to apply this grace period conservatively,
protecting young objects is not enough. You need to protect
everything they refer to as well. In the above scenario, you
would protect the new commit object and probably the tree
objects contained within, but the code happily will lose the
blob that was already sitting there.
^ permalink raw reply [flat|nested] 30+ messages in thread
* Re: [PATCH] prune: --expire=time
2007-01-21 11:17 ` Junio C Hamano
@ 2007-01-21 22:01 ` Jeff King
2007-01-22 1:38 ` Steven Grimm
0 siblings, 1 reply; 30+ messages in thread
From: Jeff King @ 2007-01-21 22:01 UTC (permalink / raw)
To: Junio C Hamano; +Cc: Matthias Lederhofer, git
On Sun, Jan 21, 2007 at 03:17:07AM -0800, Junio C Hamano wrote:
> If we wanted to apply this grace period conservatively,
> protecting young objects is not enough. You need to protect
> everything they refer to as well. In the above scenario, you
That's not sufficient either. You might not _have_ the young objects
yet, think the blob is dangling, and delete it. Meanwhile, the tree that
references it arrives. IOW,
1. blob B arrives, but already exists
2. prune deletes unreference and old blob B
3. tree T arrives, referencing blob B
I think this might be safe if you add objects in a top-down way (i.e., T
before B). However, that doesn't make sense for the commit operation, in
which you add blobs (with git-add), and then eventually construct a
tree.
-Peff
^ permalink raw reply [flat|nested] 30+ messages in thread
* Re: [PATCH] prune: --expire=time
2007-01-21 22:01 ` Jeff King
@ 2007-01-22 1:38 ` Steven Grimm
2007-01-22 1:52 ` Jeff King
2007-01-22 2:03 ` [PATCH] prune: --expire=time Junio C Hamano
0 siblings, 2 replies; 30+ messages in thread
From: Steven Grimm @ 2007-01-22 1:38 UTC (permalink / raw)
To: Jeff King; +Cc: Junio C Hamano, Matthias Lederhofer, git
Jeff King wrote:
> That's not sufficient either. You might not _have_ the young objects
> yet, think the blob is dangling, and delete it. Meanwhile, the tree that
> references it arrives. IOW,
> 1. blob B arrives, but already exists
> 2. prune deletes unreference and old blob B
> 3. tree T arrives, referencing blob B
> I think this might be safe if you add objects in a top-down way (i.e., T
> before B). However, that doesn't make sense for the commit operation, in
> which you add blobs (with git-add), and then eventually construct a
> tree.
>
Shouldn't the repository be locked against operations like prune while a
commit is in progress anyway? That seems like it's pretty prudent and
reasonable to me -- doing otherwise is just asking for a zillion little
race conditions. Prune should be a rare enough operation that having it
abort (or better, block) while a commit is going on wouldn't be a big
problem, I'd think.
-Steve
^ permalink raw reply [flat|nested] 30+ messages in thread
* Re: [PATCH] prune: --expire=time
2007-01-22 1:38 ` Steven Grimm
@ 2007-01-22 1:52 ` Jeff King
2007-01-22 2:06 ` Junio C Hamano
2007-01-22 2:03 ` [PATCH] prune: --expire=time Junio C Hamano
1 sibling, 1 reply; 30+ messages in thread
From: Jeff King @ 2007-01-22 1:52 UTC (permalink / raw)
To: Steven Grimm; +Cc: Junio C Hamano, Matthias Lederhofer, git
On Sun, Jan 21, 2007 at 05:38:57PM -0800, Steven Grimm wrote:
> >before B). However, that doesn't make sense for the commit operation, in
> >which you add blobs (with git-add), and then eventually construct a
> >tree.
> >
>
> Shouldn't the repository be locked against operations like prune while a
> commit is in progress anyway? That seems like it's pretty prudent and
> reasonable to me -- doing otherwise is just asking for a zillion little
> race conditions. Prune should be a rare enough operation that having it
> abort (or better, block) while a commit is going on wouldn't be a big
> problem, I'd think.
I was a bit loose with my phrase 'commit operation'. What I really mean
is:
$ git add file ;# (1)
$ hack hack hack ;# (2)
$ git commit ;# (3)
After step (1), you have a blob in your db. If you already had that
blob, then you have the old blob. You don't get the updated tree and
commit until step (3). Step (2) can be hours or days. Do you really want
to lock the repository that long?
Potentially we could 'touch' the blob in step (1) to update its
timestamp. But if we update timestamps for things like commit, then that
might mean 'touch'ing tens of thousands of objects for a commit which
_should_ only require making a few objects.
-Peff
^ permalink raw reply [flat|nested] 30+ messages in thread
* Re: [PATCH] prune: --expire=time
2007-01-22 1:38 ` Steven Grimm
2007-01-22 1:52 ` Jeff King
@ 2007-01-22 2:03 ` Junio C Hamano
1 sibling, 0 replies; 30+ messages in thread
From: Junio C Hamano @ 2007-01-22 2:03 UTC (permalink / raw)
To: Steven Grimm; +Cc: Jeff King, Matthias Lederhofer, git
Steven Grimm <koreth@midwinter.com> writes:
> Shouldn't the repository be locked against operations like prune while
> a commit is in progress anyway? That seems like it's pretty prudent
> and reasonable to me -- doing otherwise is just asking for a zillion
> little race conditions.
The primary problem is not "while a commit is in progress"
anyway. I do not think of a single sane reason to run git-prune
from a cron job in a repository that is used for an active
development. It would make sense for management types to run
count-objects from a cron job and yell at offenders to repack,
but even then the primary disk saving and performance
enhancement would come from repacking and not from pruning.
Especially, 1.5.0 and onwards the objects reachable from reflog
are protected from pruning and reflogs are enabled by default
for developer repositories with working trees, even rewind and
rebae would not create crufts (the only two exceptions that
create cruft are running "git add" more than once on the same
file between commits to leak blobs and intermediate tree objects
recursive merge needs to make when there are more than one
common ancestors).
It is a possibility to have a single timestamp file that any
command that intends to eventually update refs should touch
before it starts creating any object. Then prune can stat the
file and remember its timestamp before it starts reading the
refs, and then before starting to actually prune the objects it
can stat the file again and if the timestamp is different it can
abort. Commit, receive-pack (invoked by git-push from the
remote side), fetch-pack (invoked by clone, fetch and pull), etc.
all needs to touch the file upfront before they create even a
single object. But that would create a very hot spot on the
filesystem, and I am not sure what its ramifications are.
My take on this issue is rather different. I do agree with you
that prune is a rare enough operation, and I think it should not
penalize the primary thing developers would want to do many
times an hour. I think its much simpler and saner to teach
people not to run prune in an uncontrolled way.
We may want to remove the call to git-prune from git-gc, and
tell people that if they want to run something from a cron job,
run git-gc and not git-prune.
^ permalink raw reply [flat|nested] 30+ messages in thread
* Re: [PATCH] prune: --expire=time
2007-01-22 1:52 ` Jeff King
@ 2007-01-22 2:06 ` Junio C Hamano
2007-01-22 2:23 ` Linus Torvalds
0 siblings, 1 reply; 30+ messages in thread
From: Junio C Hamano @ 2007-01-22 2:06 UTC (permalink / raw)
To: Jeff King; +Cc: git
Jeff King <peff@peff.net> writes:
> I was a bit loose with my phrase 'commit operation'. What I really mean
> is:
>
> $ git add file ;# (1)
> $ hack hack hack ;# (2)
> $ git commit ;# (3)
>
> After step (1), you have a blob in your db. If you already had that
> blob, then you have the old blob. You don't get the updated tree and
> commit until step (3). Step (2) can be hours or days. Do you really want
> to lock the repository that long?
This case does not need any locking, as blobs reachable from
index are considered as "reachable" for the purpose of pruning.
^ permalink raw reply [flat|nested] 30+ messages in thread
* Re: [PATCH] prune: --expire=time
2007-01-22 2:06 ` Junio C Hamano
@ 2007-01-22 2:23 ` Linus Torvalds
2007-01-22 2:40 ` Junio C Hamano
0 siblings, 1 reply; 30+ messages in thread
From: Linus Torvalds @ 2007-01-22 2:23 UTC (permalink / raw)
To: Junio C Hamano; +Cc: Jeff King, git
On Sun, 21 Jan 2007, Junio C Hamano wrote:
>
> This case does not need any locking, as blobs reachable from
> index are considered as "reachable" for the purpose of pruning.
Well, strictly speaking, there's a race there. The new index gets written
out _after_ the blob has been created. Also, in many cases, we end up
using completely temporary indexes ("git commit filename") that "git
prune" doesn't know or understand.
All of which is really nothing new. "git prune" has always been dangerous.
You cannot, and must not, run it concurrently with other git operations.
Also, in the absense of undo operations, there is really nothing to ever
prune. Of course, the git.git archive itself has effectively taughth
people bad habits, since "pu" ends up continually rebasing itself.
However, now that rebasing ends up being visible in the branch reflog,
we're back to the "normally nothing to ever prune" situation, and as such,
the only object pruning that _should_ take place is basically as part of
"git repack -a -d" (which unlike a prune is actually safe, since it only
prunes objects that are reachable from a pack).
So to recap: "git prune" simply isn't a safe thing to do. Don't do it
without thinking. I'm not at all sure it's a good idea that "git gc" does
it for you, since it just encourages mindless pruning that probably
shouldn't happen in the first place.
Needing to prune is generally to be taken as a sign of something being
wrong.
(And yeah, the grace period makes it "safe". Assuming everybody involved
has even half-way reliable clocks. So IN PRACTICE it is all fine, and I
doubt you can lose anything except by really doing something insane. If
you want to kill your archive, it's easier to do "rm .git/objects/*/*a*"
than it is to try to do it with strange "git prune" setups, but still..)
Linus
^ permalink raw reply [flat|nested] 30+ messages in thread
* Re: [PATCH] prune: --expire=time
2007-01-22 2:23 ` Linus Torvalds
@ 2007-01-22 2:40 ` Junio C Hamano
2007-01-22 2:58 ` Linus Torvalds
2007-01-22 3:26 ` [PATCH] v1.5.0.txt: update description of git-gc Jeff King
0 siblings, 2 replies; 30+ messages in thread
From: Junio C Hamano @ 2007-01-22 2:40 UTC (permalink / raw)
To: Linus Torvalds; +Cc: git
Linus Torvalds <torvalds@osdl.org> writes:
> However, now that rebasing ends up being visible in the branch reflog,
> we're back to the "normally nothing to ever prune" situation, and as such,
> the only object pruning that _should_ take place is basically as part of
> "git repack -a -d" (which unlike a prune is actually safe, since it only
> prunes objects that are reachable from a pack).
>
> So to recap: "git prune" simply isn't a safe thing to do. Don't do it
> without thinking. I'm not at all sure it's a good idea that "git gc" does
> it for you, since it just encourages mindless pruning that probably
> shouldn't happen in the first place.
I guess we are in agreement on this.
-- >8 --
[PATCH] git-gc: do not run prune mindlessly.
Signed-off-by: Junio C Hamano <junkio@cox.net>
---
Documentation/git-gc.txt | 1 -
git-gc.sh | 1 -
2 files changed, 0 insertions(+), 2 deletions(-)
diff --git a/Documentation/git-gc.txt b/Documentation/git-gc.txt
index 2bcc949..f53ca97 100644
--- a/Documentation/git-gc.txt
+++ b/Documentation/git-gc.txt
@@ -50,7 +50,6 @@ kept. This defaults to 15 days.
See Also
--------
-gitlink:git-prune[1]
gitlink:git-reflog[1]
gitlink:git-repack[1]
gitlink:git-rerere[1]
diff --git a/git-gc.sh b/git-gc.sh
index 6de55f7..7716f62 100755
--- a/git-gc.sh
+++ b/git-gc.sh
@@ -11,5 +11,4 @@ SUBDIRECTORY_OK=Yes
git-pack-refs --prune &&
git-reflog expire --all &&
git-repack -a -d -l &&
-git-prune &&
git-rerere gc || exit
--
1.5.0.rc2
^ permalink raw reply related [flat|nested] 30+ messages in thread
* Re: [PATCH] prune: --expire=time
2007-01-22 2:40 ` Junio C Hamano
@ 2007-01-22 2:58 ` Linus Torvalds
2007-01-22 5:17 ` Junio C Hamano
2007-01-22 3:26 ` [PATCH] v1.5.0.txt: update description of git-gc Jeff King
1 sibling, 1 reply; 30+ messages in thread
From: Linus Torvalds @ 2007-01-22 2:58 UTC (permalink / raw)
To: Junio C Hamano; +Cc: git
On Sun, 21 Jan 2007, Junio C Hamano wrote:
> >
> > So to recap: "git prune" simply isn't a safe thing to do. Don't do it
> > without thinking. I'm not at all sure it's a good idea that "git gc" does
> > it for you, since it just encourages mindless pruning that probably
> > shouldn't happen in the first place.
>
> I guess we are in agreement on this.
Well, having complained about "git prune", I at the same time have to
admit that I worry about loose objects (and scary messages from
git-fsck-objects) potentially confusing new people.
So "git prune" _does_ remove stuff that happens normally. It removes stuff
that accumulates (even with reflog) thanks to commands that were
interrupted with ^C, and it also removes the auto-merge turds that the
recursive merge can create when it does its internal pseudo-commit for
more complex merges.
So I don't think running "prune" from within "git gc" is necessarily
wrong per se - I just don't think it's a good idea to do so by _default_,
exactly because of the issues it can have.
So hiding "git prune" behind "git gc" is probably a good thing (make
people learn just one thing they need to interface to), but maybe we need
a "--prune" flag to the gc command, and then perhaps just document that
you should be careful.
Linus
^ permalink raw reply [flat|nested] 30+ messages in thread
* [PATCH] v1.5.0.txt: update description of git-gc
2007-01-22 2:40 ` Junio C Hamano
2007-01-22 2:58 ` Linus Torvalds
@ 2007-01-22 3:26 ` Jeff King
1 sibling, 0 replies; 30+ messages in thread
From: Jeff King @ 2007-01-22 3:26 UTC (permalink / raw)
To: Junio C Hamano; +Cc: git
It doesn't call git-prune, and it does call a lot of other things.
Signed-off-by: Jeff King <peff@peff.net>
---
On Sun, Jan 21, 2007 at 06:40:40PM -0800, Junio C Hamano wrote:
> [PATCH] git-gc: do not run prune mindlessly.
This updates the release notes to reflect this change (and fixes a few
other inaccuracies and omissions).
v1.5.0.txt | 4 ++--
1 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/v1.5.0.txt b/v1.5.0.txt
index 1b8ecf0..8a2c9cc 100644
--- a/v1.5.0.txt
+++ b/v1.5.0.txt
@@ -191,8 +191,8 @@ Updates in v1.5.0 since v1.4.4 series
unreachable, as there is a one-day grace period built-in.
- There is a toplevel garbage collector script, 'git-gc', that
- is an easy way to run 'git-repack -a -d', 'git-reflog gc',
- and 'git-prune'.
+ runs periodic cleanup functions, including 'git-repack -a -d',
+ 'git-reflog expire', 'git-pack-refs --prune', and 'git-rerere gc'.
* Detached HEAD
--
1.5.0.rc1.gda86
^ permalink raw reply related [flat|nested] 30+ messages in thread
* Re: [PATCH] prune: --expire=time
2007-01-22 2:58 ` Linus Torvalds
@ 2007-01-22 5:17 ` Junio C Hamano
2007-01-22 6:26 ` Linus Torvalds
2007-01-22 9:32 ` Jakub Narebski
0 siblings, 2 replies; 30+ messages in thread
From: Junio C Hamano @ 2007-01-22 5:17 UTC (permalink / raw)
To: Linus Torvalds; +Cc: git
Linus Torvalds <torvalds@osdl.org> writes:
> ... I at the same time have to
> admit that I worry about loose objects (and scary messages from
> git-fsck-objects) potentially confusing new people.
I've been annoyed by those scary messages fsck-objects enough
and was wondering if we could make it less scary. Especially
annoying is that the message about missing blobs and trees that
are only referred to by dangling commits.
> So hiding "git prune" behind "git gc" is probably a good thing (make
> people learn just one thing they need to interface to), but maybe we need
> a "--prune" flag to the gc command, and then perhaps just document that
> you should be careful.
I am still undecided which one should be the default.
For interactive use by developers who work in their own
repositories, git-prune is safe because nothing else would be
working on their repositories at the time. While I do not think
we should recommend using git-gc from a cron job, if they want
to do so, giving an extra --no-prune option in their cron script
would be much less annoying.
-- >8 --
[PATCH] git-gc: do not run prune mindlessly.
You should pass --no-prune if you ever want to run git-gc from
a cron job.
Signed-off-by: Junio C Hamano <junkio@cox.net>
---
Documentation/git-gc.txt | 15 +++++++++++++--
git-gc.sh | 18 ++++++++++++++++--
2 files changed, 29 insertions(+), 4 deletions(-)
diff --git a/Documentation/git-gc.txt b/Documentation/git-gc.txt
index 2bcc949..7b650a7 100644
--- a/Documentation/git-gc.txt
+++ b/Documentation/git-gc.txt
@@ -8,7 +8,7 @@ git-gc - Cleanup unnecessary files and optimize the local repository
SYNOPSIS
--------
-'git-gc'
+'git-gc' [--no-prune]
DESCRIPTION
-----------
@@ -21,6 +21,18 @@ Users are encouraged to run this task on a regular basis within
each repository to maintain good disk space utilization and good
operating performance.
+OPTIONS
+-------
+
+--no-prune::
+ Usually `git-gc` packs refs, expires old reflog entries,
+ packs loose objects, removes unreferenced loose objects,
+ and removes old 'rerere' records. Among these, removal
+ of unreferenced loose objects is an unsafe operation
+ while other git operations are in progress. This option
+ disables this unsafe step.
+
+
Configuration
-------------
@@ -50,7 +62,6 @@ kept. This defaults to 15 days.
See Also
--------
-gitlink:git-prune[1]
gitlink:git-reflog[1]
gitlink:git-repack[1]
gitlink:git-rerere[1]
diff --git a/git-gc.sh b/git-gc.sh
index 6de55f7..ecd4b0e 100755
--- a/git-gc.sh
+++ b/git-gc.sh
@@ -4,12 +4,26 @@
#
# Cleanup unreachable files and optimize the repository.
-USAGE=''
+USAGE='git-gc [--no-prune]'
SUBDIRECTORY_OK=Yes
. git-sh-setup
+no_prune=
+while case $# in 0) break ;; esac
+do
+ case "$1" in
+ --no-prune)
+ no_prune=:
+ ;;
+ --)
+ usage
+ ;;
+ esac
+ shift
+done
+
git-pack-refs --prune &&
git-reflog expire --all &&
git-repack -a -d -l &&
-git-prune &&
+$no_prune git-prune &&
git-rerere gc || exit
--
1.5.0.rc2
^ permalink raw reply related [flat|nested] 30+ messages in thread
* Re: [PATCH] prune: --expire=time
2007-01-22 5:17 ` Junio C Hamano
@ 2007-01-22 6:26 ` Linus Torvalds
2007-01-22 6:57 ` Shawn O. Pearce
2007-01-22 7:12 ` Junio C Hamano
2007-01-22 9:32 ` Jakub Narebski
1 sibling, 2 replies; 30+ messages in thread
From: Linus Torvalds @ 2007-01-22 6:26 UTC (permalink / raw)
To: Junio C Hamano; +Cc: git
On Sun, 21 Jan 2007, Junio C Hamano wrote:
>
> I've been annoyed by those scary messages fsck-objects enough
> and was wondering if we could make it less scary. Especially
> annoying is that the message about missing blobs and trees that
> are only referred to by dangling commits.
Yeah. Something like this, which really just changes the order in which we
print out the warnings, might be a good idea.
This patch does that, and also adds a lot of commentary on what it is
doing. It also splits out the "object unreachable" case from the reachable
case, since they end up being really really different. An unreachable
object we don't even care about broken links for etc.
The diffstat says that it adds a lot more lines than it removed, but all
the really new lines are either just the added comments, or a result of
just splitting the object checking up into a few separate functions. The
actual code is largely the same (but with the improved semantics).
I actually tested this a bit by trying to force a few corruption cases.
And the changes _look_ obvious, and yes, it improved reporting (I think).
At the same time, git-fsck-objects is too important to have just one pair
of eyes looking at it, so I would suggest others really double-check my
changes and the logic.
(Btw, the "parsed but unparsed because it was in a pack-file" test should
probably be tightened up a bit. That part is not new code, it's the old
code in a new place with a newly added comment about what it is all about.
So it's not a regression, it's just something I thought I'd point out when
I looked at the code).
Add my sign-off on the patch as appropriate. I do think it's mergeable,
but I'd _really_ like somebody else to double-check me here.
NOTE! One of the new things is that it doesn't complain about missing
unreachable objects at all any more. It used to complain about missing
objects even if they were only missing because _another_ unreachable
object wanted to access them, which was obviously just useless noise (it
could happen if you ^C in the middle of a fetch, for example.
Anyway, somebody should double- and triple-check me.
Linus
---
fsck-objects.c | 133 ++++++++++++++++++++++++++++++++++++++++----------------
1 files changed, 95 insertions(+), 38 deletions(-)
diff --git a/fsck-objects.c b/fsck-objects.c
index 81f00db..ecfb014 100644
--- a/fsck-objects.c
+++ b/fsck-objects.c
@@ -54,6 +54,99 @@ static int objwarning(struct object *obj, const char *err, ...)
return -1;
}
+/*
+ * Check a single reachable object
+ */
+static void check_reachable_object(struct object *obj)
+{
+ const struct object_refs *refs;
+
+ /*
+ * We obviously want the object to be parsed,
+ * except if it was in a pack-file and we didn't
+ * do a full fsck
+ */
+ if (!obj->parsed) {
+ if (has_sha1_file(obj->sha1))
+ return; /* it is in pack - forget about it */
+ printf("missing %s %s\n", typename(obj->type), sha1_to_hex(obj->sha1));
+ return;
+ }
+
+ /*
+ * Check that everything that we try to reference is also good.
+ */
+ refs = lookup_object_refs(obj);
+ if (refs) {
+ unsigned j;
+ for (j = 0; j < refs->count; j++) {
+ struct object *ref = refs->ref[j];
+ if (ref->parsed ||
+ (has_sha1_file(ref->sha1)))
+ continue;
+ printf("broken link from %7s %s\n",
+ typename(obj->type), sha1_to_hex(obj->sha1));
+ printf(" to %7s %s\n",
+ typename(ref->type), sha1_to_hex(ref->sha1));
+ }
+ }
+}
+
+/*
+ * Check a single unreachable object
+ */
+static void check_unreachable_object(struct object *obj)
+{
+ /*
+ * Missing unreachable object? Ignore it. It's not like
+ * we miss it (since it can't be reached), nor do we want
+ * to complain about it being unreachable (since it does
+ * not exist).
+ */
+ if (!obj->parsed)
+ return;
+
+ /*
+ * Unreachable object that exists? Show it if asked to,
+ * since this is something that is prunable.
+ */
+ if (show_unreachable) {
+ printf("unreachable %s %s\n", typename(obj->type), sha1_to_hex(obj->sha1));
+ return;
+ }
+
+ /*
+ * "!used" means that nothing at all points to it, including
+ * other unreacahble objects. In other words, it's the "tip"
+ * of some set of unreachable objects, usually a commit that
+ * got dropped.
+ *
+ * Such starting points are more interesting than some random
+ * set of unreachable objects, so we show them even if the user
+ * hasn't asked for _all_ unreachable objects. If you have
+ * deleted a branch by mistake, this is a prime candidate to
+ * start looking at, for example.
+ */
+ if (!obj->used) {
+ printf("dangling %s %s\n", typename(obj->type),
+ sha1_to_hex(obj->sha1));
+ return;
+ }
+
+ /*
+ * Otherwise? It's there, it's unreachable, and some other unreachable
+ * object points to it. Ignore it - it's not interesting, and we showed
+ * all the interesting cases above.
+ */
+}
+
+static void check_object(struct object *obj)
+{
+ if (obj->flags & REACHABLE)
+ check_reachable_object(obj);
+ else
+ check_unreachable_object(obj);
+}
static void check_connectivity(void)
{
@@ -62,46 +155,10 @@ static void check_connectivity(void)
/* Look up all the requirements, warn about missing objects.. */
max = get_max_object_index();
for (i = 0; i < max; i++) {
- const struct object_refs *refs;
struct object *obj = get_indexed_object(i);
- if (!obj)
- continue;
-
- if (!obj->parsed) {
- if (has_sha1_file(obj->sha1))
- ; /* it is in pack */
- else
- printf("missing %s %s\n",
- typename(obj->type), sha1_to_hex(obj->sha1));
- continue;
- }
-
- refs = lookup_object_refs(obj);
- if (refs) {
- unsigned j;
- for (j = 0; j < refs->count; j++) {
- struct object *ref = refs->ref[j];
- if (ref->parsed ||
- (has_sha1_file(ref->sha1)))
- continue;
- printf("broken link from %7s %s\n",
- typename(obj->type), sha1_to_hex(obj->sha1));
- printf(" to %7s %s\n",
- typename(ref->type), sha1_to_hex(ref->sha1));
- }
- }
-
- if (show_unreachable && !(obj->flags & REACHABLE)) {
- printf("unreachable %s %s\n",
- typename(obj->type), sha1_to_hex(obj->sha1));
- continue;
- }
-
- if (!obj->used) {
- printf("dangling %s %s\n", typename(obj->type),
- sha1_to_hex(obj->sha1));
- }
+ if (obj)
+ check_object(obj);
}
}
^ permalink raw reply related [flat|nested] 30+ messages in thread
* Re: [PATCH] prune: --expire=time
2007-01-22 6:26 ` Linus Torvalds
@ 2007-01-22 6:57 ` Shawn O. Pearce
2007-01-22 7:12 ` Junio C Hamano
1 sibling, 0 replies; 30+ messages in thread
From: Shawn O. Pearce @ 2007-01-22 6:57 UTC (permalink / raw)
To: Linus Torvalds; +Cc: Junio C Hamano, git
Linus Torvalds <torvalds@osdl.org> wrote:
> Add my sign-off on the patch as appropriate. I do think it's mergeable,
> but I'd _really_ like somebody else to double-check me here.
Your patch looks right to me. And I like the idea of reducing the
output to only the really important stuff. Nice work.
--
Shawn.
^ permalink raw reply [flat|nested] 30+ messages in thread
* Re: [PATCH] prune: --expire=time
2007-01-22 6:26 ` Linus Torvalds
2007-01-22 6:57 ` Shawn O. Pearce
@ 2007-01-22 7:12 ` Junio C Hamano
1 sibling, 0 replies; 30+ messages in thread
From: Junio C Hamano @ 2007-01-22 7:12 UTC (permalink / raw)
To: Linus Torvalds; +Cc: git
Linus Torvalds <torvalds@osdl.org> writes:
> On Sun, 21 Jan 2007, Junio C Hamano wrote:
>>
>> I've been annoyed by those scary messages fsck-objects enough
>> and was wondering if we could make it less scary. Especially
>> annoying is that the message about missing blobs and trees that
>> are only referred to by dangling commits.
> ...
> Add my sign-off on the patch as appropriate. I do think it's mergeable,
> but I'd _really_ like somebody else to double-check me here.
I think this is very sensible. Even without all the added
comments, the refactoring makes the code much more readable.
-- >8 --
From: Linus Torvalds <torvalds@osdl.org>
Date: Sun, 21 Jan 2007 22:26:41 -0800 (PST)
Subject: [PATCH] fsck-objects: refactor checking for connectivity
This separates the connectivity check into separate codepaths,
one for reachable objects and the other for unreachable ones,
while adding a lot of comments to explain what is going on.
When checking an unreachable object, unlike a reachable one, we
do not have to complain if it does not exist (we used to
complain about a missing blob even when the only thing that
references it is a tree that is dangling). Also we do not have
to check and complain about objects that are referenced by an
unreachable object.
This makes the messages from fsck-objects a lot less noisy and
more useful.
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Signed-off-by: Junio C Hamano <junkio@cox.net>
---
<<your patch here>>
^ permalink raw reply [flat|nested] 30+ messages in thread
* Re: [PATCH] prune: --expire=time
2007-01-22 5:17 ` Junio C Hamano
2007-01-22 6:26 ` Linus Torvalds
@ 2007-01-22 9:32 ` Jakub Narebski
1 sibling, 0 replies; 30+ messages in thread
From: Jakub Narebski @ 2007-01-22 9:32 UTC (permalink / raw)
To: git
Junio C Hamano wrote:
> @@ -50,7 +62,6 @@ kept. This defaults to 15 days.
>
> See Also
> --------
> -gitlink:git-prune[1]
> gitlink:git-reflog[1]
> gitlink:git-repack[1]
> gitlink:git-rerere[1]
Huh?
--
Jakub Narebski
Warsaw, Poland
ShadeHawk on #git
^ permalink raw reply [flat|nested] 30+ messages in thread
end of thread, other threads:[~2007-01-22 9:32 UTC | newest]
Thread overview: 30+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2007-01-18 17:18 [PATCH] prune-packed: new option --min-age=N Matthias Lederhofer
2007-01-18 17:24 ` Shawn O. Pearce
2007-01-18 17:42 ` Matthias Lederhofer
2007-01-18 17:51 ` Shawn O. Pearce
2007-01-18 22:29 ` [RFC] prune: --expire=seconds Matthias Lederhofer
2007-01-18 22:32 ` Junio C Hamano
2007-01-19 3:44 ` Shawn O. Pearce
2007-01-19 10:49 ` [PATCH] prune: --expire=time Matthias Lederhofer
2007-01-19 15:41 ` Nicolas Pitre
2007-01-19 19:18 ` Junio C Hamano
2007-01-20 11:18 ` Matthias Lederhofer
2007-01-21 6:55 ` Junio C Hamano
2007-01-21 7:53 ` Shawn O. Pearce
2007-01-21 10:37 ` Matthias Lederhofer
2007-01-21 11:17 ` Junio C Hamano
2007-01-21 22:01 ` Jeff King
2007-01-22 1:38 ` Steven Grimm
2007-01-22 1:52 ` Jeff King
2007-01-22 2:06 ` Junio C Hamano
2007-01-22 2:23 ` Linus Torvalds
2007-01-22 2:40 ` Junio C Hamano
2007-01-22 2:58 ` Linus Torvalds
2007-01-22 5:17 ` Junio C Hamano
2007-01-22 6:26 ` Linus Torvalds
2007-01-22 6:57 ` Shawn O. Pearce
2007-01-22 7:12 ` Junio C Hamano
2007-01-22 9:32 ` Jakub Narebski
2007-01-22 3:26 ` [PATCH] v1.5.0.txt: update description of git-gc Jeff King
2007-01-22 2:03 ` [PATCH] prune: --expire=time Junio C Hamano
2007-01-20 12:06 ` Simon 'corecode' Schubert
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).