git.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 8/8] git-repack --max-pack-size: add option parsing to enable feature
@ 2007-04-08 23:27 Dana How
  0 siblings, 0 replies; 5+ messages in thread
From: Dana How @ 2007-04-08 23:27 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: Git Mailing List, danahow


Add --max-pack-size parsing and usage messages.
Upgrade git-repack.sh to handle multiple packfile names.

Signed-off-by: Dana How <how@deathvalley.cswitch.com>
---
 builtin-pack-objects.c |   10 +++++++++-
 git-repack.sh          |   12 +++++++-----
 2 files changed, 16 insertions(+), 6 deletions(-)

diff --git a/builtin-pack-objects.c b/builtin-pack-objects.c
index d750c4b..392a589 100644
--- a/builtin-pack-objects.c
+++ b/builtin-pack-objects.c
@@ -14,7 +14,7 @@
 #include "list-objects.h"
 
 static const char pack_usage[] = "\
-git-pack-objects [{ -q | --progress | --all-progress }] \n\
+git-pack-objects [{ -q | --progress | --all-progress }] [--max-pack-size=N] \n\
 	[--local] [--incremental] [--window=N] [--depth=N] \n\
 	[--no-reuse-delta] [--delta-base-offset] [--non-empty] \n\
 	[--revs [--unpacked | --all]*] [--reflog] [--stdout | base-name] \n\
@@ -1729,6 +1729,14 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix)
 			incremental = 1;
 			continue;
 		}
+		if (!prefixcmp(arg, "--max-pack-size=")) {
+			char *end;
+			offset_limit = strtoul(arg+16, &end, 0) * 1024 * 1024;
+			if (!arg[16] || *end)
+				usage(pack_usage);
+			no_reuse_delta = 1;
+			continue;
+		}
 		if (!prefixcmp(arg, "--window=")) {
 			char *end;
 			window = strtoul(arg+9, &end, 0);
diff --git a/git-repack.sh b/git-repack.sh
index ddfa8b4..ebce1b3 100755
--- a/git-repack.sh
+++ b/git-repack.sh
@@ -3,7 +3,7 @@
 # Copyright (c) 2005 Linus Torvalds
 #
 
-USAGE='[-a] [-d] [-f] [-l] [-n] [-q] [--window=N] [--depth=N]'
+USAGE='[-a] [-d] [-f] [-l] [-n] [-q] [--max-pack-size=N] [--window=N] [--depth=N]'
 SUBDIRECTORY_OK='Yes'
 . git-sh-setup
 
@@ -18,6 +18,7 @@ do
 	-q)	quiet=-q ;;
 	-f)	no_reuse_delta=--no-reuse-delta ;;
 	-l)	local=--local ;;
+	--max-pack-size=*) extra="$extra $1" ;;
 	--window=*) extra="$extra $1" ;;
 	--depth=*) extra="$extra $1" ;;
 	*)	usage ;;
@@ -62,11 +63,12 @@ case ",$all_into_one," in
 esac
 
 args="$args $local $quiet $no_reuse_delta$extra"
-name=$(git-pack-objects --non-empty --all --reflog $args </dev/null "$PACKTMP") ||
+names=$(git-pack-objects --non-empty --all --reflog $args </dev/null "$PACKTMP") ||
 	exit 1
-if [ -z "$name" ]; then
+if [ -z "$names" ]; then
 	echo Nothing new to pack.
-else
+fi
+for name in $names ; do
 	chmod a-w "$PACKTMP-$name.pack"
 	chmod a-w "$PACKTMP-$name.idx"
 	if test "$quiet" != '-q'; then
@@ -92,7 +94,7 @@ else
 		exit 1
 	}
 	rm -f "$PACKDIR/old-pack-$name.pack" "$PACKDIR/old-pack-$name.idx"
-fi
+done
 
 if test "$remove_redundant" = t
 then
-- 
1.5.1.89.g8abf0

^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [PATCH 8/8] git-repack --max-pack-size: add option parsing to enable feature
@ 2007-04-30 23:25 Dana How
  2007-05-01  5:45 ` Shawn O. Pearce
  0 siblings, 1 reply; 5+ messages in thread
From: Dana How @ 2007-04-30 23:25 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: Git Mailing List, danahow


Add --max-pack-size parsing and usage messages.
Upgrade git-repack.sh to handle multiple packfile names.

Signed-off-by: Dana L. How <danahow@gmail.com>
---
 builtin-pack-objects.c |   10 +++++++++-
 git-repack.sh          |   12 +++++++-----
 2 files changed, 16 insertions(+), 6 deletions(-)

diff --git a/builtin-pack-objects.c b/builtin-pack-objects.c
index 328b3cb..ed958d6 100644
--- a/builtin-pack-objects.c
+++ b/builtin-pack-objects.c
@@ -15,7 +15,7 @@
 #include "progress.h"
 
 static const char pack_usage[] = "\
-git-pack-objects [{ -q | --progress | --all-progress }] \n\
+git-pack-objects [{ -q | --progress | --all-progress }] [--max-pack-size=N] \n\
 	[--local] [--incremental] [--window=N] [--depth=N] \n\
 	[--no-reuse-delta] [--delta-base-offset] [--non-empty] \n\
 	[--revs [--unpacked | --all]*] [--reflog] [--stdout | base-name] \n\
@@ -1766,6 +1766,14 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix)
 			incremental = 1;
 			continue;
 		}
+		if (!prefixcmp(arg, "--max-pack-size=")) {
+			char *end;
+			pack_size_limit = strtoul(arg+16, &end, 0) * 1024 * 1024;
+			if (!arg[16] || *end)
+				usage(pack_usage);
+			no_reuse_delta = 1;
+			continue;
+		}
 		if (!prefixcmp(arg, "--window=")) {
 			char *end;
 			window = strtoul(arg+9, &end, 0);
diff --git a/git-repack.sh b/git-repack.sh
index ddfa8b4..ebce1b3 100755
--- a/git-repack.sh
+++ b/git-repack.sh
@@ -3,7 +3,7 @@
 # Copyright (c) 2005 Linus Torvalds
 #
 
-USAGE='[-a] [-d] [-f] [-l] [-n] [-q] [--window=N] [--depth=N]'
+USAGE='[-a] [-d] [-f] [-l] [-n] [-q] [--max-pack-size=N] [--window=N] [--depth=N]'
 SUBDIRECTORY_OK='Yes'
 . git-sh-setup
 
@@ -18,6 +18,7 @@ do
 	-q)	quiet=-q ;;
 	-f)	no_reuse_delta=--no-reuse-delta ;;
 	-l)	local=--local ;;
+	--max-pack-size=*) extra="$extra $1" ;;
 	--window=*) extra="$extra $1" ;;
 	--depth=*) extra="$extra $1" ;;
 	*)	usage ;;
@@ -62,11 +63,12 @@ case ",$all_into_one," in
 esac
 
 args="$args $local $quiet $no_reuse_delta$extra"
-name=$(git-pack-objects --non-empty --all --reflog $args </dev/null "$PACKTMP") ||
+names=$(git-pack-objects --non-empty --all --reflog $args </dev/null "$PACKTMP") ||
 	exit 1
-if [ -z "$name" ]; then
+if [ -z "$names" ]; then
 	echo Nothing new to pack.
-else
+fi
+for name in $names ; do
 	chmod a-w "$PACKTMP-$name.pack"
 	chmod a-w "$PACKTMP-$name.idx"
 	if test "$quiet" != '-q'; then
@@ -92,7 +94,7 @@ else
 		exit 1
 	}
 	rm -f "$PACKDIR/old-pack-$name.pack" "$PACKDIR/old-pack-$name.idx"
-fi
+done
 
 if test "$remove_redundant" = t
 then
-- 
1.5.2.rc0.766.gba60-dirty

^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [PATCH 8/8] git-repack --max-pack-size: add option parsing to enable feature
  2007-04-30 23:25 [PATCH 8/8] git-repack --max-pack-size: add option parsing to enable feature Dana How
@ 2007-05-01  5:45 ` Shawn O. Pearce
  2007-05-01  6:15   ` Dana How
  0 siblings, 1 reply; 5+ messages in thread
From: Shawn O. Pearce @ 2007-05-01  5:45 UTC (permalink / raw)
  To: Dana How; +Cc: Junio C Hamano, Git Mailing List

Dana How <danahow@gmail.com> wrote:
> Add --max-pack-size parsing and usage messages.
> Upgrade git-repack.sh to handle multiple packfile names.
...
> diff --git a/builtin-pack-objects.c b/builtin-pack-objects.c
...
> +		if (!prefixcmp(arg, "--max-pack-size=")) {
> +			char *end;
> +			pack_size_limit = strtoul(arg+16, &end, 0) * 1024 * 1024;
> +			if (!arg[16] || *end)
> +				usage(pack_usage);
> +			no_reuse_delta = 1;

Wow, are you serious?  This entire change is about making repack
automatically split large projects into multiple packfiles.  If
that happens are you intending that the caller will mark all of
those packfiles with .keep files immediately after repacking them?
If you want users to create .keep files, can git-repack.sh do that
for them when more than one packfile was output?

Because otherwise a "quick" git-gc will not be quick because the
reuse delta feature (which is a massive performance improvement for
repack/gc) will always be disabled.  But odds are a future repack
of the same project will generally keep things that are in the
same packfile already in the same packfile again, so delta reuse is
actually possible for most objects.  I think you should find a way
to make this change work without needing to force no_reuse_delta
just because this limit was added.

> diff --git a/git-repack.sh b/git-repack.sh
...
> +names=$(git-pack-objects --non-empty --all --reflog $args </dev/null "$PACKTMP") ||
>  	exit 1
> -if [ -z "$name" ]; then
> +if [ -z "$names" ]; then
>  	echo Nothing new to pack.
> -else
> +fi
> +for name in $names ; do

I think this particular change needs to either preceed the prior
commit, or be part of it.  If someone tries to bisect this patch
series with a large enough project that multiple packfiles are being
produced then you run into some ugly problems in this repack script.

-- 
Shawn.

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH 8/8] git-repack --max-pack-size: add option parsing to enable feature
  2007-05-01  5:45 ` Shawn O. Pearce
@ 2007-05-01  6:15   ` Dana How
  2007-05-01  6:27     ` Shawn O. Pearce
  0 siblings, 1 reply; 5+ messages in thread
From: Dana How @ 2007-05-01  6:15 UTC (permalink / raw)
  To: Shawn O. Pearce; +Cc: Junio C Hamano, Git Mailing List, danahow

On 4/30/07, Shawn O. Pearce <spearce@spearce.org> wrote:
> Dana How <danahow@gmail.com> wrote:
> > diff --git a/builtin-pack-objects.c b/builtin-pack-objects.c
> ...
> > +             if (!prefixcmp(arg, "--max-pack-size=")) {
> > +                     char *end;
> > +                     pack_size_limit = strtoul(arg+16, &end, 0) * 1024 * 1024;
> > +                     if (!arg[16] || *end)
> > +                             usage(pack_usage);
> > +                     no_reuse_delta = 1;
>
> Wow, are you serious?  This entire change is about making repack
> automatically split large projects into multiple packfiles.  If
> that happens are you intending that the caller will mark all of
> those packfiles with .keep files immediately after repacking them?
> If you want users to create .keep files, can git-repack.sh do that
> for them when more than one packfile was output?
>
> Because otherwise a "quick" git-gc will not be quick because the
> reuse delta feature (which is a massive performance improvement for
> repack/gc) will always be disabled.  But odds are a future repack
> of the same project will generally keep things that are in the
> same packfile already in the same packfile again, so delta reuse is
> actually possible for most objects.  I think you should find a way
> to make this change work without needing to force no_reuse_delta
> just because this limit was added.
In a previous version of this patchset I explained that I had turned
on no_reuse_delta because I hadn't yet verified all the combinations
in the reuse codepath(s).  I guess I'll be reinspecting them sooner
rather than later.

> > diff --git a/git-repack.sh b/git-repack.sh
> ...
> > +names=$(git-pack-objects --non-empty --all --reflog $args </dev/null "$PACKTMP") ||
> >       exit 1
> > -if [ -z "$name" ]; then
> > +if [ -z "$names" ]; then
> >       echo Nothing new to pack.
> > -else
> > +fi
> > +for name in $names ; do
>
> I think this particular change needs to either preceed the prior
> commit, or be part of it.  If someone tries to bisect this patch
> series with a large enough project that multiple packfiles are being
> produced then you run into some ugly problems in this repack script.
Sorry, you lost me.  git is being bisected, or a project managed by git?
My intent was that the pack splitting wouldn't be available until
_all_ patches were applied (active).  Bisecting git _within_ this patchset
would still be useful -- it could be used to isolate where some of
this new code broke some pre-existing feature.

Thanks,
-- 
Dana L. How  danahow@gmail.com  +1 650 804 5991 cell

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH 8/8] git-repack --max-pack-size: add option parsing to enable feature
  2007-05-01  6:15   ` Dana How
@ 2007-05-01  6:27     ` Shawn O. Pearce
  0 siblings, 0 replies; 5+ messages in thread
From: Shawn O. Pearce @ 2007-05-01  6:27 UTC (permalink / raw)
  To: Dana How; +Cc: Junio C Hamano, Git Mailing List

Dana How <danahow@gmail.com> wrote:
> >I think this particular change needs to either preceed the prior
> >commit, or be part of it.  If someone tries to bisect this patch
> >series with a large enough project that multiple packfiles are being
> >produced then you run into some ugly problems in this repack script.
...
> Sorry, you lost me.  git is being bisected, or a project managed by git?
> My intent was that the pack splitting wouldn't be available until
> _all_ patches were applied (active).  Bisecting git _within_ this patchset
> would still be useful -- it could be used to isolate where some of
> this new code broke some pre-existing feature.

git bisecting itself, to search for bugs in itself...  In that
case a bisect could stop at any random point in the middle of this
series, even if this series isn't the one that is at fault for the
given breakage.  In such a case we try to hope that git.git is
always in a working state at any given commit.

On second thought looking at your series I see how you were able to
assure that here.  You didn't activate the option until the shell
script could also handle more than one name, so since the option
is not available in any prior commit there's no way to turn on the
multiple-name-output that git-repack.sh would have broken on.

But lets just say the 7/8 patch actually did break git, and we are
bisecting to look for it.  We wouldn't actually see the breakage
activate until 8/8, which is misleading, as the broken code is
actually in 7/8.  But if you are looking at 8/8 as broken and see
the lines broken, you can easily run git-blame to really figure
out what changed, and why.  So its not really a big deal that the
series is organized like this.  I blew it out of proportion.  :-)

-- 
Shawn.

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2007-05-01  6:27 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2007-04-30 23:25 [PATCH 8/8] git-repack --max-pack-size: add option parsing to enable feature Dana How
2007-05-01  5:45 ` Shawn O. Pearce
2007-05-01  6:15   ` Dana How
2007-05-01  6:27     ` Shawn O. Pearce
  -- strict thread matches above, loose matches on Subject: below --
2007-04-08 23:27 Dana How

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).