git.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 3/3] Teach git-index-pack how to keep a pack file.
@ 2006-10-29  9:41 Shawn Pearce
  2006-10-29 10:49 ` Jakub Narebski
  2006-10-30  3:47 ` Nicolas Pitre
  0 siblings, 2 replies; 5+ messages in thread
From: Shawn Pearce @ 2006-10-29  9:41 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git

To prevent a race condition between `index-pack --stdin` and
`repack -a -d` where the repack deletes the newly created pack
file before any refs are updated to reference objects contained
within it we mark the pack file as one that should be kept.  This
removes it from the list of packs that `repack -a -d` will consider
for removal.

Callers such as `receive-pack` which want to invoke `index-pack`
should use this new --keep option to prevent the newly created pack
and index file pair from being deleted before they have finished any
related ref updates.  Only after all ref updates have been finished
should the associated .keep file be removed.

Signed-off-by: Shawn O. Pearce <spearce@spearce.org>
---
 I'm still working on the change to receive-pack.  Junio's latest
 patch (send-pack --keep) is a different way of doing at least some
 of what I'm still working on there.

 I'm throwing a pair of pipes around index-pack so I can capture
 the pack name thus allowing receive-pack to delete the correct
 .keep file after its completed the ref updates.  At that point
 receive-pack can easily examine the object count in the pack header
 and compare that against a config entry to decide if it should be
 keeping the pack or exploding it.


 Documentation/git-index-pack.txt |   24 ++++++++++++++++++--
 index-pack.c                     |   43 +++++++++++++++++++++++++++++++++++--
 2 files changed, 61 insertions(+), 6 deletions(-)

diff --git a/Documentation/git-index-pack.txt b/Documentation/git-index-pack.txt
index 9fa4847..1235416 100644
--- a/Documentation/git-index-pack.txt
+++ b/Documentation/git-index-pack.txt
@@ -9,7 +9,7 @@ git-index-pack - Build pack index file f
 SYNOPSIS
 --------
 'git-index-pack' [-v] [-o <index-file>] <pack-file>
-'git-index-pack' --stdin [--fix-thin] [-v] [-o <index-file>] [<pack-file>]
+'git-index-pack' --stdin [--fix-thin] [--keep] [-v] [-o <index-file>] [<pack-file>]
 
 
 DESCRIPTION
@@ -38,7 +38,10 @@ OPTIONS
 	instead and a copy is then written to <pack-file>. If
 	<pack-file> is not specified, the pack is written to
 	objects/pack/ directory of the current git repository with
-	a default name determined from the pack content.
+	a default name determined from the pack content.  If
+	<pack-file> is not specified consider using --keep to
+	prevent a race condition between this process and
+	gitlink::git-repack[1] .
 
 --fix-thin::
 	It is possible for gitlink:git-pack-objects[1] to build
@@ -48,7 +51,22 @@ OPTIONS
 	and they must be included in the pack for that pack to be self
 	contained and indexable. Without this option any attempt to
 	index a thin pack will fail. This option only makes sense in
-	conjonction with --stdin.
+	conjunction with --stdin.
+
+--keep::
+	Before moving the index into its final destination
+	create an empty .keep file for the associated pack file.
+	This option is usually necessary with --stdin to prevent a
+	simultaneous gitlink:git-repack[1] process from deleting
+	the newly constructed pack and index before refs can be
+	updated to use objects contained in the pack.
+
+--keep='why'::
+	Like --keep create a .keep file before moving the index into
+	its final destination, but rather than creating an empty file
+	place 'why' followed by an LF into the .keep file.  The 'why'
+	message can later be searched for within all .keep files to
+	locate any which have outlived their usefulness.
 
 
 Author
diff --git a/index-pack.c b/index-pack.c
index 866a054..b37dd78 100644
--- a/index-pack.c
+++ b/index-pack.c
@@ -10,7 +10,7 @@
 #include <signal.h>
 
 static const char index_pack_usage[] =
-"git-index-pack [-v] [-o <index-file>] { <pack-file> | --stdin [--fix-thin] [<pack-file>] }";
+"git-index-pack [-v] [-o <index-file>] [{ ---keep | --keep=<msg> }] { <pack-file> | --stdin [--fix-thin] [<pack-file>] }";
 
 struct object_entry
 {
@@ -754,6 +754,7 @@ static const char *write_index_file(cons
 
 static void final(const char *final_pack_name, const char *curr_pack_name,
 		  const char *final_index_name, const char *curr_index_name,
+		  const char *keep_name, const char *keep_msg,
 		  unsigned char *sha1)
 {
 	char name[PATH_MAX];
@@ -780,6 +781,23 @@ static void final(const char *final_pack
 		}
 	}
 
+	if (keep_msg) {
+		int keep_fd, keep_msg_len = strlen(keep_msg);
+		if (!keep_name) {
+			snprintf(name, sizeof(name), "%s/pack/pack-%s.keep",
+				 get_object_directory(), sha1_to_hex(sha1));
+			keep_name = name;
+		}
+		keep_fd = open(keep_name, O_RDWR | O_CREAT, 0600);
+		if (keep_fd < 0)
+			die("cannot write keep file");
+		if (keep_msg_len > 0) {
+			write_or_die(keep_fd, keep_msg, keep_msg_len);
+			write_or_die(keep_fd, "\n", 1);
+		}
+		close(keep_fd);
+	}
+
 	if (final_pack_name != curr_pack_name) {
 		if (!final_pack_name) {
 			snprintf(name, sizeof(name), "%s/pack/pack-%s.pack",
@@ -807,7 +825,8 @@ int main(int argc, char **argv)
 	int i, fix_thin_pack = 0;
 	const char *curr_pack, *pack_name = NULL;
 	const char *curr_index, *index_name = NULL;
-	char *index_name_buf = NULL;
+	const char *keep_name = NULL, *keep_msg = NULL;
+	char *index_name_buf = NULL, *keep_name_buf = NULL;
 	unsigned char sha1[20];
 
 	for (i = 1; i < argc; i++) {
@@ -818,6 +837,10 @@ int main(int argc, char **argv)
 				from_stdin = 1;
 			} else if (!strcmp(arg, "--fix-thin")) {
 				fix_thin_pack = 1;
+			} else if (!strcmp(arg, "--keep")) {
+				keep_msg = "";
+			} else if (!strncmp(arg, "--keep=", 7)) {
+				keep_msg = arg + 7;
 			} else if (!strcmp(arg, "-v")) {
 				verbose = 1;
 			} else if (!strcmp(arg, "-o")) {
@@ -848,6 +871,16 @@ int main(int argc, char **argv)
 		strcpy(index_name_buf + len - 5, ".idx");
 		index_name = index_name_buf;
 	}
+	if (keep_msg && !keep_name && pack_name) {
+		int len = strlen(pack_name);
+		if (!has_extension(pack_name, ".pack"))
+			die("packfile name '%s' does not end with '.pack'",
+			    pack_name);
+		keep_name_buf = xmalloc(len);
+		memcpy(keep_name_buf, pack_name, len - 5);
+		strcpy(keep_name_buf + len - 5, ".keep");
+		keep_name = keep_name_buf;
+	}
 
 	curr_pack = open_pack_file(pack_name);
 	parse_pack_header();
@@ -880,9 +913,13 @@ int main(int argc, char **argv)
 	}
 	free(deltas);
 	curr_index = write_index_file(index_name, sha1);
-	final(pack_name, curr_pack, index_name, curr_index, sha1);
+	final(pack_name, curr_pack,
+		index_name, curr_index,
+		keep_name, keep_msg,
+		sha1);
 	free(objects);
 	free(index_name_buf);
+	free(keep_name_buf);
 
 	if (!from_stdin)
 		printf("%s\n", sha1_to_hex(sha1));
-- 
1.4.3.3.g7d63

^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [PATCH 3/3] Teach git-index-pack how to keep a pack file.
  2006-10-29  9:41 [PATCH 3/3] Teach git-index-pack how to keep a pack file Shawn Pearce
@ 2006-10-29 10:49 ` Jakub Narebski
  2006-10-29 19:14   ` Junio C Hamano
  2006-10-30  3:47 ` Nicolas Pitre
  1 sibling, 1 reply; 5+ messages in thread
From: Jakub Narebski @ 2006-10-29 10:49 UTC (permalink / raw)
  To: git

Shawn Pearce wrote:

> +--keep::
> +       Before moving the index into its final destination
> +       create an empty .keep file for the associated pack file.
> +       This option is usually necessary with --stdin to prevent a
> +       simultaneous gitlink:git-repack[1] process from deleting
> +       the newly constructed pack and index before refs can be
> +       updated to use objects contained in the pack.
> +
> +--keep='why'::
> +       Like --keep create a .keep file before moving the index into
> +       its final destination, but rather than creating an empty file
> +       place 'why' followed by an LF into the .keep file.  The 'why'
> +       message can later be searched for within all .keep files to
> +       locate any which have outlived their usefulness.

Wouldn't it be better to use 'git-index-pack', perhaps with source URL if
possible, as the default 'why'?
-- 
Jakub Narebski
Warsaw, Poland
ShadeHawk on #git


^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH 3/3] Teach git-index-pack how to keep a pack file.
  2006-10-29 10:49 ` Jakub Narebski
@ 2006-10-29 19:14   ` Junio C Hamano
  2006-10-29 19:44     ` Michael S. Tsirkin
  0 siblings, 1 reply; 5+ messages in thread
From: Junio C Hamano @ 2006-10-29 19:14 UTC (permalink / raw)
  To: Jakub Narebski; +Cc: git, Shawn Pearce

Jakub Narebski <jnareb@gmail.com> writes:

> Wouldn't it be better to use 'git-index-pack', perhaps with source URL if
> possible, as the default 'why'?

I think it is sensible that Shawn did keep="reason" because
git-index-pack usually cannot decide.  .keep would be around for
at least three reasons:

 - you as the repository owner want to keep the pack from
   repacked because that is the big "initial clone" pack;

 - somebody fetched with --keep and index-pack created .keep
   flag, but somehow the fetch-pack after updating the refs
   failed to remove the flag (or has not finished);

 - somebody pushed into the repository with --keep but somehow
   the receive-pack after updating the refs failed to remove the
   flag (or has not finished);

For a shared repository settings, the person who cares about .keep
is the person who logged onto the machine with the repository and
ran "clone" (or "init-db && fetch-pack") to prepare the initial
state, and fetching would be done by smaller group of people
than pushing into it.  So these three kinds need to be easily
distinguishable by the repository owner.  The first kind would
not be usually removed; the second one the owner is likely to
know if that fetch is still running, and if it is known that
fetch is not running then .keep should probably be removed.  The
third kind is harder to tell if it is safe to remove .keep
because usually more people can push into it than fetch into it.

It might be a good idea to leave process ID of the caller of the
index-pack as part of --keep="reason" string, along with the
timestamp to help the user decide if the .keep is leftover or
still in progress (the timestamp can be seen from ls -l *.keep
so it is not strictly needed).





^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH 3/3] Teach git-index-pack how to keep a pack file.
  2006-10-29 19:14   ` Junio C Hamano
@ 2006-10-29 19:44     ` Michael S. Tsirkin
  0 siblings, 0 replies; 5+ messages in thread
From: Michael S. Tsirkin @ 2006-10-29 19:44 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: Jakub Narebski, git, Shawn Pearce

Quoting r. Junio C Hamano <junkio@cox.net>:
> It might be a good idea to leave process ID of the caller of the
> index-pack as part of --keep="reason" string, along with the
> timestamp to help the user decide if the .keep is leftover or
> still in progress (the timestamp can be seen from ls -l *.keep
> so it is not strictly needed).

Add hostname to that, for repositories that reside on networked file systems.

-- 

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH 3/3] Teach git-index-pack how to keep a pack file.
  2006-10-29  9:41 [PATCH 3/3] Teach git-index-pack how to keep a pack file Shawn Pearce
  2006-10-29 10:49 ` Jakub Narebski
@ 2006-10-30  3:47 ` Nicolas Pitre
  1 sibling, 0 replies; 5+ messages in thread
From: Nicolas Pitre @ 2006-10-30  3:47 UTC (permalink / raw)
  To: Shawn Pearce; +Cc: Junio C Hamano, git

On Sun, 29 Oct 2006, Shawn Pearce wrote:

>  I'm throwing a pair of pipes around index-pack so I can capture
>  the pack name thus allowing receive-pack to delete the correct
>  .keep file after its completed the ref updates.  At that point
>  receive-pack can easily examine the object count in the pack header
>  and compare that against a config entry to decide if it should be
>  keeping the pack or exploding it.

Is it really worth the trouble?  Especially if you've already written 
the pack out, why just not keep it instead of burning more CPU cycles 
exploding it?  The next repack will delete it anyway.

At least for the fetch/pull case here's what I've done so far.  It is 
not complete as the .keep file is not deleted as I'm not clear exactly 
where in git-fetch.sh it can be safely deleted.  But at least it works 
to the point of displaying the name of the file it should consider for 
deletion.

diff --git a/fetch-clone.c b/fetch-clone.c
index 96cdab4..c5747f4 100644
--- a/fetch-clone.c
+++ b/fetch-clone.c
@@ -81,7 +81,7 @@ int receive_unpack_pack(int xd[2], const
 
 int receive_keep_pack(int xd[2], const char *me, int quiet, int sideband)
 {
-	const char *argv[5] = { "index-pack", "--stdin", "--fix-thin",
+	const char *argv[6] = { "index-pack", "--stdin", "--fix-thin", "--keep",
 				quiet ? NULL : "-v", NULL };
 	return get_pack(xd, me, sideband, argv);
 }
diff --git a/git-fetch.sh b/git-fetch.sh
index 539dff6..dd0c00b 100755
--- a/git-fetch.sh
+++ b/git-fetch.sh
@@ -378,6 +378,10 @@ fetch_main () {
 	  failed)
 		  echo >&2 "Fetch failure: $remote"
 		  exit 1 ;;
+	  pack)
+		  pack_lockfile="$GIT_OBJECT_DIRECTORY/pack/pack-$remote_name.keep"
+		  [ -e "$pack_lockfile" ] && echo >&2 "$pack_lockfile exists"
+		  continue ;;
 	  esac
 	  found=
 	  single_force=
diff --git a/index-pack.c b/index-pack.c
index b37dd78..8db3c93 100644
--- a/index-pack.c
+++ b/index-pack.c
@@ -767,18 +767,6 @@ static void final(const char *final_pack
 		if (err)
 			die("error while closing pack file: %s", strerror(errno));
 		chmod(curr_pack_name, 0444);
-
-		/*
-		 * Let's just mimic git-unpack-objects here and write
-		 * the last part of the buffer to stdout.
-		 */
-		while (input_len) {
-			err = xwrite(1, input_buffer + input_offset, input_len);
-			if (err <= 0)
-				break;
-			input_len -= err;
-			input_offset += err;
-		}
 	}
 
 	if (keep_msg) {
@@ -818,6 +806,27 @@ static void final(const char *final_pack
 		if (move_temp_to_file(curr_index_name, final_index_name))
 			die("cannot store index file");
 	}
+
+	if (!from_stdin) {
+		printf("%s\n", sha1_to_hex(sha1));
+	} else {
+		char buf[48];
+		int len = snprintf(buf, sizeof(buf), "pack\t%s\n",
+				   sha1_to_hex(sha1));
+		xwrite(1, buf, len);
+
+		/*
+		 * Let's just mimic git-unpack-objects here and write
+		 * the last part of the input buffer to stdout.
+		 */
+		while (input_len) {
+			err = xwrite(1, input_buffer + input_offset, input_len);
+			if (err <= 0)
+				break;
+			input_len -= err;
+			input_offset += err;
+		}
+	}
 }
 
 int main(int argc, char **argv)
@@ -921,8 +930,5 @@ int main(int argc, char **argv)
 	free(index_name_buf);
 	free(keep_name_buf);
 
-	if (!from_stdin)
-		printf("%s\n", sha1_to_hex(sha1));
-
 	return 0;

^ permalink raw reply related	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2006-10-30  3:47 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2006-10-29  9:41 [PATCH 3/3] Teach git-index-pack how to keep a pack file Shawn Pearce
2006-10-29 10:49 ` Jakub Narebski
2006-10-29 19:14   ` Junio C Hamano
2006-10-29 19:44     ` Michael S. Tsirkin
2006-10-30  3:47 ` Nicolas Pitre

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).