All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v2] Enhance unpack-objects for live repo and large objects
@ 2007-05-25 21:40 Dana How
  0 siblings, 0 replies; only message in thread
From: Dana How @ 2007-05-25 21:40 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: Git Mailing List, danahow


Nicolas Pitre wrote:
> I wouldn't mind a _separate_ tool that would load a pack index,
> determine object sizes from it, and then extract big objects to write
> them as loose objects ...

Add two new options to git-unpack-objects:

--force:: Loose objects will be created even if they
already exist in the repository packed.

--min-blob-size=<n>::  Unpacking is only done for objects
larger than or equal to n kB (uncompressed size).

Passes the tests in "t" and tested on big objects.
Based on "next" but should apply to "master" as well.

Signed-off-by: Dana L. How <danahow@gmail.com>
---
 Documentation/git-unpack-objects.txt |   23 +++++++++++++++++++----
 builtin-unpack-objects.c             |   29 +++++++++++++++++++++++++++--
 cache.h                              |    2 ++
 sha1_file.c                          |   16 ++++++++++++----
 4 files changed, 60 insertions(+), 10 deletions(-)

diff --git a/Documentation/git-unpack-objects.txt b/Documentation/git-unpack-objects.txt
index ff6184b..3df2641 100644
--- a/Documentation/git-unpack-objects.txt
+++ b/Documentation/git-unpack-objects.txt
@@ -8,7 +8,7 @@ git-unpack-objects - Unpack objects from a packed archive
 
 SYNOPSIS
 --------
-'git-unpack-objects' [-n] [-q] [-r] <pack-file
+'git-unpack-objects' [-n] [-q] [-r] [-f] [--min-blob-size=N] <pack-file
 
 
 DESCRIPTION
@@ -17,9 +17,12 @@ Read a packed archive (.pack) from the standard input, expanding
 the objects contained within and writing them into the repository in
 "loose" (one object per file) format.
 
-Objects that already exist in the repository will *not* be unpacked
-from the pack-file.  Therefore, nothing will be unpacked if you use
-this command on a pack-file that exists within the target repository.
+By default,  objects that already exist in the repository will *not*
+be unpacked from the pack-file.  Therefore, nothing will be unpacked
+if you use this command on a pack-file that exists within the target
+repository,  unless you specify -f.  If an object already exists
+unpacked in the repository,  it will not be replaced with the copy
+from the pack,  with or without -f.
 
 Please see the `git-repack` documentation for options to generate
 new packs and replace existing ones.
@@ -40,6 +43,18 @@ OPTIONS
 	and make the best effort to recover as many objects as
 	possible.
 
+-f::
+	Allow loose objects to be created in the same repository that
+	contains the packfile.
+
+--min-blob-size=<n>::
+	Smallest loose object to create,  expressed in kB.
+	Blobs smaller than this will not be unpacked.  Default is 0.
+	If you specify this option with a deltified source packfile,
+	the source packfile should reside in the current repository
+	so delta bases too small to unpack are still accessible,  and
+	therefore -f will be needed for anything to be written.
+
 
 Author
 ------
diff --git a/builtin-unpack-objects.c b/builtin-unpack-objects.c
index a6ff62f..b8ee7b5 100644
--- a/builtin-unpack-objects.c
+++ b/builtin-unpack-objects.c
@@ -10,13 +10,16 @@
 #include "progress.h"
 
 static int dry_run, quiet, recover, has_errors;
-static const char unpack_usage[] = "git-unpack-objects [-n] [-q] [-r] < pack-file";
+static const char unpack_usage[] =
+"git-unpack-objects [-n] [-q] [-r] [-f] [--min-blob-size=N] < pack-file";
 
 /* We always read in 4kB chunks. */
 static unsigned char buffer[4096];
 static unsigned int offset, len;
 static off_t consumed_bytes;
 static SHA_CTX ctx;
+static int force = 0;
+uint32_t min_blob_size;
 
 /*
  * Make sure at least "min" bytes are available in the buffer, and
@@ -131,7 +134,18 @@ static void added_object(unsigned nr, enum object_type type,
 static void write_object(unsigned nr, enum object_type type,
 			 void *buf, unsigned long size)
 {
-	if (write_sha1_file(buf, size, typename(type), obj_list[nr].sha1) < 0)
+	/*
+	 * We never need to write it when it's too small.
+	 * Otherwise,  without -f,  we write it only when
+	 * it does not exist in the repository in any form.
+	 * Finally,  with -f,  we write it only when it does
+	 * not exist in the local repository as a loose object.
+	 * In all cases we fill in obj_list[nr].sha1 .
+	 */
+	if (size < min_blob_size)
+		hash_sha1_file(buf, size, typename(type), obj_list[nr].sha1);
+	else if (write_sha1_file_maybe(buf, size, typename(type),
+				       force, obj_list[nr].sha1) < 0)
 		die("failed to write object");
 	added_object(nr, type, buf, size);
 }
@@ -361,6 +375,17 @@ int cmd_unpack_objects(int argc, const char **argv, const char *prefix)
 				recover = 1;
 				continue;
 			}
+			if (!strcmp(arg, "-f")) {
+				force = 1;
+				continue;
+			}
+			if (!prefixcmp(arg, "--min-blob-size=")) {
+				char *end;
+				min_blob_size = strtoul(arg+16, &end, 0) * 1024;
+				if (!arg[16] || *end)
+					usage(unpack_usage);
+				continue;
+			}
 			if (!prefixcmp(arg, "--pack_header=")) {
 				struct pack_header *hdr;
 				char *c;
diff --git a/cache.h b/cache.h
index ec85d93..4994d03 100644
--- a/cache.h
+++ b/cache.h
@@ -343,6 +343,8 @@ extern int sha1_object_info(const unsigned char *, unsigned long *);
 extern void * read_sha1_file(const unsigned char *sha1, enum object_type *type, unsigned long *size);
 extern int hash_sha1_file(const void *buf, unsigned long len, const char *type, unsigned char *sha1);
 extern int write_sha1_file(void *buf, unsigned long len, const char *type, unsigned char *return_sha1);
+extern int write_sha1_file_maybe(void *buf, unsigned long len, const char *type,
+				 int dup_ok, unsigned char *return_sha1);
 extern int pretend_sha1_file(void *, unsigned long, enum object_type, unsigned char *);
 
 extern int check_sha1_signature(const unsigned char *sha1, void *buf, unsigned long size, const char *type);
diff --git a/sha1_file.c b/sha1_file.c
index 12d2ef2..e4c3288 100644
--- a/sha1_file.c
+++ b/sha1_file.c
@@ -1979,7 +1979,8 @@ int hash_sha1_file(const void *buf, unsigned long len, const char *type,
 	return 0;
 }
 
-int write_sha1_file(void *buf, unsigned long len, const char *type, unsigned char *returnsha1)
+int write_sha1_file_maybe(void *buf, unsigned long len, const char *type,
+			  int dup_ok, unsigned char *returnsha1)
 {
 	int size, ret;
 	unsigned char *compressed;
@@ -1990,14 +1991,15 @@ int write_sha1_file(void *buf, unsigned long len, const char *type, unsigned cha
 	char hdr[32];
 	int fd, hdrlen;
 
-	/* Normally if we have it in the pack then we do not bother writing
-	 * it out into .git/objects/??/?{38} file.
+	/* Normally if in a pack (or any where else) then we do not write
+	 * it out into .git/objects/??/?{38} file,  but with dup_ok != 0
+	 * we only avoid over-writing a loose blob in the local repo.
 	 */
 	write_sha1_file_prepare(buf, len, type, sha1, hdr, &hdrlen);
 	filename = sha1_file_name(sha1);
 	if (returnsha1)
 		hashcpy(returnsha1, sha1);
-	if (has_sha1_file(sha1))
+	if (!dup_ok && has_sha1_file(sha1))
 		return 0;
 	fd = open(filename, O_RDONLY);
 	if (fd >= 0) {
@@ -2062,6 +2064,12 @@ int write_sha1_file(void *buf, unsigned long len, const char *type, unsigned cha
 	return move_temp_to_file(tmpfile, filename);
 }
 
+int write_sha1_file(void *buf, unsigned long len, const char *type,
+		    unsigned char *returnsha1)
+{
+	return write_sha1_file_maybe(buf, len, type, 0, returnsha1);
+}
+
 /*
  * We need to unpack and recompress the object for writing
  * it out to a different file.
-- 
1.5.2.762.gd8c6-dirty

^ permalink raw reply related	[flat|nested] only message in thread

only message in thread, other threads:[~2007-05-25 21:40 UTC | newest]

Thread overview: (only message) (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2007-05-25 21:40 [PATCH v2] Enhance unpack-objects for live repo and large objects Dana How

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.