From: Dana How <danahow@gmail.com>
To: Junio C Hamano <junkio@cox.net>
Cc: Git Mailing List <git@vger.kernel.org>, danahow@gmail.com
Subject: [PATCH v2] Enhance unpack-objects for live repo and large objects
Date: Fri, 25 May 2007 14:40:24 -0700 [thread overview]
Message-ID: <465757C8.4000700@gmail.com> (raw)
Nicolas Pitre wrote:
> I wouldn't mind a _separate_ tool that would load a pack index,
> determine object sizes from it, and then extract big objects to write
> them as loose objects ...
Add two new options to git-unpack-objects:
--force:: Loose objects will be created even if they
already exist in the repository packed.
--min-blob-size=<n>:: Unpacking is only done for objects
larger than or equal to n kB (uncompressed size).
Passes the tests in "t" and tested on big objects.
Based on "next" but should apply to "master" as well.
Signed-off-by: Dana L. How <danahow@gmail.com>
---
Documentation/git-unpack-objects.txt | 23 +++++++++++++++++++----
builtin-unpack-objects.c | 29 +++++++++++++++++++++++++++--
cache.h | 2 ++
sha1_file.c | 16 ++++++++++++----
4 files changed, 60 insertions(+), 10 deletions(-)
diff --git a/Documentation/git-unpack-objects.txt b/Documentation/git-unpack-objects.txt
index ff6184b..3df2641 100644
--- a/Documentation/git-unpack-objects.txt
+++ b/Documentation/git-unpack-objects.txt
@@ -8,7 +8,7 @@ git-unpack-objects - Unpack objects from a packed archive
SYNOPSIS
--------
-'git-unpack-objects' [-n] [-q] [-r] <pack-file
+'git-unpack-objects' [-n] [-q] [-r] [-f] [--min-blob-size=N] <pack-file
DESCRIPTION
@@ -17,9 +17,12 @@ Read a packed archive (.pack) from the standard input, expanding
the objects contained within and writing them into the repository in
"loose" (one object per file) format.
-Objects that already exist in the repository will *not* be unpacked
-from the pack-file. Therefore, nothing will be unpacked if you use
-this command on a pack-file that exists within the target repository.
+By default, objects that already exist in the repository will *not*
+be unpacked from the pack-file. Therefore, nothing will be unpacked
+if you use this command on a pack-file that exists within the target
+repository, unless you specify -f. If an object already exists
+unpacked in the repository, it will not be replaced with the copy
+from the pack, with or without -f.
Please see the `git-repack` documentation for options to generate
new packs and replace existing ones.
@@ -40,6 +43,18 @@ OPTIONS
and make the best effort to recover as many objects as
possible.
+-f::
+ Allow loose objects to be created in the same repository that
+ contains the packfile.
+
+--min-blob-size=<n>::
+ Smallest loose object to create, expressed in kB.
+ Blobs smaller than this will not be unpacked. Default is 0.
+ If you specify this option with a deltified source packfile,
+ the source packfile should reside in the current repository
+ so delta bases too small to unpack are still accessible, and
+ therefore -f will be needed for anything to be written.
+
Author
------
diff --git a/builtin-unpack-objects.c b/builtin-unpack-objects.c
index a6ff62f..b8ee7b5 100644
--- a/builtin-unpack-objects.c
+++ b/builtin-unpack-objects.c
@@ -10,13 +10,16 @@
#include "progress.h"
static int dry_run, quiet, recover, has_errors;
-static const char unpack_usage[] = "git-unpack-objects [-n] [-q] [-r] < pack-file";
+static const char unpack_usage[] =
+"git-unpack-objects [-n] [-q] [-r] [-f] [--min-blob-size=N] < pack-file";
/* We always read in 4kB chunks. */
static unsigned char buffer[4096];
static unsigned int offset, len;
static off_t consumed_bytes;
static SHA_CTX ctx;
+static int force = 0;
+uint32_t min_blob_size;
/*
* Make sure at least "min" bytes are available in the buffer, and
@@ -131,7 +134,18 @@ static void added_object(unsigned nr, enum object_type type,
static void write_object(unsigned nr, enum object_type type,
void *buf, unsigned long size)
{
- if (write_sha1_file(buf, size, typename(type), obj_list[nr].sha1) < 0)
+ /*
+ * We never need to write it when it's too small.
+ * Otherwise, without -f, we write it only when
+ * it does not exist in the repository in any form.
+ * Finally, with -f, we write it only when it does
+ * not exist in the local repository as a loose object.
+ * In all cases we fill in obj_list[nr].sha1 .
+ */
+ if (size < min_blob_size)
+ hash_sha1_file(buf, size, typename(type), obj_list[nr].sha1);
+ else if (write_sha1_file_maybe(buf, size, typename(type),
+ force, obj_list[nr].sha1) < 0)
die("failed to write object");
added_object(nr, type, buf, size);
}
@@ -361,6 +375,17 @@ int cmd_unpack_objects(int argc, const char **argv, const char *prefix)
recover = 1;
continue;
}
+ if (!strcmp(arg, "-f")) {
+ force = 1;
+ continue;
+ }
+ if (!prefixcmp(arg, "--min-blob-size=")) {
+ char *end;
+ min_blob_size = strtoul(arg+16, &end, 0) * 1024;
+ if (!arg[16] || *end)
+ usage(unpack_usage);
+ continue;
+ }
if (!prefixcmp(arg, "--pack_header=")) {
struct pack_header *hdr;
char *c;
diff --git a/cache.h b/cache.h
index ec85d93..4994d03 100644
--- a/cache.h
+++ b/cache.h
@@ -343,6 +343,8 @@ extern int sha1_object_info(const unsigned char *, unsigned long *);
extern void * read_sha1_file(const unsigned char *sha1, enum object_type *type, unsigned long *size);
extern int hash_sha1_file(const void *buf, unsigned long len, const char *type, unsigned char *sha1);
extern int write_sha1_file(void *buf, unsigned long len, const char *type, unsigned char *return_sha1);
+extern int write_sha1_file_maybe(void *buf, unsigned long len, const char *type,
+ int dup_ok, unsigned char *return_sha1);
extern int pretend_sha1_file(void *, unsigned long, enum object_type, unsigned char *);
extern int check_sha1_signature(const unsigned char *sha1, void *buf, unsigned long size, const char *type);
diff --git a/sha1_file.c b/sha1_file.c
index 12d2ef2..e4c3288 100644
--- a/sha1_file.c
+++ b/sha1_file.c
@@ -1979,7 +1979,8 @@ int hash_sha1_file(const void *buf, unsigned long len, const char *type,
return 0;
}
-int write_sha1_file(void *buf, unsigned long len, const char *type, unsigned char *returnsha1)
+int write_sha1_file_maybe(void *buf, unsigned long len, const char *type,
+ int dup_ok, unsigned char *returnsha1)
{
int size, ret;
unsigned char *compressed;
@@ -1990,14 +1991,15 @@ int write_sha1_file(void *buf, unsigned long len, const char *type, unsigned cha
char hdr[32];
int fd, hdrlen;
- /* Normally if we have it in the pack then we do not bother writing
- * it out into .git/objects/??/?{38} file.
+ /* Normally if in a pack (or any where else) then we do not write
+ * it out into .git/objects/??/?{38} file, but with dup_ok != 0
+ * we only avoid over-writing a loose blob in the local repo.
*/
write_sha1_file_prepare(buf, len, type, sha1, hdr, &hdrlen);
filename = sha1_file_name(sha1);
if (returnsha1)
hashcpy(returnsha1, sha1);
- if (has_sha1_file(sha1))
+ if (!dup_ok && has_sha1_file(sha1))
return 0;
fd = open(filename, O_RDONLY);
if (fd >= 0) {
@@ -2062,6 +2064,12 @@ int write_sha1_file(void *buf, unsigned long len, const char *type, unsigned cha
return move_temp_to_file(tmpfile, filename);
}
+int write_sha1_file(void *buf, unsigned long len, const char *type,
+ unsigned char *returnsha1)
+{
+ return write_sha1_file_maybe(buf, len, type, 0, returnsha1);
+}
+
/*
* We need to unpack and recompress the object for writing
* it out to a different file.
--
1.5.2.762.gd8c6-dirty
reply other threads:[~2007-05-25 21:40 UTC|newest]
Thread overview: [no followups] expand[flat|nested] mbox.gz Atom feed
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=465757C8.4000700@gmail.com \
--to=danahow@gmail.com \
--cc=git@vger.kernel.org \
--cc=junkio@cox.net \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).