Git development

Git development
 help / color / mirror / Atom feed

* [PATCH] diff-delta: bound hash list length to avoid O(m*n) behavior
From: Nicolas Pitre @ 2006-03-08 19:32 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git


The diff-delta code can exhibit O(m*n) behavior with some patological
data set where most hash entries end up in the same hash bucket.

To prevent this, a limit is imposed to the number of entries that can 
exist in the same hash bucket.

Because of the above the code is a tiny bit more expensive on average, 
even if some small optimizations were added as well to atenuate the 
overhead. But the problematic samples used to diagnoze the issue are now 
orders of magnitude less expensive to process with only a slight loss in 
compression.

Signed-off-by: Nicolas Pitre <nico@cam.org>

---

For example, Carl Baldwin provided me with a couple 20MB files, and 
deltifying one against another one with test-delta takes around 
TEN MINUTES for only one delta on my P4 @ 3GHz.

Nnow imagine using git-repack -a with a default window
of 10 ... 

With this patch the test-delta time dropped to only 9 seconds.  And the 
resulting delta, once compressed, is about 2% larger.

diff --git a/diff-delta.c b/diff-delta.c
index 2ed5984..aaee7be 100644
--- a/diff-delta.c
+++ b/diff-delta.c
@@ -40,17 +40,18 @@ struct index {
 
 static struct index ** delta_index(const unsigned char *buf,
 				   unsigned long bufsize,
+				   unsigned long trg_bufsize,
 				   unsigned int *hash_shift)
 {
-	unsigned int hsize, hshift, entries, blksize, i;
+	unsigned int i, hsize, hshift, hlimit, entries, *hash_count;
 	const unsigned char *data;
 	struct index *entry, **hash;
 	void *mem;
 
 	/* determine index hash size */
-	entries = (bufsize + BLK_SIZE - 1) / BLK_SIZE;
+	entries = bufsize  / BLK_SIZE;
 	hsize = entries / 4;
-	for (i = 4; (1 << i) < hsize && i < 16; i++);
+	for (i = 4; (1 << i) < hsize && i < 31; i++);
 	hsize = 1 << i;
 	hshift = 32 - i;
 	*hash_shift = hshift;
@@ -63,20 +64,62 @@ static struct index ** delta_index(const
 	entry = mem + hsize * sizeof(*hash);
 	memset(hash, 0, hsize * sizeof(*hash));
 
-	/* then populate it */
+	/* allocate an array to count hash entries */
+	hash_count = calloc(hsize, sizeof(*hash_count));
+	if (!hash_count) {
+		free(hash);
+		return NULL;
+	}
+
+	/* then populate the index */
 	data = buf + entries * BLK_SIZE - BLK_SIZE;
-	blksize = bufsize - (data - buf);
 	while (data >= buf) {
-		unsigned int val = adler32(0, data, blksize);
+		unsigned int val = adler32(0, data, BLK_SIZE);
 		i = HASH(val, hshift);
 		entry->ptr = data;
 		entry->val = val;
 		entry->next = hash[i];
 		hash[i] = entry++;
-		blksize = BLK_SIZE;
+		hash_count[i]++;
 		data -= BLK_SIZE;
  	}
 
+	/*
+	 * Determine a limit on the number of entries in the same hash
+	 * bucket.  This guard us against patological data sets causing
+	 * really bad hash distribution with most entries in the same hash
+	 * bucket that would bring us to O(m*n) computing costs (m and n
+	 * corresponding to reference and target buffer sizes).
+	 *
+	 * The more the target buffer is large, the more it is important to
+	 * have small entry lists for each hash buckets.  With such a limit
+	 * the cost is bounded to something more like O(m+n).
+	 */
+	hlimit = (1 << 26) / trg_bufsize;
+	if (hlimit < 4*BLK_SIZE)
+		hlimit = 4*BLK_SIZE;
+
+	/*
+	 * Now make sure none of the hash buckets has more entries than
+	 * we're willing to test.  Otherwise we cull the entry list
+	 * uniformly to still preserve a good repartition across
+	 * the reference buffer.
+	 */
+	for (i = 0; i < hsize; i++) {
+		if (hash_count[i] < hlimit)
+			continue;
+		entry = hash[i];
+		do {
+			struct index *keep = entry;
+			int skip = hash_count[i] / hlimit / 2;
+			do {
+				entry = entry->next;
+			} while(--skip && entry);
+			keep->next = entry;
+		} while(entry);
+	}
+	free(hash_count);
+
 	return hash;
 }
 
@@ -100,7 +143,7 @@ void *diff_delta(void *from_buf, unsigne
 
 	if (!from_size || !to_size)
 		return NULL;
-	hash = delta_index(from_buf, from_size, &hash_shift);
+	hash = delta_index(from_buf, from_size, to_size, &hash_shift);
 	if (!hash)
 		return NULL;
 
@@ -141,29 +184,27 @@ void *diff_delta(void *from_buf, unsigne
 
 	while (data < top) {
 		unsigned int moff = 0, msize = 0;
-		unsigned int blksize = MIN(top - data, BLK_SIZE);
-		unsigned int val = adler32(0, data, blksize);
-		i = HASH(val, hash_shift);
-		for (entry = hash[i]; entry; entry = entry->next) {
-			const unsigned char *ref = entry->ptr;
-			const unsigned char *src = data;
-			unsigned int ref_size = ref_top - ref;
-			if (entry->val != val)
-				continue;
-			if (ref_size > top - src)
-				ref_size = top - src;
-			while (ref_size && *src++ == *ref) {
-				ref++;
-				ref_size--;
-			}
-			ref_size = ref - entry->ptr;
-			if (ref_size > msize) {
-				/* this is our best match so far */
-				moff = entry->ptr - ref_data;
-				msize = ref_size;
-				if (msize >= 0x10000) {
-					msize = 0x10000;
+		if (data + BLK_SIZE <= top) {
+			unsigned int val = adler32(0, data, BLK_SIZE);
+			i = HASH(val, hash_shift);
+			for (entry = hash[i]; entry; entry = entry->next) {
+				const unsigned char *ref = entry->ptr;
+				const unsigned char *src = data;
+				unsigned int ref_size = ref_top - ref;
+				if (entry->val != val)
+					continue;
+				if (ref_size > top - src)
+					ref_size = top - src;
+				if (ref_size > 0x10000)
+					ref_size = 0x10000;
+				if (ref_size <= msize)
 					break;
+				while (ref_size-- && *src++ == *ref)
+					ref++;
+				if (msize < ref - entry->ptr) {
+					/* this is our best match so far */
+					msize = ref - entry->ptr;
+					moff = entry->ptr - ref_data;
 				}
 			}
 		}

^ permalink raw reply related

* Re: [PATCH] git-blame: Make the output human readable
From: linux @ 2006-03-08 19:06 UTC (permalink / raw)
  To: linux, vsu; +Cc: git, junkio
In-Reply-To: <20060308183059.GD9555@procyon.home>

> So that mk_wcwidth() must be used unconditionally, and not as a
> fallback for systems which do not provide wcwidth() in libc.

Ah, the light dawns!  I now understand your confusion.
That was exactly the idea; apologies for being unclear.

Kind of like the existing issspace(), isdigit(), isalpha(), etc.
implementations in git-compat-util.h to avoid the mysteries and vagaries
of locales.  A UTF-8-only solution is desired.

^ permalink raw reply

* gitk: bug report: invalid command name "contmergediff"
From: Rutger Nijlunsing @ 2006-03-08 18:37 UTC (permalink / raw)
  To: git

Bug report on gitk, maybe already reported.

do:
   gitk HEAD~1000..

...select first commit (why is this not done automatically?), and keep
pressing down-arrow.

This gives me:
   Error: invalid command name "contmergediff"
with detail window:
   invalid command name "contmergediff"
   invalid command name "contmergediff"
       while executing
   "contmergediff $ids"
       (procedure "gettreediffline" line 14)
       invoked from within
   "gettreediffline file7 de84f99c12d1819479116685393afb1ebe99810b"


-- 
Rutger Nijlunsing ---------------------------------- eludias ed dse.nl
never attribute to a conspiracy which can be explained by incompetence
----------------------------------------------------------------------

^ permalink raw reply

* Re: [PATCH] git-blame: Make the output human readable
From: Sergey Vlasov @ 2006-03-08 18:30 UTC (permalink / raw)
  To: linux; +Cc: git, junkio
In-Reply-To: <20060308180422.27978.qmail@science.horizon.com>

[-- Attachment #1: Type: text/plain, Size: 1004 bytes --]

On Wed, Mar 08, 2006 at 01:04:22PM -0500, linux@horizon.com wrote:
> > And this won't work, unless you also add that wcwidth() implementation
> > to git.
> 
> That was the general idea.  It is freely usable.
> 
> > The problem is that the wchar_t encoding is not specified anywhere -
> > glibc uses Unicode for it, but other systems can use whatever they want
> > (even locale-dependent).
> 
> Why is that a problem?  None of the code mentioned even uses wchar_t.
> The code I wrote converts from UTF-8 to straight Unicode, and that's
> what Markus Kuhn's wcwidth() expects as an argument.

wcwidth() is a standard library function which takes a wchar_t:

http://www.opengroup.org/onlinepubs/009695399/functions/wcwidth.html

It is easy to write a program which assumes that wchar_t is Unicode
without noticing it, because it will work fine with glibc...

So that mk_wcwidth() must be used unconditionally, and not as a
fallback for systems which do not provide wcwidth() in libc.

[-- Attachment #2: Type: application/pgp-signature, Size: 189 bytes --]

^ permalink raw reply

* [PATCH] test-delta needs zlib to compile
From: Nicolas Pitre @ 2006-03-08 18:19 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git


Signed-off-by: Nicolas Pitre <nico@cam.org>

---

diff --git a/Makefile b/Makefile
index a5eb0c4..89d67d6 100644
--- a/Makefile
+++ b/Makefile
@@ -565,7 +565,7 @@ test-date$X: test-date.c date.o ctype.o
 	$(CC) $(ALL_CFLAGS) -o $@ $(ALL_LDFLAGS) test-date.c date.o ctype.o
 
 test-delta$X: test-delta.c diff-delta.o patch-delta.o
-	$(CC) $(ALL_CFLAGS) -o $@ $(ALL_LDFLAGS) $^
+	$(CC) $(ALL_CFLAGS) -o $@ $(ALL_LDFLAGS) $^ -lz
 
 check:
 	for i in *.c; do sparse $(ALL_CFLAGS) $(SPARSE_FLAGS) $$i || exit; done

^ permalink raw reply related

* Re: [PATCH] git-blame: Make the output human readable
From: linux @ 2006-03-08 18:04 UTC (permalink / raw)
  To: linux, vsu; +Cc: git, junkio
In-Reply-To: <20060308173249.1faed1d7.vsu@altlinux.ru>

> And this won't work, unless you also add that wcwidth() implementation
> to git.

That was the general idea.  It is freely usable.

> The problem is that the wchar_t encoding is not specified anywhere -
> glibc uses Unicode for it, but other systems can use whatever they want
> (even locale-dependent).

Why is that a problem?  None of the code mentioned even uses wchar_t.
The code I wrote converts from UTF-8 to straight Unicode, and that's
what Markus Kuhn's wcwidth() expects as an argument.

At no time do we ask the compiler for its opinion on the subject.

^ permalink raw reply

* Re: git-svn, tree moves, and --no-stop-on-copy
From: Matthias Urlichs @ 2006-03-08 17:02 UTC (permalink / raw)
  To: git
In-Reply-To: <20060307220837.GB27397@nowhere.earth>

Hi, Yann Dirson wrote:

> "svn switch --relocate" does not seem to be of any help.  Switching
> manually .git/git-svn/tree/ to the new repository location does not
> help either, since I must obviously update to r166 in that case, and
> then a further "git-svn fetch" fails because it does not find
> .git/git-svn/revs/166 aleady imported.
> 
> Any idea as to how to get the work done ?

You can manually edit the .git/corr file. Simply add an entry for #166
that has your reorganized (if necessary) head's SHA1.
-- 
Matthias Urlichs

^ permalink raw reply

* git-rev-list bug?
From: Catalin Marinas @ 2006-03-08 16:19 UTC (permalink / raw)
  To: git

Sorry if this was previously discussed. I ran git-rev-list on a linear
graph and tried to filter the results by a file name:

  git rev-list since.. path/to/file

but it always shows the child commit of "since" even if it didn't
touch the file. The same behaviour is for git-log (since it uses
git-rev-list) but git-whatchanged seems to be fine.

Is this the intended behaviour? The "stg patches" command based on
git-rev-list used to work fine a few weeks ago but now it is always
reporting the bottom patch in the stack as modifying a given file.

Thanks.

--
Catalin

^ permalink raw reply

* Re: Update hook in Cygwin
From: Christopher Faylor @ 2006-03-08 15:54 UTC (permalink / raw)
  To: Andreas Ericsson, Niklas H?glund, Shawn Pearce, git
In-Reply-To: <20060308144413.GA516@spearce.org>

On Wed, Mar 08, 2006 at 09:44:13AM -0500, Shawn Pearce wrote:
>BTW: chmod a-x .git/hooks/* also works as the Cygwin unixy permission
>layer will remember the change.

Using "chmod a-x" should work like linux as long as you're using an NTFS
filesystem and have not specifically turned off Cygwin's handling of this
kind of thing with the CYGWIN=nontsec environment variable.

i.e., it should work in most cases on W2K+

cgf

^ permalink raw reply

* Re: Update hook in Cygwin
From: Shawn Pearce @ 2006-03-08 14:44 UTC (permalink / raw)
  To: Andreas Ericsson; +Cc: Niklas Höglund, git
In-Reply-To: <440EDDE4.9070405@op5.se>

Andreas Ericsson <ae@op5.se> wrote:
> Niklas Höglund wrote:
> >Hi.
> >
> >After creating a couple of repositories and pushing and cloning them,
> >I get the following:
> >
> >$ git push --all origin
> >...
> >hooks/update: line 88: mail: command not found
> >
> >This is in cygwin. I'm rather glad I don't have the mail command
> >installed, as I don't want mails going anywhere.
> >
> >The update hook contains the following comment:
> >
> ># To enable this hook:
> ># (1) change the recipient e-mail address
> ># (2) make this file executable by "chmod +x update".
> >
> >But my impression after a cursory look at it is that it would always
> >call "mail" whenever it is run, and since all files are executable in
> >Windows (AFAIK), it would always be run.
> 
> 
> I was under the impression that the cygwin abstraction layer had some 
> unixy permission thing on top of NTFS. Perhaps that's wrong. If you 
> remove the hook it won't be called.

I've seen the same thing with the hooks on Cywin.  I would consider
it to be a bug in either GIT or Cygwin but I haven't decided
which yet.

If you look at the share/git-core/templates directory on a real
UNIX system would see that the hooks are not marked executable by
default when installed.  They are copied non-executable into each
new repository by git-init-db. Since they aren't executable they
don't get run.

But on Cygwin the hooks appear to be getting installed and are marked
executable in share/git-core/templates.  So when git-init-db copies
them over to the new repository they are by default enabled.

Removing the execute bit from the files in share/git-core/templates
doesn't help; for some reason git-init-db is still copying them with
the execute bit enabled.  I haven't spent the time to figure out why
yet; so I just run rm .git/hooks/* on every repository I come across.

BTW: chmod a-x .git/hooks/* also works as the Cygwin unixy permission
layer will remember the change.

-- 
Shawn.

^ permalink raw reply

* Re: [PATCH] git-blame: Make the output human readable
From: Sergey Vlasov @ 2006-03-08 14:32 UTC (permalink / raw)
  To: linux; +Cc: junkio, git
In-Reply-To: <20060306193326.19262.qmail@science.horizon.com>

[-- Attachment #1: Type: text/plain, Size: 973 bytes --]

On 6 Mar 2006 14:33:26 -0500 linux@horizon.com wrote:

> Well, getting 15 characters in UTF-8 is easy (just stop before the 16th
> byte for which ((b & 0xc0) != 0x80)), but what about combining characters?
> 
> You've got accents and stuff to worry about.  And the annoying fact that
> Unicode defined accents as suffixes, so you have to go past the 15th
> column to include all of the 
> 
> And then there's that fact that many characters are traditionally
> represented as double-wide forms, even on character terminals.
> 
> See http://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c for details
> an an example implementation of wcwidth().
> 
[skip]
> 			/* Now find the width of it */
> 			w = wcwidth(c);

And this won't work, unless you also add that wcwidth() implementation
to git.

The problem is that the wchar_t encoding is not specified anywhere -
glibc uses Unicode for it, but other systems can use whatever they want
(even locale-dependent).

[-- Attachment #2: Type: application/pgp-signature, Size: 190 bytes --]

^ permalink raw reply

* Re: [PATCH] write_sha1_file(): Perform Z_FULL_FLUSH between header and data
From: Sergey Vlasov @ 2006-03-08 14:17 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git
In-Reply-To: <7vhd69i4ep.fsf@assigned-by-dhcp.cox.net>

[-- Attachment #1: Type: text/plain, Size: 1732 bytes --]

On Wed, Mar 08, 2006 at 03:04:14AM -0800, Junio C Hamano wrote:
> Sergey Vlasov <vsu@altlinux.ru> writes:
> > However, a straight reuse still will not be possible, because
> > sha1write_compressed() uses deflateInit(&stream, Z_DEFAULT_COMPRESSION),
> > which writes zlib headers around the deflate stream, and the zlib footer
> > contains adler32 checksum.  So, as a minimum, you will need to
> > decompress the object data, calculate its adler32 checksum and write the
> > zlib header yourself.
> 
> Hmph.  Thanks for helping, but it sounds like my original plan
> was not useful at all.  Probably inflating would be still
> cheaper than inflating and then deflating, but it would not be
> as cool as a straight copy.  Sigh...

Actually you can calculate adler32 checksum of object data from
adler32(header+data) (available at the end of the loose object file),
adler32(header) (which you will need to calculate) and len(data)
(which is available in the header):

#define ADLER32_BASE	65521UL

unsigned int adler32_split(unsigned int adler_full, unsigned int adler_1,
			   unsigned long len_2)
{
	unsigned long s1_1 = adler_1 & 0xffff;
	unsigned long s1_2 = (adler_1 >> 16) & 0xffff;
	unsigned long rem = len_2 % ADLER32_BASE;
	unsigned long s_1_offset = (s1_1 + ADLER32_BASE - 1) % ADLER32_BASE;
	unsigned long s_2_offset = (s1_2 + s_1_offset*rem) % ADLER32_BASE;
	unsigned long sf_1 = adler_full & 0xffff;
	unsigned long sf_2 = (adler_full >> 16) & 0xffff;
	unsigned long s2_1 = (sf_1 + ADLER32_BASE - s_1_offset) % ADLER32_BASE;
	unsigned long s2_2 = (sf_2 + ADLER32_BASE - s_2_offset) % ADLER32_BASE;
	return (s2_2 << 16) | s2_1;
}

However, the resulting code probably won't be pretty...

[-- Attachment #2: Type: application/pgp-signature, Size: 189 bytes --]

^ permalink raw reply

* Re: Update hook in Cygwin
From: Andreas Ericsson @ 2006-03-08 13:36 UTC (permalink / raw)
  To: Niklas Höglund; +Cc: git
In-Reply-To: <ad8ce5c20603080416g5ed6d77el@mail.gmail.com>

Niklas Höglund wrote:
> Hi.
> 
> After creating a couple of repositories and pushing and cloning them,
> I get the following:
> 
> $ git push --all origin
> ...
> hooks/update: line 88: mail: command not found
> 
> This is in cygwin. I'm rather glad I don't have the mail command
> installed, as I don't want mails going anywhere.
> 
> The update hook contains the following comment:
> 
> # To enable this hook:
> # (1) change the recipient e-mail address
> # (2) make this file executable by "chmod +x update".
> 
> But my impression after a cursory look at it is that it would always
> call "mail" whenever it is run, and since all files are executable in
> Windows (AFAIK), it would always be run.


I was under the impression that the cygwin abstraction layer had some 
unixy permission thing on top of NTFS. Perhaps that's wrong. If you 
remove the hook it won't be called.

-- 
Andreas Ericsson                   andreas.ericsson@op5.se
OP5 AB                             www.op5.se
Tel: +46 8-230225                  Fax: +46 8-230231

^ permalink raw reply

* Update hook in Cygwin
From: Niklas Höglund @ 2006-03-08 12:16 UTC (permalink / raw)
  To: git

Hi.

After creating a couple of repositories and pushing and cloning them,
I get the following:

$ git push --all origin
...
hooks/update: line 88: mail: command not found

This is in cygwin. I'm rather glad I don't have the mail command
installed, as I don't want mails going anywhere.

The update hook contains the following comment:

# To enable this hook:
# (1) change the recipient e-mail address
# (2) make this file executable by "chmod +x update".

But my impression after a cursory look at it is that it would always
call "mail" whenever it is run, and since all files are executable in
Windows (AFAIK), it would always be run.

^ permalink raw reply

* Re: [PATCH] write_sha1_file(): Perform Z_FULL_FLUSH between header and data
From: Junio C Hamano @ 2006-03-08 11:04 UTC (permalink / raw)
  To: Sergey Vlasov; +Cc: git
In-Reply-To: <20060308134519.78ea313d.vsu@altlinux.ru>

Sergey Vlasov <vsu@altlinux.ru> writes:

> However, a straight reuse still will not be possible, because
> sha1write_compressed() uses deflateInit(&stream, Z_DEFAULT_COMPRESSION),
> which writes zlib headers around the deflate stream, and the zlib footer
> contains adler32 checksum.  So, as a minimum, you will need to
> decompress the object data, calculate its adler32 checksum and write the
> zlib header yourself.

Hmph.  Thanks for helping, but it sounds like my original plan
was not useful at all.  Probably inflating would be still
cheaper than inflating and then deflating, but it would not be
as cool as a straight copy.  Sigh...

^ permalink raw reply

* [PATCH] write_sha1_file(): Perform Z_FULL_FLUSH between header and data
From: Sergey Vlasov @ 2006-03-08 10:45 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git, Nicolas Pitre, Linus Torvalds
In-Reply-To: <7vzmk1izpa.fsf_-_@assigned-by-dhcp.cox.net>

Data after Z_FULL_FLUSH will be compressed independently of the
header, and could therefore be reused without recompressing when
creating a pack.

---

This passes "make test" and unpacking of the whole git repo with
git-fsck-objects afterwards.

However, a straight reuse still will not be possible, because
sha1write_compressed() uses deflateInit(&stream, Z_DEFAULT_COMPRESSION),
which writes zlib headers around the deflate stream, and the zlib footer
contains adler32 checksum.  So, as a minimum, you will need to
decompress the object data, calculate its adler32 checksum and write the
zlib header yourself.

 sha1_file.c |    7 ++++++-
 1 files changed, 6 insertions(+), 1 deletions(-)

8b12d9a58e87a4c5b5a2a7b20d06fe29a5afb903
diff --git a/sha1_file.c b/sha1_file.c
index a80d849..34d4da4 100644
--- a/sha1_file.c
+++ b/sha1_file.c
@@ -1399,7 +1399,8 @@ int write_sha1_file(void *buf, unsigned 
 	/* Set it up */
 	memset(&stream, 0, sizeof(stream));
 	deflateInit(&stream, Z_BEST_COMPRESSION);
-	size = deflateBound(&stream, len+hdrlen);
+	/* Additional 6 bytes for the Z_FULL_FLUSH marker */
+	size = deflateBound(&stream, hdrlen) + 6 + deflateBound(&stream, len);
 	compressed = xmalloc(size);

 	/* Compress it */
@@ -1412,6 +1413,10 @@ int write_sha1_file(void *buf, unsigned 
 	while (deflate(&stream, 0) == Z_OK)
 		/* nothing */;

+	/* Flush before data */
+	while (deflate(&stream, Z_FULL_FLUSH) == Z_OK)
+		/* nothing */;
+
 	/* Then the data itself.. */
 	stream.next_in = buf;
 	stream.avail_in = len;
-- 
1.2.GIT

^ permalink raw reply related

* Re: Pulling tags from git.git
From: Andreas Ericsson @ 2006-03-08 10:13 UTC (permalink / raw)
  To: gitzilla; +Cc: Junio C Hamano, git
In-Reply-To: <440E5E40.7090700@gmail.com>

A Large Angry SCM wrote:
> 
> Why is a "pull" bothering with tags? A "fetch" yes, but not a pull.
> 

A pull is a fetch + merge. I said pull because what little I know of 
Linus' workflow is the the emails he gets from susbsystem maintainers 
are called "pull requests".

>>
>> Tags not meant to be distributed are unannotated, and unannotated tags 
>> are kept out of published repos which are always stored at a central 
>> server. Everybody synchronize to those central repos, so nobody pulls 
>> from each other. Perhaps this is how the kernel devs work too, but if 
>> it ever changes the update hook will no longer be able to safeguard 
>> from it and the, in my eyes, temporary tags will be distributed in a 
>> criss-crossing mesh so no-one will ever know where it came from or who 
>> created it or why. I.e. a Bad Thing.
> 
> 
> The distinction here is not annotated tags or temporary tags but _local_ 
> tags. _Your_ workflow conventions treat unannotated tags as local tags 
> but declaring that unannotated tags can not be pushed is imposing _your_ 
> conventions on other groups. Just as branch names, themselves, can be 
> meaningful, so can tag names.
> 

Yes, that's why I said it's better to discourage than to disallow. The 
default update-hook is disabled by default and there are comments 
aplenty to make it possible even for the most die-hard point-and-click 
monkey to be able to comment out the disallowing of unannotated tags.

-- 
Andreas Ericsson                   andreas.ericsson@op5.se
OP5 AB                             www.op5.se
Tel: +46 8-230225                  Fax: +46 8-230231

^ permalink raw reply

* Re: [RFH] zlib gurus out there?
From: Johannes Schindelin @ 2006-03-08  9:46 UTC (permalink / raw)
  To: Linus Torvalds; +Cc: Junio C Hamano, git
In-Reply-To: <Pine.LNX.4.64.0603071753370.32577@g5.osdl.org>

Hi,

On Tue, 7 Mar 2006, Linus Torvalds wrote:

> On Tue, 7 Mar 2006, Junio C Hamano wrote:
> > >
> > > No, I don't think that's good. You're only doing a partial deflate, you 
> > > can't ask for a Z_FULL_FLUSH. That only works if you give it the whole 
> > > buffer, and you don't.
> 
> Actually, I misread what you were trying to do, and thought this was the 
> inflate phase, not the deflate.

I don't think it matters if it is inflate or deflate. ZLib keeps an 
internal state depending on the data. That is the whole reason why the 
packing is so good: it uses the redundancy in the data already seen to 
construct a codebook. (And that's also the reason why you can't start to 
deflate in the middle.)

Ciao,
Dscho

^ permalink raw reply

* Some more cvsimport
From: Rajkumar S @ 2006-03-08  8:49 UTC (permalink / raw)
  To: git

[-- Attachment #1: Type: text/plain, Size: 677 bytes --]

Hi all,

Thanks every one for replying to my previous mails and to Smurf for his helpful replies 
and patch.

The cvs project I am trying to track has 2 branchs. One head and another for releng_1. The 
upstream cvs tree gets updated on both branchs. I also make modifications to both. When I 
normally do cvsimport, the releng_1 tree gets updated automatically, but if I have checked 
out the releng_1 branch in my local git repository when cvsimport happens the files do not 
get updated.  I can fix this by checking out the master branch just before cvsimport and 
then switching back.

I am attaching a test script to simulate the problem and the fix is commented out.

raj

[-- Attachment #2: git_branch_init.sh --]
[-- Type: application/x-shellscript, Size: 2297 bytes --]

^ permalink raw reply

* Re: Pulling tags from git.git
From: A Large Angry SCM @ 2006-03-08  4:32 UTC (permalink / raw)
  To: Andreas Ericsson; +Cc: Junio C Hamano, git
In-Reply-To: <440DA82D.3060909@op5.se>

Andreas Ericsson wrote:
> A Large Angry SCM wrote:
>> Andreas Ericsson wrote:
>>
>>> Junio C Hamano wrote:
>>>
>>>> Andreas Ericsson <ae@op5.se> writes:
>>>>
>>>>
>>>>> With the git or git+ssh protocol, tags will be autofollowed
>>>>> when you do a pull (only signed tags, I think).  The
>>>>> auto-following is done by detecting tags that are fetched,
>>>>
>>>>
>>>>
>>>> Ah, you are correct.  We do not follow lightweight tags; I am
>>>> not sure if we should.
>>>>
>>>
>>> I'm fairly sure we shouldn't. The default update-hook prevents them 
>>> (if enabled), and I can't for the life of me think of why anyone 
>>> would want to distribute such tags.
>>>
>>> OTOH, preventing unannotated tags from being pushed seems like a 
>>> better way than to not have the ability to auto-follow those same 
>>> tags. After all, it's better to discourage than to disallow.
>>>
>>
>> Before you do this, please explain why unannotated tags are not 
>> useful, and so should not be allowed to be pushed.
> 
> 
> Imagine Linus, getting his "please pull" emails and doing so only to 
> find dozens of temporary tags fetched by the pull. Junio's patch (if I 
> read it correctly) unconditionally fetches *ALL* tags reachable from the 
> top of the commit-chain, which means there is no longer any way to keep 
> temporary tags in a repo from which someone else will pull.

Why is a "pull" bothering with tags? A "fetch" yes, but not a pull.

> I for one riddle my repos with temporary tags whenever I'm trying 
> something I'm not so sure of, or find an interesting bug or a design 
> decision I'm not 100% sure of. Perhaps I should rather do this with 
> branches, but imo branches are for doing work, whereas tags just mark a 
> spot in the development so I easily can find them with gitk or some such.
> 
> I may be biased by the way we do things at work. In our workflow, all 
> tags meant to be distributed have a short note in them which explains 
> the rationale of the tag. For example, new versions have a very brief 
> changelog that sales-people get on email (a blessing, that, since we 
> devs no longer have to update feature-lists and such).
> 
> Tags not meant to be distributed are unannotated, and unannotated tags 
> are kept out of published repos which are always stored at a central 
> server. Everybody synchronize to those central repos, so nobody pulls 
> from each other. Perhaps this is how the kernel devs work too, but if it 
> ever changes the update hook will no longer be able to safeguard from it 
> and the, in my eyes, temporary tags will be distributed in a 
> criss-crossing mesh so no-one will ever know where it came from or who 
> created it or why. I.e. a Bad Thing.

The distinction here is not annotated tags or temporary tags but _local_ 
tags. _Your_ workflow conventions treat unannotated tags as local tags 
but declaring that unannotated tags can not be pushed is imposing _your_ 
conventions on other groups. Just as branch names, themselves, can be 
meaningful, so can tag names.

^ permalink raw reply

* Re: Problems with using git
From: Greg KH @ 2006-03-08  2:16 UTC (permalink / raw)
  To: Mark Wooding; +Cc: git
In-Reply-To: <20060304222557.GA24628@kroah.com>

On Sat, Mar 04, 2006 at 02:25:57PM -0800, Greg KH wrote:
> On Sat, Mar 04, 2006 at 10:56:09AM +0000, Mark Wooding wrote:
> > Greg KH <greg@kroah.com> wrote:
> > 
> > > The latest development tree, and the latest public betas contain
> > > 1.1.3.  If you think this should be newer, I can easily go poke the
> > > proper people...
> > 
> > Given that there's a security issue which got fixed in 1.1.5, I think
> > this is really a bit poor.
> > 
> > I notice, by contrast, that Debian had managed to repackage and release
> > a new GIT the day after Junio fixed the bug in the first place.  That
> > was more than a month ago.
> 
> Fair enough, I'll go poke the proper people now...

They jumped and the cogito and git should be updated to the latest
version in the next update.

thanks,

greg k-h

^ permalink raw reply

* Re: [RFH] zlib gurus out there?
From: Linus Torvalds @ 2006-03-08  2:00 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git
In-Reply-To: <7vslptivbg.fsf@assigned-by-dhcp.cox.net>

On Tue, 7 Mar 2006, Junio C Hamano wrote:
> >
> > No, I don't think that's good. You're only doing a partial deflate, you 
> > can't ask for a Z_FULL_FLUSH. That only works if you give it the whole 
> > buffer, and you don't.

Actually, I misread what you were trying to do, and thought this was the 
inflate phase, not the deflate. Now that I understand what you want, 

> So, in short there is no way to create:
> 
>     hdr part deflated.
>     flush.
>     data part deflated independently.
> 
> and have the current sha1_read_file() not to notice that flush,

Actually, try the patch you already tried, except you'll need to add a 

	deflateEnd(&stream);
	deflateInit(&stream, Z_BEST_COMPRESSION);
	.. set up output parameters again ..

and you need to change the initial 

	size = deflateBound(&stream, len+hdrlen);

to

	size = deflateBound(&stream, len) + deflateBound(&stream, hdrlen);

and then you might be ok.

That said, I'm not sure I agree with what you're trying to do. 

		Linus

^ permalink raw reply

* [PATCH] contrib/git-svn: fix UUID reading w/pre-1.2 svn; fetch args
From: Eric Wong @ 2006-03-08  1:57 UTC (permalink / raw)
  To: Yann Dirson; +Cc: GIT list
In-Reply-To: <20060308014207.GA31137@localdomain>

Junio: please don't apply this patch to git.git just yet.  It seems fine
to me, but I haven't tested it heavily yet (Yann can help me, I hope :)
I hardly slept the past few days and I may have broken something badly
(it pasts all the tests, though).

---

As a side effect, this should also work better for 'init' off
directories that are no longer in the latest revision of the
repository.

Fix 'fetch' args (<rev>=<commit> options) on brand-new heads

Signed-off-by: Eric Wong <normalperson@yhbt.net>

---

 contrib/git-svn/git-svn.perl |   26 ++++++++++++++++++--------
 1 files changed, 18 insertions(+), 8 deletions(-)

9f59596bde5bdd68d1a0a116f7383df74966de44
diff --git a/contrib/git-svn/git-svn.perl b/contrib/git-svn/git-svn.perl
index c575883..b8d2b3e 100755
--- a/contrib/git-svn/git-svn.perl
+++ b/contrib/git-svn/git-svn.perl
@@ -162,7 +162,8 @@ sub rebuild {
 				croak "SVN repository location required: $url\n";
 			}
 			$SVN_URL ||= $url;
-			$SVN_UUID ||= setup_git_svn();
+			$SVN_UUID ||= $uuid;
+			setup_git_svn();
 			$latest = $rev;
 		}
 		assert_revision_eq_or_unknown($rev, $c);
@@ -226,10 +227,12 @@ sub fetch {
 		push @svn_co,'--ignore-externals' unless $_no_ignore_ext;
 		sys(@svn_co, $SVN_URL, $SVN_WC);
 		chdir $SVN_WC or croak $!;
+		read_uuid();
 		$last_commit = git_commit($base, @parents);
 		assert_svn_wc_clean($base->{revision}, $last_commit);
 	} else {
 		chdir $SVN_WC or croak $!;
+		read_uuid();
 		$last_commit = file_to_s("$REV_DIR/$base->{revision}");
 	}
 	my @svn_up = qw(svn up);
@@ -275,7 +278,9 @@ sub commit {
 
 	fetch();
 	chdir $SVN_WC or croak $!;
-	my $svn_current_rev =  svn_info('.')->{'Last Changed Rev'};
+	my $info = svn_info('.');
+	read_uuid($info);
+	my $svn_current_rev =  $info->{'Last Changed Rev'};
 	foreach my $c (@revs) {
 		my $mods = svn_checkout_tree($svn_current_rev, $c);
 		if (scalar @$mods == 0) {
@@ -314,6 +319,14 @@ sub show_ignore {
 
 ########################### utility functions #########################
 
+sub read_uuid {
+	return if $SVN_UUID;
+	my $info = shift || svn_info('.');
+	$SVN_UUID = $info->{'Repository UUID'} or
+					croak "Repository UUID unreadable\n";
+	s_to_file($SVN_UUID,"$GIT_DIR/$GIT_SVN/info/uuid");
+}
+
 sub setup_git_svn {
 	defined $SVN_URL or croak "SVN repository location required\n";
 	unless (-d $GIT_DIR) {
@@ -323,14 +336,10 @@ sub setup_git_svn {
 	mkpath(["$GIT_DIR/$GIT_SVN/info"]);
 	mkpath([$REV_DIR]);
 	s_to_file($SVN_URL,"$GIT_DIR/$GIT_SVN/info/url");
-	$SVN_UUID = svn_info($SVN_URL)->{'Repository UUID'} or
-					croak "Repository UUID unreadable\n";
-	s_to_file($SVN_UUID,"$GIT_DIR/$GIT_SVN/info/uuid");
 
 	open my $fd, '>>', "$GIT_DIR/$GIT_SVN/info/exclude" or croak $!;
 	print $fd '.svn',"\n";
 	close $fd or croak $!;
-	return $SVN_UUID;
 }
 
 sub assert_svn_wc_clean {
@@ -860,7 +869,6 @@ sub git_commit {
 	my ($log_msg, @parents) = @_;
 	assert_revision_unknown($log_msg->{revision});
 	my $out_fh = IO::File->new_tmpfile or croak $!;
-	$SVN_UUID ||= svn_info('.')->{'Repository UUID'};
 
 	map_tree_joins() if (@_branch_from && !%tree_map);
 
@@ -922,7 +930,9 @@ sub git_commit {
 	}
 	my @update_ref = ('git-update-ref',"refs/remotes/$GIT_SVN",$commit);
 	if (my $primary_parent = shift @exec_parents) {
-		push @update_ref, $primary_parent;
+		if (!system('git-rev-parse',"refs/remotes/$GIT_SVN")){
+			push @update_ref, $primary_parent;
+		}
 	}
 	sys(@update_ref);
 	sys('git-update-ref',"$GIT_SVN/revs/$log_msg->{revision}",$commit);
-- 
1.2.4.g198d

^ permalink raw reply related

* Re: git-svn, tree moves, and --no-stop-on-copy
From: Eric Wong @ 2006-03-08  1:42 UTC (permalink / raw)
  To: Yann Dirson; +Cc: GIT list
In-Reply-To: <20060307220837.GB27397@nowhere.earth>

Yann Dirson <ydirson@altern.org> wrote:
> It looks that the --no-stop-on-copy flag has been dropped by error
> during the "options cleanup" commit a couple of days ago.  This
> trivial patch appears at first sight to address the problem:

Thanks for the patch, but on second thought...

I'm tempted to drop it as an option...  IIRC, the only reason
--no-stop-on-copy exists in git-svn is in case ancient versions of svn
did not support --stop-on-copy.  I haven't bothered looking deeply into
SVN history to see if it was always supported or not.
 
> Before I find out in the doc about --no-stop-on-copy, I did a coupld
> of experimentation.  Among them, was using a peg-revision in the URL
> passed to "git-svn init":
> 
> $ GIT_SVN_ID=git-oldsvn git-svn init https://svn.sourceforge.net/svnroot/ufoai/trunk/src@165
> 
> That succeeds, but then "git-svn fetch" will fail with:
> 
> svn: REPORT request failed on '/svnroot/ufoai/!svn/bc/190/trunk/src@165'
> svn: '/svnroot/ufoai/!svn/bc/190/trunk/src@165' path not found
> 256 at /export/work/yann/git/git/contrib/git-svn/git-svn.perl line 783
>         man::svn_log_raw('https://svn.sourceforge.net/svnroot/ufoai/trunk/src@165', '-r0:HEAD', '--stop-on-copy') called at /export/work/yann/git/git/contrib/git-svn/git-svn.perl line 219
>         main::fetch() called at /export/work/yann/git/git/contrib/git-svn/git-svn.perl line 81

If you want full repository history for reorganized repositories,
easiest way is to pay the price for full repository and all of its
history.

	git-svn init https://svn.sourceforge.net/svnroot/ufoai
	git-svn fetch
	# this puts all your branches and tags into one single big git tree.

However, the following should always work: (after the following patch,

	GIT_SVN_ID=git-oldsvn git-svn init \
		https://svn.sourceforge.net/svnroot/ufoai/trunk
	GIT_SVN_ID=git-oldsvn git-svn fetch -r1:165

	GIT_SVN_ID=git-newsvn git-svn init
		https://svn.sourceforge.net/svnroot/ufoai/ufoai/trunk
	GIT_SVN_ID=git-newsvn git-svn fetch \
		166=`git-rev-parse refs/remotes/git-oldsvn`

Unfortunately, it does not, at least with svn 1.2.3...  I have a patch
coming that should fix things for 1.1.1 (and give better 1.1.x support
in general).  I'm not sure, but it feels like something is screwed up
with svn 1.2.3dfsg1-3:

This works:	svn log -r1 https://svn.sourceforge.net/svnroot/ufoai/trunk

This doesn't:	svn  co -r1 https://svn.sourceforge.net/svnroot/ufoai/trunk

But this:	svn  co -r1 https://svn.sourceforge.net/svnroot/ufoai
will create the following structure:
	ufoai/{trunk,branches,tags}

I'm quite puzzled about it, as I swear I've seen it work on a different
project recently (of course I cannot remember which :<)

> Maybe git-svn could also be guarded against peg-revisions on init
> command-line, since that appears to confuse it quite a bit :)

Possibly, but having '@' in URLs is valid in some cases outside of
peg-revisions.

> Additionally, it may be worth poiting out in the doc at least one
> valid use of the --no-stop-on-copy flag that is friendly to the user's
> sanity: when the svn repository has undergone a reorg, such that the
> URL passed to "init" indeed moved - at least, when/if it is made to
> work :)

In the face of repository reorgs, git-svn is happiest tracking partial
history.  Or tracking the entire repository from the root.

Hopefully I've been reasonably coherent, having insomnia lately.

-- 
Eric Wong

^ permalink raw reply

* Re: [RFH] zlib gurus out there?
From: Junio C Hamano @ 2006-03-08  1:22 UTC (permalink / raw)
  To: Linus Torvalds; +Cc: git
In-Reply-To: <Pine.LNX.4.64.0603071658300.32577@g5.osdl.org>

Linus Torvalds <torvalds@osdl.org> writes:

> On Tue, 7 Mar 2006, Junio C Hamano wrote:
>> 
>> However, I am stuck with the first step, which is to do a full
>> flush after the header.  An obvious change to the code quoted
>> above writes out a corrupt object:
>> 
>> 	/* First header.. */
>> 	stream.next_in = hdr;
>> 	stream.avail_in = hdrlen;
>> -	while (deflate(&stream, 0) == Z_OK)
>> +	while (deflate(&stream, Z_FULL_FLUSH) == Z_OK)
>> 		/* nothing */;
>
> No, I don't think that's good. You're only doing a partial deflate, you 
> can't ask for a Z_FULL_FLUSH. That only works if you give it the whole 
> buffer, and you don't.

So, in short there is no way to create:

    hdr part deflated.
    flush.
    data part deflated independently.

and have the current sha1_read_file() not to notice that flush,
while I can inspect the deflated stream to find the "flush", and
copy only the defalted data part into a pack?  Bummer...  I was
really shooting for full backward compatibility.

^ permalink raw reply

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox