Git development

Git development
 help / color / mirror / Atom feed

* [PATCH] Calculate $commitsha1 in update() only when needed
From: Pavel Roskin @ 2007-12-08  5:07 UTC (permalink / raw)
  To: git

This suppresses unhelpful error messages from git rev-parse during
checkout if the module doesn't exist.

Signed-off-by: Pavel Roskin <proski@gnu.org>
---

 git-cvsserver.perl |   12 +++++++-----
 1 files changed, 7 insertions(+), 5 deletions(-)


diff --git a/git-cvsserver.perl b/git-cvsserver.perl
index ecded3b..409b301 100755
--- a/git-cvsserver.perl
+++ b/git-cvsserver.perl
@@ -2427,9 +2427,6 @@ sub update
     # first lets get the commit list
     $ENV{GIT_DIR} = $self->{git_path};
 
-    my $commitsha1 = `git rev-parse $self->{module}`;
-    chomp $commitsha1;
-
     my $commitinfo = `git cat-file commit $self->{module} 2>&1`;
     unless ( $commitinfo =~ /tree\s+[a-zA-Z0-9]{40}/ )
     {
@@ -2440,8 +2437,13 @@ sub update
     my $git_log;
     my $lastcommit = $self->_get_prop("last_commit");
 
-    if (defined $lastcommit && $lastcommit eq $commitsha1) { # up-to-date
-         return 1;
+    if (defined $lastcommit) {
+        my $commitsha1 = `git rev-parse $self->{module}`;
+        chomp $commitsha1;
+
+        if ($lastcommit eq $commitsha1) { # up-to-date
+            return 1;
+        }
     }
 
     # Start exclusive lock here...

^ permalink raw reply related

* [PATCH 2/2] pack-objects: fix threaded load balancing
From: Nicolas Pitre @ 2007-12-08  5:03 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git, Jon Smirl


The current method consists of a master thread serving chunks of objects
to work threads when they're done with their previous chunk.  The issue
is to determine the best chunk size: making it too large creates poor
load balancing, while making it too small has a negative effect on pack
size because of the increased number of chunk boundaries and poor delta
window utilization.

This patch implements a completely different approach by initially
splitting the work in large chunks uniformly amongst all threads, and
whenever a thread is done then it steals half of the remaining work from
another thread with the largest amount of unprocessed objects.

This has the advantage of greatly reducing the number of chunk boundaries
with an almost perfect load balancing.

Signed-off-by: Nicolas Pitre <nico@cam.org>
---
 builtin-pack-objects.c |  117 +++++++++++++++++++++++++++++++++++-------------
 1 files changed, 85 insertions(+), 32 deletions(-)

diff --git a/builtin-pack-objects.c b/builtin-pack-objects.c
index 5002cc6..fcc1901 100644
--- a/builtin-pack-objects.c
+++ b/builtin-pack-objects.c
@@ -1479,10 +1479,10 @@ static unsigned long free_unpacked(struct unpacked *n)
 	return freed_mem;
 }
 
-static void find_deltas(struct object_entry **list, unsigned list_size,
+static void find_deltas(struct object_entry **list, unsigned *list_size,
 			int window, int depth, unsigned *processed)
 {
-	uint32_t i = 0, idx = 0, count = 0;
+	uint32_t i, idx = 0, count = 0;
 	unsigned int array_size = window * sizeof(struct unpacked);
 	struct unpacked *array;
 	unsigned long mem_usage = 0;
@@ -1490,11 +1490,23 @@ static void find_deltas(struct object_entry **list, unsigned list_size,
 	array = xmalloc(array_size);
 	memset(array, 0, array_size);
 
-	do {
-		struct object_entry *entry = list[i++];
+	for (;;) {
+		struct object_entry *entry = *list++;
 		struct unpacked *n = array + idx;
 		int j, max_depth, best_base = -1;
 
+		progress_lock();
+		if (!*list_size) {
+			progress_unlock();
+			break;
+		}
+		(*list_size)--;
+		if (!entry->preferred_base) {
+			(*processed)++;
+			display_progress(progress_state, *processed);
+		}
+		progress_unlock();
+
 		mem_usage -= free_unpacked(n);
 		n->entry = entry;
 
@@ -1512,11 +1524,6 @@ static void find_deltas(struct object_entry **list, unsigned list_size,
 		if (entry->preferred_base)
 			goto next;
 
-		progress_lock();
-		(*processed)++;
-		display_progress(progress_state, *processed);
-		progress_unlock();
-
 		/*
 		 * If the current object is at pack edge, take the depth the
 		 * objects that depend on the current object into account
@@ -1576,7 +1583,7 @@ static void find_deltas(struct object_entry **list, unsigned list_size,
 			count++;
 		if (idx >= window)
 			idx = 0;
-	} while (i < list_size);
+	}
 
 	for (i = 0; i < window; ++i) {
 		free_delta_index(array[i].index);
@@ -1591,6 +1598,7 @@ struct thread_params {
 	pthread_t thread;
 	struct object_entry **list;
 	unsigned list_size;
+	unsigned remaining;
 	int window;
 	int depth;
 	unsigned *processed;
@@ -1612,10 +1620,10 @@ static void *threaded_find_deltas(void *arg)
 		pthread_mutex_lock(&data_ready);
 		pthread_mutex_unlock(&data_request);
 
-		if (!me->list_size)
+		if (!me->remaining)
 			return NULL;
 
-		find_deltas(me->list, me->list_size,
+		find_deltas(me->list, &me->remaining,
 			    me->window, me->depth, me->processed);
 	}
 }
@@ -1624,57 +1632,102 @@ static void ll_find_deltas(struct object_entry **list, unsigned list_size,
 			   int window, int depth, unsigned *processed)
 {
 	struct thread_params *target, p[delta_search_threads];
-	int i, ret;
-	unsigned chunk_size;
+	int i, ret, active_threads = 0;
 
 	if (delta_search_threads <= 1) {
-		find_deltas(list, list_size, window, depth, processed);
+		find_deltas(list, &list_size, window, depth, processed);
 		return;
 	}
 
 	pthread_mutex_lock(&data_provider);
 	pthread_mutex_lock(&data_ready);
 
+	/* Start work threads. */
 	for (i = 0; i < delta_search_threads; i++) {
 		p[i].window = window;
 		p[i].depth = depth;
 		p[i].processed = processed;
+		p[i].remaining = 0;
 		ret = pthread_create(&p[i].thread, NULL,
 				     threaded_find_deltas, &p[i]);
 		if (ret)
 			die("unable to create thread: %s", strerror(ret));
+		active_threads++;
 	}
 
-	/* this should be auto-tuned somehow */
-	chunk_size = window * 1000;
+	/* Then partition the work amongst them. */
+	for (i = 0; i < delta_search_threads; i++) {
+		unsigned sub_size = list_size / (delta_search_threads - i);
 
-	do {
-		unsigned sublist_size = chunk_size;
-		if (sublist_size > list_size)
-			sublist_size = list_size;
+		pthread_mutex_lock(&data_provider);
+		target = data_requester;
+		if (!sub_size) {
+			pthread_mutex_unlock(&data_ready);
+			pthread_join(target->thread, NULL);
+			active_threads--;
+			continue;
+		}
 
 		/* try to split chunks on "path" boundaries */
-		while (sublist_size < list_size && list[sublist_size]->hash &&
-		       list[sublist_size]->hash == list[sublist_size-1]->hash)
-			sublist_size++;
+		while (sub_size < list_size && list[sub_size]->hash &&
+		       list[sub_size]->hash == list[sub_size-1]->hash)
+			sub_size++;
+
+		target->list = list;
+		target->list_size = sub_size;
+		target->remaining = sub_size;
+		pthread_mutex_unlock(&data_ready);
 
+		list += sub_size;
+		list_size -= sub_size;
+	}
+
+	/*
+	 * Now let's wait for work completion.  Each time a thread is done
+	 * with its work, we steal half of the remaining work from the
+	 * thread with the largest number of unprocessed objects and give
+	 * it to that newly idle thread.  This ensure good load balancing
+	 * until the remaining object list segments are simply too short
+	 * to be worth splitting anymore.
+	 */
+	do {
+		struct thread_params *victim = NULL;
+		unsigned sub_size = 0;
 		pthread_mutex_lock(&data_provider);
 		target = data_requester;
-		target->list = list;
-		target->list_size = sublist_size;
+
+		progress_lock();
+		for (i = 0; i < delta_search_threads; i++)
+			if (p[i].remaining > 2*window &&
+			    (!victim || victim->remaining < p[i].remaining))
+				victim = &p[i];
+		if (victim) {
+			sub_size = victim->remaining / 2;
+			list = victim->list + victim->list_size - sub_size;
+			while (sub_size && list[0]->hash &&
+			       list[0]->hash == list[-1]->hash) {
+				list++;
+				sub_size--;
+			}
+			target->list = list;
+			victim->list_size -= sub_size;
+			victim->remaining -= sub_size;
+		}
+		progress_unlock();
+
+		target->list_size = sub_size;
+		target->remaining = sub_size;
 		pthread_mutex_unlock(&data_ready);
 
-		list += sublist_size;
-		list_size -= sublist_size;
-		if (!sublist_size) {
+		if (!sub_size) {
 			pthread_join(target->thread, NULL);
-			i--;
+			active_threads--;
 		}
-	} while (i);
+	} while (active_threads);
 }
 
 #else
-#define ll_find_deltas find_deltas
+#define ll_find_deltas(l, s, w, d, p)	find_deltas(l, &s, w, d, p)
 #endif
 
 static void prepare_pack(int window, int depth)
-- 
1.5.3.7.2184.ge321d-dirty

^ permalink raw reply related

* Re: Something is broken in repack
From: Jon Smirl @ 2007-12-08  5:01 UTC (permalink / raw)
  To: Nicolas Pitre; +Cc: David Brown, Git Mailing List
In-Reply-To: <alpine.LFD.0.99999.0712072328420.555@xanadu.home>

On 12/7/07, Nicolas Pitre <nico@cam.org> wrote:
> On Fri, 7 Dec 2007, Jon Smirl wrote:
>
> > Does the gcc repo contain some giant objects? Why wasn't the memory
> > freed after their chain was processed?
>
> It should be.
>
> > Most of the last 10% is being done on a single CPU. There must be a
> > chain of giant objects that is unbalancing everything.
>
> I'm about to send a patch to fix the thread balancing for real this
> time.

Something is really broken in the last 5% of that repo. I have been
processing at 97% for 30 minutes without moving to 98%.

-- 
Jon Smirl
jonsmirl@gmail.com

^ permalink raw reply

* [PATCH 1/2] pack-objects: reverse the delta search sort list
From: Nicolas Pitre @ 2007-12-08  5:00 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git, Jon Smirl


It is currently sorted and then walked backward.  Not only this doesn't
feel natural for my poor brain, but it would make the next patch less
obvious as well.

So reverse the sort order, and reverse the list walking direction,
which effectively produce the exact same end result as before.

Also bring the relevant comment nearer the actual code and adjust it
accordingly, with minor additional clarifications.

Signed-off-by: Nicolas Pitre <nico@cam.org>
---
 builtin-pack-objects.c |   41 +++++++++++++++++++++--------------------
 1 files changed, 21 insertions(+), 20 deletions(-)

diff --git a/builtin-pack-objects.c b/builtin-pack-objects.c
index 4f44658..5002cc6 100644
--- a/builtin-pack-objects.c
+++ b/builtin-pack-objects.c
@@ -1245,28 +1245,37 @@ static void get_object_details(void)
 	free(sorted_by_offset);
 }
 
+/*
+ * We search for deltas in a list sorted by type, by filename hash, and then
+ * by size, so that we see progressively smaller and smaller files.
+ * That's because we prefer deltas to be from the bigger file
+ * to the smaller -- deletes are potentially cheaper, but perhaps
+ * more importantly, the bigger file is likely the more recent
+ * one.  The deepest deltas are therefore the oldest objects which are
+ * less susceptible to be accessed often.
+ */
 static int type_size_sort(const void *_a, const void *_b)
 {
 	const struct object_entry *a = *(struct object_entry **)_a;
 	const struct object_entry *b = *(struct object_entry **)_b;
 
-	if (a->type < b->type)
-		return -1;
 	if (a->type > b->type)
-		return 1;
-	if (a->hash < b->hash)
 		return -1;
-	if (a->hash > b->hash)
+	if (a->type < b->type)
 		return 1;
-	if (a->preferred_base < b->preferred_base)
+	if (a->hash > b->hash)
 		return -1;
-	if (a->preferred_base > b->preferred_base)
+	if (a->hash < b->hash)
 		return 1;
-	if (a->size < b->size)
+	if (a->preferred_base > b->preferred_base)
 		return -1;
+	if (a->preferred_base < b->preferred_base)
+		return 1;
 	if (a->size > b->size)
+		return -1;
+	if (a->size < b->size)
 		return 1;
-	return a > b ? -1 : (a < b);  /* newest last */
+	return a < b ? -1 : (a > b);  /* newest first */
 }
 
 struct unpacked {
@@ -1317,14 +1326,6 @@ static pthread_mutex_t progress_mutex = PTHREAD_MUTEX_INITIALIZER;
 
 #endif
 
-/*
- * We search for deltas _backwards_ in a list sorted by type and
- * by size, so that we see progressively smaller and smaller files.
- * That's because we prefer deltas to be from the bigger file
- * to the smaller - deletes are potentially cheaper, but perhaps
- * more importantly, the bigger file is likely the more recent
- * one.
- */
 static int try_delta(struct unpacked *trg, struct unpacked *src,
 		     unsigned max_depth, unsigned long *mem_usage)
 {
@@ -1481,7 +1482,7 @@ static unsigned long free_unpacked(struct unpacked *n)
 static void find_deltas(struct object_entry **list, unsigned list_size,
 			int window, int depth, unsigned *processed)
 {
-	uint32_t i = list_size, idx = 0, count = 0;
+	uint32_t i = 0, idx = 0, count = 0;
 	unsigned int array_size = window * sizeof(struct unpacked);
 	struct unpacked *array;
 	unsigned long mem_usage = 0;
@@ -1490,7 +1491,7 @@ static void find_deltas(struct object_entry **list, unsigned list_size,
 	memset(array, 0, array_size);
 
 	do {
-		struct object_entry *entry = list[--i];
+		struct object_entry *entry = list[i++];
 		struct unpacked *n = array + idx;
 		int j, max_depth, best_base = -1;
 
@@ -1575,7 +1576,7 @@ static void find_deltas(struct object_entry **list, unsigned list_size,
 			count++;
 		if (idx >= window)
 			idx = 0;
-	} while (i > 0);
+	} while (i < list_size);
 
 	for (i = 0; i < window; ++i) {
 		free_delta_index(array[i].index);
-- 
1.5.3.7.2184.ge321d-dirty

^ permalink raw reply related

* [PATCH] git-help: simplify and fix option parsing.
From: Christian Couder @ 2007-12-08  5:06 UTC (permalink / raw)
  To: Junio Hamano; +Cc: git

Signed-off-by: Christian Couder <chriscool@tuxfamily.org>
---
 help.c |   31 +++++++++++--------------------
 1 files changed, 11 insertions(+), 20 deletions(-)

	Junio wrote about "help.c" in my 
	"git-help: add -w|--web option to display html man page in a browser."
	patch:
	> Isn't this "check-help-cmd" duplication ugly, by the way?

	You are right, this patch should fix it.
	Thanks.

diff --git a/help.c b/help.c
index ecc8c66..78686db 100644
--- a/help.c
+++ b/help.c
@@ -241,7 +241,9 @@ void list_common_cmds_help(void)
 
 static const char *cmd_to_page(const char *git_cmd)
 {
-	if (!prefixcmp(git_cmd, "git"))
+	if (!git_cmd)
+		return "git";
+	else if (!prefixcmp(git_cmd, "git"))
 		return git_cmd;
 	else {
 		int page_len = strlen(git_cmd) + 4;
@@ -283,38 +285,27 @@ int cmd_version(int argc, const char **argv, const char *prefix)
 	return 0;
 }
 
-static void check_help_cmd(const char *help_cmd)
+int cmd_help(int argc, const char **argv, const char *prefix)
 {
-	if (!help_cmd) {
+	if (argc < 2) {
 		printf("usage: %s\n\n", git_usage_string);
 		list_common_cmds_help();
 		exit(0);
 	}
 
-	else if (!strcmp(help_cmd, "--all") || !strcmp(help_cmd, "-a")) {
+	const char *help_cmd = argv[1];
+
+	if (!strcmp(help_cmd, "--all") || !strcmp(help_cmd, "-a")) {
 		printf("usage: %s\n\n", git_usage_string);
 		list_commands();
-		exit(0);
 	}
-}
 
-int cmd_help(int argc, const char **argv, const char *prefix)
-{
-	const char *help_cmd = argc > 1 ? argv[1] : NULL;
-	check_help_cmd(help_cmd);
-
-	if (!strcmp(help_cmd, "--web") || !strcmp(help_cmd, "-w")) {
-		help_cmd = argc > 2 ? argv[2] : NULL;
-		check_help_cmd(help_cmd);
-
-		show_html_page(help_cmd);
+	else if (!strcmp(help_cmd, "--web") || !strcmp(help_cmd, "-w")) {
+		show_html_page(argc > 2 ? argv[2] : NULL);
 	}
 
 	else if (!strcmp(help_cmd, "--info") || !strcmp(help_cmd, "-i")) {
-		help_cmd = argc > 2 ? argv[2] : NULL;
-		check_help_cmd(help_cmd);
-
-		show_info_page(help_cmd);
+		show_info_page(argc > 2 ? argv[2] : NULL);
 	}
 
 	else
-- 
1.5.3.7.2199.ge1512-dirty

^ permalink raw reply related

* Re: git guidance
From: Al Boldi @ 2007-12-08  4:56 UTC (permalink / raw)
  To: Valdis.Kletnieks
  Cc: Jakub Narebski, Andreas Ericsson, Johannes Schindelin,
	Phillip Susi, Linus Torvalds, Jing Xue, linux-kernel, git
In-Reply-To: <11272.1197056185@turing-police.cc.vt.edu>

Valdis.Kletnieks@vt.edu wrote:
> On Fri, 07 Dec 2007 22:04:48 +0300, Al Boldi said:
> > Because WORKFLOW C is transparent, it won't affect other workflows.  So
> > you could still use your normal WORKFLOW B in addition to WORKFLOW C,
> > gaining an additional level of version control detail at no extra cost
> > other than the git-engine scratch repository overhead.
> >
> > BTW, is git efficient enough to handle WORKFLOW C?
>
> Imagine the number of commits a 'make clean; make' will do in a kernel
> tree, as it commits all those .o files... :)

.o files???

It probably goes without saying, that gitfs should have some basic 
configuration file to setup its transparent behaviour, and which would most 
probably contain an include / exclude file-filter mask, and probably other 
basic configuration options.  But this is really secondary to the 
implementation, and the question remains whether git is efficient enough.

IOW, how big is the git commit overhead as compared to a normal copy?

Thanks!

--
Al

^ permalink raw reply

* Re: Something is broken in repack
From: Nicolas Pitre @ 2007-12-08  4:30 UTC (permalink / raw)
  To: Jon Smirl; +Cc: David Brown, Git Mailing List
In-Reply-To: <9e4733910712072022na3369caob48d4b26a56224ea@mail.gmail.com>

On Fri, 7 Dec 2007, Jon Smirl wrote:

> Does the gcc repo contain some giant objects? Why wasn't the memory
> freed after their chain was processed?

It should be.

> Most of the last 10% is being done on a single CPU. There must be a
> chain of giant objects that is unbalancing everything.

I'm about to send a patch to fix the thread balancing for real this 
time.


Nicolas

^ permalink raw reply

* Re: Something is broken in repack
From: Jon Smirl @ 2007-12-08  4:22 UTC (permalink / raw)
  To: David Brown, Nicolas Pitre, Git Mailing List
In-Reply-To: <20071208033722.GA27776@old.davidb.org>

On 12/7/07, David Brown <git@davidb.org> wrote:
> On Fri, Dec 07, 2007 at 10:29:31PM -0500, Jon Smirl wrote:
> >The kernel repo has the same problem but not nearly as bad.
> >
> >Starting from a default pack
> > git repack -a -d -f  --depth=1000 --window=1000
> >Uses 1GB of physical memory
> >
> >Now do the command again.
> > git repack -a -d -f  --depth=1000 --window=1000
> >Uses 1.3GB of physical memory
>
> With my repo that contains a bunch of 50MB tarfiles, I've found I must
> specify --window-memory as well to keep repack from using nearly unbounded
> amounts of memory.  Perhaps it is the larger files found in gcc that
> provokes this.
>
> A window size of 1000 can take a lot of memory if the objects are large.

This is a partial solution to the problem. Adding window size =256M
took memory consumption down from 4.8GB to 2.8GB. It took an hour to
run the test.

It not the complete solution since my git process is still using 2.4GB
physical memory. I also still experiencing a lot of slow down in the
last 10%.

Does the gcc repo contain some giant objects? Why wasn't the memory
freed after their chain was processed?

Most of the last 10% is being done on a single CPU. There must be a
chain of giant objects that is unbalancing everything.

-- 
Jon Smirl
jonsmirl@gmail.com

^ permalink raw reply

* Re: [PATCH 2/3] git-help: add -w|--web option to display html man page in a browser.
From: Christian Couder @ 2007-12-08  4:00 UTC (permalink / raw)
  To: Junio C Hamano
  Cc: git, Theodore Tso, Jakub Narebski, Alex Riesen, Andreas Ericsson,
	Matthieu Moy, Eric Wong
In-Reply-To: <7vfxyf7zoi.fsf@gitster.siamese.dyndns.org>

Le vendredi 7 décembre 2007, Junio C Hamano a écrit :
> Junio C Hamano <gitster@pobox.com> writes:
> > Christian Couder <chriscool@tuxfamily.org> writes:
> > ...
> >
> >>> > +init_browser_path() {
> >>> > +	browser_path=`git config browser.$1.path`
> >>> > +	test -z "$browser_path" && browser_path=$1
> >>> > +}
> >>>
> >>> Please do not contaminate the config file with something the user can
> >>> easily use a lot more standardized way (iow $PATH) to configure to
> >>> his taste.
> >>>
> >>> I'd suggest dropping this bit.
>
> Well, I changed my mind.  It is a bit funny to have both firefox and
> iceweasel as "valid-tool", but if we consider $browser to define the
> external interface and $browser_path to define the implementation, it
> sort of makes sense to have that configuration.  browser_path could be
> iceweasel for browser firefox.
>
> I'll squash the patch to update the one from the last round (as the last
> two patches are not yet accepted in 'next' yet), remove the html
> documentation path fallback, but will leave this part in.

Thanks.

> browser.*.path and web.browser configuration need to be documented, if
> not already, though.

Did you see this patch:

http://article.gmane.org/gmane.comp.version-control.git/67101

Christian.

^ permalink raw reply

* Re: Something is broken in repack
From: Harvey Harrison @ 2007-12-08  3:48 UTC (permalink / raw)
  To: Jon Smirl; +Cc: Nicolas Pitre, Git Mailing List
In-Reply-To: <9e4733910712071929h17a7d88dv37686ec7cd858c63@mail.gmail.com>

On Fri, 2007-12-07 at 22:29 -0500, Jon Smirl wrote:
> The kernel repo has the same problem but not nearly as bad.
> 
> Starting from a default pack
>  git repack -a -d -f  --depth=1000 --window=1000
> Uses 1GB of physical memory
> 
> Now do the command again.
>  git repack -a -d -f  --depth=1000 --window=1000
> Uses 1.3GB of physical memory
> 
> I suspect the gcc repo has much longer revision chains than the kernel
> one since the kernel repo is only a few years old. The Mozilla repo
> contained revision chains with over 2,000 revisions. Longer revision
> chains result in longer delta chains.

I sent out a partial delta breakdown for the gcc repo earlier, here's
the whole list.

breakdown of the gcc packfile:

Total objects
1017922

ChainLength	Objects	Cumulative
1:	103817	103817
2:	67332	171149
3:	57520	228669
4:	52570	281239
5:	43910	325149
6:	37520	362669
7:	35248	397917
8:	29819	427736
9:	27619	455355
10:	22656	478011
11:	21073	499084
12:	18738	517822
13:	16674	534496
14:	14882	549378
15:	14424	563802
16:	12765	576567
17:	11662	588229
18:	11845	600074
19:	11694	611768
20:	9625	621393
21:	9031	630424
22:	8437	638861
23:	8217	647078
24:	7927	655005
25:	7955	662960
26:	7092	670052
27:	7004	677056
28:	6724	683780
29:	6626	690406
30:	5875	696281
31:	5970	702251
32:	5726	707977
33:	6025	714002
34:	5354	719356
35:	6413	725769
36:	4933	730702
37:	4888	735590
38:	4561	740151
39:	4366	744517
40:	4166	748683
41:	4531	753214
42:	4029	757243
43:	3701	760944
44:	3647	764591
45:	3553	768144
46:	3509	771653
47:	3473	775126
48:	3442	778568
49:	3379	781947
50:	3395	785342
51:	3315	788657
52:	3168	791825
53:	3345	795170
54:	3166	798336
55:	3237	801573
56:	2795	804368
57:	2768	807136
58:	2666	809802
59:	2723	812525
60:	2547	815072
61:	2565	817637
62:	2622	820259
63:	2521	822780
64:	2492	825272
65:	2529	827801
66:	2566	830367
67:	2685	833052
68:	2458	835510
69:	2457	837967
70:	2440	840407
71:	2410	842817
72:	2337	845154
73:	2301	847455
74:	2201	849656
75:	2127	851783
76:	2256	854039
77:	2038	856077
78:	1925	858002
79:	1965	859967
80:	1929	861896
81:	1890	863786
82:	1873	865659
83:	1964	867623
84:	1898	869521
85:	1839	871360
86:	1933	873293
87:	1876	875169
88:	1851	877020
89:	1789	878809
90:	1790	880599
91:	1804	882403
92:	1696	884099
93:	1863	885962
94:	1889	887851
95:	1766	889617
96:	1731	891348
97:	1775	893123
98:	1750	894873
99:	1767	896640
100:	1644	898284
101:	1642	899926
102:	1489	901415
103:	1532	902947
104:	1564	904511
105:	1477	905988
106:	1461	907449
107:	1383	908832
108:	1422	910254
109:	1316	911570
110:	1480	913050
111:	1329	914379
112:	1375	915754
113:	1292	917046
114:	1224	918270
115:	1123	919393
116:	1216	920609
117:	1252	921861
118:	1252	923113
119:	1346	924459
120:	1320	925779
121:	1277	927056
122:	1234	928290
123:	1200	929490
124:	1255	930745
125:	1206	931951
126:	1155	933106
127:	1246	934352
128:	1226	935578
129:	1194	936772
130:	1268	938040
131:	1334	939374
132:	1146	940520
133:	1220	941740
134:	1055	942795
135:	1110	943905
136:	1095	945000
137:	1294	946294
138:	1204	947498
139:	1218	948716
140:	1101	949817
141:	993	950810
142:	975	951785
143:	1014	952799
144:	968	953767
145:	957	954724
146:	1069	955793
147:	996	956789
148:	967	957756
149:	964	958720
150:	954	959674
151:	949	960623
152:	1001	961624
153:	1042	962666
154:	1057	963723
155:	948	964671
156:	966	965637
157:	833	966470
158:	959	967429
159:	907	968336
160:	854	969190
161:	847	970037
162:	836	970873
163:	769	971642
164:	747	972389
165:	755	973144
166:	707	973851
167:	774	974625
168:	777	975402
169:	783	976185
170:	707	976892
171:	738	977630
172:	775	978405
173:	781	979186
174:	698	979884
175:	801	980685
176:	712	981397
177:	679	982076
178:	775	982851
179:	696	983547
180:	760	984307
181:	740	985047
182:	752	985799
183:	704	986503
184:	683	987186
185:	690	987876
186:	741	988617
187:	642	989259
188:	672	989931
189:	679	990610
190:	691	991301
191:	648	991949
192:	703	992652
193:	675	993327
194:	687	994014
195:	625	994639
196:	607	995246
197:	583	995829
198:	632	996461
199:	540	997001
200:	652	997653
201:	600	998253
202:	628	998881
203:	624	999505
204:	582	1000087
205:	548	1000635
206:	520	1001155
207:	648	1001803
208:	556	1002359
209:	563	1002922
210:	508	1003430
211:	570	1004000
212:	530	1004530
213:	575	1005105
214:	527	1005632
215:	521	1006153
216:	515	1006668
217:	513	1007181
218:	460	1007641
219:	491	1008132
220:	474	1008606
221:	471	1009077
222:	482	1009559
223:	485	1010044
224:	439	1010483
225:	385	1010868
226:	385	1011253
227:	403	1011656
228:	380	1012036
229:	376	1012412
230:	377	1012789
231:	415	1013204
232:	394	1013598
233:	362	1013960
234:	334	1014294
235:	366	1014660
236:	317	1014977
237:	362	1015339
238:	343	1015682
239:	392	1016074
240:	317	1016391
241:	305	1016696
242:	319	1017015
243:	276	1017291
244:	247	1017538
245:	179	1017717
246:	111	1017828
247:	61	1017889
248:	27	1017916
249:	6	1017922

Harvey

^ permalink raw reply

* Re: Something is broken in repack
From: Harvey Harrison @ 2007-12-08  3:44 UTC (permalink / raw)
  To: Nicolas Pitre; +Cc: Jon Smirl, Git Mailing List
In-Reply-To: <alpine.LFD.0.99999.0712072032410.555@xanadu.home>


On Fri, 2007-12-07 at 20:46 -0500, Nicolas Pitre wrote:
> On Fri, 7 Dec 2007, Jon Smirl wrote:
> > And the 330MB gcc pack for input
> >  git repack -a -d -f  --depth=250 --window=250
> > 
> > complete seconds RAM
> > 10%  47 1GB
> > 20%  29 1Gb
> > 30%  24 1Gb
> > 40%  18 1GB
> > 50%  110 1.2GB
> > 60%  85 1.4GB
> > 70%  195 1.5GB
> > 80%  186 2.5GB
> > 90%  489 3.8GB
> > 95%  800 4.8GB
> > I killed it because it started swapping
> > 
> > The mmaps are only about 400MB in this case.
> > At the end the git process had 4.4GB of physical RAM allocated.
> > Starting with a 2GB pack of the same data my process size only grew to
> > 3GB with 2GB of mmaps.
> 
> Which is quite reasonable, even if the same issue might still be there.
> 
> So the problem seems to be related to the pack access code and not the 
> repack code.  And it must have something to do with the number of deltas 
> being replayed.  And because the repack is attempting delta compression 
> roughly from newest to oldest, and because old objects are typically in 
> a deeper delta chain, then this might explain the logarithmic slowdown.
> 
> So something must be wrong with the delta cache in sha1_file.c somehow.

All I have is a qualitative observation, but during the process of
creating the pack, there was a _huge_ slowdown between 10-15%
(hundreds/dozens per second to single object per second and a
corresponding increase in process size).  Didn't keep any numbers
at the time, but it was noticable.

I wonder if there are a bunch of huge objects somewhere in gcc's
history?

Harvey

^ permalink raw reply

* Re: Something is broken in repack
From: David Brown @ 2007-12-08  3:37 UTC (permalink / raw)
  To: Jon Smirl; +Cc: Nicolas Pitre, Git Mailing List
In-Reply-To: <9e4733910712071929h17a7d88dv37686ec7cd858c63@mail.gmail.com>

On Fri, Dec 07, 2007 at 10:29:31PM -0500, Jon Smirl wrote:
>The kernel repo has the same problem but not nearly as bad.
>
>Starting from a default pack
> git repack -a -d -f  --depth=1000 --window=1000
>Uses 1GB of physical memory
>
>Now do the command again.
> git repack -a -d -f  --depth=1000 --window=1000
>Uses 1.3GB of physical memory

With my repo that contains a bunch of 50MB tarfiles, I've found I must
specify --window-memory as well to keep repack from using nearly unbounded
amounts of memory.  Perhaps it is the larger files found in gcc that
provokes this.

A window size of 1000 can take a lot of memory if the objects are large.

Dave

^ permalink raw reply

* Re: Something is broken in repack
From: Jon Smirl @ 2007-12-08  3:29 UTC (permalink / raw)
  To: Nicolas Pitre; +Cc: Git Mailing List
In-Reply-To: <alpine.LFD.0.99999.0712072124160.555@xanadu.home>

The kernel repo has the same problem but not nearly as bad.

Starting from a default pack
 git repack -a -d -f  --depth=1000 --window=1000
Uses 1GB of physical memory

Now do the command again.
 git repack -a -d -f  --depth=1000 --window=1000
Uses 1.3GB of physical memory

I suspect the gcc repo has much longer revision chains than the kernel
one since the kernel repo is only a few years old. The Mozilla repo
contained revision chains with over 2,000 revisions. Longer revision
chains result in longer delta chains.

So what is allocating the extra memory? Either a function of the
number of entries in the chain, or related to accessing the chain
since a chain with more entries will need to be accessed more times.

I have a 168MB kernel pack now after 15 minutes of four cores at 100%.

Here's another observation, the gcc objects are larger. Kernel has
650K objects in 190MB, gcc has 870K objects in 330MB. Average gcc
object is 30% larger. How should the average kernel developer
interpret this?

-- 
Jon Smirl
jonsmirl@gmail.com

^ permalink raw reply

* Re: Something is broken in repack
From: David Brown @ 2007-12-08  2:56 UTC (permalink / raw)
  To: Jon Smirl; +Cc: Git Mailing List
In-Reply-To: <9e4733910712071505y6834f040k37261d65a2d445c4@mail.gmail.com>

On Fri, Dec 07, 2007 at 06:05:38PM -0500, Jon Smirl wrote:
>Using this config:
>[pack]
>        threads = 4
>        deltacachesize = 256M
>        deltacachelimit = 0

Just out of curiousity, does adding

         [pack]
                 windowmemory = 256M

help.  I've found this to grow very large when there are large blobs.

Dave

^ permalink raw reply

* Re: git-bisect feature suggestion: "git-bisect diff"
From: Junio C Hamano @ 2007-12-08  2:54 UTC (permalink / raw)
  To: Jeff King; +Cc: Ingo Molnar, git
In-Reply-To: <20071207220738.GA23535@coredump.intra.peff.net>

Jeff King <peff@peff.net> writes:

> Right, which leads to my (perhaps subtle) point that the builtin alias
> hack is just what you said elsewhere: a cute hack. IOW, I am slightly
> NAKing inclusion of it in master (OTOH, I really don't see what it could
> _hurt_, so maybe somebody could find a use for it that we didn't think
> of).

Heh, since when one can NAK one's own change? ;-)

Yeah, I am inclined to let it rot in 'next' until 1.5.4 ships and then
decide.  Either people will forget about it (in which case we can
revert) or enough people would want it and give some magic smarts to
it.

^ permalink raw reply

* Re: Something is broken in repack
From: Nicolas Pitre @ 2007-12-08  2:28 UTC (permalink / raw)
  To: Jon Smirl; +Cc: Git Mailing List
In-Reply-To: <9e4733910712071804ja0a49e1m1eb209cb942bc36f@mail.gmail.com>

On Fri, 7 Dec 2007, Jon Smirl wrote:

> On 12/7/07, Nicolas Pitre <nico@cam.org> wrote:
> > On Fri, 7 Dec 2007, Jon Smirl wrote:
> >
> > >  git repack -a -d -f  --depth=250 --window=250
> > >
> > > complete seconds RAM
> > > 10%  47 1GB
> > > 20%  29 1Gb
> > > 30%  24 1Gb
> > > 40%  18 1GB
> > > 50%  110 1.2GB
> > > 60%  85 1.4GB
> > > 70%  195 1.5GB
> > > 80%  186 2.5GB
> > > 90%  489 3.8GB
> > > 95%  800 4.8GB
> > > I killed it because it started swapping
> > >
> > > The mmaps are only about 400MB in this case.
> > > At the end the git process had 4.4GB of physical RAM allocated.
> >
> > That's really bad.
> >
> > > Starting from a highly compressed pack greatly aggravates the problem.
> >
> > That is really interesting though.
> >
> > > Starting with a 2GB pack of the same data my process size only grew to
> > > 3GB with 2GB of mmaps.
> >
> > Which is quite reasonable, even if the same issue might still be there.
> >
> > So the problem seems to be related to the pack access code and not the
> > repack code.  And it must have something to do with the number of deltas
> > being replayed.  And because the repack is attempting delta compression
> > roughly from newest to oldest, and because old objects are typically in
> > a deeper delta chain, then this might explain the logarithmic slowdown.
> >
> > So something must be wrong with the delta cache in sha1_file.c somehow.

Staring at the cache code I don't see anything wrong with it.

> I applied the delta accounting patch. It took about 200MB of from the
> memory use but that doesn't make a dent in 4GB of allocations.

Right.  I didn't expect much from that fix.


Nicolas

^ permalink raw reply

* Re: Something is broken in repack
From: Jon Smirl @ 2007-12-08  2:22 UTC (permalink / raw)
  To: Nicolas Pitre; +Cc: Git Mailing List
In-Reply-To: <alpine.LFD.0.99999.0712072032410.555@xanadu.home>

On 12/7/07, Nicolas Pitre <nico@cam.org> wrote:
> So the problem seems to be related to the pack access code and not the
> repack code.  And it must have something to do with the number of deltas
> being replayed.  And because the repack is attempting delta compression
> roughly from newest to oldest, and because old objects are typically in
> a deeper delta chain, then this might explain the logarithmic slowdown.

What could be wrongly allocating 4GB of memory? Figure that out and
you should have your answer. The slow down may be coming from having
to search through more and more objects in memory.

Memory consumption seem to be correlated to the depth of the delta
chain being accessed. It blows up tremendously right at the end. It
may even be a square of the length of the chain length. For the normal
default case the square didn't hurt, but 250*250 = 62,500 which would
eat a huge amount of memory.

-- 
Jon Smirl
jonsmirl@gmail.com

^ permalink raw reply

* Re: Git and GCC
From: J.C. Pizarro @ 2007-12-08  2:21 UTC (permalink / raw)
  To: Linus Torvalds, David Miller, gcc, git

On 2007/12/07, "Linus Torvalds" <torvalds@linux-foundation.org> wrote:
> On Fri, 7 Dec 2007, David Miller wrote:
> >
> > Also I could end up being performance limited by SHA, it's not very
> > well tuned on Sparc.  It's been on my TODO list to code up the crypto
> > unit support for Niagara-2 in the kernel, then work with Herbert Xu on
> > the userland interfaces to take advantage of that in things like
> > libssl.  Even a better C/asm version would probably improve GIT
> > performance a bit.
>
> I doubt yu can use the hardware support. Kernel-only hw support is
> inherently broken for any sane user-space usage, the setup costs are just
> way way too high. To be useful, crypto engines need to support direct user
> space access (ie a regular instruction, with all state being held in
> normal registers that get saved/restored by the kernel).
>
> > Is SHA a significant portion of the compute during these repacks?
> > I should run oprofile...
>
> SHA1 is almost totally insignificant on x86. It hardly shows up. But we
> have a good optimized version there.

If SHA1 is slow then why dont he contribute adding Haval160 (3 rounds)
that it's faster than SHA1? And to optimize still more it with SIMD instructions
in kernelspace and userland.

>
> zlib tends to be a lot more noticeable (especially the uncompression: it
> may be faster than compression, but it's done _so_ much more that it
> totally dominates).
>
> 			Linus

It's better

1.   "Don't compress this repo but compact this uncompressed repo
      using minimal spanning forest and deltas"
2.   "After, compress this whole repo with LZMA (e.g. 48MiB) from 7zip before
      burning it to DVD for backup reasons or before replicating it to
internet".

   J.C.Pizarro "the noiser"

^ permalink raw reply

* Re: Something is broken in repack
From: Jon Smirl @ 2007-12-08  2:04 UTC (permalink / raw)
  To: Nicolas Pitre; +Cc: Git Mailing List
In-Reply-To: <alpine.LFD.0.99999.0712072032410.555@xanadu.home>

On 12/7/07, Nicolas Pitre <nico@cam.org> wrote:
> On Fri, 7 Dec 2007, Jon Smirl wrote:
>
> > Using this config:
> > [pack]
> >         threads = 4
> >         deltacachesize = 256M
> >         deltacachelimit = 0
>
> Since you have a different result according to the source pack used then
> those cache settings, even if there was a bug with them, are not
> significant.
>
> > And the 330MB gcc pack for input
> >  git repack -a -d -f  --depth=250 --window=250
> >
> > complete seconds RAM
> > 10%  47 1GB
> > 20%  29 1Gb
> > 30%  24 1Gb
> > 40%  18 1GB
> > 50%  110 1.2GB
> > 60%  85 1.4GB
> > 70%  195 1.5GB
> > 80%  186 2.5GB
> > 90%  489 3.8GB
> > 95%  800 4.8GB
> > I killed it because it started swapping
> >
> > The mmaps are only about 400MB in this case.
> > At the end the git process had 4.4GB of physical RAM allocated.
>
> That's really bad.
>
> > Starting from a highly compressed pack greatly aggravates the problem.
>
> That is really interesting though.
>
> > Starting with a 2GB pack of the same data my process size only grew to
> > 3GB with 2GB of mmaps.
>
> Which is quite reasonable, even if the same issue might still be there.
>
> So the problem seems to be related to the pack access code and not the
> repack code.  And it must have something to do with the number of deltas
> being replayed.  And because the repack is attempting delta compression
> roughly from newest to oldest, and because old objects are typically in
> a deeper delta chain, then this might explain the logarithmic slowdown.
>
> So something must be wrong with the delta cache in sha1_file.c somehow.

I applied the delta accounting patch. It took about 200MB of from the
memory use but that doesn't make a dent in 4GB of allocations.


-- 
Jon Smirl
jonsmirl@gmail.com

^ permalink raw reply

* Re: Git and GCC
From: David Miller @ 2007-12-08  1:55 UTC (permalink / raw)
  To: torvalds; +Cc: jonsmirl, peff, nico, dberlin, harvey.harrison, ismail, gcc, git
In-Reply-To: <alpine.LFD.0.9999.0712070919590.7274@woody.linux-foundation.org>

From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Fri, 7 Dec 2007 09:23:47 -0800 (PST)

> 
> 
> On Fri, 7 Dec 2007, David Miller wrote:
> > 
> > Also I could end up being performance limited by SHA, it's not very
> > well tuned on Sparc.  It's been on my TODO list to code up the crypto
> > unit support for Niagara-2 in the kernel, then work with Herbert Xu on
> > the userland interfaces to take advantage of that in things like
> > libssl.  Even a better C/asm version would probably improve GIT
> > performance a bit.
> 
> I doubt yu can use the hardware support. Kernel-only hw support is 
> inherently broken for any sane user-space usage, the setup costs are just 
> way way too high. To be useful, crypto engines need to support direct user 
> space access (ie a regular instruction, with all state being held in 
> normal registers that get saved/restored by the kernel).

Unfortunately they are hypervisor calls, and you have to give
the thing physical addresses for the buffer to work on, so
letting userland get at it directly isn't currently doable.

I still believe that there are cases where userland can take
advantage of in-kernel crypto devices, such as when we are
streaming the data into the kernel anyways (for a write()
or sendmsg()) and the user just wants the transformation to
be done on that stream.

As a specific case, hardware crypto SSL support works quite
well for sendmsg() user packet data.  And this the kind of API
Solaris provides to get good SSL performance with Niagara.

> > Is SHA a significant portion of the compute during these repacks?
> > I should run oprofile...
> 
> SHA1 is almost totally insignificant on x86. It hardly shows up. But we 
> have a good optimized version there.

Ok.

> zlib tends to be a lot more noticeable (especially the uncompression: it 
> may be faster than compression, but it's done _so_ much more that it 
> totally dominates).

zlib is really hard to optimize on Sparc, I've tried numerous times.
Actually compress is the real cycle killer, and in that case the inner
loop wants to dereference 2-byte shorts at a time but they are
unaligned half of the time, and any the check for alignment nullifies
the gains of avoiding the two byte loads.

Uncompress I don't think is optimized at all on any platform with
asm stuff like the compress side is.  It's a pretty straightforward
transformation and the memory accesses dominate the overhead.

I'll do some profiling to see what might be worth looking into.

^ permalink raw reply

* Re: Something is broken in repack
From: Nicolas Pitre @ 2007-12-08  1:46 UTC (permalink / raw)
  To: Jon Smirl; +Cc: Git Mailing List
In-Reply-To: <9e4733910712071505y6834f040k37261d65a2d445c4@mail.gmail.com>

On Fri, 7 Dec 2007, Jon Smirl wrote:

> Using this config:
> [pack]
>         threads = 4
>         deltacachesize = 256M
>         deltacachelimit = 0

Since you have a different result according to the source pack used then 
those cache settings, even if there was a bug with them, are not 
significant.

> And the 330MB gcc pack for input
>  git repack -a -d -f  --depth=250 --window=250
> 
> complete seconds RAM
> 10%  47 1GB
> 20%  29 1Gb
> 30%  24 1Gb
> 40%  18 1GB
> 50%  110 1.2GB
> 60%  85 1.4GB
> 70%  195 1.5GB
> 80%  186 2.5GB
> 90%  489 3.8GB
> 95%  800 4.8GB
> I killed it because it started swapping
> 
> The mmaps are only about 400MB in this case.
> At the end the git process had 4.4GB of physical RAM allocated.

That's really bad.

> Starting from a highly compressed pack greatly aggravates the problem.

That is really interesting though.

> Starting with a 2GB pack of the same data my process size only grew to
> 3GB with 2GB of mmaps.

Which is quite reasonable, even if the same issue might still be there.

So the problem seems to be related to the pack access code and not the 
repack code.  And it must have something to do with the number of deltas 
being replayed.  And because the repack is attempting delta compression 
roughly from newest to oldest, and because old objects are typically in 
a deeper delta chain, then this might explain the logarithmic slowdown.

So something must be wrong with the delta cache in sha1_file.c somehow.

Nicolas

^ permalink raw reply

* [PATCH 2/2] shortlog: code restruturing and clean-up
From: Junio C Hamano @ 2007-12-08  1:32 UTC (permalink / raw)
  To: git
In-Reply-To: <1197077573-14945-1-git-send-email-gitster@pobox.com>

The code tried to parse and clean-up the author name and the one line
information in three places (two callers of insert_author_oneline() and
the called function itself), whihc was a mess.

This renames the callee to insert_one_record() and make it responsible
for cleaning up the author name and one line information.

Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 builtin-shortlog.c |  165 +++++++++++++++++++--------------------------------
 1 files changed, 62 insertions(+), 103 deletions(-)

diff --git a/builtin-shortlog.c b/builtin-shortlog.c
index 3fe7546..b9cc134 100644
--- a/builtin-shortlog.c
+++ b/builtin-shortlog.c
@@ -27,45 +27,60 @@ static int compare_by_number(const void *a1, const void *a2)
 
 static struct path_list mailmap = {NULL, 0, 0, 0};
 
-static void insert_author_oneline(struct path_list *list,
-		const char *author, int authorlen,
-		const char *oneline, int onelinelen)
+static void insert_one_record(struct path_list *list,
+			      const char *author,
+			      const char *oneline)
 {
 	const char *dot3 = common_repo_prefix;
 	char *buffer, *p;
 	struct path_list_item *item;
 	struct path_list *onelines;
+	char namebuf[1024];
+	size_t len;
+	const char *eol;
+	const char *boemail, *eoemail;
+
+	boemail = strchr(author, '<');
+	if (!boemail)
+		return;
+	eoemail = strchr(boemail, '>');
+	if (!eoemail)
+		return;
+	if (!map_email(&mailmap, boemail+1, namebuf, sizeof(namebuf))) {
+		while (author < boemail && isspace(*author))
+			author++;
+		for (len = 0;
+		     len < sizeof(namebuf) - 1 && author + len < boemail;
+		     len++)
+			namebuf[len] = author[len];
+		while (0 < len && isspace(namebuf[len-1]))
+			len--;
+		namebuf[len] = '\0';
+	}
 
-	while (authorlen > 0 && isspace(author[authorlen - 1]))
-		authorlen--;
-
-	buffer = xmemdupz(author, authorlen);
+	buffer = xstrdup(namebuf);
 	item = path_list_insert(buffer, list);
 	if (item->util == NULL)
 		item->util = xcalloc(1, sizeof(struct path_list));
 	else
 		free(buffer);
 
+	eol = strchr(oneline, '\n');
+	if (!eol)
+		eol = oneline + strlen(oneline);
+	while (*oneline && isspace(*oneline) && *oneline != '\n')
+		oneline++;
 	if (!prefixcmp(oneline, "[PATCH")) {
 		char *eob = strchr(oneline, ']');
-
-		if (eob) {
-			while (isspace(eob[1]) && eob[1] != '\n')
-				eob++;
-			if (eob - oneline < onelinelen) {
-				onelinelen -= eob - oneline;
-				oneline = eob;
-			}
-		}
+		if (eob && (!eol || eob < eol))
+			oneline = eob + 1;
 	}
-
-	while (onelinelen > 0 && isspace(oneline[0])) {
+	while (*oneline && isspace(*oneline) && *oneline != '\n')
 		oneline++;
-		onelinelen--;
-	}
-	while (onelinelen > 0 && isspace(oneline[onelinelen - 1]))
-		onelinelen--;
-	buffer = xmemdupz(oneline, onelinelen);
+	len = eol - oneline;
+	while (len && isspace(oneline[len-1]))
+		len--;
+	buffer = xmemdupz(oneline, len);
 
 	if (dot3) {
 		int dot3len = strlen(dot3);
@@ -92,55 +107,32 @@ static void insert_author_oneline(struct path_list *list,
 
 static void read_from_stdin(struct path_list *list)
 {
-	char buffer[1024];
-
-	while (fgets(buffer, sizeof(buffer), stdin) != NULL) {
-		char *bob;
-		if ((buffer[0] == 'A' || buffer[0] == 'a') &&
-				!prefixcmp(buffer + 1, "uthor: ") &&
-				(bob = strchr(buffer + 7, '<')) != NULL) {
-			char buffer2[1024], offset = 0;
-
-			if (map_email(&mailmap, bob + 1, buffer, sizeof(buffer)))
-				bob = buffer + strlen(buffer);
-			else {
-				offset = 8;
-				while (buffer + offset < bob &&
-				       isspace(bob[-1]))
-					bob--;
-			}
-
-			while (fgets(buffer2, sizeof(buffer2), stdin) &&
-					buffer2[0] != '\n')
-				; /* chomp input */
-			if (fgets(buffer2, sizeof(buffer2), stdin)) {
-				int l2 = strlen(buffer2);
-				int i;
-				for (i = 0; i < l2; i++)
-					if (!isspace(buffer2[i]))
-						break;
-				insert_author_oneline(list,
-						buffer + offset,
-						bob - buffer - offset,
-						buffer2 + i, l2 - i);
-			}
-		}
+	char author[1024], oneline[1024];
+
+	while (fgets(author, sizeof(author), stdin) != NULL) {
+		if (!(author[0] == 'A' || author[0] == 'a') ||
+		    prefixcmp(author + 1, "uthor: "))
+			continue;
+		while (fgets(oneline, sizeof(oneline), stdin) &&
+		       oneline[0] != '\n')
+			; /* discard headers */
+		while (fgets(oneline, sizeof(oneline), stdin) &&
+		       oneline[0] == '\n')
+			; /* discard blanks */
+		insert_one_record(list, author + 8, oneline);
 	}
 }
 
 static void get_from_rev(struct rev_info *rev, struct path_list *list)
 {
-	char scratch[1024];
 	struct commit *commit;
 
 	prepare_revision_walk(rev);
 	while ((commit = get_revision(rev)) != NULL) {
-		const char *author = NULL, *oneline, *buffer;
-		int authorlen = authorlen, onelinelen;
+		const char *author = NULL, *buffer;
 
-		/* get author and oneline */
-		for (buffer = commit->buffer; buffer && *buffer != '\0' &&
-				*buffer != '\n'; ) {
+		buffer = commit->buffer;
+		while (*buffer && *buffer != '\n') {
 			const char *eol = strchr(buffer, '\n');
 
 			if (eol == NULL)
@@ -148,50 +140,17 @@ static void get_from_rev(struct rev_info *rev, struct path_list *list)
 			else
 				eol++;
 
-			if (!prefixcmp(buffer, "author ")) {
-				char *bracket = strchr(buffer, '<');
-
-				if (bracket == NULL || bracket > eol)
-					die("Invalid commit buffer: %s",
-					    sha1_to_hex(commit->object.sha1));
-
-				if (map_email(&mailmap, bracket + 1, scratch,
-							sizeof(scratch))) {
-					author = scratch;
-					authorlen = strlen(scratch);
-				} else {
-					if (bracket[-1] == ' ')
-						bracket--;
-
-					author = buffer + 7;
-					authorlen = bracket - buffer - 7;
-				}
-			}
+			if (!prefixcmp(buffer, "author "))
+				author = buffer + 7;
 			buffer = eol;
 		}
-
-		if (author == NULL)
-			die ("Missing author: %s",
-					sha1_to_hex(commit->object.sha1));
-
-		if (buffer == NULL || *buffer == '\0') {
-			oneline = "<none>";
-			onelinelen = sizeof(oneline) + 1;
-		} else {
-			char *eol;
-
-			oneline = buffer + 1;
-			eol = strchr(oneline, '\n');
-			if (eol == NULL)
-				onelinelen = strlen(oneline);
-			else
-				onelinelen = eol - oneline;
-		}
-
-		insert_author_oneline(list,
-				author, authorlen, oneline, onelinelen);
+		if (!author)
+			die("Missing author: %s",
+			    sha1_to_hex(commit->object.sha1));
+		if (*buffer)
+			buffer++;
+		insert_one_record(list, author, !*buffer ? "<none>" : buffer);
 	}
-
 }
 
 static int parse_uint(char const **arg, int comma)
-- 
1.5.3.7-2182-g108b

^ permalink raw reply related

* [PATCH 1/2] mailmap: fix bogus for() loop that happened to be safe by accident
From: Junio C Hamano @ 2007-12-08  1:32 UTC (permalink / raw)
  To: git

The empty loop pretended to have an empty statement as its body by a
phony indentation, but in fact was slurping the next statement into it.

Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 mailmap.c |    7 ++++---
 1 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/mailmap.c b/mailmap.c
index 8714167..f017255 100644
--- a/mailmap.c
+++ b/mailmap.c
@@ -42,9 +42,10 @@ int read_mailmap(struct path_list *map, const char *filename, char **repo_abbrev
 			continue;
 		if (right_bracket == left_bracket + 1)
 			continue;
-		for (end_of_name = left_bracket; end_of_name != buffer
-				&& isspace(end_of_name[-1]); end_of_name--)
-			/* keep on looking */
+		for (end_of_name = left_bracket;
+		     end_of_name != buffer && isspace(end_of_name[-1]);
+		     end_of_name--)
+			; /* keep on looking */
 		if (end_of_name == buffer)
 			continue;
 		name = xmalloc(end_of_name - buffer + 1);
-- 
1.5.3.7-2182-g108b

^ permalink raw reply related

* [PATCH] pack-objects: fix delta cache size accounting
From: Nicolas Pitre @ 2007-12-08  1:27 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: Linus Torvalds, Jon Smirl, Git Mailing List
In-Reply-To: <alpine.LFD.0.9999.0712071632490.12046@woody.linux-foundation.org>

The wrong value was substracted from delta_cache_size when replacing
a cached delta, as trg_entry->delta_size was used after the old size
had been replaced by the new size.

Noticed by Linus.

Signed-off-by: Nicolas Pitre <nico@cam.org> 
---

On Fri, 7 Dec 2007, Linus Torvalds wrote:

> The code in try_delta() that replaces a delta cache entry with another one 
> seems very buggy wrt that whole "delta_cache_size" update. It does
> 
> 	delta_cache_size -= trg_entry->delta_size;
> 
> to account for the old delta going away, but it does this *after* having 
> already replaced trg_entry->delta_size with the new delta entry.

Doh!  Mea culpa.

diff --git a/builtin-pack-objects.c b/builtin-pack-objects.c
index 4f44658..350ece4 100644
--- a/builtin-pack-objects.c
+++ b/builtin-pack-objects.c
@@ -1422,10 +1422,6 @@ static int try_delta(struct unpacked *trg, struct unpacked *src,
 		}
 	}
 
-	trg_entry->delta = src_entry;
-	trg_entry->delta_size = delta_size;
-	trg->depth = src->depth + 1;
-
 	/*
 	 * Handle memory allocation outside of the cache
 	 * accounting lock.  Compiler will optimize the strangeness
@@ -1439,7 +1435,7 @@ static int try_delta(struct unpacked *trg, struct unpacked *src,
 		trg_entry->delta_data = NULL;
 	}
 	if (delta_cacheable(src_size, trg_size, delta_size)) {
-		delta_cache_size += trg_entry->delta_size;
+		delta_cache_size += delta_size;
 		cache_unlock();
 		trg_entry->delta_data = xrealloc(delta_buf, delta_size);
 	} else {
@@ -1447,6 +1443,10 @@ static int try_delta(struct unpacked *trg, struct unpacked *src,
 		free(delta_buf);
 	}
 
+	trg_entry->delta = src_entry;
+	trg_entry->delta_size = delta_size;
+	trg->depth = src->depth + 1;
+
 	return 1;
 }
 

^ permalink raw reply related

* git-svn branch naming question
From: Miklos Vajna @ 2007-12-08  1:04 UTC (permalink / raw)
  To: git

[-- Attachment #1: Type: text/plain, Size: 852 bytes --]

hi,

i'm using git-svn for projects where i don't just want to commit to
trunk but to other branches, too.

for example:

git-svn clone -s svn+ssh://vmiklos@svn.gnome.org/svn/ooo-build ooo-build

then i have a local 'master' branch and all the other branches are local
branches.

so, when i want to work in the ooo-build-2-3 branch, i do a:

git checkout -b ooo-build-2-3 ooo-build-2-3

but when i do a git svn rebase, i get:

warning: refname 'ooo-build-2-3' is ambiguous.

what am i doing wrong?

in fact i suspect that in case i would use some other branch name, like
simply '2-3' then i could get rid of this warning, but that's the
problem with using the equivalent name of the remote branch when working
in a branch locally?

probably i miss some parameter to git-svn clone so that it would prefix
the refs with some 'origin'?

thanks,
- VMiklos

[-- Attachment #2: Type: application/pgp-signature, Size: 189 bytes --]

^ permalink raw reply

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox