* [PATCH] Calculate $commitsha1 in update() only when needed
From: Pavel Roskin @ 2007-12-08 5:07 UTC (permalink / raw)
To: git
This suppresses unhelpful error messages from git rev-parse during
checkout if the module doesn't exist.
Signed-off-by: Pavel Roskin <proski@gnu.org>
---
git-cvsserver.perl | 12 +++++++-----
1 files changed, 7 insertions(+), 5 deletions(-)
diff --git a/git-cvsserver.perl b/git-cvsserver.perl
index ecded3b..409b301 100755
--- a/git-cvsserver.perl
+++ b/git-cvsserver.perl
@@ -2427,9 +2427,6 @@ sub update
# first lets get the commit list
$ENV{GIT_DIR} = $self->{git_path};
- my $commitsha1 = `git rev-parse $self->{module}`;
- chomp $commitsha1;
-
my $commitinfo = `git cat-file commit $self->{module} 2>&1`;
unless ( $commitinfo =~ /tree\s+[a-zA-Z0-9]{40}/ )
{
@@ -2440,8 +2437,13 @@ sub update
my $git_log;
my $lastcommit = $self->_get_prop("last_commit");
- if (defined $lastcommit && $lastcommit eq $commitsha1) { # up-to-date
- return 1;
+ if (defined $lastcommit) {
+ my $commitsha1 = `git rev-parse $self->{module}`;
+ chomp $commitsha1;
+
+ if ($lastcommit eq $commitsha1) { # up-to-date
+ return 1;
+ }
}
# Start exclusive lock here...
^ permalink raw reply related
* [PATCH 2/2] pack-objects: fix threaded load balancing
From: Nicolas Pitre @ 2007-12-08 5:03 UTC (permalink / raw)
To: Junio C Hamano; +Cc: git, Jon Smirl
The current method consists of a master thread serving chunks of objects
to work threads when they're done with their previous chunk. The issue
is to determine the best chunk size: making it too large creates poor
load balancing, while making it too small has a negative effect on pack
size because of the increased number of chunk boundaries and poor delta
window utilization.
This patch implements a completely different approach by initially
splitting the work in large chunks uniformly amongst all threads, and
whenever a thread is done then it steals half of the remaining work from
another thread with the largest amount of unprocessed objects.
This has the advantage of greatly reducing the number of chunk boundaries
with an almost perfect load balancing.
Signed-off-by: Nicolas Pitre <nico@cam.org>
---
builtin-pack-objects.c | 117 +++++++++++++++++++++++++++++++++++-------------
1 files changed, 85 insertions(+), 32 deletions(-)
diff --git a/builtin-pack-objects.c b/builtin-pack-objects.c
index 5002cc6..fcc1901 100644
--- a/builtin-pack-objects.c
+++ b/builtin-pack-objects.c
@@ -1479,10 +1479,10 @@ static unsigned long free_unpacked(struct unpacked *n)
return freed_mem;
}
-static void find_deltas(struct object_entry **list, unsigned list_size,
+static void find_deltas(struct object_entry **list, unsigned *list_size,
int window, int depth, unsigned *processed)
{
- uint32_t i = 0, idx = 0, count = 0;
+ uint32_t i, idx = 0, count = 0;
unsigned int array_size = window * sizeof(struct unpacked);
struct unpacked *array;
unsigned long mem_usage = 0;
@@ -1490,11 +1490,23 @@ static void find_deltas(struct object_entry **list, unsigned list_size,
array = xmalloc(array_size);
memset(array, 0, array_size);
- do {
- struct object_entry *entry = list[i++];
+ for (;;) {
+ struct object_entry *entry = *list++;
struct unpacked *n = array + idx;
int j, max_depth, best_base = -1;
+ progress_lock();
+ if (!*list_size) {
+ progress_unlock();
+ break;
+ }
+ (*list_size)--;
+ if (!entry->preferred_base) {
+ (*processed)++;
+ display_progress(progress_state, *processed);
+ }
+ progress_unlock();
+
mem_usage -= free_unpacked(n);
n->entry = entry;
@@ -1512,11 +1524,6 @@ static void find_deltas(struct object_entry **list, unsigned list_size,
if (entry->preferred_base)
goto next;
- progress_lock();
- (*processed)++;
- display_progress(progress_state, *processed);
- progress_unlock();
-
/*
* If the current object is at pack edge, take the depth the
* objects that depend on the current object into account
@@ -1576,7 +1583,7 @@ static void find_deltas(struct object_entry **list, unsigned list_size,
count++;
if (idx >= window)
idx = 0;
- } while (i < list_size);
+ }
for (i = 0; i < window; ++i) {
free_delta_index(array[i].index);
@@ -1591,6 +1598,7 @@ struct thread_params {
pthread_t thread;
struct object_entry **list;
unsigned list_size;
+ unsigned remaining;
int window;
int depth;
unsigned *processed;
@@ -1612,10 +1620,10 @@ static void *threaded_find_deltas(void *arg)
pthread_mutex_lock(&data_ready);
pthread_mutex_unlock(&data_request);
- if (!me->list_size)
+ if (!me->remaining)
return NULL;
- find_deltas(me->list, me->list_size,
+ find_deltas(me->list, &me->remaining,
me->window, me->depth, me->processed);
}
}
@@ -1624,57 +1632,102 @@ static void ll_find_deltas(struct object_entry **list, unsigned list_size,
int window, int depth, unsigned *processed)
{
struct thread_params *target, p[delta_search_threads];
- int i, ret;
- unsigned chunk_size;
+ int i, ret, active_threads = 0;
if (delta_search_threads <= 1) {
- find_deltas(list, list_size, window, depth, processed);
+ find_deltas(list, &list_size, window, depth, processed);
return;
}
pthread_mutex_lock(&data_provider);
pthread_mutex_lock(&data_ready);
+ /* Start work threads. */
for (i = 0; i < delta_search_threads; i++) {
p[i].window = window;
p[i].depth = depth;
p[i].processed = processed;
+ p[i].remaining = 0;
ret = pthread_create(&p[i].thread, NULL,
threaded_find_deltas, &p[i]);
if (ret)
die("unable to create thread: %s", strerror(ret));
+ active_threads++;
}
- /* this should be auto-tuned somehow */
- chunk_size = window * 1000;
+ /* Then partition the work amongst them. */
+ for (i = 0; i < delta_search_threads; i++) {
+ unsigned sub_size = list_size / (delta_search_threads - i);
- do {
- unsigned sublist_size = chunk_size;
- if (sublist_size > list_size)
- sublist_size = list_size;
+ pthread_mutex_lock(&data_provider);
+ target = data_requester;
+ if (!sub_size) {
+ pthread_mutex_unlock(&data_ready);
+ pthread_join(target->thread, NULL);
+ active_threads--;
+ continue;
+ }
/* try to split chunks on "path" boundaries */
- while (sublist_size < list_size && list[sublist_size]->hash &&
- list[sublist_size]->hash == list[sublist_size-1]->hash)
- sublist_size++;
+ while (sub_size < list_size && list[sub_size]->hash &&
+ list[sub_size]->hash == list[sub_size-1]->hash)
+ sub_size++;
+
+ target->list = list;
+ target->list_size = sub_size;
+ target->remaining = sub_size;
+ pthread_mutex_unlock(&data_ready);
+ list += sub_size;
+ list_size -= sub_size;
+ }
+
+ /*
+ * Now let's wait for work completion. Each time a thread is done
+ * with its work, we steal half of the remaining work from the
+ * thread with the largest number of unprocessed objects and give
+ * it to that newly idle thread. This ensure good load balancing
+ * until the remaining object list segments are simply too short
+ * to be worth splitting anymore.
+ */
+ do {
+ struct thread_params *victim = NULL;
+ unsigned sub_size = 0;
pthread_mutex_lock(&data_provider);
target = data_requester;
- target->list = list;
- target->list_size = sublist_size;
+
+ progress_lock();
+ for (i = 0; i < delta_search_threads; i++)
+ if (p[i].remaining > 2*window &&
+ (!victim || victim->remaining < p[i].remaining))
+ victim = &p[i];
+ if (victim) {
+ sub_size = victim->remaining / 2;
+ list = victim->list + victim->list_size - sub_size;
+ while (sub_size && list[0]->hash &&
+ list[0]->hash == list[-1]->hash) {
+ list++;
+ sub_size--;
+ }
+ target->list = list;
+ victim->list_size -= sub_size;
+ victim->remaining -= sub_size;
+ }
+ progress_unlock();
+
+ target->list_size = sub_size;
+ target->remaining = sub_size;
pthread_mutex_unlock(&data_ready);
- list += sublist_size;
- list_size -= sublist_size;
- if (!sublist_size) {
+ if (!sub_size) {
pthread_join(target->thread, NULL);
- i--;
+ active_threads--;
}
- } while (i);
+ } while (active_threads);
}
#else
-#define ll_find_deltas find_deltas
+#define ll_find_deltas(l, s, w, d, p) find_deltas(l, &s, w, d, p)
#endif
static void prepare_pack(int window, int depth)
--
1.5.3.7.2184.ge321d-dirty
^ permalink raw reply related
* Re: Something is broken in repack
From: Jon Smirl @ 2007-12-08 5:01 UTC (permalink / raw)
To: Nicolas Pitre; +Cc: David Brown, Git Mailing List
In-Reply-To: <alpine.LFD.0.99999.0712072328420.555@xanadu.home>
On 12/7/07, Nicolas Pitre <nico@cam.org> wrote:
> On Fri, 7 Dec 2007, Jon Smirl wrote:
>
> > Does the gcc repo contain some giant objects? Why wasn't the memory
> > freed after their chain was processed?
>
> It should be.
>
> > Most of the last 10% is being done on a single CPU. There must be a
> > chain of giant objects that is unbalancing everything.
>
> I'm about to send a patch to fix the thread balancing for real this
> time.
Something is really broken in the last 5% of that repo. I have been
processing at 97% for 30 minutes without moving to 98%.
--
Jon Smirl
jonsmirl@gmail.com
^ permalink raw reply
* [PATCH 1/2] pack-objects: reverse the delta search sort list
From: Nicolas Pitre @ 2007-12-08 5:00 UTC (permalink / raw)
To: Junio C Hamano; +Cc: git, Jon Smirl
It is currently sorted and then walked backward. Not only this doesn't
feel natural for my poor brain, but it would make the next patch less
obvious as well.
So reverse the sort order, and reverse the list walking direction,
which effectively produce the exact same end result as before.
Also bring the relevant comment nearer the actual code and adjust it
accordingly, with minor additional clarifications.
Signed-off-by: Nicolas Pitre <nico@cam.org>
---
builtin-pack-objects.c | 41 +++++++++++++++++++++--------------------
1 files changed, 21 insertions(+), 20 deletions(-)
diff --git a/builtin-pack-objects.c b/builtin-pack-objects.c
index 4f44658..5002cc6 100644
--- a/builtin-pack-objects.c
+++ b/builtin-pack-objects.c
@@ -1245,28 +1245,37 @@ static void get_object_details(void)
free(sorted_by_offset);
}
+/*
+ * We search for deltas in a list sorted by type, by filename hash, and then
+ * by size, so that we see progressively smaller and smaller files.
+ * That's because we prefer deltas to be from the bigger file
+ * to the smaller -- deletes are potentially cheaper, but perhaps
+ * more importantly, the bigger file is likely the more recent
+ * one. The deepest deltas are therefore the oldest objects which are
+ * less susceptible to be accessed often.
+ */
static int type_size_sort(const void *_a, const void *_b)
{
const struct object_entry *a = *(struct object_entry **)_a;
const struct object_entry *b = *(struct object_entry **)_b;
- if (a->type < b->type)
- return -1;
if (a->type > b->type)
- return 1;
- if (a->hash < b->hash)
return -1;
- if (a->hash > b->hash)
+ if (a->type < b->type)
return 1;
- if (a->preferred_base < b->preferred_base)
+ if (a->hash > b->hash)
return -1;
- if (a->preferred_base > b->preferred_base)
+ if (a->hash < b->hash)
return 1;
- if (a->size < b->size)
+ if (a->preferred_base > b->preferred_base)
return -1;
+ if (a->preferred_base < b->preferred_base)
+ return 1;
if (a->size > b->size)
+ return -1;
+ if (a->size < b->size)
return 1;
- return a > b ? -1 : (a < b); /* newest last */
+ return a < b ? -1 : (a > b); /* newest first */
}
struct unpacked {
@@ -1317,14 +1326,6 @@ static pthread_mutex_t progress_mutex = PTHREAD_MUTEX_INITIALIZER;
#endif
-/*
- * We search for deltas _backwards_ in a list sorted by type and
- * by size, so that we see progressively smaller and smaller files.
- * That's because we prefer deltas to be from the bigger file
- * to the smaller - deletes are potentially cheaper, but perhaps
- * more importantly, the bigger file is likely the more recent
- * one.
- */
static int try_delta(struct unpacked *trg, struct unpacked *src,
unsigned max_depth, unsigned long *mem_usage)
{
@@ -1481,7 +1482,7 @@ static unsigned long free_unpacked(struct unpacked *n)
static void find_deltas(struct object_entry **list, unsigned list_size,
int window, int depth, unsigned *processed)
{
- uint32_t i = list_size, idx = 0, count = 0;
+ uint32_t i = 0, idx = 0, count = 0;
unsigned int array_size = window * sizeof(struct unpacked);
struct unpacked *array;
unsigned long mem_usage = 0;
@@ -1490,7 +1491,7 @@ static void find_deltas(struct object_entry **list, unsigned list_size,
memset(array, 0, array_size);
do {
- struct object_entry *entry = list[--i];
+ struct object_entry *entry = list[i++];
struct unpacked *n = array + idx;
int j, max_depth, best_base = -1;
@@ -1575,7 +1576,7 @@ static void find_deltas(struct object_entry **list, unsigned list_size,
count++;
if (idx >= window)
idx = 0;
- } while (i > 0);
+ } while (i < list_size);
for (i = 0; i < window; ++i) {
free_delta_index(array[i].index);
--
1.5.3.7.2184.ge321d-dirty
^ permalink raw reply related
* [PATCH] git-help: simplify and fix option parsing.
From: Christian Couder @ 2007-12-08 5:06 UTC (permalink / raw)
To: Junio Hamano; +Cc: git
Signed-off-by: Christian Couder <chriscool@tuxfamily.org>
---
help.c | 31 +++++++++++--------------------
1 files changed, 11 insertions(+), 20 deletions(-)
Junio wrote about "help.c" in my
"git-help: add -w|--web option to display html man page in a browser."
patch:
> Isn't this "check-help-cmd" duplication ugly, by the way?
You are right, this patch should fix it.
Thanks.
diff --git a/help.c b/help.c
index ecc8c66..78686db 100644
--- a/help.c
+++ b/help.c
@@ -241,7 +241,9 @@ void list_common_cmds_help(void)
static const char *cmd_to_page(const char *git_cmd)
{
- if (!prefixcmp(git_cmd, "git"))
+ if (!git_cmd)
+ return "git";
+ else if (!prefixcmp(git_cmd, "git"))
return git_cmd;
else {
int page_len = strlen(git_cmd) + 4;
@@ -283,38 +285,27 @@ int cmd_version(int argc, const char **argv, const char *prefix)
return 0;
}
-static void check_help_cmd(const char *help_cmd)
+int cmd_help(int argc, const char **argv, const char *prefix)
{
- if (!help_cmd) {
+ if (argc < 2) {
printf("usage: %s\n\n", git_usage_string);
list_common_cmds_help();
exit(0);
}
- else if (!strcmp(help_cmd, "--all") || !strcmp(help_cmd, "-a")) {
+ const char *help_cmd = argv[1];
+
+ if (!strcmp(help_cmd, "--all") || !strcmp(help_cmd, "-a")) {
printf("usage: %s\n\n", git_usage_string);
list_commands();
- exit(0);
}
-}
-int cmd_help(int argc, const char **argv, const char *prefix)
-{
- const char *help_cmd = argc > 1 ? argv[1] : NULL;
- check_help_cmd(help_cmd);
-
- if (!strcmp(help_cmd, "--web") || !strcmp(help_cmd, "-w")) {
- help_cmd = argc > 2 ? argv[2] : NULL;
- check_help_cmd(help_cmd);
-
- show_html_page(help_cmd);
+ else if (!strcmp(help_cmd, "--web") || !strcmp(help_cmd, "-w")) {
+ show_html_page(argc > 2 ? argv[2] : NULL);
}
else if (!strcmp(help_cmd, "--info") || !strcmp(help_cmd, "-i")) {
- help_cmd = argc > 2 ? argv[2] : NULL;
- check_help_cmd(help_cmd);
-
- show_info_page(help_cmd);
+ show_info_page(argc > 2 ? argv[2] : NULL);
}
else
--
1.5.3.7.2199.ge1512-dirty
^ permalink raw reply related
* Re: git guidance
From: Al Boldi @ 2007-12-08 4:56 UTC (permalink / raw)
To: Valdis.Kletnieks
Cc: Jakub Narebski, Andreas Ericsson, Johannes Schindelin,
Phillip Susi, Linus Torvalds, Jing Xue, linux-kernel, git
In-Reply-To: <11272.1197056185@turing-police.cc.vt.edu>
Valdis.Kletnieks@vt.edu wrote:
> On Fri, 07 Dec 2007 22:04:48 +0300, Al Boldi said:
> > Because WORKFLOW C is transparent, it won't affect other workflows. So
> > you could still use your normal WORKFLOW B in addition to WORKFLOW C,
> > gaining an additional level of version control detail at no extra cost
> > other than the git-engine scratch repository overhead.
> >
> > BTW, is git efficient enough to handle WORKFLOW C?
>
> Imagine the number of commits a 'make clean; make' will do in a kernel
> tree, as it commits all those .o files... :)
.o files???
It probably goes without saying, that gitfs should have some basic
configuration file to setup its transparent behaviour, and which would most
probably contain an include / exclude file-filter mask, and probably other
basic configuration options. But this is really secondary to the
implementation, and the question remains whether git is efficient enough.
IOW, how big is the git commit overhead as compared to a normal copy?
Thanks!
--
Al
^ permalink raw reply
* Re: Something is broken in repack
From: Nicolas Pitre @ 2007-12-08 4:30 UTC (permalink / raw)
To: Jon Smirl; +Cc: David Brown, Git Mailing List
In-Reply-To: <9e4733910712072022na3369caob48d4b26a56224ea@mail.gmail.com>
On Fri, 7 Dec 2007, Jon Smirl wrote:
> Does the gcc repo contain some giant objects? Why wasn't the memory
> freed after their chain was processed?
It should be.
> Most of the last 10% is being done on a single CPU. There must be a
> chain of giant objects that is unbalancing everything.
I'm about to send a patch to fix the thread balancing for real this
time.
Nicolas
^ permalink raw reply
* Re: Something is broken in repack
From: Jon Smirl @ 2007-12-08 4:22 UTC (permalink / raw)
To: David Brown, Nicolas Pitre, Git Mailing List
In-Reply-To: <20071208033722.GA27776@old.davidb.org>
On 12/7/07, David Brown <git@davidb.org> wrote:
> On Fri, Dec 07, 2007 at 10:29:31PM -0500, Jon Smirl wrote:
> >The kernel repo has the same problem but not nearly as bad.
> >
> >Starting from a default pack
> > git repack -a -d -f --depth=1000 --window=1000
> >Uses 1GB of physical memory
> >
> >Now do the command again.
> > git repack -a -d -f --depth=1000 --window=1000
> >Uses 1.3GB of physical memory
>
> With my repo that contains a bunch of 50MB tarfiles, I've found I must
> specify --window-memory as well to keep repack from using nearly unbounded
> amounts of memory. Perhaps it is the larger files found in gcc that
> provokes this.
>
> A window size of 1000 can take a lot of memory if the objects are large.
This is a partial solution to the problem. Adding window size =256M
took memory consumption down from 4.8GB to 2.8GB. It took an hour to
run the test.
It not the complete solution since my git process is still using 2.4GB
physical memory. I also still experiencing a lot of slow down in the
last 10%.
Does the gcc repo contain some giant objects? Why wasn't the memory
freed after their chain was processed?
Most of the last 10% is being done on a single CPU. There must be a
chain of giant objects that is unbalancing everything.
--
Jon Smirl
jonsmirl@gmail.com
^ permalink raw reply
* Re: [PATCH 2/3] git-help: add -w|--web option to display html man page in a browser.
From: Christian Couder @ 2007-12-08 4:00 UTC (permalink / raw)
To: Junio C Hamano
Cc: git, Theodore Tso, Jakub Narebski, Alex Riesen, Andreas Ericsson,
Matthieu Moy, Eric Wong
In-Reply-To: <7vfxyf7zoi.fsf@gitster.siamese.dyndns.org>
Le vendredi 7 décembre 2007, Junio C Hamano a écrit :
> Junio C Hamano <gitster@pobox.com> writes:
> > Christian Couder <chriscool@tuxfamily.org> writes:
> > ...
> >
> >>> > +init_browser_path() {
> >>> > + browser_path=`git config browser.$1.path`
> >>> > + test -z "$browser_path" && browser_path=$1
> >>> > +}
> >>>
> >>> Please do not contaminate the config file with something the user can
> >>> easily use a lot more standardized way (iow $PATH) to configure to
> >>> his taste.
> >>>
> >>> I'd suggest dropping this bit.
>
> Well, I changed my mind. It is a bit funny to have both firefox and
> iceweasel as "valid-tool", but if we consider $browser to define the
> external interface and $browser_path to define the implementation, it
> sort of makes sense to have that configuration. browser_path could be
> iceweasel for browser firefox.
>
> I'll squash the patch to update the one from the last round (as the last
> two patches are not yet accepted in 'next' yet), remove the html
> documentation path fallback, but will leave this part in.
Thanks.
> browser.*.path and web.browser configuration need to be documented, if
> not already, though.
Did you see this patch:
http://article.gmane.org/gmane.comp.version-control.git/67101
Christian.
^ permalink raw reply
* Re: Something is broken in repack
From: Harvey Harrison @ 2007-12-08 3:48 UTC (permalink / raw)
To: Jon Smirl; +Cc: Nicolas Pitre, Git Mailing List
In-Reply-To: <9e4733910712071929h17a7d88dv37686ec7cd858c63@mail.gmail.com>
On Fri, 2007-12-07 at 22:29 -0500, Jon Smirl wrote:
> The kernel repo has the same problem but not nearly as bad.
>
> Starting from a default pack
> git repack -a -d -f --depth=1000 --window=1000
> Uses 1GB of physical memory
>
> Now do the command again.
> git repack -a -d -f --depth=1000 --window=1000
> Uses 1.3GB of physical memory
>
> I suspect the gcc repo has much longer revision chains than the kernel
> one since the kernel repo is only a few years old. The Mozilla repo
> contained revision chains with over 2,000 revisions. Longer revision
> chains result in longer delta chains.
I sent out a partial delta breakdown for the gcc repo earlier, here's
the whole list.
breakdown of the gcc packfile:
Total objects
1017922
ChainLength Objects Cumulative
1: 103817 103817
2: 67332 171149
3: 57520 228669
4: 52570 281239
5: 43910 325149
6: 37520 362669
7: 35248 397917
8: 29819 427736
9: 27619 455355
10: 22656 478011
11: 21073 499084
12: 18738 517822
13: 16674 534496
14: 14882 549378
15: 14424 563802
16: 12765 576567
17: 11662 588229
18: 11845 600074
19: 11694 611768
20: 9625 621393
21: 9031 630424
22: 8437 638861
23: 8217 647078
24: 7927 655005
25: 7955 662960
26: 7092 670052
27: 7004 677056
28: 6724 683780
29: 6626 690406
30: 5875 696281
31: 5970 702251
32: 5726 707977
33: 6025 714002
34: 5354 719356
35: 6413 725769
36: 4933 730702
37: 4888 735590
38: 4561 740151
39: 4366 744517
40: 4166 748683
41: 4531 753214
42: 4029 757243
43: 3701 760944
44: 3647 764591
45: 3553 768144
46: 3509 771653
47: 3473 775126
48: 3442 778568
49: 3379 781947
50: 3395 785342
51: 3315 788657
52: 3168 791825
53: 3345 795170
54: 3166 798336
55: 3237 801573
56: 2795 804368
57: 2768 807136
58: 2666 809802
59: 2723 812525
60: 2547 815072
61: 2565 817637
62: 2622 820259
63: 2521 822780
64: 2492 825272
65: 2529 827801
66: 2566 830367
67: 2685 833052
68: 2458 835510
69: 2457 837967
70: 2440 840407
71: 2410 842817
72: 2337 845154
73: 2301 847455
74: 2201 849656
75: 2127 851783
76: 2256 854039
77: 2038 856077
78: 1925 858002
79: 1965 859967
80: 1929 861896
81: 1890 863786
82: 1873 865659
83: 1964 867623
84: 1898 869521
85: 1839 871360
86: 1933 873293
87: 1876 875169
88: 1851 877020
89: 1789 878809
90: 1790 880599
91: 1804 882403
92: 1696 884099
93: 1863 885962
94: 1889 887851
95: 1766 889617
96: 1731 891348
97: 1775 893123
98: 1750 894873
99: 1767 896640
100: 1644 898284
101: 1642 899926
102: 1489 901415
103: 1532 902947
104: 1564 904511
105: 1477 905988
106: 1461 907449
107: 1383 908832
108: 1422 910254
109: 1316 911570
110: 1480 913050
111: 1329 914379
112: 1375 915754
113: 1292 917046
114: 1224 918270
115: 1123 919393
116: 1216 920609
117: 1252 921861
118: 1252 923113
119: 1346 924459
120: 1320 925779
121: 1277 927056
122: 1234 928290
123: 1200 929490
124: 1255 930745
125: 1206 931951
126: 1155 933106
127: 1246 934352
128: 1226 935578
129: 1194 936772
130: 1268 938040
131: 1334 939374
132: 1146 940520
133: 1220 941740
134: 1055 942795
135: 1110 943905
136: 1095 945000
137: 1294 946294
138: 1204 947498
139: 1218 948716
140: 1101 949817
141: 993 950810
142: 975 951785
143: 1014 952799
144: 968 953767
145: 957 954724
146: 1069 955793
147: 996 956789
148: 967 957756
149: 964 958720
150: 954 959674
151: 949 960623
152: 1001 961624
153: 1042 962666
154: 1057 963723
155: 948 964671
156: 966 965637
157: 833 966470
158: 959 967429
159: 907 968336
160: 854 969190
161: 847 970037
162: 836 970873
163: 769 971642
164: 747 972389
165: 755 973144
166: 707 973851
167: 774 974625
168: 777 975402
169: 783 976185
170: 707 976892
171: 738 977630
172: 775 978405
173: 781 979186
174: 698 979884
175: 801 980685
176: 712 981397
177: 679 982076
178: 775 982851
179: 696 983547
180: 760 984307
181: 740 985047
182: 752 985799
183: 704 986503
184: 683 987186
185: 690 987876
186: 741 988617
187: 642 989259
188: 672 989931
189: 679 990610
190: 691 991301
191: 648 991949
192: 703 992652
193: 675 993327
194: 687 994014
195: 625 994639
196: 607 995246
197: 583 995829
198: 632 996461
199: 540 997001
200: 652 997653
201: 600 998253
202: 628 998881
203: 624 999505
204: 582 1000087
205: 548 1000635
206: 520 1001155
207: 648 1001803
208: 556 1002359
209: 563 1002922
210: 508 1003430
211: 570 1004000
212: 530 1004530
213: 575 1005105
214: 527 1005632
215: 521 1006153
216: 515 1006668
217: 513 1007181
218: 460 1007641
219: 491 1008132
220: 474 1008606
221: 471 1009077
222: 482 1009559
223: 485 1010044
224: 439 1010483
225: 385 1010868
226: 385 1011253
227: 403 1011656
228: 380 1012036
229: 376 1012412
230: 377 1012789
231: 415 1013204
232: 394 1013598
233: 362 1013960
234: 334 1014294
235: 366 1014660
236: 317 1014977
237: 362 1015339
238: 343 1015682
239: 392 1016074
240: 317 1016391
241: 305 1016696
242: 319 1017015
243: 276 1017291
244: 247 1017538
245: 179 1017717
246: 111 1017828
247: 61 1017889
248: 27 1017916
249: 6 1017922
Harvey
^ permalink raw reply
* Re: Something is broken in repack
From: Harvey Harrison @ 2007-12-08 3:44 UTC (permalink / raw)
To: Nicolas Pitre; +Cc: Jon Smirl, Git Mailing List
In-Reply-To: <alpine.LFD.0.99999.0712072032410.555@xanadu.home>
On Fri, 2007-12-07 at 20:46 -0500, Nicolas Pitre wrote:
> On Fri, 7 Dec 2007, Jon Smirl wrote:
> > And the 330MB gcc pack for input
> > git repack -a -d -f --depth=250 --window=250
> >
> > complete seconds RAM
> > 10% 47 1GB
> > 20% 29 1Gb
> > 30% 24 1Gb
> > 40% 18 1GB
> > 50% 110 1.2GB
> > 60% 85 1.4GB
> > 70% 195 1.5GB
> > 80% 186 2.5GB
> > 90% 489 3.8GB
> > 95% 800 4.8GB
> > I killed it because it started swapping
> >
> > The mmaps are only about 400MB in this case.
> > At the end the git process had 4.4GB of physical RAM allocated.
> > Starting with a 2GB pack of the same data my process size only grew to
> > 3GB with 2GB of mmaps.
>
> Which is quite reasonable, even if the same issue might still be there.
>
> So the problem seems to be related to the pack access code and not the
> repack code. And it must have something to do with the number of deltas
> being replayed. And because the repack is attempting delta compression
> roughly from newest to oldest, and because old objects are typically in
> a deeper delta chain, then this might explain the logarithmic slowdown.
>
> So something must be wrong with the delta cache in sha1_file.c somehow.
All I have is a qualitative observation, but during the process of
creating the pack, there was a _huge_ slowdown between 10-15%
(hundreds/dozens per second to single object per second and a
corresponding increase in process size). Didn't keep any numbers
at the time, but it was noticable.
I wonder if there are a bunch of huge objects somewhere in gcc's
history?
Harvey
^ permalink raw reply
* Re: Something is broken in repack
From: David Brown @ 2007-12-08 3:37 UTC (permalink / raw)
To: Jon Smirl; +Cc: Nicolas Pitre, Git Mailing List
In-Reply-To: <9e4733910712071929h17a7d88dv37686ec7cd858c63@mail.gmail.com>
On Fri, Dec 07, 2007 at 10:29:31PM -0500, Jon Smirl wrote:
>The kernel repo has the same problem but not nearly as bad.
>
>Starting from a default pack
> git repack -a -d -f --depth=1000 --window=1000
>Uses 1GB of physical memory
>
>Now do the command again.
> git repack -a -d -f --depth=1000 --window=1000
>Uses 1.3GB of physical memory
With my repo that contains a bunch of 50MB tarfiles, I've found I must
specify --window-memory as well to keep repack from using nearly unbounded
amounts of memory. Perhaps it is the larger files found in gcc that
provokes this.
A window size of 1000 can take a lot of memory if the objects are large.
Dave
^ permalink raw reply
* Re: Something is broken in repack
From: Jon Smirl @ 2007-12-08 3:29 UTC (permalink / raw)
To: Nicolas Pitre; +Cc: Git Mailing List
In-Reply-To: <alpine.LFD.0.99999.0712072124160.555@xanadu.home>
The kernel repo has the same problem but not nearly as bad.
Starting from a default pack
git repack -a -d -f --depth=1000 --window=1000
Uses 1GB of physical memory
Now do the command again.
git repack -a -d -f --depth=1000 --window=1000
Uses 1.3GB of physical memory
I suspect the gcc repo has much longer revision chains than the kernel
one since the kernel repo is only a few years old. The Mozilla repo
contained revision chains with over 2,000 revisions. Longer revision
chains result in longer delta chains.
So what is allocating the extra memory? Either a function of the
number of entries in the chain, or related to accessing the chain
since a chain with more entries will need to be accessed more times.
I have a 168MB kernel pack now after 15 minutes of four cores at 100%.
Here's another observation, the gcc objects are larger. Kernel has
650K objects in 190MB, gcc has 870K objects in 330MB. Average gcc
object is 30% larger. How should the average kernel developer
interpret this?
--
Jon Smirl
jonsmirl@gmail.com
^ permalink raw reply
* Re: Something is broken in repack
From: David Brown @ 2007-12-08 2:56 UTC (permalink / raw)
To: Jon Smirl; +Cc: Git Mailing List
In-Reply-To: <9e4733910712071505y6834f040k37261d65a2d445c4@mail.gmail.com>
On Fri, Dec 07, 2007 at 06:05:38PM -0500, Jon Smirl wrote:
>Using this config:
>[pack]
> threads = 4
> deltacachesize = 256M
> deltacachelimit = 0
Just out of curiousity, does adding
[pack]
windowmemory = 256M
help. I've found this to grow very large when there are large blobs.
Dave
^ permalink raw reply
* Re: git-bisect feature suggestion: "git-bisect diff"
From: Junio C Hamano @ 2007-12-08 2:54 UTC (permalink / raw)
To: Jeff King; +Cc: Ingo Molnar, git
In-Reply-To: <20071207220738.GA23535@coredump.intra.peff.net>
Jeff King <peff@peff.net> writes:
> Right, which leads to my (perhaps subtle) point that the builtin alias
> hack is just what you said elsewhere: a cute hack. IOW, I am slightly
> NAKing inclusion of it in master (OTOH, I really don't see what it could
> _hurt_, so maybe somebody could find a use for it that we didn't think
> of).
Heh, since when one can NAK one's own change? ;-)
Yeah, I am inclined to let it rot in 'next' until 1.5.4 ships and then
decide. Either people will forget about it (in which case we can
revert) or enough people would want it and give some magic smarts to
it.
^ permalink raw reply
* Re: Something is broken in repack
From: Nicolas Pitre @ 2007-12-08 2:28 UTC (permalink / raw)
To: Jon Smirl; +Cc: Git Mailing List
In-Reply-To: <9e4733910712071804ja0a49e1m1eb209cb942bc36f@mail.gmail.com>
On Fri, 7 Dec 2007, Jon Smirl wrote:
> On 12/7/07, Nicolas Pitre <nico@cam.org> wrote:
> > On Fri, 7 Dec 2007, Jon Smirl wrote:
> >
> > > git repack -a -d -f --depth=250 --window=250
> > >
> > > complete seconds RAM
> > > 10% 47 1GB
> > > 20% 29 1Gb
> > > 30% 24 1Gb
> > > 40% 18 1GB
> > > 50% 110 1.2GB
> > > 60% 85 1.4GB
> > > 70% 195 1.5GB
> > > 80% 186 2.5GB
> > > 90% 489 3.8GB
> > > 95% 800 4.8GB
> > > I killed it because it started swapping
> > >
> > > The mmaps are only about 400MB in this case.
> > > At the end the git process had 4.4GB of physical RAM allocated.
> >
> > That's really bad.
> >
> > > Starting from a highly compressed pack greatly aggravates the problem.
> >
> > That is really interesting though.
> >
> > > Starting with a 2GB pack of the same data my process size only grew to
> > > 3GB with 2GB of mmaps.
> >
> > Which is quite reasonable, even if the same issue might still be there.
> >
> > So the problem seems to be related to the pack access code and not the
> > repack code. And it must have something to do with the number of deltas
> > being replayed. And because the repack is attempting delta compression
> > roughly from newest to oldest, and because old objects are typically in
> > a deeper delta chain, then this might explain the logarithmic slowdown.
> >
> > So something must be wrong with the delta cache in sha1_file.c somehow.
Staring at the cache code I don't see anything wrong with it.
> I applied the delta accounting patch. It took about 200MB of from the
> memory use but that doesn't make a dent in 4GB of allocations.
Right. I didn't expect much from that fix.
Nicolas
^ permalink raw reply
* Re: Something is broken in repack
From: Jon Smirl @ 2007-12-08 2:22 UTC (permalink / raw)
To: Nicolas Pitre; +Cc: Git Mailing List
In-Reply-To: <alpine.LFD.0.99999.0712072032410.555@xanadu.home>
On 12/7/07, Nicolas Pitre <nico@cam.org> wrote:
> So the problem seems to be related to the pack access code and not the
> repack code. And it must have something to do with the number of deltas
> being replayed. And because the repack is attempting delta compression
> roughly from newest to oldest, and because old objects are typically in
> a deeper delta chain, then this might explain the logarithmic slowdown.
What could be wrongly allocating 4GB of memory? Figure that out and
you should have your answer. The slow down may be coming from having
to search through more and more objects in memory.
Memory consumption seem to be correlated to the depth of the delta
chain being accessed. It blows up tremendously right at the end. It
may even be a square of the length of the chain length. For the normal
default case the square didn't hurt, but 250*250 = 62,500 which would
eat a huge amount of memory.
--
Jon Smirl
jonsmirl@gmail.com
^ permalink raw reply
* Re: Git and GCC
From: J.C. Pizarro @ 2007-12-08 2:21 UTC (permalink / raw)
To: Linus Torvalds, David Miller, gcc, git
On 2007/12/07, "Linus Torvalds" <torvalds@linux-foundation.org> wrote:
> On Fri, 7 Dec 2007, David Miller wrote:
> >
> > Also I could end up being performance limited by SHA, it's not very
> > well tuned on Sparc. It's been on my TODO list to code up the crypto
> > unit support for Niagara-2 in the kernel, then work with Herbert Xu on
> > the userland interfaces to take advantage of that in things like
> > libssl. Even a better C/asm version would probably improve GIT
> > performance a bit.
>
> I doubt yu can use the hardware support. Kernel-only hw support is
> inherently broken for any sane user-space usage, the setup costs are just
> way way too high. To be useful, crypto engines need to support direct user
> space access (ie a regular instruction, with all state being held in
> normal registers that get saved/restored by the kernel).
>
> > Is SHA a significant portion of the compute during these repacks?
> > I should run oprofile...
>
> SHA1 is almost totally insignificant on x86. It hardly shows up. But we
> have a good optimized version there.
If SHA1 is slow then why dont he contribute adding Haval160 (3 rounds)
that it's faster than SHA1? And to optimize still more it with SIMD instructions
in kernelspace and userland.
>
> zlib tends to be a lot more noticeable (especially the uncompression: it
> may be faster than compression, but it's done _so_ much more that it
> totally dominates).
>
> Linus
It's better
1. "Don't compress this repo but compact this uncompressed repo
using minimal spanning forest and deltas"
2. "After, compress this whole repo with LZMA (e.g. 48MiB) from 7zip before
burning it to DVD for backup reasons or before replicating it to
internet".
J.C.Pizarro "the noiser"
^ permalink raw reply
* Re: Something is broken in repack
From: Jon Smirl @ 2007-12-08 2:04 UTC (permalink / raw)
To: Nicolas Pitre; +Cc: Git Mailing List
In-Reply-To: <alpine.LFD.0.99999.0712072032410.555@xanadu.home>
On 12/7/07, Nicolas Pitre <nico@cam.org> wrote:
> On Fri, 7 Dec 2007, Jon Smirl wrote:
>
> > Using this config:
> > [pack]
> > threads = 4
> > deltacachesize = 256M
> > deltacachelimit = 0
>
> Since you have a different result according to the source pack used then
> those cache settings, even if there was a bug with them, are not
> significant.
>
> > And the 330MB gcc pack for input
> > git repack -a -d -f --depth=250 --window=250
> >
> > complete seconds RAM
> > 10% 47 1GB
> > 20% 29 1Gb
> > 30% 24 1Gb
> > 40% 18 1GB
> > 50% 110 1.2GB
> > 60% 85 1.4GB
> > 70% 195 1.5GB
> > 80% 186 2.5GB
> > 90% 489 3.8GB
> > 95% 800 4.8GB
> > I killed it because it started swapping
> >
> > The mmaps are only about 400MB in this case.
> > At the end the git process had 4.4GB of physical RAM allocated.
>
> That's really bad.
>
> > Starting from a highly compressed pack greatly aggravates the problem.
>
> That is really interesting though.
>
> > Starting with a 2GB pack of the same data my process size only grew to
> > 3GB with 2GB of mmaps.
>
> Which is quite reasonable, even if the same issue might still be there.
>
> So the problem seems to be related to the pack access code and not the
> repack code. And it must have something to do with the number of deltas
> being replayed. And because the repack is attempting delta compression
> roughly from newest to oldest, and because old objects are typically in
> a deeper delta chain, then this might explain the logarithmic slowdown.
>
> So something must be wrong with the delta cache in sha1_file.c somehow.
I applied the delta accounting patch. It took about 200MB of from the
memory use but that doesn't make a dent in 4GB of allocations.
--
Jon Smirl
jonsmirl@gmail.com
^ permalink raw reply
* Re: Git and GCC
From: David Miller @ 2007-12-08 1:55 UTC (permalink / raw)
To: torvalds; +Cc: jonsmirl, peff, nico, dberlin, harvey.harrison, ismail, gcc, git
In-Reply-To: <alpine.LFD.0.9999.0712070919590.7274@woody.linux-foundation.org>
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Fri, 7 Dec 2007 09:23:47 -0800 (PST)
>
>
> On Fri, 7 Dec 2007, David Miller wrote:
> >
> > Also I could end up being performance limited by SHA, it's not very
> > well tuned on Sparc. It's been on my TODO list to code up the crypto
> > unit support for Niagara-2 in the kernel, then work with Herbert Xu on
> > the userland interfaces to take advantage of that in things like
> > libssl. Even a better C/asm version would probably improve GIT
> > performance a bit.
>
> I doubt yu can use the hardware support. Kernel-only hw support is
> inherently broken for any sane user-space usage, the setup costs are just
> way way too high. To be useful, crypto engines need to support direct user
> space access (ie a regular instruction, with all state being held in
> normal registers that get saved/restored by the kernel).
Unfortunately they are hypervisor calls, and you have to give
the thing physical addresses for the buffer to work on, so
letting userland get at it directly isn't currently doable.
I still believe that there are cases where userland can take
advantage of in-kernel crypto devices, such as when we are
streaming the data into the kernel anyways (for a write()
or sendmsg()) and the user just wants the transformation to
be done on that stream.
As a specific case, hardware crypto SSL support works quite
well for sendmsg() user packet data. And this the kind of API
Solaris provides to get good SSL performance with Niagara.
> > Is SHA a significant portion of the compute during these repacks?
> > I should run oprofile...
>
> SHA1 is almost totally insignificant on x86. It hardly shows up. But we
> have a good optimized version there.
Ok.
> zlib tends to be a lot more noticeable (especially the uncompression: it
> may be faster than compression, but it's done _so_ much more that it
> totally dominates).
zlib is really hard to optimize on Sparc, I've tried numerous times.
Actually compress is the real cycle killer, and in that case the inner
loop wants to dereference 2-byte shorts at a time but they are
unaligned half of the time, and any the check for alignment nullifies
the gains of avoiding the two byte loads.
Uncompress I don't think is optimized at all on any platform with
asm stuff like the compress side is. It's a pretty straightforward
transformation and the memory accesses dominate the overhead.
I'll do some profiling to see what might be worth looking into.
^ permalink raw reply
* Re: Something is broken in repack
From: Nicolas Pitre @ 2007-12-08 1:46 UTC (permalink / raw)
To: Jon Smirl; +Cc: Git Mailing List
In-Reply-To: <9e4733910712071505y6834f040k37261d65a2d445c4@mail.gmail.com>
On Fri, 7 Dec 2007, Jon Smirl wrote:
> Using this config:
> [pack]
> threads = 4
> deltacachesize = 256M
> deltacachelimit = 0
Since you have a different result according to the source pack used then
those cache settings, even if there was a bug with them, are not
significant.
> And the 330MB gcc pack for input
> git repack -a -d -f --depth=250 --window=250
>
> complete seconds RAM
> 10% 47 1GB
> 20% 29 1Gb
> 30% 24 1Gb
> 40% 18 1GB
> 50% 110 1.2GB
> 60% 85 1.4GB
> 70% 195 1.5GB
> 80% 186 2.5GB
> 90% 489 3.8GB
> 95% 800 4.8GB
> I killed it because it started swapping
>
> The mmaps are only about 400MB in this case.
> At the end the git process had 4.4GB of physical RAM allocated.
That's really bad.
> Starting from a highly compressed pack greatly aggravates the problem.
That is really interesting though.
> Starting with a 2GB pack of the same data my process size only grew to
> 3GB with 2GB of mmaps.
Which is quite reasonable, even if the same issue might still be there.
So the problem seems to be related to the pack access code and not the
repack code. And it must have something to do with the number of deltas
being replayed. And because the repack is attempting delta compression
roughly from newest to oldest, and because old objects are typically in
a deeper delta chain, then this might explain the logarithmic slowdown.
So something must be wrong with the delta cache in sha1_file.c somehow.
Nicolas
^ permalink raw reply
* [PATCH 2/2] shortlog: code restruturing and clean-up
From: Junio C Hamano @ 2007-12-08 1:32 UTC (permalink / raw)
To: git
In-Reply-To: <1197077573-14945-1-git-send-email-gitster@pobox.com>
The code tried to parse and clean-up the author name and the one line
information in three places (two callers of insert_author_oneline() and
the called function itself), whihc was a mess.
This renames the callee to insert_one_record() and make it responsible
for cleaning up the author name and one line information.
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
builtin-shortlog.c | 165 +++++++++++++++++++--------------------------------
1 files changed, 62 insertions(+), 103 deletions(-)
diff --git a/builtin-shortlog.c b/builtin-shortlog.c
index 3fe7546..b9cc134 100644
--- a/builtin-shortlog.c
+++ b/builtin-shortlog.c
@@ -27,45 +27,60 @@ static int compare_by_number(const void *a1, const void *a2)
static struct path_list mailmap = {NULL, 0, 0, 0};
-static void insert_author_oneline(struct path_list *list,
- const char *author, int authorlen,
- const char *oneline, int onelinelen)
+static void insert_one_record(struct path_list *list,
+ const char *author,
+ const char *oneline)
{
const char *dot3 = common_repo_prefix;
char *buffer, *p;
struct path_list_item *item;
struct path_list *onelines;
+ char namebuf[1024];
+ size_t len;
+ const char *eol;
+ const char *boemail, *eoemail;
+
+ boemail = strchr(author, '<');
+ if (!boemail)
+ return;
+ eoemail = strchr(boemail, '>');
+ if (!eoemail)
+ return;
+ if (!map_email(&mailmap, boemail+1, namebuf, sizeof(namebuf))) {
+ while (author < boemail && isspace(*author))
+ author++;
+ for (len = 0;
+ len < sizeof(namebuf) - 1 && author + len < boemail;
+ len++)
+ namebuf[len] = author[len];
+ while (0 < len && isspace(namebuf[len-1]))
+ len--;
+ namebuf[len] = '\0';
+ }
- while (authorlen > 0 && isspace(author[authorlen - 1]))
- authorlen--;
-
- buffer = xmemdupz(author, authorlen);
+ buffer = xstrdup(namebuf);
item = path_list_insert(buffer, list);
if (item->util == NULL)
item->util = xcalloc(1, sizeof(struct path_list));
else
free(buffer);
+ eol = strchr(oneline, '\n');
+ if (!eol)
+ eol = oneline + strlen(oneline);
+ while (*oneline && isspace(*oneline) && *oneline != '\n')
+ oneline++;
if (!prefixcmp(oneline, "[PATCH")) {
char *eob = strchr(oneline, ']');
-
- if (eob) {
- while (isspace(eob[1]) && eob[1] != '\n')
- eob++;
- if (eob - oneline < onelinelen) {
- onelinelen -= eob - oneline;
- oneline = eob;
- }
- }
+ if (eob && (!eol || eob < eol))
+ oneline = eob + 1;
}
-
- while (onelinelen > 0 && isspace(oneline[0])) {
+ while (*oneline && isspace(*oneline) && *oneline != '\n')
oneline++;
- onelinelen--;
- }
- while (onelinelen > 0 && isspace(oneline[onelinelen - 1]))
- onelinelen--;
- buffer = xmemdupz(oneline, onelinelen);
+ len = eol - oneline;
+ while (len && isspace(oneline[len-1]))
+ len--;
+ buffer = xmemdupz(oneline, len);
if (dot3) {
int dot3len = strlen(dot3);
@@ -92,55 +107,32 @@ static void insert_author_oneline(struct path_list *list,
static void read_from_stdin(struct path_list *list)
{
- char buffer[1024];
-
- while (fgets(buffer, sizeof(buffer), stdin) != NULL) {
- char *bob;
- if ((buffer[0] == 'A' || buffer[0] == 'a') &&
- !prefixcmp(buffer + 1, "uthor: ") &&
- (bob = strchr(buffer + 7, '<')) != NULL) {
- char buffer2[1024], offset = 0;
-
- if (map_email(&mailmap, bob + 1, buffer, sizeof(buffer)))
- bob = buffer + strlen(buffer);
- else {
- offset = 8;
- while (buffer + offset < bob &&
- isspace(bob[-1]))
- bob--;
- }
-
- while (fgets(buffer2, sizeof(buffer2), stdin) &&
- buffer2[0] != '\n')
- ; /* chomp input */
- if (fgets(buffer2, sizeof(buffer2), stdin)) {
- int l2 = strlen(buffer2);
- int i;
- for (i = 0; i < l2; i++)
- if (!isspace(buffer2[i]))
- break;
- insert_author_oneline(list,
- buffer + offset,
- bob - buffer - offset,
- buffer2 + i, l2 - i);
- }
- }
+ char author[1024], oneline[1024];
+
+ while (fgets(author, sizeof(author), stdin) != NULL) {
+ if (!(author[0] == 'A' || author[0] == 'a') ||
+ prefixcmp(author + 1, "uthor: "))
+ continue;
+ while (fgets(oneline, sizeof(oneline), stdin) &&
+ oneline[0] != '\n')
+ ; /* discard headers */
+ while (fgets(oneline, sizeof(oneline), stdin) &&
+ oneline[0] == '\n')
+ ; /* discard blanks */
+ insert_one_record(list, author + 8, oneline);
}
}
static void get_from_rev(struct rev_info *rev, struct path_list *list)
{
- char scratch[1024];
struct commit *commit;
prepare_revision_walk(rev);
while ((commit = get_revision(rev)) != NULL) {
- const char *author = NULL, *oneline, *buffer;
- int authorlen = authorlen, onelinelen;
+ const char *author = NULL, *buffer;
- /* get author and oneline */
- for (buffer = commit->buffer; buffer && *buffer != '\0' &&
- *buffer != '\n'; ) {
+ buffer = commit->buffer;
+ while (*buffer && *buffer != '\n') {
const char *eol = strchr(buffer, '\n');
if (eol == NULL)
@@ -148,50 +140,17 @@ static void get_from_rev(struct rev_info *rev, struct path_list *list)
else
eol++;
- if (!prefixcmp(buffer, "author ")) {
- char *bracket = strchr(buffer, '<');
-
- if (bracket == NULL || bracket > eol)
- die("Invalid commit buffer: %s",
- sha1_to_hex(commit->object.sha1));
-
- if (map_email(&mailmap, bracket + 1, scratch,
- sizeof(scratch))) {
- author = scratch;
- authorlen = strlen(scratch);
- } else {
- if (bracket[-1] == ' ')
- bracket--;
-
- author = buffer + 7;
- authorlen = bracket - buffer - 7;
- }
- }
+ if (!prefixcmp(buffer, "author "))
+ author = buffer + 7;
buffer = eol;
}
-
- if (author == NULL)
- die ("Missing author: %s",
- sha1_to_hex(commit->object.sha1));
-
- if (buffer == NULL || *buffer == '\0') {
- oneline = "<none>";
- onelinelen = sizeof(oneline) + 1;
- } else {
- char *eol;
-
- oneline = buffer + 1;
- eol = strchr(oneline, '\n');
- if (eol == NULL)
- onelinelen = strlen(oneline);
- else
- onelinelen = eol - oneline;
- }
-
- insert_author_oneline(list,
- author, authorlen, oneline, onelinelen);
+ if (!author)
+ die("Missing author: %s",
+ sha1_to_hex(commit->object.sha1));
+ if (*buffer)
+ buffer++;
+ insert_one_record(list, author, !*buffer ? "<none>" : buffer);
}
-
}
static int parse_uint(char const **arg, int comma)
--
1.5.3.7-2182-g108b
^ permalink raw reply related
* [PATCH 1/2] mailmap: fix bogus for() loop that happened to be safe by accident
From: Junio C Hamano @ 2007-12-08 1:32 UTC (permalink / raw)
To: git
The empty loop pretended to have an empty statement as its body by a
phony indentation, but in fact was slurping the next statement into it.
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
mailmap.c | 7 ++++---
1 files changed, 4 insertions(+), 3 deletions(-)
diff --git a/mailmap.c b/mailmap.c
index 8714167..f017255 100644
--- a/mailmap.c
+++ b/mailmap.c
@@ -42,9 +42,10 @@ int read_mailmap(struct path_list *map, const char *filename, char **repo_abbrev
continue;
if (right_bracket == left_bracket + 1)
continue;
- for (end_of_name = left_bracket; end_of_name != buffer
- && isspace(end_of_name[-1]); end_of_name--)
- /* keep on looking */
+ for (end_of_name = left_bracket;
+ end_of_name != buffer && isspace(end_of_name[-1]);
+ end_of_name--)
+ ; /* keep on looking */
if (end_of_name == buffer)
continue;
name = xmalloc(end_of_name - buffer + 1);
--
1.5.3.7-2182-g108b
^ permalink raw reply related
* [PATCH] pack-objects: fix delta cache size accounting
From: Nicolas Pitre @ 2007-12-08 1:27 UTC (permalink / raw)
To: Junio C Hamano; +Cc: Linus Torvalds, Jon Smirl, Git Mailing List
In-Reply-To: <alpine.LFD.0.9999.0712071632490.12046@woody.linux-foundation.org>
The wrong value was substracted from delta_cache_size when replacing
a cached delta, as trg_entry->delta_size was used after the old size
had been replaced by the new size.
Noticed by Linus.
Signed-off-by: Nicolas Pitre <nico@cam.org>
---
On Fri, 7 Dec 2007, Linus Torvalds wrote:
> The code in try_delta() that replaces a delta cache entry with another one
> seems very buggy wrt that whole "delta_cache_size" update. It does
>
> delta_cache_size -= trg_entry->delta_size;
>
> to account for the old delta going away, but it does this *after* having
> already replaced trg_entry->delta_size with the new delta entry.
Doh! Mea culpa.
diff --git a/builtin-pack-objects.c b/builtin-pack-objects.c
index 4f44658..350ece4 100644
--- a/builtin-pack-objects.c
+++ b/builtin-pack-objects.c
@@ -1422,10 +1422,6 @@ static int try_delta(struct unpacked *trg, struct unpacked *src,
}
}
- trg_entry->delta = src_entry;
- trg_entry->delta_size = delta_size;
- trg->depth = src->depth + 1;
-
/*
* Handle memory allocation outside of the cache
* accounting lock. Compiler will optimize the strangeness
@@ -1439,7 +1435,7 @@ static int try_delta(struct unpacked *trg, struct unpacked *src,
trg_entry->delta_data = NULL;
}
if (delta_cacheable(src_size, trg_size, delta_size)) {
- delta_cache_size += trg_entry->delta_size;
+ delta_cache_size += delta_size;
cache_unlock();
trg_entry->delta_data = xrealloc(delta_buf, delta_size);
} else {
@@ -1447,6 +1443,10 @@ static int try_delta(struct unpacked *trg, struct unpacked *src,
free(delta_buf);
}
+ trg_entry->delta = src_entry;
+ trg_entry->delta_size = delta_size;
+ trg->depth = src->depth + 1;
+
return 1;
}
^ permalink raw reply related
* git-svn branch naming question
From: Miklos Vajna @ 2007-12-08 1:04 UTC (permalink / raw)
To: git
[-- Attachment #1: Type: text/plain, Size: 852 bytes --]
hi,
i'm using git-svn for projects where i don't just want to commit to
trunk but to other branches, too.
for example:
git-svn clone -s svn+ssh://vmiklos@svn.gnome.org/svn/ooo-build ooo-build
then i have a local 'master' branch and all the other branches are local
branches.
so, when i want to work in the ooo-build-2-3 branch, i do a:
git checkout -b ooo-build-2-3 ooo-build-2-3
but when i do a git svn rebase, i get:
warning: refname 'ooo-build-2-3' is ambiguous.
what am i doing wrong?
in fact i suspect that in case i would use some other branch name, like
simply '2-3' then i could get rid of this warning, but that's the
problem with using the equivalent name of the remote branch when working
in a branch locally?
probably i miss some parameter to git-svn clone so that it would prefix
the refs with some 'origin'?
thanks,
- VMiklos
[-- Attachment #2: Type: application/pgp-signature, Size: 189 bytes --]
^ permalink raw reply
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox