* [PATCH] describe: rewrite name_rev() iteratively @ 2014-04-06 22:47 Dragos Foianu 2014-04-08 7:41 ` Eric Sunshine 2014-04-08 18:50 ` Jeff King 0 siblings, 2 replies; 3+ messages in thread From: Dragos Foianu @ 2014-04-06 22:47 UTC (permalink / raw) To: git; +Cc: Dragos Foianu The "git describe --contains" command uses the name_rev() function which is currently a recursive function. This causes a Stack Overflow when the history is large enough. Rewrite name_rev iteratively using a stack on the heap. This slightly reduces performance due to the extra operations on the heap, but the function no longer overflows the stack. Reported-by: Sylvestre Ledru <sylvestre@mozilla.com> Signed-off-by: Dragos Foianu <dragos.foianu@gmail.com> --- builtin/name-rev.c | 176 ++++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 128 insertions(+), 48 deletions(-) diff --git a/builtin/name-rev.c b/builtin/name-rev.c index c824d4e..5848d81 100644 --- a/builtin/name-rev.c +++ b/builtin/name-rev.c @@ -19,66 +19,146 @@ static long cutoff = LONG_MAX; /* How many generations are maximally preferred over _one_ merge traversal? */ #define MERGE_TRAVERSAL_WEIGHT 65535 +typedef struct rev_data { + struct commit *commit; + const char *tip_name; + int generation; + int distance; + int deref; +} *rev_data; + +typedef struct rev_stack { + struct rev_data *rev; + struct rev_stack *next; +} *rev_stack; + +static void stack_push(rev_stack *stack, rev_data data) { + rev_stack new_node = xmalloc(sizeof(*new_node)); + + new_node->rev = data; + new_node->next = *stack; + *stack = new_node; +} + +static void stack_push_end(rev_stack *stack, rev_data data) { + rev_stack new_node = xmalloc(sizeof(*new_node)); + + while (*stack != NULL) + stack = &(*stack)->next; + new_node->rev = data; + new_node->next = *stack; + *stack = new_node; +} + +static rev_data stack_pop(rev_stack *stack) { + rev_stack next = (*stack)->next; + rev_data rev = (*stack)->rev; + free(*stack); + + *stack = next; + return rev; +} + +static rev_data make_rev_data(struct commit *commit, + const char* tip_name, int generation, int distance, + int deref) +{ + rev_data data = xmalloc(sizeof(*data)); + + data->commit = commit; + data->tip_name = tip_name; + data->generation = generation; + data->distance = distance; + data->deref = deref; + + return data; +} + static void name_rev(struct commit *commit, const char *tip_name, int generation, int distance, int deref) { - struct rev_name *name = (struct rev_name *)commit->util; - struct commit_list *parents; - int parent_number = 1; + rev_stack stack = NULL; + rev_data data, next_rev; - parse_commit(commit); + data = make_rev_data(commit, tip_name, generation, distance, deref); + stack_push(&stack, data); - if (commit->date < cutoff) - return; + while (stack != NULL) { + rev_data rev = stack_pop(&stack); - if (deref) { - char *new_name = xmalloc(strlen(tip_name)+3); - strcpy(new_name, tip_name); - strcat(new_name, "^0"); - tip_name = new_name; + struct rev_name *name = (struct rev_name *) rev->commit->util; + struct commit_list *parents; + int parent_number = 1; - if (generation) - die("generation: %d, but deref?", generation); - } + parse_commit(rev->commit); + + if (rev->commit->date < cutoff) + continue; + + if (rev->deref) { + char *new_name = xmalloc(strlen(rev->tip_name) + 3); + strcpy(new_name, rev->tip_name); + strcat(new_name, "^0"); + rev->tip_name = new_name; - if (name == NULL) { - name = xmalloc(sizeof(rev_name)); - commit->util = name; - goto copy_data; - } else if (name->distance > distance) { + if (rev->generation) + die("generation: %d, but deref?", + rev->generation); + } + + if (name == NULL) { + name = xmalloc(sizeof(rev_name)); + rev->commit->util = name; + goto copy_data; + } else if (name->distance > rev->distance) { copy_data: - name->tip_name = tip_name; - name->generation = generation; - name->distance = distance; - } else - return; - - for (parents = commit->parents; - parents; - parents = parents->next, parent_number++) { - if (parent_number > 1) { - int len = strlen(tip_name); - char *new_name = xmalloc(len + - 1 + decimal_length(generation) + /* ~<n> */ - 1 + 2 + /* ^NN */ - 1); - - if (len > 2 && !strcmp(tip_name + len - 2, "^0")) - len -= 2; - if (generation > 0) - sprintf(new_name, "%.*s~%d^%d", len, tip_name, - generation, parent_number); - else - sprintf(new_name, "%.*s^%d", len, tip_name, - parent_number); + name->tip_name = rev->tip_name; + name->generation = rev->generation; + name->distance = rev->distance; + } else + continue; - name_rev(parents->item, new_name, 0, - distance + MERGE_TRAVERSAL_WEIGHT, 0); - } else { - name_rev(parents->item, tip_name, generation + 1, - distance + 1, 0); + for (parents = rev->commit->parents; + parents; + parents = parents->next, parent_number++) { + if (parent_number > 1) { + int len = strlen(rev->tip_name); + char *new_name = xmalloc(len + + /* ~<n> */ + 1 + decimal_length(rev->generation) + + /* ^NN */ + 1 + 2 + + 1); + + if (len > 2 && + !strcmp(rev->tip_name + len - 2, "^0")) + len -= 2; + + if (rev->generation > 0) + sprintf(new_name, "%.*s~%d^%d", len, + rev->tip_name, rev->generation, + parent_number); + else + sprintf(new_name, "%.*s^%d", len, + rev->tip_name, parent_number); + + next_rev = make_rev_data(parents->item, + new_name, 0, + rev->distance + MERGE_TRAVERSAL_WEIGHT, + 0); + + stack_push_end(&stack, next_rev); + } else { + next_rev = make_rev_data(parents->item, + rev->tip_name, rev->generation + 1, + rev->distance + 1, 0); + + stack_push(&stack, next_rev); + } } + + free(rev); } } -- 1.7.10.4 ^ permalink raw reply related [flat|nested] 3+ messages in thread
* Re: [PATCH] describe: rewrite name_rev() iteratively 2014-04-06 22:47 [PATCH] describe: rewrite name_rev() iteratively Dragos Foianu @ 2014-04-08 7:41 ` Eric Sunshine 2014-04-08 18:50 ` Jeff King 1 sibling, 0 replies; 3+ messages in thread From: Eric Sunshine @ 2014-04-08 7:41 UTC (permalink / raw) To: Dragos Foianu; +Cc: Git List, Sylvestre Ledru [cc: Sylvestre Ledru <sylvestre@mozilla.com>] On Sun, Apr 6, 2014 at 6:47 PM, Dragos Foianu <dragos.foianu@gmail.com> wrote: > The "git describe --contains" command uses the name_rev() function which > is currently a recursive function. This causes a Stack Overflow when the > history is large enough. No need to capitalize "stack overflow". It might be helpful if you provide a link to the original problem report by Sylvestre [1] for context. [1]: http://thread.gmane.org/gmane.comp.version-control.git/244430 > Rewrite name_rev iteratively using a stack on the heap. This slightly > reduces performance due to the extra operations on the heap, but the > function no longer overflows the stack. > > Reported-by: Sylvestre Ledru <sylvestre@mozilla.com> It's a good idea to cc: the original reporter of the problem so that he can test the fix. (And, generally speaking, it's good etiquette to cc: people who commented on the issue.) > Signed-off-by: Dragos Foianu <dragos.foianu@gmail.com> > --- > builtin/name-rev.c | 176 ++++++++++++++++++++++++++++++++++++++-------------- > 1 file changed, 128 insertions(+), 48 deletions(-) > > diff --git a/builtin/name-rev.c b/builtin/name-rev.c > index c824d4e..5848d81 100644 > --- a/builtin/name-rev.c > +++ b/builtin/name-rev.c > @@ -19,66 +19,146 @@ static long cutoff = LONG_MAX; > /* How many generations are maximally preferred over _one_ merge traversal? */ > #define MERGE_TRAVERSAL_WEIGHT 65535 > > +typedef struct rev_data { On this project, "typedef struct" is almost universally avoided. Just say "struct rev_data" when needed. > + struct commit *commit; > + const char *tip_name; > + int generation; > + int distance; > + int deref; 'deref' may be true only for the initial call to name_rev(); all recursive invocations unconditionally pass false, so including it in the structure is superfluous and potentially confusing for readers. More below. > +} *rev_data; > + > +typedef struct rev_stack { > + struct rev_data *rev; > + struct rev_stack *next; > +} *rev_stack; > + > +static void stack_push(rev_stack *stack, rev_data data) { > + rev_stack new_node = xmalloc(sizeof(*new_node)); > + > + new_node->rev = data; > + new_node->next = *stack; > + *stack = new_node; > +} > + > +static void stack_push_end(rev_stack *stack, rev_data data) { > + rev_stack new_node = xmalloc(sizeof(*new_node)); > + > + while (*stack != NULL) > + stack = &(*stack)->next; > + new_node->rev = data; > + new_node->next = *stack; > + *stack = new_node; > +} > + > +static rev_data stack_pop(rev_stack *stack) { > + rev_stack next = (*stack)->next; > + rev_data rev = (*stack)->rev; > + free(*stack); > + > + *stack = next; > + return rev; > +} > + > +static rev_data make_rev_data(struct commit *commit, > + const char* tip_name, int generation, int distance, > + int deref) > +{ > + rev_data data = xmalloc(sizeof(*data)); > + > + data->commit = commit; > + data->tip_name = tip_name; > + data->generation = generation; > + data->distance = distance; > + data->deref = deref; > + > + return data; > +} > + > static void name_rev(struct commit *commit, > const char *tip_name, int generation, int distance, > int deref) > { > - struct rev_name *name = (struct rev_name *)commit->util; > - struct commit_list *parents; > - int parent_number = 1; > + rev_stack stack = NULL; > + rev_data data, next_rev; > > - parse_commit(commit); > + data = make_rev_data(commit, tip_name, generation, distance, deref); > + stack_push(&stack, data); > > - if (commit->date < cutoff) > - return; > + while (stack != NULL) { > + rev_data rev = stack_pop(&stack); > > - if (deref) { > - char *new_name = xmalloc(strlen(tip_name)+3); > - strcpy(new_name, tip_name); > - strcat(new_name, "^0"); > - tip_name = new_name; > + struct rev_name *name = (struct rev_name *) rev->commit->util; > + struct commit_list *parents; > + int parent_number = 1; > > - if (generation) > - die("generation: %d, but deref?", generation); > - } > + parse_commit(rev->commit); > + > + if (rev->commit->date < cutoff) > + continue; > + > + if (rev->deref) { > + char *new_name = xmalloc(strlen(rev->tip_name) + 3); > + strcpy(new_name, rev->tip_name); > + strcat(new_name, "^0"); > + rev->tip_name = new_name; As mentioned above, 'deref' may be true only upon the first call; recursive invocations always set it to false, so the entire 'if (deref)' conditional can be promoted out of your 'while (stack != NULL)' loop. (In fact, this deref processing could be promoted out of this function altogether since its only ever done for the first commit visited, but such a change is likely fodder for a separate cleanup patch.) More below. > - if (name == NULL) { > - name = xmalloc(sizeof(rev_name)); > - commit->util = name; > - goto copy_data; > - } else if (name->distance > distance) { > + if (rev->generation) > + die("generation: %d, but deref?", > + rev->generation); > + } > + > + if (name == NULL) { > + name = xmalloc(sizeof(rev_name)); > + rev->commit->util = name; > + goto copy_data; > + } else if (name->distance > rev->distance) { > copy_data: > - name->tip_name = tip_name; > - name->generation = generation; > - name->distance = distance; > - } else > - return; > - > - for (parents = commit->parents; > - parents; > - parents = parents->next, parent_number++) { > - if (parent_number > 1) { > - int len = strlen(tip_name); > - char *new_name = xmalloc(len + > - 1 + decimal_length(generation) + /* ~<n> */ > - 1 + 2 + /* ^NN */ > - 1); > - > - if (len > 2 && !strcmp(tip_name + len - 2, "^0")) > - len -= 2; > - if (generation > 0) > - sprintf(new_name, "%.*s~%d^%d", len, tip_name, > - generation, parent_number); > - else > - sprintf(new_name, "%.*s^%d", len, tip_name, > - parent_number); > + name->tip_name = rev->tip_name; > + name->generation = rev->generation; > + name->distance = rev->distance; > + } else > + continue; > > - name_rev(parents->item, new_name, 0, > - distance + MERGE_TRAVERSAL_WEIGHT, 0); > - } else { > - name_rev(parents->item, tip_name, generation + 1, > - distance + 1, 0); > + for (parents = rev->commit->parents; > + parents; > + parents = parents->next, parent_number++) { > + if (parent_number > 1) { > + int len = strlen(rev->tip_name); > + char *new_name = xmalloc(len + > + /* ~<n> */ > + 1 + decimal_length(rev->generation) + > + /* ^NN */ > + 1 + 2 + > + 1); > + > + if (len > 2 && > + !strcmp(rev->tip_name + len - 2, "^0")) > + len -= 2; > + > + if (rev->generation > 0) > + sprintf(new_name, "%.*s~%d^%d", len, > + rev->tip_name, rev->generation, > + parent_number); > + else > + sprintf(new_name, "%.*s^%d", len, > + rev->tip_name, parent_number); > + > + next_rev = make_rev_data(parents->item, > + new_name, 0, > + rev->distance + MERGE_TRAVERSAL_WEIGHT, > + 0); > + > + stack_push_end(&stack, next_rev); > + } else { > + next_rev = make_rev_data(parents->item, > + rev->tip_name, rev->generation + 1, > + rev->distance + 1, 0); > + > + stack_push(&stack, next_rev); > + } This logic changes the order in which the commits are visited. Given a history, such as: --D--B---A --E-/ / --F--C-/ --G-/ where A's parents are (B,C), B's parents are (D,E), and C's parents are (F,G), the original code traverses commits in this order: A B D E C F G but, with your patch, they are traversed in this order: A B D C F E G > } > + > + free(rev); > } > } > > -- > 1.7.10.4 ^ permalink raw reply [flat|nested] 3+ messages in thread
* Re: [PATCH] describe: rewrite name_rev() iteratively 2014-04-06 22:47 [PATCH] describe: rewrite name_rev() iteratively Dragos Foianu 2014-04-08 7:41 ` Eric Sunshine @ 2014-04-08 18:50 ` Jeff King 1 sibling, 0 replies; 3+ messages in thread From: Jeff King @ 2014-04-08 18:50 UTC (permalink / raw) To: Dragos Foianu; +Cc: git On Mon, Apr 07, 2014 at 01:47:14AM +0300, Dragos Foianu wrote: > The "git describe --contains" command uses the name_rev() function which > is currently a recursive function. This causes a Stack Overflow when the > history is large enough. > > Rewrite name_rev iteratively using a stack on the heap. This slightly > reduces performance due to the extra operations on the heap, but the > function no longer overflows the stack. You can avoid the heap overhead by using an array for your stack, and only resizing it when necessary. Like this: struct rev_stack { int nr, alloc; struct rev_data *data; }; static struct rev_data *rev_stack_push(struct rev_stack *stack) { ALLOC_GROW(stack->data, stack->nr + 1, stack->alloc); return &stack->data[stack->nr++]; } static void rev_stack_pop(struct rev_stack *stack) { stack->nr--; } static void rev_stack_init(struct rev_stack *stack) { stack->nr = stack->alloc = 0; stack->data = NULL; } static void rev_stack_release(struct rev_stack *stack) { free(stack->data); rev_stack_init(stack); } Usage would be something like: struct rev_data *data = rev_stack_push(&stack); data->commit = commit; data->tip_name = tip_name; ... IOW, you push first to allocate the space, and then do your make_rev_data, rather than the other way around. The downside is that your allocation is always as big as the deepest recursion so far, so you hold on to the memory a little longer than necessary. I think that's a good tradeoff versus an extra malloc() for every commit. -Peff ^ permalink raw reply [flat|nested] 3+ messages in thread
end of thread, other threads:[~2014-04-08 18:50 UTC | newest] Thread overview: 3+ messages (download: mbox.gz follow: Atom feed -- links below jump to the message on this page -- 2014-04-06 22:47 [PATCH] describe: rewrite name_rev() iteratively Dragos Foianu 2014-04-08 7:41 ` Eric Sunshine 2014-04-08 18:50 ` Jeff King
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox; as well as URLs for NNTP newsgroup(s).