* [PATCH] describe: rewrite name_rev() iteratively
@ 2014-04-06 22:47 Dragos Foianu
2014-04-08 7:41 ` Eric Sunshine
2014-04-08 18:50 ` Jeff King
0 siblings, 2 replies; 3+ messages in thread
From: Dragos Foianu @ 2014-04-06 22:47 UTC (permalink / raw)
To: git; +Cc: Dragos Foianu
The "git describe --contains" command uses the name_rev() function which
is currently a recursive function. This causes a Stack Overflow when the
history is large enough.
Rewrite name_rev iteratively using a stack on the heap. This slightly
reduces performance due to the extra operations on the heap, but the
function no longer overflows the stack.
Reported-by: Sylvestre Ledru <sylvestre@mozilla.com>
Signed-off-by: Dragos Foianu <dragos.foianu@gmail.com>
---
builtin/name-rev.c | 176 ++++++++++++++++++++++++++++++++++++++--------------
1 file changed, 128 insertions(+), 48 deletions(-)
diff --git a/builtin/name-rev.c b/builtin/name-rev.c
index c824d4e..5848d81 100644
--- a/builtin/name-rev.c
+++ b/builtin/name-rev.c
@@ -19,66 +19,146 @@ static long cutoff = LONG_MAX;
/* How many generations are maximally preferred over _one_ merge traversal? */
#define MERGE_TRAVERSAL_WEIGHT 65535
+typedef struct rev_data {
+ struct commit *commit;
+ const char *tip_name;
+ int generation;
+ int distance;
+ int deref;
+} *rev_data;
+
+typedef struct rev_stack {
+ struct rev_data *rev;
+ struct rev_stack *next;
+} *rev_stack;
+
+static void stack_push(rev_stack *stack, rev_data data) {
+ rev_stack new_node = xmalloc(sizeof(*new_node));
+
+ new_node->rev = data;
+ new_node->next = *stack;
+ *stack = new_node;
+}
+
+static void stack_push_end(rev_stack *stack, rev_data data) {
+ rev_stack new_node = xmalloc(sizeof(*new_node));
+
+ while (*stack != NULL)
+ stack = &(*stack)->next;
+ new_node->rev = data;
+ new_node->next = *stack;
+ *stack = new_node;
+}
+
+static rev_data stack_pop(rev_stack *stack) {
+ rev_stack next = (*stack)->next;
+ rev_data rev = (*stack)->rev;
+ free(*stack);
+
+ *stack = next;
+ return rev;
+}
+
+static rev_data make_rev_data(struct commit *commit,
+ const char* tip_name, int generation, int distance,
+ int deref)
+{
+ rev_data data = xmalloc(sizeof(*data));
+
+ data->commit = commit;
+ data->tip_name = tip_name;
+ data->generation = generation;
+ data->distance = distance;
+ data->deref = deref;
+
+ return data;
+}
+
static void name_rev(struct commit *commit,
const char *tip_name, int generation, int distance,
int deref)
{
- struct rev_name *name = (struct rev_name *)commit->util;
- struct commit_list *parents;
- int parent_number = 1;
+ rev_stack stack = NULL;
+ rev_data data, next_rev;
- parse_commit(commit);
+ data = make_rev_data(commit, tip_name, generation, distance, deref);
+ stack_push(&stack, data);
- if (commit->date < cutoff)
- return;
+ while (stack != NULL) {
+ rev_data rev = stack_pop(&stack);
- if (deref) {
- char *new_name = xmalloc(strlen(tip_name)+3);
- strcpy(new_name, tip_name);
- strcat(new_name, "^0");
- tip_name = new_name;
+ struct rev_name *name = (struct rev_name *) rev->commit->util;
+ struct commit_list *parents;
+ int parent_number = 1;
- if (generation)
- die("generation: %d, but deref?", generation);
- }
+ parse_commit(rev->commit);
+
+ if (rev->commit->date < cutoff)
+ continue;
+
+ if (rev->deref) {
+ char *new_name = xmalloc(strlen(rev->tip_name) + 3);
+ strcpy(new_name, rev->tip_name);
+ strcat(new_name, "^0");
+ rev->tip_name = new_name;
- if (name == NULL) {
- name = xmalloc(sizeof(rev_name));
- commit->util = name;
- goto copy_data;
- } else if (name->distance > distance) {
+ if (rev->generation)
+ die("generation: %d, but deref?",
+ rev->generation);
+ }
+
+ if (name == NULL) {
+ name = xmalloc(sizeof(rev_name));
+ rev->commit->util = name;
+ goto copy_data;
+ } else if (name->distance > rev->distance) {
copy_data:
- name->tip_name = tip_name;
- name->generation = generation;
- name->distance = distance;
- } else
- return;
-
- for (parents = commit->parents;
- parents;
- parents = parents->next, parent_number++) {
- if (parent_number > 1) {
- int len = strlen(tip_name);
- char *new_name = xmalloc(len +
- 1 + decimal_length(generation) + /* ~<n> */
- 1 + 2 + /* ^NN */
- 1);
-
- if (len > 2 && !strcmp(tip_name + len - 2, "^0"))
- len -= 2;
- if (generation > 0)
- sprintf(new_name, "%.*s~%d^%d", len, tip_name,
- generation, parent_number);
- else
- sprintf(new_name, "%.*s^%d", len, tip_name,
- parent_number);
+ name->tip_name = rev->tip_name;
+ name->generation = rev->generation;
+ name->distance = rev->distance;
+ } else
+ continue;
- name_rev(parents->item, new_name, 0,
- distance + MERGE_TRAVERSAL_WEIGHT, 0);
- } else {
- name_rev(parents->item, tip_name, generation + 1,
- distance + 1, 0);
+ for (parents = rev->commit->parents;
+ parents;
+ parents = parents->next, parent_number++) {
+ if (parent_number > 1) {
+ int len = strlen(rev->tip_name);
+ char *new_name = xmalloc(len +
+ /* ~<n> */
+ 1 + decimal_length(rev->generation) +
+ /* ^NN */
+ 1 + 2 +
+ 1);
+
+ if (len > 2 &&
+ !strcmp(rev->tip_name + len - 2, "^0"))
+ len -= 2;
+
+ if (rev->generation > 0)
+ sprintf(new_name, "%.*s~%d^%d", len,
+ rev->tip_name, rev->generation,
+ parent_number);
+ else
+ sprintf(new_name, "%.*s^%d", len,
+ rev->tip_name, parent_number);
+
+ next_rev = make_rev_data(parents->item,
+ new_name, 0,
+ rev->distance + MERGE_TRAVERSAL_WEIGHT,
+ 0);
+
+ stack_push_end(&stack, next_rev);
+ } else {
+ next_rev = make_rev_data(parents->item,
+ rev->tip_name, rev->generation + 1,
+ rev->distance + 1, 0);
+
+ stack_push(&stack, next_rev);
+ }
}
+
+ free(rev);
}
}
--
1.7.10.4
^ permalink raw reply related [flat|nested] 3+ messages in thread
* Re: [PATCH] describe: rewrite name_rev() iteratively
2014-04-06 22:47 [PATCH] describe: rewrite name_rev() iteratively Dragos Foianu
@ 2014-04-08 7:41 ` Eric Sunshine
2014-04-08 18:50 ` Jeff King
1 sibling, 0 replies; 3+ messages in thread
From: Eric Sunshine @ 2014-04-08 7:41 UTC (permalink / raw)
To: Dragos Foianu; +Cc: Git List, Sylvestre Ledru
[cc: Sylvestre Ledru <sylvestre@mozilla.com>]
On Sun, Apr 6, 2014 at 6:47 PM, Dragos Foianu <dragos.foianu@gmail.com> wrote:
> The "git describe --contains" command uses the name_rev() function which
> is currently a recursive function. This causes a Stack Overflow when the
> history is large enough.
No need to capitalize "stack overflow".
It might be helpful if you provide a link to the original problem
report by Sylvestre [1] for context.
[1]: http://thread.gmane.org/gmane.comp.version-control.git/244430
> Rewrite name_rev iteratively using a stack on the heap. This slightly
> reduces performance due to the extra operations on the heap, but the
> function no longer overflows the stack.
>
> Reported-by: Sylvestre Ledru <sylvestre@mozilla.com>
It's a good idea to cc: the original reporter of the problem so that
he can test the fix. (And, generally speaking, it's good etiquette to
cc: people who commented on the issue.)
> Signed-off-by: Dragos Foianu <dragos.foianu@gmail.com>
> ---
> builtin/name-rev.c | 176 ++++++++++++++++++++++++++++++++++++++--------------
> 1 file changed, 128 insertions(+), 48 deletions(-)
>
> diff --git a/builtin/name-rev.c b/builtin/name-rev.c
> index c824d4e..5848d81 100644
> --- a/builtin/name-rev.c
> +++ b/builtin/name-rev.c
> @@ -19,66 +19,146 @@ static long cutoff = LONG_MAX;
> /* How many generations are maximally preferred over _one_ merge traversal? */
> #define MERGE_TRAVERSAL_WEIGHT 65535
>
> +typedef struct rev_data {
On this project, "typedef struct" is almost universally avoided. Just
say "struct rev_data" when needed.
> + struct commit *commit;
> + const char *tip_name;
> + int generation;
> + int distance;
> + int deref;
'deref' may be true only for the initial call to name_rev(); all
recursive invocations unconditionally pass false, so including it in
the structure is superfluous and potentially confusing for readers.
More below.
> +} *rev_data;
> +
> +typedef struct rev_stack {
> + struct rev_data *rev;
> + struct rev_stack *next;
> +} *rev_stack;
> +
> +static void stack_push(rev_stack *stack, rev_data data) {
> + rev_stack new_node = xmalloc(sizeof(*new_node));
> +
> + new_node->rev = data;
> + new_node->next = *stack;
> + *stack = new_node;
> +}
> +
> +static void stack_push_end(rev_stack *stack, rev_data data) {
> + rev_stack new_node = xmalloc(sizeof(*new_node));
> +
> + while (*stack != NULL)
> + stack = &(*stack)->next;
> + new_node->rev = data;
> + new_node->next = *stack;
> + *stack = new_node;
> +}
> +
> +static rev_data stack_pop(rev_stack *stack) {
> + rev_stack next = (*stack)->next;
> + rev_data rev = (*stack)->rev;
> + free(*stack);
> +
> + *stack = next;
> + return rev;
> +}
> +
> +static rev_data make_rev_data(struct commit *commit,
> + const char* tip_name, int generation, int distance,
> + int deref)
> +{
> + rev_data data = xmalloc(sizeof(*data));
> +
> + data->commit = commit;
> + data->tip_name = tip_name;
> + data->generation = generation;
> + data->distance = distance;
> + data->deref = deref;
> +
> + return data;
> +}
> +
> static void name_rev(struct commit *commit,
> const char *tip_name, int generation, int distance,
> int deref)
> {
> - struct rev_name *name = (struct rev_name *)commit->util;
> - struct commit_list *parents;
> - int parent_number = 1;
> + rev_stack stack = NULL;
> + rev_data data, next_rev;
>
> - parse_commit(commit);
> + data = make_rev_data(commit, tip_name, generation, distance, deref);
> + stack_push(&stack, data);
>
> - if (commit->date < cutoff)
> - return;
> + while (stack != NULL) {
> + rev_data rev = stack_pop(&stack);
>
> - if (deref) {
> - char *new_name = xmalloc(strlen(tip_name)+3);
> - strcpy(new_name, tip_name);
> - strcat(new_name, "^0");
> - tip_name = new_name;
> + struct rev_name *name = (struct rev_name *) rev->commit->util;
> + struct commit_list *parents;
> + int parent_number = 1;
>
> - if (generation)
> - die("generation: %d, but deref?", generation);
> - }
> + parse_commit(rev->commit);
> +
> + if (rev->commit->date < cutoff)
> + continue;
> +
> + if (rev->deref) {
> + char *new_name = xmalloc(strlen(rev->tip_name) + 3);
> + strcpy(new_name, rev->tip_name);
> + strcat(new_name, "^0");
> + rev->tip_name = new_name;
As mentioned above, 'deref' may be true only upon the first call;
recursive invocations always set it to false, so the entire 'if
(deref)' conditional can be promoted out of your 'while (stack !=
NULL)' loop.
(In fact, this deref processing could be promoted out of this function
altogether since its only ever done for the first commit visited, but
such a change is likely fodder for a separate cleanup patch.)
More below.
> - if (name == NULL) {
> - name = xmalloc(sizeof(rev_name));
> - commit->util = name;
> - goto copy_data;
> - } else if (name->distance > distance) {
> + if (rev->generation)
> + die("generation: %d, but deref?",
> + rev->generation);
> + }
> +
> + if (name == NULL) {
> + name = xmalloc(sizeof(rev_name));
> + rev->commit->util = name;
> + goto copy_data;
> + } else if (name->distance > rev->distance) {
> copy_data:
> - name->tip_name = tip_name;
> - name->generation = generation;
> - name->distance = distance;
> - } else
> - return;
> -
> - for (parents = commit->parents;
> - parents;
> - parents = parents->next, parent_number++) {
> - if (parent_number > 1) {
> - int len = strlen(tip_name);
> - char *new_name = xmalloc(len +
> - 1 + decimal_length(generation) + /* ~<n> */
> - 1 + 2 + /* ^NN */
> - 1);
> -
> - if (len > 2 && !strcmp(tip_name + len - 2, "^0"))
> - len -= 2;
> - if (generation > 0)
> - sprintf(new_name, "%.*s~%d^%d", len, tip_name,
> - generation, parent_number);
> - else
> - sprintf(new_name, "%.*s^%d", len, tip_name,
> - parent_number);
> + name->tip_name = rev->tip_name;
> + name->generation = rev->generation;
> + name->distance = rev->distance;
> + } else
> + continue;
>
> - name_rev(parents->item, new_name, 0,
> - distance + MERGE_TRAVERSAL_WEIGHT, 0);
> - } else {
> - name_rev(parents->item, tip_name, generation + 1,
> - distance + 1, 0);
> + for (parents = rev->commit->parents;
> + parents;
> + parents = parents->next, parent_number++) {
> + if (parent_number > 1) {
> + int len = strlen(rev->tip_name);
> + char *new_name = xmalloc(len +
> + /* ~<n> */
> + 1 + decimal_length(rev->generation) +
> + /* ^NN */
> + 1 + 2 +
> + 1);
> +
> + if (len > 2 &&
> + !strcmp(rev->tip_name + len - 2, "^0"))
> + len -= 2;
> +
> + if (rev->generation > 0)
> + sprintf(new_name, "%.*s~%d^%d", len,
> + rev->tip_name, rev->generation,
> + parent_number);
> + else
> + sprintf(new_name, "%.*s^%d", len,
> + rev->tip_name, parent_number);
> +
> + next_rev = make_rev_data(parents->item,
> + new_name, 0,
> + rev->distance + MERGE_TRAVERSAL_WEIGHT,
> + 0);
> +
> + stack_push_end(&stack, next_rev);
> + } else {
> + next_rev = make_rev_data(parents->item,
> + rev->tip_name, rev->generation + 1,
> + rev->distance + 1, 0);
> +
> + stack_push(&stack, next_rev);
> + }
This logic changes the order in which the commits are visited. Given a
history, such as:
--D--B---A
--E-/ /
--F--C-/
--G-/
where A's parents are (B,C), B's parents are (D,E), and C's parents
are (F,G), the original code traverses commits in this order:
A B D E C F G
but, with your patch, they are traversed in this order:
A B D C F E G
> }
> +
> + free(rev);
> }
> }
>
> --
> 1.7.10.4
^ permalink raw reply [flat|nested] 3+ messages in thread
* Re: [PATCH] describe: rewrite name_rev() iteratively
2014-04-06 22:47 [PATCH] describe: rewrite name_rev() iteratively Dragos Foianu
2014-04-08 7:41 ` Eric Sunshine
@ 2014-04-08 18:50 ` Jeff King
1 sibling, 0 replies; 3+ messages in thread
From: Jeff King @ 2014-04-08 18:50 UTC (permalink / raw)
To: Dragos Foianu; +Cc: git
On Mon, Apr 07, 2014 at 01:47:14AM +0300, Dragos Foianu wrote:
> The "git describe --contains" command uses the name_rev() function which
> is currently a recursive function. This causes a Stack Overflow when the
> history is large enough.
>
> Rewrite name_rev iteratively using a stack on the heap. This slightly
> reduces performance due to the extra operations on the heap, but the
> function no longer overflows the stack.
You can avoid the heap overhead by using an array for your stack, and
only resizing it when necessary. Like this:
struct rev_stack {
int nr, alloc;
struct rev_data *data;
};
static struct rev_data *rev_stack_push(struct rev_stack *stack)
{
ALLOC_GROW(stack->data, stack->nr + 1, stack->alloc);
return &stack->data[stack->nr++];
}
static void rev_stack_pop(struct rev_stack *stack)
{
stack->nr--;
}
static void rev_stack_init(struct rev_stack *stack)
{
stack->nr = stack->alloc = 0;
stack->data = NULL;
}
static void rev_stack_release(struct rev_stack *stack)
{
free(stack->data);
rev_stack_init(stack);
}
Usage would be something like:
struct rev_data *data = rev_stack_push(&stack);
data->commit = commit;
data->tip_name = tip_name;
...
IOW, you push first to allocate the space, and then do your
make_rev_data, rather than the other way around.
The downside is that your allocation is always as big as the deepest
recursion so far, so you hold on to the memory a little longer than
necessary. I think that's a good tradeoff versus an extra malloc() for
every commit.
-Peff
^ permalink raw reply [flat|nested] 3+ messages in thread
end of thread, other threads:[~2014-04-08 18:50 UTC | newest]
Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2014-04-06 22:47 [PATCH] describe: rewrite name_rev() iteratively Dragos Foianu
2014-04-08 7:41 ` Eric Sunshine
2014-04-08 18:50 ` Jeff King
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).