git.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] describe: rewrite name_rev() iteratively
@ 2014-04-06 22:47 Dragos Foianu
  2014-04-08  7:41 ` Eric Sunshine
  2014-04-08 18:50 ` Jeff King
  0 siblings, 2 replies; 3+ messages in thread
From: Dragos Foianu @ 2014-04-06 22:47 UTC (permalink / raw)
  To: git; +Cc: Dragos Foianu

The "git describe --contains" command uses the name_rev() function which
is currently a recursive function. This causes a Stack Overflow when the
history is large enough.

Rewrite name_rev iteratively using a stack on the heap. This slightly
reduces performance due to the extra operations on the heap, but the
function no longer overflows the stack.

Reported-by: Sylvestre Ledru <sylvestre@mozilla.com>
Signed-off-by: Dragos Foianu <dragos.foianu@gmail.com>
---
 builtin/name-rev.c |  176 ++++++++++++++++++++++++++++++++++++++--------------
 1 file changed, 128 insertions(+), 48 deletions(-)

diff --git a/builtin/name-rev.c b/builtin/name-rev.c
index c824d4e..5848d81 100644
--- a/builtin/name-rev.c
+++ b/builtin/name-rev.c
@@ -19,66 +19,146 @@ static long cutoff = LONG_MAX;
 /* How many generations are maximally preferred over _one_ merge traversal? */
 #define MERGE_TRAVERSAL_WEIGHT 65535
 
+typedef struct rev_data {
+	struct commit *commit;
+	const char *tip_name;
+	int generation;
+	int distance;
+	int deref;
+} *rev_data;
+
+typedef struct rev_stack {
+	struct rev_data *rev;
+	struct rev_stack *next;
+} *rev_stack;
+
+static void stack_push(rev_stack *stack, rev_data data) {
+	rev_stack new_node = xmalloc(sizeof(*new_node));
+
+	new_node->rev = data;
+	new_node->next = *stack;
+	*stack = new_node;
+}
+
+static void stack_push_end(rev_stack *stack, rev_data data) {
+	rev_stack new_node = xmalloc(sizeof(*new_node));
+
+	while (*stack != NULL)
+		stack = &(*stack)->next;
+	new_node->rev = data;
+	new_node->next = *stack;
+	*stack = new_node;
+}
+
+static rev_data stack_pop(rev_stack *stack) {
+	rev_stack next = (*stack)->next;
+	rev_data rev = (*stack)->rev;
+	free(*stack);
+
+	*stack = next;
+	return rev;
+}
+
+static rev_data make_rev_data(struct commit *commit,
+		const char* tip_name, int generation, int distance,
+		int deref)
+{
+	rev_data data = xmalloc(sizeof(*data));
+
+	data->commit = commit;
+	data->tip_name = tip_name;
+	data->generation = generation;
+	data->distance = distance;
+	data->deref = deref;
+
+	return data;
+}
+
 static void name_rev(struct commit *commit,
 		const char *tip_name, int generation, int distance,
 		int deref)
 {
-	struct rev_name *name = (struct rev_name *)commit->util;
-	struct commit_list *parents;
-	int parent_number = 1;
+	rev_stack stack = NULL;
+	rev_data data, next_rev;
 
-	parse_commit(commit);
+	data = make_rev_data(commit, tip_name, generation, distance, deref);
+	stack_push(&stack, data);
 
-	if (commit->date < cutoff)
-		return;
+	while (stack != NULL) {
+		rev_data rev = stack_pop(&stack);
 
-	if (deref) {
-		char *new_name = xmalloc(strlen(tip_name)+3);
-		strcpy(new_name, tip_name);
-		strcat(new_name, "^0");
-		tip_name = new_name;
+		struct rev_name *name = (struct rev_name *) rev->commit->util;
+		struct commit_list *parents;
+		int parent_number = 1;
 
-		if (generation)
-			die("generation: %d, but deref?", generation);
-	}
+		parse_commit(rev->commit);
+
+		if (rev->commit->date < cutoff)
+			continue;
+
+		if (rev->deref) {
+			char *new_name = xmalloc(strlen(rev->tip_name) + 3);
+			strcpy(new_name, rev->tip_name);
+			strcat(new_name, "^0");
+			rev->tip_name = new_name;
 
-	if (name == NULL) {
-		name = xmalloc(sizeof(rev_name));
-		commit->util = name;
-		goto copy_data;
-	} else if (name->distance > distance) {
+			if (rev->generation)
+				die("generation: %d, but deref?",
+					rev->generation);
+		}
+
+		if (name == NULL) {
+			name = xmalloc(sizeof(rev_name));
+			rev->commit->util = name;
+			goto copy_data;
+		} else if (name->distance > rev->distance) {
 copy_data:
-		name->tip_name = tip_name;
-		name->generation = generation;
-		name->distance = distance;
-	} else
-		return;
-
-	for (parents = commit->parents;
-			parents;
-			parents = parents->next, parent_number++) {
-		if (parent_number > 1) {
-			int len = strlen(tip_name);
-			char *new_name = xmalloc(len +
-				1 + decimal_length(generation) +  /* ~<n> */
-				1 + 2 +				  /* ^NN */
-				1);
-
-			if (len > 2 && !strcmp(tip_name + len - 2, "^0"))
-				len -= 2;
-			if (generation > 0)
-				sprintf(new_name, "%.*s~%d^%d", len, tip_name,
-						generation, parent_number);
-			else
-				sprintf(new_name, "%.*s^%d", len, tip_name,
-						parent_number);
+			name->tip_name = rev->tip_name;
+			name->generation = rev->generation;
+			name->distance = rev->distance;
+		} else
+			continue;
 
-			name_rev(parents->item, new_name, 0,
-				distance + MERGE_TRAVERSAL_WEIGHT, 0);
-		} else {
-			name_rev(parents->item, tip_name, generation + 1,
-				distance + 1, 0);
+		for (parents = rev->commit->parents;
+				parents;
+				parents = parents->next, parent_number++) {
+			if (parent_number > 1) {
+				int len = strlen(rev->tip_name);
+				char *new_name = xmalloc(len +
+					/* ~<n> */
+					1 + decimal_length(rev->generation) +
+					/* ^NN */
+					1 + 2 +
+					1);
+
+				if (len > 2 &&
+					!strcmp(rev->tip_name + len - 2, "^0"))
+					len -= 2;
+
+				if (rev->generation > 0)
+					sprintf(new_name, "%.*s~%d^%d", len,
+						rev->tip_name, rev->generation,
+						parent_number);
+				else
+					sprintf(new_name, "%.*s^%d", len,
+						rev->tip_name, parent_number);
+
+				next_rev = make_rev_data(parents->item,
+					new_name, 0,
+					rev->distance + MERGE_TRAVERSAL_WEIGHT,
+					0);
+
+				stack_push_end(&stack, next_rev);
+			} else {
+				next_rev = make_rev_data(parents->item,
+					rev->tip_name, rev->generation + 1,
+					rev->distance + 1, 0);
+
+				stack_push(&stack, next_rev);
+			}
 		}
+
+		free(rev);
 	}
 }
 
-- 
1.7.10.4

^ permalink raw reply related	[flat|nested] 3+ messages in thread

* Re: [PATCH] describe: rewrite name_rev() iteratively
  2014-04-06 22:47 [PATCH] describe: rewrite name_rev() iteratively Dragos Foianu
@ 2014-04-08  7:41 ` Eric Sunshine
  2014-04-08 18:50 ` Jeff King
  1 sibling, 0 replies; 3+ messages in thread
From: Eric Sunshine @ 2014-04-08  7:41 UTC (permalink / raw)
  To: Dragos Foianu; +Cc: Git List, Sylvestre Ledru

[cc: Sylvestre Ledru <sylvestre@mozilla.com>]

On Sun, Apr 6, 2014 at 6:47 PM, Dragos Foianu <dragos.foianu@gmail.com> wrote:
> The "git describe --contains" command uses the name_rev() function which
> is currently a recursive function. This causes a Stack Overflow when the
> history is large enough.

No need to capitalize "stack overflow".

It might be helpful if you provide a link to the original problem
report by Sylvestre [1] for context.

[1]: http://thread.gmane.org/gmane.comp.version-control.git/244430

> Rewrite name_rev iteratively using a stack on the heap. This slightly
> reduces performance due to the extra operations on the heap, but the
> function no longer overflows the stack.
>
> Reported-by: Sylvestre Ledru <sylvestre@mozilla.com>

It's a good idea to cc: the original reporter of the problem so that
he can test the fix. (And, generally speaking, it's good etiquette to
cc: people who commented on the issue.)

> Signed-off-by: Dragos Foianu <dragos.foianu@gmail.com>
> ---
>  builtin/name-rev.c |  176 ++++++++++++++++++++++++++++++++++++++--------------
>  1 file changed, 128 insertions(+), 48 deletions(-)
>
> diff --git a/builtin/name-rev.c b/builtin/name-rev.c
> index c824d4e..5848d81 100644
> --- a/builtin/name-rev.c
> +++ b/builtin/name-rev.c
> @@ -19,66 +19,146 @@ static long cutoff = LONG_MAX;
>  /* How many generations are maximally preferred over _one_ merge traversal? */
>  #define MERGE_TRAVERSAL_WEIGHT 65535
>
> +typedef struct rev_data {

On this project, "typedef struct" is almost universally avoided. Just
say "struct rev_data" when needed.

> +       struct commit *commit;
> +       const char *tip_name;
> +       int generation;
> +       int distance;
> +       int deref;

'deref' may be true only for the initial call to name_rev(); all
recursive invocations unconditionally pass false, so including it in
the structure is superfluous and potentially confusing for readers.

More below.

> +} *rev_data;
> +
> +typedef struct rev_stack {
> +       struct rev_data *rev;
> +       struct rev_stack *next;
> +} *rev_stack;
> +
> +static void stack_push(rev_stack *stack, rev_data data) {
> +       rev_stack new_node = xmalloc(sizeof(*new_node));
> +
> +       new_node->rev = data;
> +       new_node->next = *stack;
> +       *stack = new_node;
> +}
> +
> +static void stack_push_end(rev_stack *stack, rev_data data) {
> +       rev_stack new_node = xmalloc(sizeof(*new_node));
> +
> +       while (*stack != NULL)
> +               stack = &(*stack)->next;
> +       new_node->rev = data;
> +       new_node->next = *stack;
> +       *stack = new_node;
> +}
> +
> +static rev_data stack_pop(rev_stack *stack) {
> +       rev_stack next = (*stack)->next;
> +       rev_data rev = (*stack)->rev;
> +       free(*stack);
> +
> +       *stack = next;
> +       return rev;
> +}
> +
> +static rev_data make_rev_data(struct commit *commit,
> +               const char* tip_name, int generation, int distance,
> +               int deref)
> +{
> +       rev_data data = xmalloc(sizeof(*data));
> +
> +       data->commit = commit;
> +       data->tip_name = tip_name;
> +       data->generation = generation;
> +       data->distance = distance;
> +       data->deref = deref;
> +
> +       return data;
> +}
> +
>  static void name_rev(struct commit *commit,
>                 const char *tip_name, int generation, int distance,
>                 int deref)
>  {
> -       struct rev_name *name = (struct rev_name *)commit->util;
> -       struct commit_list *parents;
> -       int parent_number = 1;
> +       rev_stack stack = NULL;
> +       rev_data data, next_rev;
>
> -       parse_commit(commit);
> +       data = make_rev_data(commit, tip_name, generation, distance, deref);
> +       stack_push(&stack, data);
>
> -       if (commit->date < cutoff)
> -               return;
> +       while (stack != NULL) {
> +               rev_data rev = stack_pop(&stack);
>
> -       if (deref) {
> -               char *new_name = xmalloc(strlen(tip_name)+3);
> -               strcpy(new_name, tip_name);
> -               strcat(new_name, "^0");
> -               tip_name = new_name;
> +               struct rev_name *name = (struct rev_name *) rev->commit->util;
> +               struct commit_list *parents;
> +               int parent_number = 1;
>
> -               if (generation)
> -                       die("generation: %d, but deref?", generation);
> -       }
> +               parse_commit(rev->commit);
> +
> +               if (rev->commit->date < cutoff)
> +                       continue;
> +
> +               if (rev->deref) {
> +                       char *new_name = xmalloc(strlen(rev->tip_name) + 3);
> +                       strcpy(new_name, rev->tip_name);
> +                       strcat(new_name, "^0");
> +                       rev->tip_name = new_name;

As mentioned above, 'deref' may be true only upon the first call;
recursive invocations always set it to false, so the entire 'if
(deref)' conditional can be promoted out of your 'while (stack !=
NULL)' loop.

(In fact, this deref processing could be promoted out of this function
altogether since its only ever done for the first commit visited, but
such a change is likely fodder for a separate cleanup patch.)

More below.

> -       if (name == NULL) {
> -               name = xmalloc(sizeof(rev_name));
> -               commit->util = name;
> -               goto copy_data;
> -       } else if (name->distance > distance) {
> +                       if (rev->generation)
> +                               die("generation: %d, but deref?",
> +                                       rev->generation);
> +               }
> +
> +               if (name == NULL) {
> +                       name = xmalloc(sizeof(rev_name));
> +                       rev->commit->util = name;
> +                       goto copy_data;
> +               } else if (name->distance > rev->distance) {
>  copy_data:
> -               name->tip_name = tip_name;
> -               name->generation = generation;
> -               name->distance = distance;
> -       } else
> -               return;
> -
> -       for (parents = commit->parents;
> -                       parents;
> -                       parents = parents->next, parent_number++) {
> -               if (parent_number > 1) {
> -                       int len = strlen(tip_name);
> -                       char *new_name = xmalloc(len +
> -                               1 + decimal_length(generation) +  /* ~<n> */
> -                               1 + 2 +                           /* ^NN */
> -                               1);
> -
> -                       if (len > 2 && !strcmp(tip_name + len - 2, "^0"))
> -                               len -= 2;
> -                       if (generation > 0)
> -                               sprintf(new_name, "%.*s~%d^%d", len, tip_name,
> -                                               generation, parent_number);
> -                       else
> -                               sprintf(new_name, "%.*s^%d", len, tip_name,
> -                                               parent_number);
> +                       name->tip_name = rev->tip_name;
> +                       name->generation = rev->generation;
> +                       name->distance = rev->distance;
> +               } else
> +                       continue;
>
> -                       name_rev(parents->item, new_name, 0,
> -                               distance + MERGE_TRAVERSAL_WEIGHT, 0);
> -               } else {
> -                       name_rev(parents->item, tip_name, generation + 1,
> -                               distance + 1, 0);
> +               for (parents = rev->commit->parents;
> +                               parents;
> +                               parents = parents->next, parent_number++) {
> +                       if (parent_number > 1) {
> +                               int len = strlen(rev->tip_name);
> +                               char *new_name = xmalloc(len +
> +                                       /* ~<n> */
> +                                       1 + decimal_length(rev->generation) +
> +                                       /* ^NN */
> +                                       1 + 2 +
> +                                       1);
> +
> +                               if (len > 2 &&
> +                                       !strcmp(rev->tip_name + len - 2, "^0"))
> +                                       len -= 2;
> +
> +                               if (rev->generation > 0)
> +                                       sprintf(new_name, "%.*s~%d^%d", len,
> +                                               rev->tip_name, rev->generation,
> +                                               parent_number);
> +                               else
> +                                       sprintf(new_name, "%.*s^%d", len,
> +                                               rev->tip_name, parent_number);
> +
> +                               next_rev = make_rev_data(parents->item,
> +                                       new_name, 0,
> +                                       rev->distance + MERGE_TRAVERSAL_WEIGHT,
> +                                       0);
> +
> +                               stack_push_end(&stack, next_rev);
> +                       } else {
> +                               next_rev = make_rev_data(parents->item,
> +                                       rev->tip_name, rev->generation + 1,
> +                                       rev->distance + 1, 0);
> +
> +                               stack_push(&stack, next_rev);
> +                       }

This logic changes the order in which the commits are visited. Given a
history, such as:

    --D--B---A
    --E-/   /
    --F--C-/
    --G-/

where A's parents are (B,C), B's parents are (D,E), and C's parents
are (F,G), the original code traverses commits in this order:

    A B D E C F G

but, with your patch, they are traversed in this order:

    A B D C F E G

>                 }
> +
> +               free(rev);
>         }
>  }
>
> --
> 1.7.10.4

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH] describe: rewrite name_rev() iteratively
  2014-04-06 22:47 [PATCH] describe: rewrite name_rev() iteratively Dragos Foianu
  2014-04-08  7:41 ` Eric Sunshine
@ 2014-04-08 18:50 ` Jeff King
  1 sibling, 0 replies; 3+ messages in thread
From: Jeff King @ 2014-04-08 18:50 UTC (permalink / raw)
  To: Dragos Foianu; +Cc: git

On Mon, Apr 07, 2014 at 01:47:14AM +0300, Dragos Foianu wrote:

> The "git describe --contains" command uses the name_rev() function which
> is currently a recursive function. This causes a Stack Overflow when the
> history is large enough.
> 
> Rewrite name_rev iteratively using a stack on the heap. This slightly
> reduces performance due to the extra operations on the heap, but the
> function no longer overflows the stack.

You can avoid the heap overhead by using an array for your stack, and
only resizing it when necessary. Like this:

    struct rev_stack {
            int nr, alloc;
            struct rev_data *data;
    };

    static struct rev_data *rev_stack_push(struct rev_stack *stack)
    {
            ALLOC_GROW(stack->data, stack->nr + 1, stack->alloc);
            return &stack->data[stack->nr++];
    }

    static void rev_stack_pop(struct rev_stack *stack)
    {
            stack->nr--;
    }

    static void rev_stack_init(struct rev_stack *stack)
    {
            stack->nr = stack->alloc = 0;
            stack->data = NULL;
    }

    static void rev_stack_release(struct rev_stack *stack)
    {
            free(stack->data);
            rev_stack_init(stack);
    }

Usage would be something like:

    struct rev_data *data = rev_stack_push(&stack);
    data->commit = commit;
    data->tip_name = tip_name;
    ...

IOW, you push first to allocate the space, and then do your
make_rev_data, rather than the other way around.

The downside is that your allocation is always as big as the deepest
recursion so far, so you hold on to the memory a little longer than
necessary. I think that's a good tradeoff versus an extra malloc() for
every commit.

-Peff

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2014-04-08 18:50 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2014-04-06 22:47 [PATCH] describe: rewrite name_rev() iteratively Dragos Foianu
2014-04-08  7:41 ` Eric Sunshine
2014-04-08 18:50 ` Jeff King

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).