Git development
 help / color / mirror / Atom feed
* [PATCH v9 05/20] ref-filter: move get_head_description() from branch.c
From: Karthik Nayak @ 2016-12-27 16:23 UTC (permalink / raw)
  To: git; +Cc: jacob.keller, gitster, ramsay, Karthik Nayak
In-Reply-To: <20161227162357.28212-1-Karthik.188@gmail.com>

From: Karthik Nayak <karthik.188@gmail.com>

Move the implementation of get_head_description() from branch.c to
ref-filter.  This gives a description of the HEAD ref if called. This
is used as the refname for the HEAD ref whenever the
FILTER_REFS_DETACHED_HEAD option is used. Make it public because we
need it to calculate the length of the HEAD refs description in
branch.c:calc_maxwidth() when we port branch.c to use ref-filter
APIs.

Mentored-by: Christian Couder <christian.couder@gmail.com>
Mentored-by: Matthieu Moy <matthieu.moy@grenoble-inp.fr>
Signed-off-by: Karthik Nayak <karthik.188@gmail.com>
---
 builtin/branch.c | 33 ---------------------------------
 ref-filter.c     | 38 ++++++++++++++++++++++++++++++++++++--
 ref-filter.h     |  2 ++
 3 files changed, 38 insertions(+), 35 deletions(-)

diff --git a/builtin/branch.c b/builtin/branch.c
index 9d30f55..6423ebc 100644
--- a/builtin/branch.c
+++ b/builtin/branch.c
@@ -364,39 +364,6 @@ static void add_verbose_info(struct strbuf *out, struct ref_array_item *item,
 	strbuf_release(&subject);
 }
 
-static char *get_head_description(void)
-{
-	struct strbuf desc = STRBUF_INIT;
-	struct wt_status_state state;
-	memset(&state, 0, sizeof(state));
-	wt_status_get_state(&state, 1);
-	if (state.rebase_in_progress ||
-	    state.rebase_interactive_in_progress)
-		strbuf_addf(&desc, _("(no branch, rebasing %s)"),
-			    state.branch);
-	else if (state.bisect_in_progress)
-		strbuf_addf(&desc, _("(no branch, bisect started on %s)"),
-			    state.branch);
-	else if (state.detached_from) {
-		if (state.detached_at)
-			/* TRANSLATORS: make sure this matches
-			   "HEAD detached at " in wt-status.c */
-			strbuf_addf(&desc, _("(HEAD detached at %s)"),
-				state.detached_from);
-		else
-			/* TRANSLATORS: make sure this matches
-			   "HEAD detached from " in wt-status.c */
-			strbuf_addf(&desc, _("(HEAD detached from %s)"),
-				state.detached_from);
-	}
-	else
-		strbuf_addstr(&desc, _("(no branch)"));
-	free(state.branch);
-	free(state.onto);
-	free(state.detached_from);
-	return strbuf_detach(&desc, NULL);
-}
-
 static void format_and_print_ref_item(struct ref_array_item *item, int maxwidth,
 				      struct ref_filter *filter, const char *remote_prefix)
 {
diff --git a/ref-filter.c b/ref-filter.c
index 385fc04..5511a20 100644
--- a/ref-filter.c
+++ b/ref-filter.c
@@ -14,6 +14,7 @@
 #include "git-compat-util.h"
 #include "version.h"
 #include "trailer.h"
+#include "wt-status.h"
 
 typedef enum { FIELD_STR, FIELD_ULONG, FIELD_TIME } cmp_type;
 typedef enum { COMPARE_EQUAL, COMPARE_UNEQUAL, COMPARE_NONE } cmp_status;
@@ -1101,6 +1102,37 @@ static void fill_remote_ref_details(struct used_atom *atom, const char *refname,
 		*s = refname;
 }
 
+char *get_head_description(void)
+{
+	struct strbuf desc = STRBUF_INIT;
+	struct wt_status_state state;
+	memset(&state, 0, sizeof(state));
+	wt_status_get_state(&state, 1);
+	if (state.rebase_in_progress ||
+	    state.rebase_interactive_in_progress)
+		strbuf_addf(&desc, _("(no branch, rebasing %s)"),
+			    state.branch);
+	else if (state.bisect_in_progress)
+		strbuf_addf(&desc, _("(no branch, bisect started on %s)"),
+			    state.branch);
+	else if (state.detached_from) {
+		/* TRANSLATORS: make sure these match _("HEAD detached at ")
+		   and _("HEAD detached from ") in wt-status.c */
+		if (state.detached_at)
+			strbuf_addf(&desc, _("(HEAD detached at %s)"),
+				state.detached_from);
+		else
+			strbuf_addf(&desc, _("(HEAD detached from %s)"),
+				state.detached_from);
+	}
+	else
+		strbuf_addstr(&desc, _("(no branch)"));
+	free(state.branch);
+	free(state.onto);
+	free(state.detached_from);
+	return strbuf_detach(&desc, NULL);
+}
+
 /*
  * Parse the object referred by ref, and grab needed value.
  */
@@ -1140,9 +1172,11 @@ static void populate_value(struct ref_array_item *ref)
 			name++;
 		}
 
-		if (starts_with(name, "refname"))
+		if (starts_with(name, "refname")) {
 			refname = ref->refname;
-		else if (starts_with(name, "symref"))
+			if (ref->kind & FILTER_REFS_DETACHED_HEAD)
+				refname = get_head_description();
+		} else if (starts_with(name, "symref"))
 			refname = ref->symref ? ref->symref : "";
 		else if (starts_with(name, "upstream")) {
 			const char *branch_name;
diff --git a/ref-filter.h b/ref-filter.h
index fc55fa3..f78323d 100644
--- a/ref-filter.h
+++ b/ref-filter.h
@@ -108,5 +108,7 @@ int parse_opt_ref_sorting(const struct option *opt, const char *arg, int unset);
 struct ref_sorting *ref_default_sorting(void);
 /*  Function to parse --merged and --no-merged options */
 int parse_opt_merge_filter(const struct option *opt, const char *arg, int unset);
+/*  Get the current HEAD's description */
+char *get_head_description(void);
 
 #endif /*  REF_FILTER_H  */
-- 
2.10.2


^ permalink raw reply related

* [PATCH v9 02/20] ref-filter: include reference to 'used_atom' within 'atom_value'
From: Karthik Nayak @ 2016-12-27 16:23 UTC (permalink / raw)
  To: git; +Cc: jacob.keller, gitster, ramsay, Karthik Nayak, Karthik Nayak
In-Reply-To: <20161227162357.28212-1-Karthik.188@gmail.com>

From: Karthik Nayak <karthik.188@gmail.com>

Ensure that each 'atom_value' has a reference to its corresponding
'used_atom'. This let's us use values within 'used_atom' in the
'handler' function.

Hence we can get the %(align) atom's parameters directly from the
'used_atom' therefore removing the necessity of passing %(align) atom's
parameters to 'atom_value'.

This also acts as a preparatory patch for the upcoming patch where we
introduce %(if:equals=) and %(if:notequals=).

Signed-off-by: Karthik Nayak <Karthik.188@gmail.com>
---
 ref-filter.c | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/ref-filter.c b/ref-filter.c
index 0a57872..f31c4b6 100644
--- a/ref-filter.c
+++ b/ref-filter.c
@@ -241,11 +241,9 @@ struct ref_formatting_state {
 
 struct atom_value {
 	const char *s;
-	union {
-		struct align align;
-	} u;
 	void (*handler)(struct atom_value *atomv, struct ref_formatting_state *state);
 	unsigned long ul; /* used for sorting when not FIELD_STR */
+	struct used_atom *atom;
 };
 
 /*
@@ -381,7 +379,7 @@ static void align_atom_handler(struct atom_value *atomv, struct ref_formatting_s
 	push_stack_element(&state->stack);
 	new = state->stack;
 	new->at_end = end_align_handler;
-	new->at_end_data = &atomv->u.align;
+	new->at_end_data = &atomv->atom->u.align;
 }
 
 static void if_then_else_handler(struct ref_formatting_stack **stack)
@@ -1090,6 +1088,7 @@ static void populate_value(struct ref_array_item *ref)
 		struct branch *branch = NULL;
 
 		v->handler = append_atom;
+		v->atom = atom;
 
 		if (*name == '*') {
 			deref = 1;
@@ -1154,7 +1153,6 @@ static void populate_value(struct ref_array_item *ref)
 				v->s = " ";
 			continue;
 		} else if (starts_with(name, "align")) {
-			v->u.align = atom->u.align;
 			v->handler = align_atom_handler;
 			continue;
 		} else if (!strcmp(name, "end")) {
-- 
2.10.2


^ permalink raw reply related

* [PATCH v9 03/20] ref-filter: implement %(if:equals=<string>) and %(if:notequals=<string>)
From: Karthik Nayak @ 2016-12-27 16:23 UTC (permalink / raw)
  To: git; +Cc: jacob.keller, gitster, ramsay, Karthik Nayak
In-Reply-To: <20161227162357.28212-1-Karthik.188@gmail.com>

From: Karthik Nayak <karthik.188@gmail.com>

Implement %(if:equals=<string>) wherein the if condition is only
satisfied if the value obtained between the %(if:...) and %(then) atom
is the same as the given '<string>'.

Similarly, implement (if:notequals=<string>) wherein the if condition
is only satisfied if the value obtained between the %(if:...) and
%(then) atom is different from the given '<string>'.

This is done by introducing 'if_atom_parser()' which parses the given
%(if) atom and then stores the data in used_atom which is later passed
on to the used_atom of the %(then) atom, so that it can do the required
comparisons.

Add tests and Documentation for the same.

Mentored-by: Christian Couder <christian.couder@gmail.com>
Mentored-by: Matthieu Moy <matthieu.moy@grenoble-inp.fr>
Signed-off-by: Karthik Nayak <karthik.188@gmail.com>
---
 Documentation/git-for-each-ref.txt |  3 +++
 ref-filter.c                       | 46 +++++++++++++++++++++++++++++++++-----
 t/t6302-for-each-ref-filter.sh     | 18 +++++++++++++++
 3 files changed, 62 insertions(+), 5 deletions(-)

diff --git a/Documentation/git-for-each-ref.txt b/Documentation/git-for-each-ref.txt
index 5e80c34..39aab09 100644
--- a/Documentation/git-for-each-ref.txt
+++ b/Documentation/git-for-each-ref.txt
@@ -158,6 +158,9 @@ if::
 	evaluating the string before %(then), this is useful when we
 	use the %(HEAD) atom which prints either "*" or " " and we
 	want to apply the 'if' condition only on the 'HEAD' ref.
+	Append ":equals=<string>" or ":notequals=<string>" to compare
+	the value between the %(if:...) and %(then) atoms with the
+	given string.
 
 In addition to the above, for commit and tag objects, the header
 field names (`tree`, `parent`, `object`, `type`, and `tag`) can
diff --git a/ref-filter.c b/ref-filter.c
index f31c4b6..e002629 100644
--- a/ref-filter.c
+++ b/ref-filter.c
@@ -16,6 +16,7 @@
 #include "trailer.h"
 
 typedef enum { FIELD_STR, FIELD_ULONG, FIELD_TIME } cmp_type;
+typedef enum { COMPARE_EQUAL, COMPARE_UNEQUAL, COMPARE_NONE } cmp_status;
 
 struct align {
 	align_type position;
@@ -23,6 +24,8 @@ struct align {
 };
 
 struct if_then_else {
+	cmp_status cmp_status;
+	const char *str;
 	unsigned int then_atom_seen : 1,
 		else_atom_seen : 1,
 		condition_satisfied : 1;
@@ -50,6 +53,10 @@ static struct used_atom {
 			enum { C_BARE, C_BODY, C_BODY_DEP, C_LINES, C_SIG, C_SUB, C_TRAILERS } option;
 			unsigned int nlines;
 		} contents;
+		struct {
+			cmp_status cmp_status;
+			const char *str;
+		} if_then_else;
 		enum { O_FULL, O_SHORT } objectname;
 	} u;
 } *used_atom;
@@ -179,6 +186,21 @@ static void align_atom_parser(struct used_atom *atom, const char *arg)
 	string_list_clear(&params, 0);
 }
 
+static void if_atom_parser(struct used_atom *atom, const char *arg)
+{
+	if (!arg) {
+		atom->u.if_then_else.cmp_status = COMPARE_NONE;
+		return;
+	} else if (skip_prefix(arg, "equals=", &atom->u.if_then_else.str)) {
+		atom->u.if_then_else.cmp_status = COMPARE_EQUAL;
+	} else if (skip_prefix(arg, "notequals=", &atom->u.if_then_else.str)) {
+		atom->u.if_then_else.cmp_status = COMPARE_UNEQUAL;
+	} else {
+		die(_("unrecognized %%(if) argument: %s"), arg);
+	}
+}
+
+
 static struct {
 	const char *name;
 	cmp_type cmp_type;
@@ -220,7 +242,7 @@ static struct {
 	{ "color", FIELD_STR, color_atom_parser },
 	{ "align", FIELD_STR, align_atom_parser },
 	{ "end" },
-	{ "if" },
+	{ "if", FIELD_STR, if_atom_parser },
 	{ "then" },
 	{ "else" },
 };
@@ -422,6 +444,9 @@ static void if_atom_handler(struct atom_value *atomv, struct ref_formatting_stat
 	struct ref_formatting_stack *new;
 	struct if_then_else *if_then_else = xcalloc(sizeof(struct if_then_else), 1);
 
+	if_then_else->str = atomv->atom->u.if_then_else.str;
+	if_then_else->cmp_status = atomv->atom->u.if_then_else.cmp_status;
+
 	push_stack_element(&state->stack);
 	new = state->stack;
 	new->at_end = if_then_else_handler;
@@ -453,10 +478,17 @@ static void then_atom_handler(struct atom_value *atomv, struct ref_formatting_st
 		die(_("format: %%(then) atom used after %%(else)"));
 	if_then_else->then_atom_seen = 1;
 	/*
-	 * If there exists non-empty string between the 'if' and
-	 * 'then' atom then the 'if' condition is satisfied.
+	 * If the 'equals' or 'notequals' attribute is used then
+	 * perform the required comparison. If not, only non-empty
+	 * strings satisfy the 'if' condition.
 	 */
-	if (cur->output.len && !is_empty(cur->output.buf))
+	if (if_then_else->cmp_status == COMPARE_EQUAL) {
+		if (!strcmp(if_then_else->str, cur->output.buf))
+			if_then_else->condition_satisfied = 1;
+	} else 	if (if_then_else->cmp_status == COMPARE_UNEQUAL) {
+		if (strcmp(if_then_else->str, cur->output.buf))
+			if_then_else->condition_satisfied = 1;
+	} else if (cur->output.len && !is_empty(cur->output.buf))
 		if_then_else->condition_satisfied = 1;
 	strbuf_reset(&cur->output);
 }
@@ -1158,7 +1190,11 @@ static void populate_value(struct ref_array_item *ref)
 		} else if (!strcmp(name, "end")) {
 			v->handler = end_atom_handler;
 			continue;
-		} else if (!strcmp(name, "if")) {
+		} else if (starts_with(name, "if")) {
+			const char *s;
+
+			if (skip_prefix(name, "if:", &s))
+				v->s = xstrdup(s);
 			v->handler = if_atom_handler;
 			continue;
 		} else if (!strcmp(name, "then")) {
diff --git a/t/t6302-for-each-ref-filter.sh b/t/t6302-for-each-ref-filter.sh
index fed3013..a09a1a4 100755
--- a/t/t6302-for-each-ref-filter.sh
+++ b/t/t6302-for-each-ref-filter.sh
@@ -403,4 +403,22 @@ test_expect_success 'ignore spaces in %(if) atom usage' '
 	test_cmp expect actual
 '
 
+test_expect_success 'check %(if:equals=<string>)' '
+	git for-each-ref --format="%(if:equals=master)%(refname:short)%(then)Found master%(else)Not master%(end)" refs/heads/ >actual &&
+	cat >expect <<-\EOF &&
+	Found master
+	Not master
+	EOF
+	test_cmp expect actual
+'
+
+test_expect_success 'check %(if:notequals=<string>)' '
+	git for-each-ref --format="%(if:notequals=master)%(refname:short)%(then)Not master%(else)Found master%(end)" refs/heads/ >actual &&
+	cat >expect <<-\EOF &&
+	Found master
+	Not master
+	EOF
+	test_cmp expect actual
+'
+
 test_done
-- 
2.10.2


^ permalink raw reply related

* [PATCH v9 01/20] ref-filter: implement %(if), %(then), and %(else) atoms
From: Karthik Nayak @ 2016-12-27 16:23 UTC (permalink / raw)
  To: git; +Cc: jacob.keller, gitster, ramsay, Karthik Nayak
In-Reply-To: <20161227162357.28212-1-Karthik.188@gmail.com>

From: Karthik Nayak <karthik.188@gmail.com>

Implement %(if), %(then) and %(else) atoms. Used as
%(if)...%(then)...%(end) or %(if)...%(then)...%(else)...%(end). If the
format string between %(if) and %(then) expands to an empty string, or
to only whitespaces, then the whole %(if)...%(end) expands to the string
following %(then). Otherwise, it expands to the string following
%(else), if any. Nesting of this construct is possible.

This is in preparation for porting over `git branch -l` to use
ref-filter APIs for printing.

Add Documentation and tests regarding the same.

Mentored-by: Christian Couder <christian.couder@gmail.com>
Mentored-by: Matthieu Moy <matthieu.moy@grenoble-inp.fr>
Signed-off-by: Karthik Nayak <karthik.188@gmail.com>
---
 Documentation/git-for-each-ref.txt |  34 ++++++++++
 ref-filter.c                       | 134 +++++++++++++++++++++++++++++++++++--
 t/t6302-for-each-ref-filter.sh     |  76 +++++++++++++++++++++
 3 files changed, 237 insertions(+), 7 deletions(-)

diff --git a/Documentation/git-for-each-ref.txt b/Documentation/git-for-each-ref.txt
index abe13f3..5e80c34 100644
--- a/Documentation/git-for-each-ref.txt
+++ b/Documentation/git-for-each-ref.txt
@@ -149,6 +149,16 @@ align::
 	quoted, but if nested then only the topmost level performs
 	quoting.
 
+if::
+	Used as %(if)...%(then)...%(end) or
+	%(if)...%(then)...%(else)...%(end).  If there is an atom with
+	value or string literal after the %(if) then everything after
+	the %(then) is printed, else if the %(else) atom is used, then
+	everything after %(else) is printed. We ignore space when
+	evaluating the string before %(then), this is useful when we
+	use the %(HEAD) atom which prints either "*" or " " and we
+	want to apply the 'if' condition only on the 'HEAD' ref.
+
 In addition to the above, for commit and tag objects, the header
 field names (`tree`, `parent`, `object`, `type`, and `tag`) can
 be used to specify the value in the header field.
@@ -186,6 +196,14 @@ As a special case for the date-type fields, you may specify a format for
 the date by adding `:` followed by date format name (see the
 values the `--date` option to linkgit:git-rev-list[1] takes).
 
+Some atoms like %(align) and %(if) always require a matching %(end).
+We call them "opening atoms" and sometimes denote them as %($open).
+
+When a scripting language specific quoting is in effect, everything
+between a top-level opening atom and its matching %(end) is evaluated
+according to the semantics of the opening atom and its result is
+quoted.
+
 
 EXAMPLES
 --------
@@ -273,6 +291,22 @@ eval=`git for-each-ref --shell --format="$fmt" \
 eval "$eval"
 ------------
 
+
+An example to show the usage of %(if)...%(then)...%(else)...%(end).
+This prefixes the current branch with a star.
+
+------------
+git for-each-ref --format="%(if)%(HEAD)%(then)* %(else)  %(end)%(refname:short)" refs/heads/
+------------
+
+
+An example to show the usage of %(if)...%(then)...%(end).
+This prints the authorname, if present.
+
+------------
+git for-each-ref --format="%(refname)%(if)%(authorname)%(then) %(color:red)Authored by: %(authorname)%(end)"
+------------
+
 SEE ALSO
 --------
 linkgit:git-show-ref[1]
diff --git a/ref-filter.c b/ref-filter.c
index 1a97840..0a57872 100644
--- a/ref-filter.c
+++ b/ref-filter.c
@@ -22,6 +22,12 @@ struct align {
 	unsigned int width;
 };
 
+struct if_then_else {
+	unsigned int then_atom_seen : 1,
+		else_atom_seen : 1,
+		condition_satisfied : 1;
+};
+
 /*
  * An atom is a valid field atom listed below, possibly prefixed with
  * a "*" to denote deref_tag().
@@ -214,6 +220,9 @@ static struct {
 	{ "color", FIELD_STR, color_atom_parser },
 	{ "align", FIELD_STR, align_atom_parser },
 	{ "end" },
+	{ "if" },
+	{ "then" },
+	{ "else" },
 };
 
 #define REF_FORMATTING_STATE_INIT  { 0, NULL }
@@ -221,7 +230,7 @@ static struct {
 struct ref_formatting_stack {
 	struct ref_formatting_stack *prev;
 	struct strbuf output;
-	void (*at_end)(struct ref_formatting_stack *stack);
+	void (*at_end)(struct ref_formatting_stack **stack);
 	void *at_end_data;
 };
 
@@ -354,13 +363,14 @@ static void pop_stack_element(struct ref_formatting_stack **stack)
 	*stack = prev;
 }
 
-static void end_align_handler(struct ref_formatting_stack *stack)
+static void end_align_handler(struct ref_formatting_stack **stack)
 {
-	struct align *align = (struct align *)stack->at_end_data;
+	struct ref_formatting_stack *cur = *stack;
+	struct align *align = (struct align *)cur->at_end_data;
 	struct strbuf s = STRBUF_INIT;
 
-	strbuf_utf8_align(&s, align->position, align->width, stack->output.buf);
-	strbuf_swap(&stack->output, &s);
+	strbuf_utf8_align(&s, align->position, align->width, cur->output.buf);
+	strbuf_swap(&cur->output, &s);
 	strbuf_release(&s);
 }
 
@@ -374,6 +384,104 @@ static void align_atom_handler(struct atom_value *atomv, struct ref_formatting_s
 	new->at_end_data = &atomv->u.align;
 }
 
+static void if_then_else_handler(struct ref_formatting_stack **stack)
+{
+	struct ref_formatting_stack *cur = *stack;
+	struct ref_formatting_stack *prev = cur->prev;
+	struct if_then_else *if_then_else = (struct if_then_else *)cur->at_end_data;
+
+	if (!if_then_else->then_atom_seen)
+		die(_("format: %%(if) atom used without a %%(then) atom"));
+
+	if (if_then_else->else_atom_seen) {
+		/*
+		 * There is an %(else) atom: we need to drop one state from the
+		 * stack, either the %(else) branch if the condition is satisfied, or
+		 * the %(then) branch if it isn't.
+		 */
+		if (if_then_else->condition_satisfied) {
+			strbuf_reset(&cur->output);
+			pop_stack_element(&cur);
+		} else {
+			strbuf_swap(&cur->output, &prev->output);
+			strbuf_reset(&cur->output);
+			pop_stack_element(&cur);
+		}
+	} else if (!if_then_else->condition_satisfied) {
+		/*
+		 * No %(else) atom: just drop the %(then) branch if the
+		 * condition is not satisfied.
+		 */
+		strbuf_reset(&cur->output);
+	}
+
+	*stack = cur;
+	free(if_then_else);
+}
+
+static void if_atom_handler(struct atom_value *atomv, struct ref_formatting_state *state)
+{
+	struct ref_formatting_stack *new;
+	struct if_then_else *if_then_else = xcalloc(sizeof(struct if_then_else), 1);
+
+	push_stack_element(&state->stack);
+	new = state->stack;
+	new->at_end = if_then_else_handler;
+	new->at_end_data = if_then_else;
+}
+
+static int is_empty(const char *s)
+{
+	while (*s != '\0') {
+		if (!isspace(*s))
+			return 0;
+		s++;
+	}
+	return 1;
+}
+
+static void then_atom_handler(struct atom_value *atomv, struct ref_formatting_state *state)
+{
+	struct ref_formatting_stack *cur = state->stack;
+	struct if_then_else *if_then_else = NULL;
+
+	if (cur->at_end == if_then_else_handler)
+		if_then_else = (struct if_then_else *)cur->at_end_data;
+	if (!if_then_else)
+		die(_("format: %%(then) atom used without an %%(if) atom"));
+	if (if_then_else->then_atom_seen)
+		die(_("format: %%(then) atom used more than once"));
+	if (if_then_else->else_atom_seen)
+		die(_("format: %%(then) atom used after %%(else)"));
+	if_then_else->then_atom_seen = 1;
+	/*
+	 * If there exists non-empty string between the 'if' and
+	 * 'then' atom then the 'if' condition is satisfied.
+	 */
+	if (cur->output.len && !is_empty(cur->output.buf))
+		if_then_else->condition_satisfied = 1;
+	strbuf_reset(&cur->output);
+}
+
+static void else_atom_handler(struct atom_value *atomv, struct ref_formatting_state *state)
+{
+	struct ref_formatting_stack *prev = state->stack;
+	struct if_then_else *if_then_else = NULL;
+
+	if (prev->at_end == if_then_else_handler)
+		if_then_else = (struct if_then_else *)prev->at_end_data;
+	if (!if_then_else)
+		die(_("format: %%(else) atom used without an %%(if) atom"));
+	if (!if_then_else->then_atom_seen)
+		die(_("format: %%(else) atom used without a %%(then) atom"));
+	if (if_then_else->else_atom_seen)
+		die(_("format: %%(else) atom used more than once"));
+	if_then_else->else_atom_seen = 1;
+	push_stack_element(&state->stack);
+	state->stack->at_end_data = prev->at_end_data;
+	state->stack->at_end = prev->at_end;
+}
+
 static void end_atom_handler(struct atom_value *atomv, struct ref_formatting_state *state)
 {
 	struct ref_formatting_stack *current = state->stack;
@@ -381,14 +489,17 @@ static void end_atom_handler(struct atom_value *atomv, struct ref_formatting_sta
 
 	if (!current->at_end)
 		die(_("format: %%(end) atom used without corresponding atom"));
-	current->at_end(current);
+	current->at_end(&state->stack);
+
+	/*  Stack may have been popped within at_end(), hence reset the current pointer */
+	current = state->stack;
 
 	/*
 	 * Perform quote formatting when the stack element is that of
 	 * a supporting atom. If nested then perform quote formatting
 	 * only on the topmost supporting atom.
 	 */
-	if (!state->stack->prev->prev) {
+	if (!current->prev->prev) {
 		quote_formatting(&s, current->output.buf, state->quote_style);
 		strbuf_swap(&current->output, &s);
 	}
@@ -1049,6 +1160,15 @@ static void populate_value(struct ref_array_item *ref)
 		} else if (!strcmp(name, "end")) {
 			v->handler = end_atom_handler;
 			continue;
+		} else if (!strcmp(name, "if")) {
+			v->handler = if_atom_handler;
+			continue;
+		} else if (!strcmp(name, "then")) {
+			v->handler = then_atom_handler;
+			continue;
+		} else if (!strcmp(name, "else")) {
+			v->handler = else_atom_handler;
+			continue;
 		} else
 			continue;
 
diff --git a/t/t6302-for-each-ref-filter.sh b/t/t6302-for-each-ref-filter.sh
index d0ab09f..fed3013 100755
--- a/t/t6302-for-each-ref-filter.sh
+++ b/t/t6302-for-each-ref-filter.sh
@@ -327,4 +327,80 @@ test_expect_success 'reverse version sort' '
 	test_cmp expect actual
 '
 
+test_expect_success 'improper usage of %(if), %(then), %(else) and %(end) atoms' '
+	test_must_fail git for-each-ref --format="%(if)" &&
+	test_must_fail git for-each-ref --format="%(then) %(end)" &&
+	test_must_fail git for-each-ref --format="%(else) %(end)" &&
+	test_must_fail git for-each-ref --format="%(if) %(else) %(end)" &&
+	test_must_fail git for-each-ref --format="%(if) %(then) %(then) %(end)" &&
+	test_must_fail git for-each-ref --format="%(then) %(else) %(end)" &&
+	test_must_fail git for-each-ref --format="%(if) %(else) %(end)" &&
+	test_must_fail git for-each-ref --format="%(if) %(then) %(else)" &&
+	test_must_fail git for-each-ref --format="%(if) %(else) %(then) %(end)" &&
+	test_must_fail git for-each-ref --format="%(if) %(then) %(else) %(else) %(end)" &&
+	test_must_fail git for-each-ref --format="%(if) %(end)"
+'
+
+test_expect_success 'check %(if)...%(then)...%(end) atoms' '
+	git for-each-ref --format="%(refname)%(if)%(authorname)%(then) Author: %(authorname)%(end)" >actual &&
+	cat >expect <<-\EOF &&
+	refs/heads/master Author: A U Thor
+	refs/heads/side Author: A U Thor
+	refs/odd/spot Author: A U Thor
+	refs/tags/annotated-tag
+	refs/tags/doubly-annotated-tag
+	refs/tags/doubly-signed-tag
+	refs/tags/foo1.10 Author: A U Thor
+	refs/tags/foo1.3 Author: A U Thor
+	refs/tags/foo1.6 Author: A U Thor
+	refs/tags/four Author: A U Thor
+	refs/tags/one Author: A U Thor
+	refs/tags/signed-tag
+	refs/tags/three Author: A U Thor
+	refs/tags/two Author: A U Thor
+	EOF
+	test_cmp expect actual
+'
+
+test_expect_success 'check %(if)...%(then)...%(else)...%(end) atoms' '
+	git for-each-ref --format="%(if)%(authorname)%(then)%(authorname)%(else)No author%(end): %(refname)" >actual &&
+	cat >expect <<-\EOF &&
+	A U Thor: refs/heads/master
+	A U Thor: refs/heads/side
+	A U Thor: refs/odd/spot
+	No author: refs/tags/annotated-tag
+	No author: refs/tags/doubly-annotated-tag
+	No author: refs/tags/doubly-signed-tag
+	A U Thor: refs/tags/foo1.10
+	A U Thor: refs/tags/foo1.3
+	A U Thor: refs/tags/foo1.6
+	A U Thor: refs/tags/four
+	A U Thor: refs/tags/one
+	No author: refs/tags/signed-tag
+	A U Thor: refs/tags/three
+	A U Thor: refs/tags/two
+	EOF
+	test_cmp expect actual
+'
+test_expect_success 'ignore spaces in %(if) atom usage' '
+	git for-each-ref --format="%(refname:short): %(if)%(HEAD)%(then)Head ref%(else)Not Head ref%(end)" >actual &&
+	cat >expect <<-\EOF &&
+	master: Head ref
+	side: Not Head ref
+	odd/spot: Not Head ref
+	annotated-tag: Not Head ref
+	doubly-annotated-tag: Not Head ref
+	doubly-signed-tag: Not Head ref
+	foo1.10: Not Head ref
+	foo1.3: Not Head ref
+	foo1.6: Not Head ref
+	four: Not Head ref
+	one: Not Head ref
+	signed-tag: Not Head ref
+	three: Not Head ref
+	two: Not Head ref
+	EOF
+	test_cmp expect actual
+'
+
 test_done
-- 
2.10.2


^ permalink raw reply related

* [PATCH v9 00/20] port branch.c to use ref-filter's printing options
From: Karthik Nayak @ 2016-12-27 16:23 UTC (permalink / raw)
  To: git; +Cc: jacob.keller, gitster, ramsay, Karthik Nayak

This is part of unification of the commands 'git tag -l, git branch -l
and git for-each-ref'. This ports over branch.c to use ref-filter's
printing options.

Initially posted here: $(gmane/279226). It was decided that this series
would follow up after refactoring ref-filter parsing mechanism, which
is now merged into master (9606218b32344c5c756f7c29349d3845ef60b80c).

v1 can be found here: $(gmane/288342)
v2 can be found here: $(gmane/288863)
v3 can be found here: $(gmane/290299)
v4 can be found here: $(gmane/291106)
v5b can be found here: $(gmane/292467)
v6 can be found here: http://marc.info/?l=git&m=146330914118766&w=2
v7 can be found here: http://marc.info/?l=git&m=147863593317362&w=2
v8 can be found here: http://marc.info/?l=git&m=148112502029302&w=2

Changes in this version:
1. A few formatting errors.
2. Made quote_literal_for_format() static.
3. lstrip and rstrip doesn't die on less components.

Thanks Jacob, Junio, Jeff, Ramsay for their suggestions and help.

Karthik Nayak (20):
  ref-filter: implement %(if), %(then), and %(else) atoms
  ref-filter: include reference to 'used_atom' within 'atom_value'
  ref-filter: implement %(if:equals=<string>) and
    %(if:notequals=<string>)
  ref-filter: modify "%(objectname:short)" to take length
  ref-filter: move get_head_description() from branch.c
  ref-filter: introduce format_ref_array_item()
  ref-filter: make %(upstream:track) prints "[gone]" for invalid
    upstreams
  ref-filter: add support for %(upstream:track,nobracket)
  ref-filter: make "%(symref)" atom work with the ':short' modifier
  ref-filter: introduce refname_atom_parser_internal()
  ref-filter: introduce refname_atom_parser()
  ref-filter: make remote_ref_atom_parser() use
    refname_atom_parser_internal()
  ref-filter: rename the 'strip' option to 'lstrip'
  ref-filter: Do not abruptly die when using the 'lstrip=<N>' option
  ref-filter: modify the 'lstrip=<N>' option to work with negative '<N>'
  ref-filter: add an 'rstrip=<N>' option to atoms which deal with
    refnames
  ref-filter: allow porcelain to translate messages in the output
  branch, tag: use porcelain output
  branch: use ref-filter printing APIs
  branch: implement '--format' option

 Documentation/git-branch.txt       |   7 +-
 Documentation/git-for-each-ref.txt |  86 +++++--
 builtin/branch.c                   | 290 +++++++---------------
 builtin/tag.c                      |   6 +-
 ref-filter.c                       | 490 +++++++++++++++++++++++++++++++------
 ref-filter.h                       |   7 +
 t/t3203-branch-output.sh           |  16 +-
 t/t6040-tracking-info.sh           |   2 +-
 t/t6300-for-each-ref.sh            |  88 ++++++-
 t/t6302-for-each-ref-filter.sh     |  94 +++++++
 10 files changed, 780 insertions(+), 306 deletions(-)

Interdiff:

diff --git a/builtin/branch.c b/builtin/branch.c
index 6393c3c..4051a18 100644
--- a/builtin/branch.c
+++ b/builtin/branch.c
@@ -306,9 +306,9 @@ static int calc_maxwidth(struct ref_array *refs, int remote_bonus)
 	return max;
 }

-const char *quote_literal_for_format(const char *s)
+static const char *quote_literal_for_format(const char *s)
 {
-	struct strbuf buf = STRBUF_INIT;
+	static struct strbuf buf = STRBUF_INIT;

 	strbuf_reset(&buf);
 	while (*s) {
--- a/ref-filter.c
+++ b/ref-filter.c
@@ -76,10 +77,10 @@ static struct used_atom {
 		struct {
 			enum { RR_REF, RR_TRACK, RR_TRACKSHORT } option;
 			struct refname_atom refname;
-			unsigned int nobracket: 1;
+			unsigned int nobracket : 1;
 		} remote_ref;
@@ -1106,7 +1126,8 @@ static const char *lstrip_ref_components(const char *refname, int len)
 		const char *p = refname;

 		/* Find total no of '/' separated path-components */
-		for (i = 0; p[i]; p[i] == '/' ? i++ : *p++);
+		for (i = 0; p[i]; p[i] == '/' ? i++ : *p++)
+			;
 		/*
 		 * The number of components we need to strip is now
 		 * the total minus the components to be left (Plus one
@@ -1116,11 +1137,10 @@ static const char *lstrip_ref_components(const char *refname, int len)
 		remaining = i + len + 1;
 	}

-	while (remaining) {
+	while (remaining > 0) {
 		switch (*start++) {
 		case '\0':
-			die(_("ref '%s' does not have %d components to :lstrip"),
-			    refname, len);
+			return "";
 		case '/':
 			remaining--;
 			break;
@@ -1140,7 +1160,8 @@ static const char *rstrip_ref_components(const char *refname, int len)
 		const char *p = refname;

 		/* Find total no of '/' separated path-components */
-		for (i = 0; p[i]; p[i] == '/' ? i++ : *p++);
+		for (i = 0; p[i]; p[i] == '/' ? i++ : *p++)
+			;
 		/*
 		 * The number of components we need to strip is now
 		 * the total minus the components to be left (Plus one
@@ -1150,11 +1171,10 @@ static const char *rstrip_ref_components(const char *refname, int len)
 		remaining = i + len + 1;
 	}

-	while (remaining--) {
+	while (remaining-- > 0) {
 		char *p = strrchr(start, '/');
 		if (p == NULL)
-			die(_("ref '%s' does not have %d components to :rstrip"),
-			  refname, len);
+			return "";
 		else
 			p[0] = '\0';
 	}
--- a/t/t6300-for-each-ref.sh
+++ b/t/t6300-for-each-ref.sh
@@ -153,16 +153,6 @@ test_expect_success 'Check invalid atoms names are errors' '
 	test_must_fail git for-each-ref --format="%(INVALID)" refs/heads
 '

-test_expect_success 'stripping refnames too far gives an error' '
-	test_must_fail git for-each-ref --format="%(refname:lstrip=3)" &&
-	test_must_fail git for-each-ref --format="%(refname:lstrip=-4)"
-'
-
-test_expect_success 'stripping refnames too far gives an error' '
-	test_must_fail git for-each-ref --format="%(refname:rstrip=3)" &&
-	test_must_fail git for-each-ref --format="%(refname:rstrip=-4)"
-'
-
 test_expect_success 'Check format specifiers are ignored in naming date atoms' '
 	git for-each-ref --format="%(authordate)" refs/heads &&
 	git for-each-ref --format="%(authordate:default) %(authordate)" refs/heads &&
diff --git a/Documentation/git-for-each-ref.txt b/Documentation/git-for-each-ref.txt
index c72baeb..81db67d 100644
--- a/Documentation/git-for-each-ref.txt
+++ b/Documentation/git-for-each-ref.txt
@@ -92,14 +95,13 @@ refname::
        The name of the ref (the part after $GIT_DIR/).
        For a non-ambiguous short name of the ref append `:short`.
        The option core.warnAmbiguousRefs is used to select the strict
-       abbreviation mode. If `lstrip=<N>` or `rstrip=<N>` option can
+       abbreviation mode. The `lstrip=<N>` or `rstrip=<N>` option can
        be appended to strip `<N>` slash-separated path components
-       from or end of the refname respectively (e.g.,
+       from the left or right of the refname respectively (e.g.,
        `%(refname:lstrip=2)` turns `refs/tags/foo` into `foo` and
        `%(refname:rstrip=2)` turns `refs/tags/foo` into `refs`).  if
        `<N>` is a negative number, then only `<N>` path components
-       are left behind.  If a displayed ref has fewer components than
-       `<N>`, the command aborts with an error.
+       are left behind.

 objecttype::
        The type of the object (`blob`, `tree`, `commit`, `tag`).



--
2.10.2

^ permalink raw reply related

* Gitview Shell Injection Vulnerability
From: Javantea @ 2016-12-27  8:29 UTC (permalink / raw)
  To: git

[-- Attachment #1: Type: text/plain, Size: 1570 bytes --]

I have found a shell injection vulnerability in contrib/gitview/gitview.

Gitview Shell Injection Vulnerability

Versions affected: 8cb711c8a5-1d1bdafd64 (<=2.11.0)

Gitview executes shell commands using string concatenation with user supplied data, filenames and branch names. Running Gitview and interacting with the user interface with a malicious filename or branch name in the current repository results in malicious commands being executed as the current user.

AnnotateWindow.add_file_data(self, filename, commit_sha1, line_num):
    fp = os.popen("git cat-file blob " + commit_sha1 +":"+filename)

AnnotateWindow.annotate(self, filename, commit_sha1, line_num):
    fp = os.popen("git ls-tree "+ commit_sha1 + " -- " + filename)
    fp = os.popen("git blame --incremental -C -C -- " + filename + " " + commit_sha1)

GitView.set_branch(self, args):
    fp = os.popen("git rev-parse --sq --default HEAD " + list_to_string(args, 1))
    fp = os.popen("git rev-list  --header --topo-order --parents " + git_rev_list_cmd)

The program also has other uses of os.popen but none use values that the user can manipulate. However, the fix should definitely replace these instances so that the code might one day pass pylint and manual code review easier.

The function os.popen has been replaced by safer functions in the subprocess module. The code can be improved easily because it requires very little change to convert the code to work with arrays of strings instead of strings.

If you have any questions or would like a patch, please let me know.

Regards,
Javantea

^ permalink raw reply

* Re: [PATCHv5 1/4] submodule.h: add extern keyword to functions
From: Junio C Hamano @ 2016-12-27  1:13 UTC (permalink / raw)
  To: Stefan Beller; +Cc: git, bmwill, David.Turner, sandals, j6t
In-Reply-To: <20161220232012.15997-2-sbeller@google.com>

Stefan Beller <sbeller@google.com> writes:

> As the upcoming series will add a lot of functions to the submodule
> header, let's first make the header consistent to the rest of the project
> by adding the extern keyword to functions.

This may be the right thing to do in the longer term but a patch
like this adds a lot of unnecessary work on the integrator when
there are _other_ topics that are in flight.  I'll see how bad the
conflicts are while merging the topic to 'pu'.

Thanks.

^ permalink raw reply

* Re: [PATCHv5 4/4] rm: absorb a submodules git dir before deletion
From: Junio C Hamano @ 2016-12-27  1:10 UTC (permalink / raw)
  To: Stefan Beller; +Cc: git, bmwill, David.Turner, sandals, j6t
In-Reply-To: <20161220232012.15997-5-sbeller@google.com>

Stefan Beller <sbeller@google.com> writes:

> @@ -342,6 +313,8 @@ int cmd_rm(int argc, const char **argv, const char *prefix)
>  			exit(0);
>  	}
>  
> +	submodules_absorb_gitdir_if_needed(prefix);
> +
>  	/*
>  	 * If not forced, the file, the index and the HEAD (if exists)
>  	 * must match; but the file can already been removed, since
> @@ -358,9 +331,6 @@ int cmd_rm(int argc, const char **argv, const char *prefix)
>  			oidclr(&oid);
>  		if (check_local_mod(&oid, index_only))
>  			exit(1);
> -	} else if (!index_only) {
> -		if (check_submodules_use_gitfiles())
> -			exit(1);
>  	}
>  

Hmph.  It may be a bit strange to see an "index-only" remove to
touch working tree, no?  Yet submodules_absorb_gitdir_if_needed() is
unconditionally called above, which feels somewhat unexpected. 

> @@ -389,32 +359,20 @@ int cmd_rm(int argc, const char **argv, const char *prefix)
>  	 */
>  	if (!index_only) {
>  		int removed = 0, gitmodules_modified = 0;
>  		for (i = 0; i < list.nr; i++) {
>  			const char *path = list.entry[i].name;
>  			if (list.entry[i].is_submodule) {
> +				struct strbuf buf = STRBUF_INIT;
> +
> +				strbuf_addstr(&buf, path);
> +				if (remove_dir_recursively(&buf, 0))
> +					die(_("could not remove '%s'"), path);
> +				strbuf_release(&buf);
> +
> +				removed = 1;
> +				if (!remove_path_from_gitmodules(path))
> +					gitmodules_modified = 1;
> +				continue;
>  			}

I do not see any behaviour change from the original (not quoted
here), but it is somewhat surprising that "git rm ./submodule" does
not really check if the submodule has local modifications and files
that are not even added before remove_dir_recursively() is called.

Or perhaps I am reading the code incorrectly and such a check is
done elsewhere?

^ permalink raw reply

* Re: [PATCHv5 3/4] submodule: rename and add flags to ok_to_remove_submodule
From: Junio C Hamano @ 2016-12-27  0:53 UTC (permalink / raw)
  To: Stefan Beller; +Cc: git, bmwill, David.Turner, sandals, j6t
In-Reply-To: <20161220232012.15997-4-sbeller@google.com>

Stefan Beller <sbeller@google.com> writes:

> As only 0 is understood as false, rename the function to invert the
> meaning, i.e. the return code of 0 signals the removal of the submodule
> is fine, and other values can be used to return a more precise answer
> what went wrong.

Makes sense to rename it as that will catch all the callers that
depend on the old semantics and name.

> -	if (start_command(&cp))
> -		die(_("could not run 'git status --porcelain -u --ignore-submodules=none' in submodule %s"), path);
> +	if (start_command(&cp)) {
> +		if (flags & SUBMODULE_REMOVAL_DIE_ON_ERROR)
> +			die(_("could not start 'git status in submodule '%s'"),
> +				path);
> +		ret = -1;
> +		goto out;
> +	}

This new codepath that does not die will not leak anything, as
a failed start_command() should release its argv[] and env[].

>  	len = strbuf_read(&buf, cp.out, 1024);
>  	if (len > 2)
> -		ok_to_remove = 0;
> +		ret = 1;

Not a new problem but is it obvious why the comparison of "len" is
against "2"?  This may deserve a one-liner comment.

Otherwise looks good to me.

^ permalink raw reply

* Re: git-apply: warn/fail on *changed* end of line (eol) *only*?
From: Igor Djordjevic BugA @ 2016-12-27  0:26 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git
In-Reply-To: <xmqqvau7cqy1.fsf@gitster.mtv.corp.google.com>

Hi Junio, and thanks for sharing your thoughts.

You understood it pretty well, except the context "scope". Example does
show a single line change, and a single line old/new comparison, but
that is for simplicity sake of the initial example. What I was
discussing about was the whole patch "hunk" instead (mentioned inside
the part you quoted, too).

> But what if the patch was like this?
> 
>      1 <CRLF>
>     -3 <CRLF>
>     +three <CRLF>
>     +four <CRLF>
>      5 <CRLF>
> 
> Do you want to warn on "four <CRLF>" because it does not have any
> "previous" corresponding line?

No, because the old hunk (single - line) has <CRLF> line ending(s), the
same as the new hunk (both + lines), so effectively there is no end of
line change between old and new _hunks_, so no warning needed.

> Extending the thought further, which line do you want to warn and
> which line do you not want to, if the patch were like this instead?
> 
>      1 <CRLF>
>     -3 <CRLF>
>     +four <CRLF>
>     +three <CRLF>
>      5 <CRLF>

Again, no warnings here, same as described above.

> Extending this thought experiment further, you would realize that
> fundamentally the concept of "previous contents" has no sensible
> definition.

This one is interesting, and maybe arguable - I certainly don`t have
enough knowledge about the overall matter (Git heuristics for diff
creation, in terms of which parts get marked as "old" (-), and which as
"new" (+), which probably even depends on user defined settings)...

... BUT, the overall idea here seems rather simple (to me :P) - if *all*
removed lines inside the old hunk have the same line endings (<CRLF>,
for example), where there is *any* (at least one) added line inside the
new hunk that has a different line ending than the ones in the old hunk
(like <LF>), then warning or failure might be preferred (optionally,
at least), as it does seem like something fishy is going on.

I appreciate and understand a need for Git being able to know "correct"
line endings, but in case where the whole previous hunk (or the whole
file, even) has "incorrect" line endings (<CRLF>, as far as Git is
concerned, as in the given example), it seems more sensible to me for
Git to warn you about _that_ first, rather than keep silent and apply a
new hunk introducing <LF> line endings without you even knowing - heck,
maybe your core end of line setting is incorrect, indeed, so actually
having someone to let you know seems nice, before potentially corrupting
your file.

It felt like - "Hey, I do have a mean to know that your _whole file_
has incorrect line endings (as per my current settings), but I don`t
want to tell you, yet I`ll rather happily apply this patch which
introduces n lines with _correct_ line endings... So, now you still have
your whole incorrect file, but don`t worry, I fixed those n lines for
you behind your back, life is good, and you`re welcome."

Ok, I might be exaggerating, but it just doesn`t seem right, the end
result seems even worse - having "incorrect" and "correct" endings
silently mixed.

To prevent this, the most important question seems to be - do we have a
sensible way to tell "this is THE line ending of the old hunk (the only
line ending used)"?

And the worst case answer would probably be - you need to check the
whole (old) file, if all line endings are the same, that is THE line
ending you`ll use for comparison against line endings of the newly added
lines.

For the best case scenario, I don`t know enough of Git diff heuristics
to say if we would even be able to determine THE line endings based on
the old _hunk_ only, not to examine the whole old file.

_If_ we can find THE old hunk (or file) line endings, and patch
introduces different ones, then warn/fail option seems sensible...?

Of course, in case where old hunk/file already has mixed line endings
(like both <CRLF> and <LF>), we can either choose to warn right away (as
mixed lines endings are found already), or maybe even not warn at all,
as nothing actually changes in regards to line endings no matter which
one gets added, and we`re discussing "warn/fail on eol *change* only".

Otherwise, I agree about "previous contents" sensibility, especially in
the "blame" case, but this does seem a bit different... or does it?

Regards, BugA

^ permalink raw reply

* Re: What's cooking in git.git (Dec 2016, #07; Thu, 22)
From: Stefan Beller @ 2016-12-26 20:33 UTC (permalink / raw)
  To: Lars Schneider; +Cc: Junio C Hamano, git, Luke Diamand
In-Reply-To: <7E1C7387-4F37-423F-803D-3B5690B49D40@gmail.com>

On Fri, Dec 23, 2016 at 3:55 PM, Lars Schneider
<larsxschneider@gmail.com> wrote:
> "next" seems to generate a small error on macOS. Probably introduced in
> "worktree: check if a submodule uses worktrees" (1a248cf)

Thanks for reporting, will send a fix on Tuesday.

^ permalink raw reply

* Re: Corner case involving null sha1, alternates, cache misses, and submodule config API
From: Stefan Beller @ 2016-12-26 20:32 UTC (permalink / raw)
  To: Mike Hommey, Josh Triplett, Brandon Williams
  Cc: Heiko Voigt, Junio C Hamano, git@vger.kernel.org
In-Reply-To: <20161225022904.v2mixrnbitvlviuu@glandium.org>

On Sat, Dec 24, 2016 at 6:29 PM, Mike Hommey <mh@glandium.org> wrote:
> Hi,
>
> As you might be aware, I'm working on a mercurial remote helper for git.
> The way it stores metadata for mercurial manifests abuses "commit"
> references in trees, which are normally used for submodules.
>
> Some operations in the helper use git diff-tree on those trees to find
> files faster than just using ls-tree on every commit would.
>
> Anyways, long story short, it turns out that a combination of
> everything mentioned in the subject of this email causes running git
> diff-tree -r --stdin with a list of 300k+ pairs of commits to take 10
> minutes, when (after investigation) adding --ignore-submodules=dirty
> made it take 1 minute instead, for the exact same 3GB output.
>
> It turns out, this all starts in is_submodule_ignored(), which contains:
>
>         if (!DIFF_OPT_TST(options, OVERRIDE_SUBMODULE_CONFIG))
>                 set_diffopt_flags_from_submodule_config(options, path);
>
> And set_diffopt_flags_from_submodule_config calls:
>
>         submodule_from_path(null_sha1, path);
>
> And because there is no actual submodule involved, at some point that
> null_sha1 ends up in the call to read_sha1_file from
> submodule-config.c's config_from, which then proceeds to try to open the
> null sha1 as a loose object in every alternate, doing multiple system
> calls in each directory for something that is bound to fail. And to add
> pain to injury, it repeats that for each and every line of input to git
> diff-tree because the object cache doesn't care about storing negatives
> (which makes perfect sense for most cases).
>
> Even worse, when read_object returns NULL because the object doesn't
> exist, read_sha1_file_extended calls has_loose_object which does
> another set of system calls.
>
> Now, while I realize my use case is very atypical, and that I should
> just use --ignore-submodule=dirty, the fact that using the null sha1 can
> trigger such behavior strikes me as a footgun that would be better
> avoided. Especially when you factor the fact that
> read_sha1_file_extended calls lookup_replace_object_extended, which
> suggests one might interfere by creating a replace object for the null
> sha1. (BTW, it's not entirely clear to me, in the context of actual
> submodules, what the various --ignore-submodule options are supposed to
> mean for trees that are not the current HEAD ; also, the manual page say
> "all" is the default, but that doesn't appear to be true)

I think at this high level we should start optimizing submodules, i.e.
not HEAD / working tree -> select less aggressive submodule config.

>
> From a cursory look at the output of `git grep \\bnull_sha1` it doesn't
> look like the null sha1 is used anywhere else in a similar fashion where
> it can be attempted to be read as an object. So, one could consider this
> is something the submodule config code should handle on its own by
> treating the null_sha1 argument to submodule_from_path (really
> config_from) specially. After all, gitmodule_sha1_from_commit already
> avoids a get_sha1() call when it's given the null sha1.
>
> OTOH, it seems submodule_from_path and submodule_from_name, the only two
> public functions that end up in config_from(), are *always* called with
> either the null sha1 or a literal null pointer.

When Heiko introduced the submodule config, the consensus
was to have a flexible API to lookup submodule configuration.
The sha1 argument defines where to lookup the submodule
config (e.g. you could have moved a submodule such that the
submodule_from_path returns a different submodule for HEAD^
than HEAD.

The null_sha1 is used for current working tree configuration, i.e.
load .gitmodules and then overload with .git/config.

origin/bw/grep-submodules changes the behavior as it preloads the
null_sha1 to be an actual commit-ish (HEAD^').

I may make use of it in the checkout --recurse-submodules series.

> The *only* calls to
> these functions that doesn't involve a null sha1 or a null pointer is
> from test code. So all in all, I'm not entirely sure what this sha1
> argument is all about in the first place.

See Documentation/technical/api-submodule-config.txt (IIRC)
in next(?).

>
> However, an argument could be made that null_sha1 should be treated
> specially at a lower level (read_sha1_file, I guess).
>
> What would be sensible to do here?

As said above, the nul_sha1 is (ab-)used to mean
"use the preloaded config" which as of now is .git/config on top
of .gitmodules in the working tree.

I would assume we want another earlier and cheaper switch
for all these functions. This may also come in handy for the
git-series, that uses gitlinks as non-submodules.

Something like

    int treat_gitlinks_as_submodule()

which can be configured to be "auto"[1]  and then
"on", "off" that can be set for tools such as mercurial
remote helper or git-series.

[1] the default that
requires some slightly more expensive guess work, e.g.
the existence of the .gitmodules file. c.f.
https://github.com/git/git/commit/1863e05af5be73d9f7b9a1d22a33ca9849726623
static void preset_submodule_default(void)
would be a starting point to cheaply estimate
if gitlinks are submodules.



>
> Mike

^ permalink raw reply

* Re: [PATCH v2 00/21] Add configuration options for split-index
From: Christian Couder @ 2016-12-26 10:32 UTC (permalink / raw)
  To: Duy Nguyen
  Cc: git, Junio C Hamano, Ævar Arnfjörð Bjarmason,
	Christian Couder
In-Reply-To: <20161219120246.GE24125@ash>

On Mon, Dec 19, 2016 at 1:02 PM, Duy Nguyen <pclouds@gmail.com> wrote:
> On Sat, Dec 17, 2016 at 03:55:26PM +0100, Christian Couder wrote:
>> Goal
>> ~~~~
>>
>> We want to make it possible to use the split-index feature
>> automatically by just setting a new "core.splitIndex" configuration
>> variable to true.
>>
>> This can be valuable as split-index can help significantly speed up
>> `git rebase` especially along with the work to libify `git apply`
>> that has been merged to master
>> (see https://github.com/git/git/commit/81358dc238372793b1590efa149cc1581d1fbd98)
>> and is now in v2.11.
>
> I've read through the series (*) and I think it looks good, just a few
> minor comments here and there.

Thanks for your review.

I think I addressed all the minor points left in the v3 and the emails
I just sent.

> (*) guiltily admit that I only skimmed through tests, not giving them
>     as much attention as I should have

^ permalink raw reply

* Re: [PATCH v3 00/21] Add configuration options for split-index
From: Christian Couder @ 2016-12-26 10:29 UTC (permalink / raw)
  To: git
  Cc: Junio C Hamano, Nguyen Thai Ngoc Duy,
	Ævar Arnfjörð Bjarmason, Christian Couder
In-Reply-To: <20161226102222.17150-1-chriscool@tuxfamily.org>

On Mon, Dec 26, 2016 at 11:22 AM, Christian Couder
<christian.couder@gmail.com> wrote:
>
> Highlevel view of the patches in the series
> ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
>
> Except for patch 1/21, there are 3 big steps, one for each new
> configuration variable introduced.
>
> There only a few small differences between this patch series and the
> v2 patch series sent a few weeks ago. Only two commits have been
> changed a little, as suggested by Duy.

Here is the diff compared to v2:

diff --git a/config.c b/config.c
index 5c52cefd78..d6c8f8f3ba 100644
--- a/config.c
+++ b/config.c
@@ -1724,7 +1724,7 @@ int git_config_get_untracked_cache(void)

 int git_config_get_split_index(void)
 {
-       int val = -1;
+       int val;

        if (!git_config_get_maybe_bool("core.splitindex", &val))
                return val;
diff --git a/read-cache.c b/read-cache.c
index 772343ab25..bf0ac1ce61 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -2227,18 +2227,17 @@ static unsigned long get_shared_index_expire_date(void)
        return shared_index_expire_date;
 }

-static int can_delete_shared_index(const char *shared_sha1_hex)
+static int can_delete_shared_index(const char *shared_index_path)
 {
        struct stat st;
        unsigned long expiration;
-       const char *shared_index = git_path("sharedindex.%s", shared_sha1_hex);

        /* Check timestamp */
        expiration = get_shared_index_expire_date();
        if (!expiration)
                return 0;
-       if (stat(shared_index, &st))
-               return error_errno(_("could not stat '%s"), shared_index);
+       if (stat(shared_index_path, &st))
+               return error_errno(_("could not stat '%s"), shared_index_path);
        if (st.st_mtime > expiration)
                return 0;

@@ -2255,13 +2254,15 @@ static int clean_shared_index_files(const char
*current_hex)

        while ((de = readdir(dir)) != NULL) {
                const char *sha1_hex;
+               const char *shared_index_path;
                if (!skip_prefix(de->d_name, "sharedindex.", &sha1_hex))
                        continue;
                if (!strcmp(sha1_hex, current_hex))
                        continue;
-               if (can_delete_shared_index(sha1_hex) > 0 &&
-                   unlink(git_path("%s", de->d_name)))
-                       error_errno(_("unable to unlink: %s"),
git_path("%s", de->d_name));
+               shared_index_path = git_path("%s", de->d_name);
+               if (can_delete_shared_index(shared_index_path) > 0 &&
+                   unlink(shared_index_path))
+                       error_errno(_("unable to unlink: %s"),
shared_index_path);
        }
        closedir(dir);

^ permalink raw reply related

* [PATCH v3 06/21] t1700: add tests for core.splitIndex
From: Christian Couder @ 2016-12-26 10:22 UTC (permalink / raw)
  To: git
  Cc: Junio C Hamano, Nguyen Thai Ngoc Duy,
	Ævar Arnfjörð Bjarmason, Christian Couder
In-Reply-To: <20161226102222.17150-1-chriscool@tuxfamily.org>

Signed-off-by: Christian Couder <chriscool@tuxfamily.org>
---
 t/t1700-split-index.sh | 37 +++++++++++++++++++++++++++++++++++++
 1 file changed, 37 insertions(+)

diff --git a/t/t1700-split-index.sh b/t/t1700-split-index.sh
index 292a0720fc..db8c39f446 100755
--- a/t/t1700-split-index.sh
+++ b/t/t1700-split-index.sh
@@ -200,4 +200,41 @@ EOF
 	test_cmp expect actual
 '
 
+test_expect_success 'set core.splitIndex config variable to true' '
+	git config core.splitIndex true &&
+	: >three &&
+	git update-index --add three &&
+	git ls-files --stage >ls-files.actual &&
+	cat >ls-files.expect <<EOF &&
+100644 e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 0	one
+100644 e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 0	three
+100644 e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 0	two
+EOF
+	test_cmp ls-files.expect ls-files.actual &&
+	BASE=$(test-dump-split-index .git/index | grep "^base") &&
+	test-dump-split-index .git/index | sed "/^own/d" >actual &&
+	cat >expect <<EOF &&
+$BASE
+replacements:
+deletions:
+EOF
+	test_cmp expect actual
+'
+
+test_expect_success 'set core.splitIndex config variable to false' '
+	git config core.splitIndex false &&
+	git update-index --force-remove three &&
+	git ls-files --stage >ls-files.actual &&
+	cat >ls-files.expect <<EOF &&
+100644 e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 0	one
+100644 e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 0	two
+EOF
+	test_cmp ls-files.expect ls-files.actual &&
+	test-dump-split-index .git/index | sed "/^own/d" >actual &&
+	cat >expect <<EOF &&
+not a split index
+EOF
+	test_cmp expect actual
+'
+
 test_done
-- 
2.11.0.209.gda91e66374.dirty


^ permalink raw reply related

* [PATCH v3 08/21] Documentation/git-update-index: talk about core.splitIndex config var
From: Christian Couder @ 2016-12-26 10:22 UTC (permalink / raw)
  To: git
  Cc: Junio C Hamano, Nguyen Thai Ngoc Duy,
	Ævar Arnfjörð Bjarmason, Christian Couder
In-Reply-To: <20161226102222.17150-1-chriscool@tuxfamily.org>

Signed-off-by: Christian Couder <chriscool@tuxfamily.org>
---
 Documentation/git-update-index.txt | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/Documentation/git-update-index.txt b/Documentation/git-update-index.txt
index 7386c93162..e091b2a409 100644
--- a/Documentation/git-update-index.txt
+++ b/Documentation/git-update-index.txt
@@ -171,6 +171,12 @@ may not support it yet.
 	given again, all changes in $GIT_DIR/index are pushed back to
 	the shared index file. This mode is designed for very large
 	indexes that take a significant amount of time to read or write.
++
+These options take effect whatever the value of the `core.splitIndex`
+configuration variable (see linkgit:git-config[1]). But a warning is
+emitted when the change goes against the configured value, as the
+configured value will take effect next time the index is read and this
+will remove the intended effect of the option.
 
 --untracked-cache::
 --no-untracked-cache::
-- 
2.11.0.209.gda91e66374.dirty


^ permalink raw reply related

* [PATCH v3 07/21] Documentation/config: add information for core.splitIndex
From: Christian Couder @ 2016-12-26 10:22 UTC (permalink / raw)
  To: git
  Cc: Junio C Hamano, Nguyen Thai Ngoc Duy,
	Ævar Arnfjörð Bjarmason, Christian Couder
In-Reply-To: <20161226102222.17150-1-chriscool@tuxfamily.org>

Signed-off-by: Christian Couder <chriscool@tuxfamily.org>
---
 Documentation/config.txt | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/Documentation/config.txt b/Documentation/config.txt
index d51182a060..221c5982c0 100644
--- a/Documentation/config.txt
+++ b/Documentation/config.txt
@@ -331,6 +331,10 @@ core.trustctime::
 	crawlers and some backup systems).
 	See linkgit:git-update-index[1]. True by default.
 
+core.splitIndex::
+	If true, the split-index feature of the index will be used.
+	See linkgit:git-update-index[1]. False by default.
+
 core.untrackedCache::
 	Determines what to do about the untracked cache feature of the
 	index. It will be kept, if this variable is unset or set to
-- 
2.11.0.209.gda91e66374.dirty


^ permalink raw reply related

* [PATCH v3 13/21] sha1_file: make check_and_freshen_file() non static
From: Christian Couder @ 2016-12-26 10:22 UTC (permalink / raw)
  To: git
  Cc: Junio C Hamano, Nguyen Thai Ngoc Duy,
	Ævar Arnfjörð Bjarmason, Christian Couder
In-Reply-To: <20161226102222.17150-1-chriscool@tuxfamily.org>

This function will be used in a commit soon, so let's make
it available globally.

Signed-off-by: Christian Couder <chriscool@tuxfamily.org>
---
 cache.h     | 3 +++
 sha1_file.c | 2 +-
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/cache.h b/cache.h
index e15b421b6f..f442f28189 100644
--- a/cache.h
+++ b/cache.h
@@ -1170,6 +1170,9 @@ extern int has_pack_index(const unsigned char *sha1);
 
 extern void assert_sha1_type(const unsigned char *sha1, enum object_type expect);
 
+/* Helper to check and "touch" a file */
+extern int check_and_freshen_file(const char *fn, int freshen);
+
 extern const signed char hexval_table[256];
 static inline unsigned int hexval(unsigned char c)
 {
diff --git a/sha1_file.c b/sha1_file.c
index 1173071859..f5303d955a 100644
--- a/sha1_file.c
+++ b/sha1_file.c
@@ -601,7 +601,7 @@ static int freshen_file(const char *fn)
  * either does not exist on disk, or has a stale mtime and may be subject to
  * pruning).
  */
-static int check_and_freshen_file(const char *fn, int freshen)
+int check_and_freshen_file(const char *fn, int freshen)
 {
 	if (access(fn, F_OK))
 		return 0;
-- 
2.11.0.209.gda91e66374.dirty


^ permalink raw reply related

* [PATCH v3 14/21] read-cache: touch shared index files when used
From: Christian Couder @ 2016-12-26 10:22 UTC (permalink / raw)
  To: git
  Cc: Junio C Hamano, Nguyen Thai Ngoc Duy,
	Ævar Arnfjörð Bjarmason, Christian Couder
In-Reply-To: <20161226102222.17150-1-chriscool@tuxfamily.org>

When a split-index file is created, let's update the mtime of the
shared index file that the split-index file is referencing.

In a following commit we will make shared index file expire
depending on their mtime, so updating the mtime makes sure that
the shared index file will not be deleted soon.

Signed-off-by: Christian Couder <chriscool@tuxfamily.org>
---
 read-cache.c | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/read-cache.c b/read-cache.c
index a1aaec5135..9fbad2044b 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -1682,6 +1682,19 @@ int do_read_index(struct index_state *istate, const char *path, int must_exist)
 	die("index file corrupt");
 }
 
+/*
+ * Signal that the shared index is used by updating its mtime.
+ *
+ * This way, shared index can be removed if they have not been used
+ * for some time. It's ok to fail to update the mtime if we are on a
+ * read only file system.
+ */
+void freshen_shared_index(char *base_sha1_hex)
+{
+	const char *shared_index = git_path("sharedindex.%s", base_sha1_hex);
+	check_and_freshen_file(shared_index, 1);
+}
+
 int read_index_from(struct index_state *istate, const char *path)
 {
 	struct split_index *split_index;
@@ -2273,6 +2286,8 @@ int write_locked_index(struct index_state *istate, struct lock_file *lock,
 		int ret = write_shared_index(istate, lock, flags);
 		if (ret)
 			return ret;
+	} else {
+		freshen_shared_index(sha1_to_hex(si->base_sha1));
 	}
 
 	return write_split_index(istate, lock, flags);
-- 
2.11.0.209.gda91e66374.dirty


^ permalink raw reply related

* [PATCH v3 16/21] read-cache: unlink old sharedindex files
From: Christian Couder @ 2016-12-26 10:22 UTC (permalink / raw)
  To: git
  Cc: Junio C Hamano, Nguyen Thai Ngoc Duy,
	Ævar Arnfjörð Bjarmason, Christian Couder
In-Reply-To: <20161226102222.17150-1-chriscool@tuxfamily.org>

Everytime split index is turned on, it creates a "sharedindex.XXXX"
file in the git directory. This change makes sure that shared index
files that haven't been used for a long time are removed when a new
shared index file is created.

The new "splitIndex.sharedIndexExpire" config variable is created
to tell the delay after which an unused shared index file can be
deleted. It defaults to "1.week.ago".

A previous commit made sure that each time a split index file is
created the mtime of the shared index file it references is updated.
This makes sure that recently used shared index file will not be
deleted.

Signed-off-by: Christian Couder <chriscool@tuxfamily.org>
---
 read-cache.c | 64 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 63 insertions(+), 1 deletion(-)

diff --git a/read-cache.c b/read-cache.c
index 9fbad2044b..e62a6c742d 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -2207,6 +2207,65 @@ static int write_split_index(struct index_state *istate,
 	return ret;
 }
 
+static const char *shared_index_expire = "1.week.ago";
+
+static unsigned long get_shared_index_expire_date(void)
+{
+	static unsigned long shared_index_expire_date;
+	static int shared_index_expire_date_prepared;
+
+	if (!shared_index_expire_date_prepared) {
+		git_config_get_expiry("splitindex.sharedindexexpire",
+				      &shared_index_expire);
+		shared_index_expire_date = approxidate(shared_index_expire);
+		shared_index_expire_date_prepared = 1;
+	}
+
+	return shared_index_expire_date;
+}
+
+static int can_delete_shared_index(const char *shared_index_path)
+{
+	struct stat st;
+	unsigned long expiration;
+
+	/* Check timestamp */
+	expiration = get_shared_index_expire_date();
+	if (!expiration)
+		return 0;
+	if (stat(shared_index_path, &st))
+		return error_errno(_("could not stat '%s"), shared_index_path);
+	if (st.st_mtime > expiration)
+		return 0;
+
+	return 1;
+}
+
+static int clean_shared_index_files(const char *current_hex)
+{
+	struct dirent *de;
+	DIR *dir = opendir(get_git_dir());
+
+	if (!dir)
+		return error_errno(_("unable to open git dir: %s"), get_git_dir());
+
+	while ((de = readdir(dir)) != NULL) {
+		const char *sha1_hex;
+		const char *shared_index_path;
+		if (!skip_prefix(de->d_name, "sharedindex.", &sha1_hex))
+			continue;
+		if (!strcmp(sha1_hex, current_hex))
+			continue;
+		shared_index_path = git_path("%s", de->d_name);
+		if (can_delete_shared_index(shared_index_path) > 0 &&
+		    unlink(shared_index_path))
+			error_errno(_("unable to unlink: %s"), shared_index_path);
+	}
+	closedir(dir);
+
+	return 0;
+}
+
 static struct tempfile temporary_sharedindex;
 
 static int write_shared_index(struct index_state *istate,
@@ -2228,8 +2287,11 @@ static int write_shared_index(struct index_state *istate,
 	}
 	ret = rename_tempfile(&temporary_sharedindex,
 			      git_path("sharedindex.%s", sha1_to_hex(si->base->sha1)));
-	if (!ret)
+	if (!ret) {
 		hashcpy(si->base_sha1, si->base->sha1);
+		clean_shared_index_files(sha1_to_hex(si->base->sha1));
+	}
+
 	return ret;
 }
 
-- 
2.11.0.209.gda91e66374.dirty


^ permalink raw reply related

* [PATCH v3 18/21] read-cache: refactor read_index_from()
From: Christian Couder @ 2016-12-26 10:22 UTC (permalink / raw)
  To: git
  Cc: Junio C Hamano, Nguyen Thai Ngoc Duy,
	Ævar Arnfjörð Bjarmason, Christian Couder
In-Reply-To: <20161226102222.17150-1-chriscool@tuxfamily.org>

It looks better and is simpler to review when we don't compute
the same things many times in the function.

It will also help make the following commit simpler.

Signed-off-by: Christian Couder <chriscool@tuxfamily.org>
---
 read-cache.c | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/read-cache.c b/read-cache.c
index e62a6c742d..98ef1323d6 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -1699,6 +1699,8 @@ int read_index_from(struct index_state *istate, const char *path)
 {
 	struct split_index *split_index;
 	int ret;
+	char *base_sha1_hex;
+	const char *base_path;
 
 	/* istate->initialized covers both .git/index and .git/sharedindex.xxx */
 	if (istate->initialized)
@@ -1716,15 +1718,15 @@ int read_index_from(struct index_state *istate, const char *path)
 		discard_index(split_index->base);
 	else
 		split_index->base = xcalloc(1, sizeof(*split_index->base));
-	ret = do_read_index(split_index->base,
-			    git_path("sharedindex.%s",
-				     sha1_to_hex(split_index->base_sha1)), 1);
+
+	base_sha1_hex = sha1_to_hex(split_index->base_sha1);
+	base_path = git_path("sharedindex.%s", base_sha1_hex);
+	ret = do_read_index(split_index->base, base_path, 1);
 	if (hashcmp(split_index->base_sha1, split_index->base->sha1))
 		die("broken index, expect %s in %s, got %s",
-		    sha1_to_hex(split_index->base_sha1),
-		    git_path("sharedindex.%s",
-			     sha1_to_hex(split_index->base_sha1)),
+		    base_sha1_hex, base_path,
 		    sha1_to_hex(split_index->base->sha1));
+
 	merge_base_index(istate);
 	post_read_index_from(istate);
 	return ret;
-- 
2.11.0.209.gda91e66374.dirty


^ permalink raw reply related

* [PATCH v3 21/21] Documentation/git-update-index: explain splitIndex.*
From: Christian Couder @ 2016-12-26 10:22 UTC (permalink / raw)
  To: git
  Cc: Junio C Hamano, Nguyen Thai Ngoc Duy,
	Ævar Arnfjörð Bjarmason, Christian Couder
In-Reply-To: <20161226102222.17150-1-chriscool@tuxfamily.org>

Signed-off-by: Christian Couder <chriscool@tuxfamily.org>
---
 Documentation/config.txt           |  6 +++---
 Documentation/git-update-index.txt | 37 +++++++++++++++++++++++++++++--------
 2 files changed, 32 insertions(+), 11 deletions(-)

diff --git a/Documentation/config.txt b/Documentation/config.txt
index 24e771d22e..87a570cf88 100644
--- a/Documentation/config.txt
+++ b/Documentation/config.txt
@@ -2792,9 +2792,9 @@ splitIndex.sharedIndexExpire::
 	index file is created. The value "now" expires all entries
 	immediately, and "never" suppresses expiration altogether.
 	The default value is "one.week.ago".
-	Note that each time a new split-index file is created, the
-	mtime of the related shared index file is updated to the
-	current time.
+	Note that each time a split index based on a shared index file
+	is either created or read from, the mtime of the shared index
+	file is updated to the current time.
 	See linkgit:git-update-index[1].
 
 status.relativePaths::
diff --git a/Documentation/git-update-index.txt b/Documentation/git-update-index.txt
index e091b2a409..46c953b2f2 100644
--- a/Documentation/git-update-index.txt
+++ b/Documentation/git-update-index.txt
@@ -163,14 +163,10 @@ may not support it yet.
 
 --split-index::
 --no-split-index::
-	Enable or disable split index mode. If enabled, the index is
-	split into two files, $GIT_DIR/index and $GIT_DIR/sharedindex.<SHA-1>.
-	Changes are accumulated in $GIT_DIR/index while the shared
-	index file contains all index entries stays unchanged. If
-	split-index mode is already enabled and `--split-index` is
-	given again, all changes in $GIT_DIR/index are pushed back to
-	the shared index file. This mode is designed for very large
-	indexes that take a significant amount of time to read or write.
+	Enable or disable split index mode. If split-index mode is
+	already enabled and `--split-index` is given again, all
+	changes in $GIT_DIR/index are pushed back to the shared index
+	file.
 +
 These options take effect whatever the value of the `core.splitIndex`
 configuration variable (see linkgit:git-config[1]). But a warning is
@@ -394,6 +390,31 @@ Although this bit looks similar to assume-unchanged bit, its goal is
 different from assume-unchanged bit's. Skip-worktree also takes
 precedence over assume-unchanged bit when both are set.
 
+Split index
+-----------
+
+This mode is designed for very large indexes that take a significant
+amount of time to read or write.
+
+In this mode, the index is split into two files, $GIT_DIR/index and
+$GIT_DIR/sharedindex.<SHA-1>. Changes are accumulated in
+$GIT_DIR/index, the split index, while the shared index file contains
+all index entries and stays unchanged.
+
+All changes in the split index are pushed back to the shared index
+file when the number of entries in the split index reaches a level
+specified by the splitIndex.maxPercentChange config variable (see
+linkgit:git-config[1]).
+
+Each time a new shared index file is created, the old shared index
+files are deleted if their mtime is older than what is specified by
+the splitIndex.sharedIndexExpire config variable (see
+linkgit:git-config[1]).
+
+To avoid deleting a shared index file that is still used, its mtime is
+updated to the current time everytime a new split index based on the
+shared index file is either created or read from.
+
 Untracked cache
 ---------------
 
-- 
2.11.0.209.gda91e66374.dirty


^ permalink raw reply related

* [PATCH v3 17/21] t1700: test shared index file expiration
From: Christian Couder @ 2016-12-26 10:22 UTC (permalink / raw)
  To: git
  Cc: Junio C Hamano, Nguyen Thai Ngoc Duy,
	Ævar Arnfjörð Bjarmason, Christian Couder
In-Reply-To: <20161226102222.17150-1-chriscool@tuxfamily.org>

Signed-off-by: Christian Couder <chriscool@tuxfamily.org>
---
 t/t1700-split-index.sh | 44 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 44 insertions(+)

diff --git a/t/t1700-split-index.sh b/t/t1700-split-index.sh
index f03addf654..f448fc13cd 100755
--- a/t/t1700-split-index.sh
+++ b/t/t1700-split-index.sh
@@ -310,4 +310,48 @@ EOF
 	test_cmp expect actual
 '
 
+test_expect_success 'shared index files expire after 7 days by default' '
+	: >ten &&
+	git update-index --add ten &&
+	test $(ls .git/sharedindex.* | wc -l) -gt 1 &&
+	just_under_7_days_ago=$((1-7*86400)) &&
+	test-chmtime =$just_under_7_days_ago .git/sharedindex.* &&
+	: >eleven &&
+	git update-index --add eleven &&
+	test $(ls .git/sharedindex.* | wc -l) -gt 1 &&
+	just_over_7_days_ago=$((-1-7*86400)) &&
+	test-chmtime =$just_over_7_days_ago .git/sharedindex.* &&
+	: >twelve &&
+	git update-index --add twelve &&
+	test $(ls .git/sharedindex.* | wc -l) = 1
+'
+
+test_expect_success 'check splitIndex.sharedIndexExpire set to 8 days' '
+	git config splitIndex.sharedIndexExpire "8.days.ago" &&
+	test-chmtime =$just_over_7_days_ago .git/sharedindex.* &&
+	: >thirteen &&
+	git update-index --add thirteen &&
+	test $(ls .git/sharedindex.* | wc -l) -gt 1 &&
+	just_over_8_days_ago=$((-1-8*86400)) &&
+	test-chmtime =$just_over_8_days_ago .git/sharedindex.* &&
+	: >fourteen &&
+	git update-index --add fourteen &&
+	test $(ls .git/sharedindex.* | wc -l) = 1
+'
+
+test_expect_success 'check splitIndex.sharedIndexExpire set to "never" and "now"' '
+	git config splitIndex.sharedIndexExpire never &&
+	just_10_years_ago=$((-365*10*86400)) &&
+	test-chmtime =$just_10_years_ago .git/sharedindex.* &&
+	: >fifteen &&
+	git update-index --add fifteen &&
+	test $(ls .git/sharedindex.* | wc -l) -gt 1 &&
+	git config splitIndex.sharedIndexExpire now &&
+	just_1_second_ago=-1 &&
+	test-chmtime =$just_1_second_ago .git/sharedindex.* &&
+	: >sixteen &&
+	git update-index --add sixteen &&
+	test $(ls .git/sharedindex.* | wc -l) = 1
+'
+
 test_done
-- 
2.11.0.209.gda91e66374.dirty


^ permalink raw reply related

* [PATCH v3 20/21] Documentation/config: add splitIndex.sharedIndexExpire
From: Christian Couder @ 2016-12-26 10:22 UTC (permalink / raw)
  To: git
  Cc: Junio C Hamano, Nguyen Thai Ngoc Duy,
	Ævar Arnfjörð Bjarmason, Christian Couder
In-Reply-To: <20161226102222.17150-1-chriscool@tuxfamily.org>

Signed-off-by: Christian Couder <chriscool@tuxfamily.org>
---
 Documentation/config.txt | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/Documentation/config.txt b/Documentation/config.txt
index e0f5a77980..24e771d22e 100644
--- a/Documentation/config.txt
+++ b/Documentation/config.txt
@@ -2786,6 +2786,17 @@ splitIndex.maxPercentChange::
 	than 20 percent of the total number of entries.
 	See linkgit:git-update-index[1].
 
+splitIndex.sharedIndexExpire::
+	When the split index feature is used, shared index files with
+	a mtime older than this time will be removed when a new shared
+	index file is created. The value "now" expires all entries
+	immediately, and "never" suppresses expiration altogether.
+	The default value is "one.week.ago".
+	Note that each time a new split-index file is created, the
+	mtime of the related shared index file is updated to the
+	current time.
+	See linkgit:git-update-index[1].
+
 status.relativePaths::
 	By default, linkgit:git-status[1] shows paths relative to the
 	current directory. Setting this variable to `false` shows paths
-- 
2.11.0.209.gda91e66374.dirty


^ permalink raw reply related

* [PATCH v3 19/21] read-cache: use freshen_shared_index() in read_index_from()
From: Christian Couder @ 2016-12-26 10:22 UTC (permalink / raw)
  To: git
  Cc: Junio C Hamano, Nguyen Thai Ngoc Duy,
	Ævar Arnfjörð Bjarmason, Christian Couder
In-Reply-To: <20161226102222.17150-1-chriscool@tuxfamily.org>

This way a share index file will not be garbage collected if
we still read from an index it is based from.

As we need to read the current index before creating a new
one, the tests have to be adjusted, so that we don't expect
an old shared index file to be deleted right away when we
create a new one.

Signed-off-by: Christian Couder <chriscool@tuxfamily.org>
---
 read-cache.c           |  1 +
 t/t1700-split-index.sh | 14 +++++++-------
 2 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/read-cache.c b/read-cache.c
index 98ef1323d6..bf0ac1ce61 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -1727,6 +1727,7 @@ int read_index_from(struct index_state *istate, const char *path)
 		    base_sha1_hex, base_path,
 		    sha1_to_hex(split_index->base->sha1));
 
+	freshen_shared_index(base_sha1_hex);
 	merge_base_index(istate);
 	post_read_index_from(istate);
 	return ret;
diff --git a/t/t1700-split-index.sh b/t/t1700-split-index.sh
index f448fc13cd..800f84a593 100755
--- a/t/t1700-split-index.sh
+++ b/t/t1700-split-index.sh
@@ -313,17 +313,17 @@ EOF
 test_expect_success 'shared index files expire after 7 days by default' '
 	: >ten &&
 	git update-index --add ten &&
-	test $(ls .git/sharedindex.* | wc -l) -gt 1 &&
+	test $(ls .git/sharedindex.* | wc -l) -gt 2 &&
 	just_under_7_days_ago=$((1-7*86400)) &&
 	test-chmtime =$just_under_7_days_ago .git/sharedindex.* &&
 	: >eleven &&
 	git update-index --add eleven &&
-	test $(ls .git/sharedindex.* | wc -l) -gt 1 &&
+	test $(ls .git/sharedindex.* | wc -l) -gt 2 &&
 	just_over_7_days_ago=$((-1-7*86400)) &&
 	test-chmtime =$just_over_7_days_ago .git/sharedindex.* &&
 	: >twelve &&
 	git update-index --add twelve &&
-	test $(ls .git/sharedindex.* | wc -l) = 1
+	test $(ls .git/sharedindex.* | wc -l) -le 2
 '
 
 test_expect_success 'check splitIndex.sharedIndexExpire set to 8 days' '
@@ -331,12 +331,12 @@ test_expect_success 'check splitIndex.sharedIndexExpire set to 8 days' '
 	test-chmtime =$just_over_7_days_ago .git/sharedindex.* &&
 	: >thirteen &&
 	git update-index --add thirteen &&
-	test $(ls .git/sharedindex.* | wc -l) -gt 1 &&
+	test $(ls .git/sharedindex.* | wc -l) -gt 2 &&
 	just_over_8_days_ago=$((-1-8*86400)) &&
 	test-chmtime =$just_over_8_days_ago .git/sharedindex.* &&
 	: >fourteen &&
 	git update-index --add fourteen &&
-	test $(ls .git/sharedindex.* | wc -l) = 1
+	test $(ls .git/sharedindex.* | wc -l) -le 2
 '
 
 test_expect_success 'check splitIndex.sharedIndexExpire set to "never" and "now"' '
@@ -345,13 +345,13 @@ test_expect_success 'check splitIndex.sharedIndexExpire set to "never" and "now"
 	test-chmtime =$just_10_years_ago .git/sharedindex.* &&
 	: >fifteen &&
 	git update-index --add fifteen &&
-	test $(ls .git/sharedindex.* | wc -l) -gt 1 &&
+	test $(ls .git/sharedindex.* | wc -l) -gt 2 &&
 	git config splitIndex.sharedIndexExpire now &&
 	just_1_second_ago=-1 &&
 	test-chmtime =$just_1_second_ago .git/sharedindex.* &&
 	: >sixteen &&
 	git update-index --add sixteen &&
-	test $(ls .git/sharedindex.* | wc -l) = 1
+	test $(ls .git/sharedindex.* | wc -l) -le 2
 '
 
 test_done
-- 
2.11.0.209.gda91e66374.dirty


^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox