Git development
 help / color / mirror / Atom feed
* [PATCH 0/5] sequencer: allow skipping commits
From: Giuseppe Bilotta @ 2017-01-23 22:52 UTC (permalink / raw)
  To: git; +Cc: Junio C Hamano, Giuseppe Bilotta

This series introduces a few options to the sequencer,
to allow skipping unwanted/unnecessary commits.

The first patch is just cleanup. The second fixes a potential issue
about sequencing options not being correctly remembered across
interruptions.

The next two introduce cherry-pick options to skip empty (or only
redundant) commits. The two options are introduced separately because
of the complexity associated with the possible combinations that can be
had.

The last commit allows --skip as a reset + --continue, to quickly skip
the current commit during a failed cherry-pick or revert (for example
because a better version of the commit was already merged).

Giuseppe Bilotta (5):
  sequencer: sort options load/save by struct position
  sequencer: save/load all options
  cherry-pick: option to skip empty commits
  cherry-pick: allow skipping only redundant commits
  sequencer: allow to --skip current commit

 Documentation/git-cherry-pick.txt |  10 +++
 Documentation/sequencer.txt       |  10 ++-
 builtin/revert.c                  |  24 +++++-
 sequencer.c                       | 163 ++++++++++++++++++++++++++++++--------
 sequencer.h                       |   4 +-
 5 files changed, 176 insertions(+), 35 deletions(-)

-- 
2.11.0.616.gd72966cf44.dirty


^ permalink raw reply

* [PATCH 5/5] sequencer: allow to --skip current commit
From: Giuseppe Bilotta @ 2017-01-23 22:52 UTC (permalink / raw)
  To: git; +Cc: Junio C Hamano, Giuseppe Bilotta
In-Reply-To: <20170123225221.3659-1-giuseppe.bilotta@gmail.com>

If a sequencing gets interrupted (by a conflict or an empty commit or
whatever), the user can now opt to just skip it passing the `--skip`
command line option, which acts like a `--continue`, except that the
current commit gets skipped.

Signed-off-by: Giuseppe Bilotta <giuseppe.bilotta@gmail.com>
---
 Documentation/sequencer.txt | 10 +++++++++-
 builtin/revert.c            |  7 +++++--
 sequencer.c                 | 32 ++++++++++++++++++++++++++++----
 sequencer.h                 |  2 +-
 4 files changed, 43 insertions(+), 8 deletions(-)

diff --git a/Documentation/sequencer.txt b/Documentation/sequencer.txt
index 5747f442f2..095d6cd732 100644
--- a/Documentation/sequencer.txt
+++ b/Documentation/sequencer.txt
@@ -1,7 +1,15 @@
 --continue::
 	Continue the operation in progress using the information in
 	'.git/sequencer'.  Can be used to continue after resolving
-	conflicts in a failed cherry-pick or revert.
+	conflicts in a failed cherry-pick or revert.  Use `--skip`
+	instead if the current commit should be ignored.
+
+--skip::
+	Skips the current commit, and then continues the operation
+	in progress using the information in '.git/sequencer'.i
+	Can be used to continue to a cherry-pick or rever that was
+	interrupted by an empty commit, or by a commit that conflicts
+	and for which the resolution is to discard the commit.
 
 --quit::
 	Forget about the current operation in progress.  Can be used
diff --git a/builtin/revert.c b/builtin/revert.c
index aca8a1d9d0..dece0bebf7 100644
--- a/builtin/revert.c
+++ b/builtin/revert.c
@@ -79,6 +79,7 @@ static int run_sequencer(int argc, const char **argv, struct replay_opts *opts)
 	struct option base_options[] = {
 		OPT_CMDMODE(0, "quit", &cmd, N_("end revert or cherry-pick sequence"), 'q'),
 		OPT_CMDMODE(0, "continue", &cmd, N_("resume revert or cherry-pick sequence"), 'c'),
+		OPT_CMDMODE(0, "skip", &cmd, N_("resume revert or cherry-pick sequence, skipping this commit"), 's'),
 		OPT_CMDMODE(0, "abort", &cmd, N_("cancel revert or cherry-pick sequence"), 'a'),
 		OPT_BOOL('n', "no-commit", &opts->no_commit, N_("don't automatically commit")),
 		OPT_BOOL('e', "edit", &opts->edit, N_("edit the commit message")),
@@ -127,6 +128,8 @@ static int run_sequencer(int argc, const char **argv, struct replay_opts *opts)
 			this_operation = "--quit";
 		else if (cmd == 'c')
 			this_operation = "--continue";
+		else if (cmd == 's')
+			this_operation = "--skip";
 		else {
 			assert(cmd == 'a');
 			this_operation = "--abort";
@@ -188,8 +191,8 @@ static int run_sequencer(int argc, const char **argv, struct replay_opts *opts)
 
 	if (cmd == 'q')
 		return sequencer_remove_state(opts);
-	if (cmd == 'c')
-		return sequencer_continue(opts);
+	if (cmd == 'c' || cmd == 's')
+		return sequencer_continue(opts, cmd);
 	if (cmd == 'a')
 		return sequencer_rollback(opts);
 	return sequencer_pick_revisions(opts);
diff --git a/sequencer.c b/sequencer.c
index 333d9112de..cfe8c06989 100644
--- a/sequencer.c
+++ b/sequencer.c
@@ -1359,21 +1359,45 @@ static int continue_single_pick(void)
 	return run_command_v_opt(argv, RUN_GIT_CMD);
 }
 
-int sequencer_continue(struct replay_opts *opts)
+/*
+ * Continue the sequencing, after either committing
+ * (cmd == 'c') or skipping (cmd == 's') the current
+ * commit.
+ */
+int sequencer_continue(struct replay_opts *opts, char cmd)
 {
 	struct todo_list todo_list = TODO_LIST_INIT;
-	int res;
+	int single, res;
 
 	if (read_and_refresh_cache(opts))
 		return -1;
 
-	if (!file_exists(get_todo_path(opts)))
-		return continue_single_pick();
+	if (!file_exists(get_todo_path(opts))) {
+		if (cmd == 'c') {
+			return continue_single_pick();
+		} else {
+			assert(cmd == 's');
+			/* Skipping the only commit is equivalent to an abort */
+			return sequencer_rollback(opts);
+		}
+	}
 	if (read_populate_opts(opts))
 		return -1;
 	if ((res = read_populate_todo(&todo_list, opts)))
 		goto release_todo_list;
 
+	/* If we were asked to skip this commit, rollback
+	 * and continue with the next */
+	if (cmd == 's') {
+		if ((res = rollback_single_pick()))
+			goto release_todo_list;
+		discard_cache();
+		if ((res = read_cache()) < 0)
+			goto release_todo_list;
+		printf("index unchanged: %d\n", is_index_unchanged());
+		goto skip_this_commit;
+	}
+
 	/* check if there is something to commit */
 	res = is_index_unchanged();
 	if (res < 0)
diff --git a/sequencer.h b/sequencer.h
index f8b8bd0063..afc4bb4e6c 100644
--- a/sequencer.h
+++ b/sequencer.h
@@ -41,7 +41,7 @@ struct replay_opts {
 #define REPLAY_OPTS_INIT { -1 }
 
 int sequencer_pick_revisions(struct replay_opts *opts);
-int sequencer_continue(struct replay_opts *opts);
+int sequencer_continue(struct replay_opts *opts, char cmd);
 int sequencer_rollback(struct replay_opts *opts);
 int sequencer_remove_state(struct replay_opts *opts);
 
-- 
2.11.0.616.gd72966cf44.dirty


^ permalink raw reply related

* [PATCH 3/5] cherry-pick: option to skip empty commits
From: Giuseppe Bilotta @ 2017-01-23 22:52 UTC (permalink / raw)
  To: git; +Cc: Junio C Hamano, Giuseppe Bilotta
In-Reply-To: <20170123225221.3659-1-giuseppe.bilotta@gmail.com>

This allows cherry-picking a set of commits, some of which may be
redundant, without stopping to ask for the user intervention.

Signed-off-by: Giuseppe Bilotta <giuseppe.bilotta@gmail.com>
---
 Documentation/git-cherry-pick.txt |  4 ++++
 builtin/revert.c                  |  1 +
 sequencer.c                       | 45 +++++++++++++++++++++++++++++++--------
 sequencer.h                       |  1 +
 4 files changed, 42 insertions(+), 9 deletions(-)

diff --git a/Documentation/git-cherry-pick.txt b/Documentation/git-cherry-pick.txt
index d35d771fc8..ffced816d6 100644
--- a/Documentation/git-cherry-pick.txt
+++ b/Documentation/git-cherry-pick.txt
@@ -138,6 +138,10 @@ effect to your index in a row.
 	examine the commit. This option overrides that behavior and
 	creates an empty commit object.  Implies `--allow-empty`.
 
+--skip-empty::
+	This option causes empty and redundant cherry-picked commits to
+	be skipped without requesting the user intervention.
+
 --strategy=<strategy>::
 	Use the given merge strategy.  Should only be used once.
 	See the MERGE STRATEGIES section in linkgit:git-merge[1]
diff --git a/builtin/revert.c b/builtin/revert.c
index 4ca5b51544..ffdd367f99 100644
--- a/builtin/revert.c
+++ b/builtin/revert.c
@@ -102,6 +102,7 @@ static int run_sequencer(int argc, const char **argv, struct replay_opts *opts)
 			OPT_BOOL(0, "allow-empty", &opts->allow_empty, N_("preserve initially empty commits")),
 			OPT_BOOL(0, "allow-empty-message", &opts->allow_empty_message, N_("allow commits with empty messages")),
 			OPT_BOOL(0, "keep-redundant-commits", &opts->keep_redundant_commits, N_("keep redundant, empty commits")),
+			OPT_BOOL(0, "skip-empty", &opts->skip_empty, N_("skip redundant, empty commits")),
 			OPT_END(),
 		};
 		options = parse_options_concat(options, cp_extra);
diff --git a/sequencer.c b/sequencer.c
index 3d2f61c979..9c01310162 100644
--- a/sequencer.c
+++ b/sequencer.c
@@ -550,22 +550,32 @@ static int is_original_commit_empty(struct commit *commit)
 
 /*
  * Do we run "git commit" with "--allow-empty"?
+ *
+ * Or do we just skip this empty commit?
+ *
+ * Returns 1 if a commit should be done with --allow-empty,
+ *         0 if a commit should be done without --allow-empty,
+ *         2 if no commit should be done at all (skip empty commit)
+ *         negative values in case of error
+ *
  */
-static int allow_empty(struct replay_opts *opts, struct commit *commit)
+static int allow_or_skip_empty(struct replay_opts *opts, struct commit *commit)
 {
 	int index_unchanged, empty_commit;
 
 	/*
-	 * Three cases:
+	 * Four cases:
 	 *
-	 * (1) we do not allow empty at all and error out.
+	 * (1) we do not allow empty at all and error out;
 	 *
-	 * (2) we allow ones that were initially empty, but
+	 * (2) we skip empty commits altogether;
+	 *
+	 * (3) we allow ones that were initially empty, but
 	 * forbid the ones that become empty;
 	 *
-	 * (3) we allow both.
+	 * (4) we allow both.
 	 */
-	if (!opts->allow_empty)
+	if (!opts->allow_empty && !opts->skip_empty)
 		return 0; /* let "git commit" barf as necessary */
 
 	index_unchanged = is_index_unchanged();
@@ -574,6 +584,9 @@ static int allow_empty(struct replay_opts *opts, struct commit *commit)
 	if (!index_unchanged)
 		return 0; /* we do not have to say --allow-empty */
 
+	if (opts->skip_empty)
+		return 2;
+
 	if (opts->keep_redundant_commits)
 		return 1;
 
@@ -612,7 +625,7 @@ static int do_pick_commit(enum todo_command command, struct commit *commit,
 	const char *base_label, *next_label;
 	struct commit_message msg = { NULL, NULL, NULL, NULL };
 	struct strbuf msgbuf = STRBUF_INIT;
-	int res, unborn = 0, allow;
+	int res = 0, unborn = 0, allow;
 
 	if (opts->no_commit) {
 		/*
@@ -771,12 +784,13 @@ static int do_pick_commit(enum todo_command command, struct commit *commit,
 		goto leave;
 	}
 
-	allow = allow_empty(opts, commit);
+	allow = allow_or_skip_empty(opts, commit);
 	if (allow < 0) {
 		res = allow;
 		goto leave;
 	}
-	if (!opts->no_commit)
+	/* allow == 2 means skip this commit */
+	if (allow != 2 && !opts->no_commit)
 		res = run_git_commit(opts->edit ? NULL : git_path_merge_msg(),
 				     opts, allow, opts->edit, 0, 0);
 
@@ -993,6 +1007,8 @@ static int populate_opts_cb(const char *key, const char *value, void *data)
 		opts->allow_empty_message = git_config_bool_or_int(key, value, &error_flag);
 	else if (!strcmp(key, "options.keep-redundant-commits"))
 		opts->keep_redundant_commits = git_config_bool_or_int(key, value, &error_flag);
+	else if (!strcmp(key, "options.skip-empty"))
+		opts->skip_empty = git_config_bool_or_int(key, value, &error_flag);
 	else if (!strcmp(key, "options.mainline"))
 		opts->mainline = git_config_int(key, value);
 	else if (!strcmp(key, "options.gpg-sign"))
@@ -1249,6 +1265,8 @@ static int save_opts(struct replay_opts *opts)
 		res |= git_config_set_in_file_gently(opts_file, "options.allow-empty-message", "true");
 	if (opts->keep_redundant_commits)
 		res |= git_config_set_in_file_gently(opts_file, "options.keep-redundant-commits", "true");
+	if (opts->skip_empty)
+		res |= git_config_set_in_file_gently(opts_file, "options.skip-empty", "true");
 	if (opts->mainline) {
 		struct strbuf buf = STRBUF_INIT;
 		strbuf_addf(&buf, "%d", opts->mainline);
@@ -1322,6 +1340,14 @@ int sequencer_continue(struct replay_opts *opts)
 	if ((res = read_populate_todo(&todo_list, opts)))
 		goto release_todo_list;
 
+	/* check if there is something to commit */
+	res = is_index_unchanged();
+	if (res < 0)
+		goto release_todo_list;
+
+	if (res && opts->skip_empty)
+		goto skip_this_commit;
+
 	/* Verify that the conflict has been resolved */
 	if (file_exists(git_path_cherry_pick_head()) ||
 	    file_exists(git_path_revert_head())) {
@@ -1333,6 +1359,7 @@ int sequencer_continue(struct replay_opts *opts)
 		res = error_dirty_index(opts);
 		goto release_todo_list;
 	}
+skip_this_commit:
 	todo_list.current++;
 	res = pick_commits(&todo_list, opts);
 release_todo_list:
diff --git a/sequencer.h b/sequencer.h
index 7a513c576b..c747cfcfc7 100644
--- a/sequencer.h
+++ b/sequencer.h
@@ -23,6 +23,7 @@ struct replay_opts {
 	int allow_empty;
 	int allow_empty_message;
 	int keep_redundant_commits;
+	int skip_empty;
 
 	int mainline;
 
-- 
2.11.0.616.gd72966cf44.dirty


^ permalink raw reply related

* [PATCH 1/5] sequencer: sort options load/save by struct position
From: Giuseppe Bilotta @ 2017-01-23 22:52 UTC (permalink / raw)
  To: git; +Cc: Junio C Hamano, Giuseppe Bilotta
In-Reply-To: <20170123225221.3659-1-giuseppe.bilotta@gmail.com>

No functional change. The order in which options are serialized and
reloaded is now the same in which they appear in the replay_opts
structure. This makes it easier to spot when we forget to
serialize/reload an option value.

Signed-off-by: Giuseppe Bilotta <giuseppe.bilotta@gmail.com>
---
 sequencer.c | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/sequencer.c b/sequencer.c
index 9adb7bbf1d..672c81b559 100644
--- a/sequencer.c
+++ b/sequencer.c
@@ -975,22 +975,22 @@ static int populate_opts_cb(const char *key, const char *value, void *data)
 
 	if (!value)
 		error_flag = 0;
-	else if (!strcmp(key, "options.no-commit"))
-		opts->no_commit = git_config_bool_or_int(key, value, &error_flag);
 	else if (!strcmp(key, "options.edit"))
 		opts->edit = git_config_bool_or_int(key, value, &error_flag);
-	else if (!strcmp(key, "options.signoff"))
-		opts->signoff = git_config_bool_or_int(key, value, &error_flag);
 	else if (!strcmp(key, "options.record-origin"))
 		opts->record_origin = git_config_bool_or_int(key, value, &error_flag);
+	else if (!strcmp(key, "options.no-commit"))
+		opts->no_commit = git_config_bool_or_int(key, value, &error_flag);
+	else if (!strcmp(key, "options.signoff"))
+		opts->signoff = git_config_bool_or_int(key, value, &error_flag);
 	else if (!strcmp(key, "options.allow-ff"))
 		opts->allow_ff = git_config_bool_or_int(key, value, &error_flag);
 	else if (!strcmp(key, "options.mainline"))
 		opts->mainline = git_config_int(key, value);
-	else if (!strcmp(key, "options.strategy"))
-		git_config_string_dup(&opts->strategy, key, value);
 	else if (!strcmp(key, "options.gpg-sign"))
 		git_config_string_dup(&opts->gpg_sign, key, value);
+	else if (!strcmp(key, "options.strategy"))
+		git_config_string_dup(&opts->strategy, key, value);
 	else if (!strcmp(key, "options.strategy-option")) {
 		ALLOC_GROW(opts->xopts, opts->xopts_nr + 1, opts->xopts_alloc);
 		opts->xopts[opts->xopts_nr++] = xstrdup(value);
@@ -1223,14 +1223,14 @@ static int save_opts(struct replay_opts *opts)
 	const char *opts_file = git_path_opts_file();
 	int res = 0;
 
-	if (opts->no_commit)
-		res |= git_config_set_in_file_gently(opts_file, "options.no-commit", "true");
 	if (opts->edit)
 		res |= git_config_set_in_file_gently(opts_file, "options.edit", "true");
-	if (opts->signoff)
-		res |= git_config_set_in_file_gently(opts_file, "options.signoff", "true");
 	if (opts->record_origin)
 		res |= git_config_set_in_file_gently(opts_file, "options.record-origin", "true");
+	if (opts->no_commit)
+		res |= git_config_set_in_file_gently(opts_file, "options.no-commit", "true");
+	if (opts->signoff)
+		res |= git_config_set_in_file_gently(opts_file, "options.signoff", "true");
 	if (opts->allow_ff)
 		res |= git_config_set_in_file_gently(opts_file, "options.allow-ff", "true");
 	if (opts->mainline) {
@@ -1239,10 +1239,10 @@ static int save_opts(struct replay_opts *opts)
 		res |= git_config_set_in_file_gently(opts_file, "options.mainline", buf.buf);
 		strbuf_release(&buf);
 	}
+	if (opts->gpg_sign)
+		res |= git_config_set_in_file_gently(opts_file, "options.gpg-sign", opts->gpg_sign);
 	if (opts->strategy)
 		res |= git_config_set_in_file_gently(opts_file, "options.strategy", opts->strategy);
-	if (opts->gpg_sign)
-		res |= git_config_set_in_file_gently(opts_file, "options.gpg-sign", opts->gpg_sign);
 	if (opts->xopts) {
 		int i;
 		for (i = 0; i < opts->xopts_nr; i++)
-- 
2.11.0.616.gd72966cf44.dirty


^ permalink raw reply related

* [PATCH 4/5] cherry-pick: allow skipping only redundant commits
From: Giuseppe Bilotta @ 2017-01-23 22:52 UTC (permalink / raw)
  To: git; +Cc: Junio C Hamano, Giuseppe Bilotta
In-Reply-To: <20170123225221.3659-1-giuseppe.bilotta@gmail.com>

This allows the preservation of originally empty commits with the
combination of flags --allow-empty --skip-redundant-commits.

Signed-off-by: Giuseppe Bilotta <giuseppe.bilotta@gmail.com>
---
 Documentation/git-cherry-pick.txt |  8 ++++-
 builtin/revert.c                  | 18 +++++++++++-
 sequencer.c                       | 62 ++++++++++++++++++++++++++++++---------
 sequencer.h                       |  1 +
 4 files changed, 73 insertions(+), 16 deletions(-)

diff --git a/Documentation/git-cherry-pick.txt b/Documentation/git-cherry-pick.txt
index ffced816d6..147e0cde0c 100644
--- a/Documentation/git-cherry-pick.txt
+++ b/Documentation/git-cherry-pick.txt
@@ -138,9 +138,15 @@ effect to your index in a row.
 	examine the commit. This option overrides that behavior and
 	creates an empty commit object.  Implies `--allow-empty`.
 
+--skip-redundant-commits::
+	Redundant commits will be skipped altogether. This does not
+	influence commits that were originally empty (see
+	`--allow-empty` and `--skip-empty`).
+
 --skip-empty::
 	This option causes empty and redundant cherry-picked commits to
-	be skipped without requesting the user intervention.
+	be skipped without requesting the user intervention. Implies
+	`--skip-redundant-commits`.
 
 --strategy=<strategy>::
 	Use the given merge strategy.  Should only be used once.
diff --git a/builtin/revert.c b/builtin/revert.c
index ffdd367f99..aca8a1d9d0 100644
--- a/builtin/revert.c
+++ b/builtin/revert.c
@@ -102,7 +102,8 @@ static int run_sequencer(int argc, const char **argv, struct replay_opts *opts)
 			OPT_BOOL(0, "allow-empty", &opts->allow_empty, N_("preserve initially empty commits")),
 			OPT_BOOL(0, "allow-empty-message", &opts->allow_empty_message, N_("allow commits with empty messages")),
 			OPT_BOOL(0, "keep-redundant-commits", &opts->keep_redundant_commits, N_("keep redundant, empty commits")),
-			OPT_BOOL(0, "skip-empty", &opts->skip_empty, N_("skip redundant, empty commits")),
+			OPT_BOOL(0, "skip-empty", &opts->skip_empty, N_("skip both redundant and initially empty commits")),
+			OPT_BOOL(0, "skip-redundant-commits", &opts->skip_redundant_commits, N_("skip redundant commits")),
 			OPT_END(),
 		};
 		options = parse_options_concat(options, cp_extra);
@@ -115,6 +116,9 @@ static int run_sequencer(int argc, const char **argv, struct replay_opts *opts)
 	/* implies allow_empty */
 	if (opts->keep_redundant_commits)
 		opts->allow_empty = 1;
+	/* implies skip_redundant_commits */
+	if (opts->skip_empty)
+		opts->skip_redundant_commits = 1;
 
 	/* Check for incompatible command line arguments */
 	if (cmd) {
@@ -147,6 +151,18 @@ static int run_sequencer(int argc, const char **argv, struct replay_opts *opts)
 				"--edit", opts->edit,
 				NULL);
 
+	if (opts->keep_redundant_commits)
+		verify_opt_compatible(me, "--keep-redundant-commits",
+				"--skip-empty", opts->skip_empty,
+				"--skip-redundant-commits", opts->skip_redundant_commits,
+				NULL);
+
+	if (opts->keep_redundant_commits)
+		verify_opt_compatible(me, "--skip-empty",
+				"--allow-empty", opts->allow_empty,
+				"--keep-redundant-commits", opts->keep_redundant_commits,
+				NULL);
+
 	if (cmd) {
 		opts->revs = NULL;
 	} else {
diff --git a/sequencer.c b/sequencer.c
index 9c01310162..333d9112de 100644
--- a/sequencer.c
+++ b/sequencer.c
@@ -563,40 +563,70 @@ static int allow_or_skip_empty(struct replay_opts *opts, struct commit *commit)
 {
 	int index_unchanged, empty_commit;
 
-	/*
-	 * Four cases:
+	/* We have four options:
 	 *
-	 * (1) we do not allow empty at all and error out;
+	 * --allow-empty (AE)
+	 * --keep-redundant-commits (KR)
+	 * --skip-empty (SE)
+	 * --skip-redundant-commits (SR)
 	 *
-	 * (2) we skip empty commits altogether;
+	 * Additionally, if KR, then AE. And if SE, then SR.
+	 * 
+	 * We have three possible states:
+	 * Not Empty (NE)
+	 * Originally Empty (OE)
+	 * made REdundant (RE) (originally not empty)
 	 *
-	 * (3) we allow ones that were initially empty, but
-	 * forbid the ones that become empty;
+	 * NE always gets committed. The other two depend on the combination
+	 * of flags.
 	 *
-	 * (4) we allow both.
+	 *              OE outcome | RE outcome | AE  KR  SE  SR
+	 *     Case 0:  0 (error)    0 (error)     0   0   0   0
+	 *     Case 1:  1 (allow)    0 (error)     1   0   0   0
+	 * N/A Case 2:  2 (skip)     0 (error)     0   0   1   0
+	 * N/A Case 3:  0 (error)    1 (keep)      0   1   0   0
+	 *     Case 4:  1 (allow)    1 (keep)      1   1   0   0
+	 * N/A Case 5:  2 (skip)     1 (keep)      0   1   1   0
+	 *     Case 6:  0 (error)    2 (skip)      0   0   0   1
+	 *     Case 7:  1 (allow)    2 (skip)      1   0   0   1
+	 *     Case 8:  2 (skip )    2 (skip)      0   0   1   1
+	 *
+	 * TODO should we allow Case 2? If so, how?
 	 */
-	if (!opts->allow_empty && !opts->skip_empty)
+
+	/* Case 0 */
+	if (!opts->allow_empty && !opts->skip_redundant_commits)
 		return 0; /* let "git commit" barf as necessary */
 
 	index_unchanged = is_index_unchanged();
 	if (index_unchanged < 0)
 		return index_unchanged;
+
 	if (!index_unchanged)
 		return 0; /* we do not have to say --allow-empty */
 
-	if (opts->skip_empty)
-		return 2;
+	/* Here we know that the commit is either OE or RE */
 
+	/* Case 4, we don't care, result is 'allow' for both cases */
 	if (opts->keep_redundant_commits)
 		return 1;
 
+	/* Case 8, we don't care, result is 'skip' for both cases */
+	if (opts->skip_empty)
+		return 2;
+
+	/* Now we must differentiate between OE and RE,
+	 * for Case 1, 6, 7 */
 	empty_commit = is_original_commit_empty(commit);
 	if (empty_commit < 0)
 		return empty_commit;
-	if (!empty_commit)
-		return 0;
-	else
-		return 1;
+
+	/* An OE will return 1 if AE, 0 otherwise */
+	if (empty_commit)
+		return opts->allow_empty;
+
+	/* An RE will return 2 if SR, 0 otherwise */
+	return 2*opts->skip_redundant_commits;
 }
 
 enum todo_command {
@@ -1009,6 +1039,8 @@ static int populate_opts_cb(const char *key, const char *value, void *data)
 		opts->keep_redundant_commits = git_config_bool_or_int(key, value, &error_flag);
 	else if (!strcmp(key, "options.skip-empty"))
 		opts->skip_empty = git_config_bool_or_int(key, value, &error_flag);
+	else if (!strcmp(key, "options.skip-redundant-commits"))
+		opts->skip_redundant_commits = git_config_bool_or_int(key, value, &error_flag);
 	else if (!strcmp(key, "options.mainline"))
 		opts->mainline = git_config_int(key, value);
 	else if (!strcmp(key, "options.gpg-sign"))
@@ -1267,6 +1299,8 @@ static int save_opts(struct replay_opts *opts)
 		res |= git_config_set_in_file_gently(opts_file, "options.keep-redundant-commits", "true");
 	if (opts->skip_empty)
 		res |= git_config_set_in_file_gently(opts_file, "options.skip-empty", "true");
+	if (opts->skip_redundant_commits)
+		res |= git_config_set_in_file_gently(opts_file, "options.skip-redundant-commits", "true");
 	if (opts->mainline) {
 		struct strbuf buf = STRBUF_INIT;
 		strbuf_addf(&buf, "%d", opts->mainline);
diff --git a/sequencer.h b/sequencer.h
index c747cfcfc7..f8b8bd0063 100644
--- a/sequencer.h
+++ b/sequencer.h
@@ -24,6 +24,7 @@ struct replay_opts {
 	int allow_empty_message;
 	int keep_redundant_commits;
 	int skip_empty;
+	int skip_redundant_commits;;
 
 	int mainline;
 
-- 
2.11.0.616.gd72966cf44.dirty


^ permalink raw reply related

* [PATCH 2/5] sequencer: save/load all options
From: Giuseppe Bilotta @ 2017-01-23 22:52 UTC (permalink / raw)
  To: git; +Cc: Junio C Hamano, Giuseppe Bilotta
In-Reply-To: <20170123225221.3659-1-giuseppe.bilotta@gmail.com>

Add the missing replay_opts to save_opts and populate_opts, so that an
interrupted cherry-pick will continue with the same setup it had before
the interruption.

Signed-off-by: Giuseppe Bilotta <giuseppe.bilotta@gmail.com>
---
 sequencer.c | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/sequencer.c b/sequencer.c
index 672c81b559..3d2f61c979 100644
--- a/sequencer.c
+++ b/sequencer.c
@@ -985,6 +985,14 @@ static int populate_opts_cb(const char *key, const char *value, void *data)
 		opts->signoff = git_config_bool_or_int(key, value, &error_flag);
 	else if (!strcmp(key, "options.allow-ff"))
 		opts->allow_ff = git_config_bool_or_int(key, value, &error_flag);
+	else if (!strcmp(key, "options.rerere-autoupdate"))
+		opts->allow_rerere_auto = git_config_bool_or_int(key, value, &error_flag);
+	else if (!strcmp(key, "options.allow-empty"))
+		opts->allow_empty = git_config_bool_or_int(key, value, &error_flag);
+	else if (!strcmp(key, "options.allow-empty-message"))
+		opts->allow_empty_message = git_config_bool_or_int(key, value, &error_flag);
+	else if (!strcmp(key, "options.keep-redundant-commits"))
+		opts->keep_redundant_commits = git_config_bool_or_int(key, value, &error_flag);
 	else if (!strcmp(key, "options.mainline"))
 		opts->mainline = git_config_int(key, value);
 	else if (!strcmp(key, "options.gpg-sign"))
@@ -1233,6 +1241,14 @@ static int save_opts(struct replay_opts *opts)
 		res |= git_config_set_in_file_gently(opts_file, "options.signoff", "true");
 	if (opts->allow_ff)
 		res |= git_config_set_in_file_gently(opts_file, "options.allow-ff", "true");
+	if (opts->allow_rerere_auto)
+		res |= git_config_set_in_file_gently(opts_file, "options.rerere-autoupdate", "true");
+	if (opts->allow_empty)
+		res |= git_config_set_in_file_gently(opts_file, "options.allow-empty", "true");
+	if (opts->allow_empty_message)
+		res |= git_config_set_in_file_gently(opts_file, "options.allow-empty-message", "true");
+	if (opts->keep_redundant_commits)
+		res |= git_config_set_in_file_gently(opts_file, "options.keep-redundant-commits", "true");
 	if (opts->mainline) {
 		struct strbuf buf = STRBUF_INIT;
 		strbuf_addf(&buf, "%d", opts->mainline);
-- 
2.11.0.616.gd72966cf44.dirty


^ permalink raw reply related

* Re: [PATCH] rebase: pass --signoff option to git am
From: Junio C Hamano @ 2017-01-23 23:27 UTC (permalink / raw)
  To: Giuseppe Bilotta; +Cc: Git List
In-Reply-To: <CAOxFTczrLmWZg3720HMUA-13q9ADi_rK5k0x+TEYyKR=xR5b_w@mail.gmail.com>

Giuseppe Bilotta <giuseppe.bilotta@gmail.com> writes:

> On Mon, Jan 23, 2017 at 9:16 PM, Junio C Hamano <gitster@pobox.com> wrote:
>> Giuseppe Bilotta <giuseppe.bilotta@gmail.com> writes:
>>
>>> On Mon, Jan 23, 2017 at 7:13 PM, Junio C Hamano <gitster@pobox.com> wrote:
>>>>
>>>> Should we plan to extend this to the interactive backend that is
>>>> shared between rebase -i and rebase -m, too?  Or is this patch
>>>> already sufficient to cover them?
>>>
>>> AFAIK this is sufficient for both, in the sense that I've used it with
>>> git rebase -i and it works.
>>
>> That is a good news and at the same time a bit awkard one ;-)
>>
>> The mention of "passed to 'git am'" twice in the documentation and
>> help text would lead people to think "rebase -i" would not be
>> affected and (1) would need more work to do so, or (2) the user does
>> not want "rebase -i" to be unaffected for whatever reason, and gets
>> surprised to see that it actually does get affected.
>
> I'm not sure I follow. If the user doesn't want to signoff during a
> rebase, they can simply not pass --signoff. If they do, they can not
> pass it. Am I missing something?

alias.

Which also means that there needs to be --no-signoff option that can
be given to countermand an earlier --signoff, if a user did

	[alias] rb = rebase --signoff

and wants to disable it one time only with

	$ git rb --no-signoff

>
>> In any case, will queue as-is so that we won't lose the patch while
>> waiting for people to raise their opinions.
>
> Thanks.

Thanks.  The final version would also need tests, so it may be a
good time to start thinking about what aspect of this feature wants
to be protected against future breakages.

^ permalink raw reply

* Re: [PATCH v2 0/5] string-list: make string_list_sort() reentrant
From: Jeff King @ 2017-01-23 23:54 UTC (permalink / raw)
  To: René Scharfe; +Cc: Git List, Junio C Hamano, Johannes Schindelin
In-Reply-To: <67ac53cd-3fc0-8bd0-30f4-129281c3090f@web.de>

On Sun, Jan 22, 2017 at 06:47:18PM +0100, René Scharfe wrote:

> Use qsort_s() from C11 Annex K to make string_list_sort() safer, in
> particular when called from parallel threads.
> 
> Changes from v1:
> * Renamed HAVE_QSORT_S to HAVE_ISO_QSORT_S in Makefile to disambiguate.
> * Added basic perf test (patch 3).
> * Converted a second caller to QSORT_S, in ref-filter.c (patch 5).

This looks nicely done overall, and I appreciate the perf test.

The speed looks like a reasonable outcome. I'm torn on the qsort_r()
demo patch. I don't think it looks too bad. OTOH, I don't think I would
want to deal with the opposite-argument-order versions.

Is there any interest in people adding the ISO qsort_s() to their libc
implementations? It seems like it's been a fair number of years by now.

-Peff

^ permalink raw reply

* What's cooking in git.git (Jan 2017, #04; Mon, 23)
From: Junio C Hamano @ 2017-01-24  0:18 UTC (permalink / raw)
  To: git

Here are the topics that have been cooking.  Commits prefixed with
'-' are only in 'pu' (proposed updates) while commits prefixed with
'+' are in 'next'.  The ones marked with '.' do not appear in any of
the integration branches, but I am still holding onto them.

You can find the changes described here in the integration branches
of the repositories listed at

    http://git-blame.blogspot.com/p/git-public-repositories.html

--------------------------------------------------
[Graduated to "master"]

* ad/bisect-terms (2017-01-13) 1 commit
  (merged to 'next' on 2017-01-18 at 9f500d6cf5)
 + Documentation/bisect: improve on (bad|new) and (good|bad)

 Documentation fix.


* bw/read-blob-data-does-not-modify-index-state (2017-01-11) 1 commit
  (merged to 'next' on 2017-01-18 at f33363fb07)
 + index: improve constness for reading blob data

 Code clean-up.


* jk/grep-e-could-be-extended-beyond-posix (2017-01-11) 1 commit
  (merged to 'next' on 2017-01-18 at dd1b9d1fa8)
 + t7810: avoid assumption about invalid regex syntax

 Tighten a test to avoid mistaking an extended ERE regexp engine as
 a PRE regexp engine.


* rh/diff-orderfile-doc (2017-01-15) 2 commits
  (merged to 'next' on 2017-01-18 at cc2af9c628)
 + diff: document the format of the -O (diff.orderFile) file
 + diff: document behavior of relative diff.orderFile

 Documentation fix.


* sb/cd-then-git-can-be-written-as-git-c (2017-01-13) 1 commit
  (merged to 'next' on 2017-01-18 at 8923d2d001)
 + lib-submodule-update.sh: reduce use of subshell by using "git -C"

 Test clean-up.


* sb/submodule-config-tests (2017-01-12) 2 commits
  (merged to 'next' on 2017-01-18 at bd850f6ad3)
 + t7411: test lookup of uninitialized submodules
 + t7411: quote URLs

 Test updates.


* sb/submodule-embed-gitdir (2017-01-12) 1 commit
  (merged to 'next' on 2017-01-18 at 0a5e24a3f0)
 + submodule absorbgitdirs: mention in docstring help

 Help-text fix.


* sb/submodule-init (2017-01-12) 1 commit
  (merged to 'next' on 2017-01-18 at 2e8a38b1cc)
 + submodule update --init: display correct path from submodule

 Error message fix.


* sg/fix-versioncmp-with-common-suffix (2017-01-12) 8 commits
  (merged to 'next' on 2017-01-18 at f79c24a291)
 + versioncmp: generalize version sort suffix reordering
 + versioncmp: factor out helper for suffix matching
 + versioncmp: use earliest-longest contained suffix to determine sorting order
 + versioncmp: cope with common part overlapping with prerelease suffix
 + versioncmp: pass full tagnames to swap_prereleases()
 + t7004-tag: add version sort tests to show prerelease reordering issues
 + t7004-tag: use test_config helper
 + t7004-tag: delete unnecessary tags with test_when_finished

 The prereleaseSuffix feature of version comparison that is used in
 "git tag -l" did not correctly when two or more prereleases for the
 same release were present (e.g. when 2.0, 2.0-beta1, and 2.0-beta2
 are there and the code needs to compare 2.0-beta1 and 2.0-beta2).


* vn/diff-ihc-config (2017-01-12) 1 commit
  (merged to 'next' on 2017-01-18 at ac4915dbe6)
 + diff: add interhunk context config option

 "git diff" learned diff.interHunkContext configuration variable
 that gives the default value for its --inter-hunk-context option.


* ws/request-pull-code-cleanup (2017-01-15) 1 commit
  (merged to 'next' on 2017-01-18 at dfcb1405de)
 + request-pull: drop old USAGE stuff

 Code clean-up.

--------------------------------------------------
[New Topics]

* sh/grep-tree-obj-tweak-output (2017-01-20) 2 commits
 - grep: use '/' delimiter for paths
 - grep: only add delimiter if there isn't one already

 "git grep", when fed a tree-ish as an input, shows each hit
 prefixed with "<tree-ish>:<path>:<lineno>:".  As <tree-ish> is
 almost always either a commit or a tag that points at a commit, the
 early part of the output "<tree-ish>:<path>" can be used as the
 name of the blob and given to "git show".  When <tree-ish> is a
 tree given in the extended SHA-1 syntax (e.g. "<commit>:", or
 "<commit>:<dir>"), however, this results in a string that does not
 name a blob (e.g. "<commit>::<path>" or "<commit>:<dir>:<path>").
 "git grep" has been taught to be a bit more intelligent about these
 cases and omit a colon (in the former case) or use slash (in the
 latter case) to produce "<commit>:<path>" and
 "<commit>:<dir>/<path>" that can be used as the name of a blob.

 Waiting for the review discussion to settle, followed by a reroll.


* vp/show-ref-verify-head (2017-01-23) 5 commits
 - show-ref: remove dead `if (verify)' check
 - show-ref: detect dangling refs under --verify as well
 - show-ref: move --quiet handling into show_one()
 - show-ref: allow -d to work with --verify
 - show-ref: accept HEAD with --verify

 "git show-ref HEAD" used with "--verify" because the user is not
 interested in seeing refs/remotes/origin/HEAD, and used with
 "--head" because the user does not want HEAD to be filtered out,
 i.e. "git show-ref --head --verify HEAD", did not work as expected.

 Will merge to 'next'.


* bc/use-asciidoctor-opt (2017-01-23) 7 commits
 - Makefile: add a knob to enable the use of Asciidoctor
 - Documentation: move dblatex arguments into variable
 - Documentation: add XSLT to fix DocBook for Texinfo
 - Documentation: sort sources for gitman.texi
 - Documentation: remove unneeded argument in cat-texi.perl
 - Documentation: modernize cat-texi.perl
 - Documentation: fix warning in cat-texi.perl

 Asciidoctor, an alternative reimplementation of AsciiDoc, still
 needs some changes to work with documents meant to be formatted
 with AsciiDoc.  "make USE_ASCIIDOCTOR=YesPlease" to use it out of
 the box to document our pages is getting closer to reality.


* ls/travis-p4-on-macos (2017-01-23) 1 commit
  (merged to 'next' on 2017-01-23 at 2d51987faa)
 + travis-ci: fix Perforce install on macOS

 Update the definition of the MacOSX test environment used by
 TravisCI.

 Will merge to 'master'.


* rs/qsort-s (2017-01-23) 5 commits
 - ref-filter: use QSORT_S in ref_array_sort()
 - string-list: use QSORT_S in string_list_sort()
 - perf: add basic sort performance test
 - add QSORT_S
 - compat: add qsort_s()

 A few codepaths had to rely on a global variable when sorting
 elements of an array because sort(3) API does not allow extra data
 to be passed to the comparison function.  Use qsort_s() when
 natively available, and a fallback implementation of it when not,
 to eliminate the need, which is a prerequisite for making the
 codepath reentrant.

 Will merge to 'next'.

--------------------------------------------------
[Stalled]

* jc/diff-b-m (2015-02-23) 5 commits
 . WIPWIP
 . WIP: diff-b-m
 - diffcore-rename: allow easier debugging
 - diffcore-rename.c: add locate_rename_src()
 - diffcore-break: allow debugging

 "git diff -B -M" produced incorrect patch when the postimage of a
 completely rewritten file is similar to the preimage of a removed
 file; such a resulting file must not be expressed as a rename from
 other place.

 The fix in this patch is broken, unfortunately.

 Will discard.

--------------------------------------------------
[Cooking]

* jk/coding-guidelines-update (2017-01-17) 1 commit
  (merged to 'next' on 2017-01-23 at 8c57afa288)
 + CodingGuidelines: clarify multi-line brace style

 Developer doc update.

 Will merge to 'master'.


* jk/fsck-connectivity-check-fix (2017-01-17) 6 commits
  (merged to 'next' on 2017-01-23 at e8e9b76b84)
 + fsck: check HAS_OBJ more consistently
 + fsck: do not fallback "git fsck <bogus>" to "git fsck"
 + fsck: tighten error-checks of "git fsck <head>"
 + fsck: prepare dummy objects for --connectivity-check
 + fsck: report trees as dangling
 + t1450: clean up sub-objects in duplicate-entry test

 "git fsck --connectivity-check" was not working at all.

 Will merge to 'master'.


* js/exec-path-coverity-workaround (2017-01-09) 2 commits
  (merged to 'next' on 2017-01-23 at bf5dfbf860)
 + git_exec_path: do not return the result of getenv()
 + git_exec_path: avoid Coverity warning about unfree()d result

 Code cleanup.

 Will merge to 'master'.
 Split out of another topic.


* js/mingw-isatty (2017-01-18) 1 commit
  (merged to 'next' on 2017-01-23 at ae0f80e058)
 + mingw: follow-up to "replace isatty() hack"

 An update to a topic that is already in 'master'.

 Will merge to 'master'.


* js/sequencer-i-countdown-3 (2017-01-17) 38 commits
 - sequencer (rebase -i): write out the final message
 - sequencer (rebase -i): write the progress into files
 - sequencer (rebase -i): show the progress
 - sequencer (rebase -i): suggest --edit-todo upon unknown command
 - sequencer (rebase -i): show only failed cherry-picks' output
 - sequencer (rebase -i): show only failed `git commit`'s output
 - sequencer: use run_command() directly
 - sequencer: update reading author-script
 - sequencer (rebase -i): differentiate between comments and 'noop'
 - sequencer (rebase -i): implement the 'drop' command
 - sequencer (rebase -i): allow rescheduling commands
 - sequencer (rebase -i): respect strategy/strategy_opts settings
 - sequencer (rebase -i): respect the rebase.autostash setting
 - sequencer (rebase -i): run the post-rewrite hook, if needed
 - sequencer (rebase -i): record interrupted commits in rewritten, too
 - sequencer (rebase -i): copy commit notes at end
 - sequencer (rebase -i): set the reflog message consistently
 - sequencer (rebase -i): refactor setting the reflog message
 - sequencer (rebase -i): allow fast-forwarding for edit/reword
 - sequencer (rebase -i): implement the 'reword' command
 - sequencer (rebase -i): leave a patch upon error
 - sequencer (rebase -i): update refs after a successful rebase
 - sequencer (rebase -i): the todo can be empty when continuing
 - sequencer (rebase -i): skip some revert/cherry-pick specific code path
 - sequencer (rebase -i): remove CHERRY_PICK_HEAD when no longer needed
 - sequencer (rebase -i): allow continuing with staged changes
 - sequencer (rebase -i): write an author-script file
 - sequencer (rebase -i): implement the short commands
 - sequencer (rebase -i): add support for the 'fixup' and 'squash' commands
 - sequencer (rebase -i): write the 'done' file
 - sequencer (rebase -i): learn about the 'verbose' mode
 - sequencer (rebase -i): implement the 'exec' command
 - sequencer (rebase -i): implement the 'edit' command
 - sequencer (rebase -i): implement the 'noop' command
 - sequencer: support a new action: 'interactive rebase'
 - sequencer: use a helper to find the commit message
 - sequencer: move "else" keyword onto the same line as preceding brace
 - sequencer: avoid unnecessary curly braces

 The sequencer machinery has been further enhanced so that a later
 set of patches can start using it to reimplement "rebase -i".

 Will merge to 'next'.
 I think I've said everything that needs to be said on this topic.


* jk/clear-delta-base-cache-fix (2017-01-19) 1 commit
  (merged to 'next' on 2017-01-23 at 5f4af2b0a5)
 + clear_delta_base_cache(): don't modify hashmap while iterating

 A crashing bug introduced in v2.11 timeframe has been found (it is
 triggerable only in fast-import) and fixed.

 Will merge to 'master'.


* jk/describe-omit-some-refs (2017-01-19) 6 commits
 - SQUASH???
 - describe: teach describe negative pattern matches
 - describe: teach --match to accept multiple patterns
 - name-rev: add support to exclude refs by pattern match
 - name-rev: extend --refs to accept multiple patterns
 - doc: add documentation for OPT_STRING_LIST

 "git describe" and "git name-rev" have been taught to take more
 than one refname patterns to restrict the set of refs to base their
 naming output on, and also learned to take negative patterns to
 name refs not to be used for naming via their "--exclude" option.

 Will merge to 'next' after making sure SQUASH??? is correctly
 squashed in.


* js/remote-rename-with-half-configured-remote (2017-01-19) 2 commits
  (merged to 'next' on 2017-01-23 at a1b655dbac)
 + remote rename: more carefully determine whether a remote is configured
 + remote rename: demonstrate a bogus "remote exists" bug

 With anticipatory tweaking for remotes defined in ~/.gitconfig
 (e.g. "remote.origin.prune" set to true, even though there may or
 may not actually be "origin" remote defined in a particular Git
 repository), "git remote rename" and other commands misinterpreted
 and behaved as if such a non-existing remote actually existed.

 Will merge to 'master'.


* sb/in-core-index-doc (2017-01-19) 4 commits
  (merged to 'next' on 2017-01-23 at 30224463e8)
 + documentation: retire unfinished documentation
 + cache.h: document add_[file_]to_index
 + cache.h: document remove_index_entry_at
 + cache.h: document index_name_pos

 Documentation and in-code comments updates.

 Will merge to 'master'.


* sb/retire-convert-objects-from-contrib (2017-01-19) 1 commit
  (merged to 'next' on 2017-01-23 at decc1e237d)
 + contrib: remove git-convert-objects

 Remove an ancient tool left in contrib/.

 Will merge to 'master'.


* ep/commit-static-buf-cleanup (2017-01-13) 2 commits
 - builtin/commit.c: switch to xstrfmt(), instead of snprintf()
 - builtin/commit.c: remove the PATH_MAX limitation via dynamic allocation

 Code clean-up.

 Expecting a reroll.
 The tip one would instead be done with strbuf.
 cf. <CA+EOSB=4-TKpi6mr-yVbwRsFrVzE=vo4Y9Qm3VMm7pn=UB1_hQ@mail.gmail.com>


* jk/vreport-sanitize (2017-01-11) 2 commits
  (merged to 'next' on 2017-01-18 at 4bbf370981)
 + vreport: sanitize ASCII control chars
 + Revert "vreportf: avoid intermediate buffer"

 An error message with an ASCII control character like '\r' in it
 can alter the message to hide its early part, which is problematic
 when a remote side gives such an error message that the local side
 will relay with a "remote: " prefix.

 Will merge to 'master'.


* sb/unpack-trees-super-prefix (2017-01-12) 5 commits
 - SQUASH
 - unpack-trees: support super-prefix option
 - t1001: modernize style
 - t1000: modernize style
 - read-tree: use OPT_BOOL instead of OPT_SET_INT

 "git read-tree" and its underlying unpack_trees() machinery learned
 to report problematic paths prefixed with the --super-prefix option.

 Expecting a reroll.
 The first three are in good shape.  The last one needs a better
 explanation and possibly an update to its test.
 cf. <CAGZ79kaHDnVLQr8WLoaD5KKs7EqeW=KbkptF=iHpU5t054Xcdw@mail.gmail.com>


* sb/submodule-doc (2017-01-12) 3 commits
 - submodules: add a background story
 - submodule update documentation: don't repeat ourselves
 - submodule documentation: add options to the subcommand

 Needs review.


* bw/attr (2017-01-23) 27 commits
 - attr: reformat git_attr_set_direction() function
 - attr: push the bare repo check into read_attr()
 - attr: store attribute stack in attr_check structure
 - attr: tighten const correctness with git_attr and match_attr
 - attr: remove maybe-real, maybe-macro from git_attr
 - attr: eliminate global check_all_attr array
 - attr: use hashmap for attribute dictionary
 - attr: change validity check for attribute names to use positive logic
 - attr: pass struct attr_check to collect_some_attrs
 - attr: retire git_check_attrs() API
 - attr: convert git_check_attrs() callers to use the new API
 - attr: convert git_all_attrs() to use "struct attr_check"
 - attr: (re)introduce git_check_attr() and struct attr_check
 - attr: rename function and struct related to checking attributes
 - attr.c: outline the future plans by heavily commenting
 - Documentation/gitattributes.txt: fix a typo
 - attr.c: add push_stack() helper
 - attr: support quoting pathname patterns in C style
 - attr.c: plug small leak in parse_attr_line()
 - attr.c: tighten constness around "git_attr" structure
 - attr.c: simplify macroexpand_one()
 - attr.c: mark where #if DEBUG ends more clearly
 - attr.c: complete a sentence in a comment
 - attr.c: explain the lack of attr-name syntax check in parse_attr()
 - attr.c: update a stale comment on "struct match_attr"
 - attr.c: use strchrnul() to scan for one line
 - commit.c: use strchrnul() to scan for one line

 The gitattributes machinery is being taught to work better in a
 multi-threaded environment.

 Expecting a reroll.


* jk/loose-object-fsck (2017-01-15) 6 commits
  (merged to 'next' on 2017-01-23 at 4302ad090d)
 + fsck: detect trailing garbage in all object types
 + fsck: parse loose object paths directly
 + sha1_file: add read_loose_object() function
 + t1450: test fsck of packed objects
 + sha1_file: fix error message for alternate objects
 + t1450: refactor loose-object removal

 "git fsck" inspects loose objects more carefully now.

 Will merge to 'master'.


* vn/xdiff-func-context (2017-01-15) 1 commit
 - xdiff -W: relax end-of-file function detection

 "git diff -W" has been taught to handle the case where a new
 function is added at the end of the file better.

 Will hold.
 An follow-up change to go back from the line that matches the
 funcline to show comments before the function definition is still
 being discussed.


* mh/ref-remove-empty-directory (2017-01-07) 23 commits
 - files_transaction_commit(): clean up empty directories
 - try_remove_empty_parents(): teach to remove parents of reflogs, too
 - try_remove_empty_parents(): don't trash argument contents
 - try_remove_empty_parents(): rename parameter "name" -> "refname"
 - delete_ref_loose(): inline function
 - delete_ref_loose(): derive loose reference path from lock
 - log_ref_write_1(): inline function
 - log_ref_setup(): manage the name of the reflog file internally
 - log_ref_write_1(): don't depend on logfile argument
 - log_ref_setup(): pass the open file descriptor back to the caller
 - log_ref_setup(): improve robustness against races
 - log_ref_setup(): separate code for create vs non-create
 - log_ref_write(): inline function
 - rename_tmp_log(): improve error reporting
 - rename_tmp_log(): use raceproof_create_file()
 - lock_ref_sha1_basic(): use raceproof_create_file()
 - lock_ref_sha1_basic(): inline constant
 - raceproof_create_file(): new function
 - safe_create_leading_directories(): set errno on SCLD_EXISTS
 - safe_create_leading_directories_const(): preserve errno
 - t5505: use "for-each-ref" to test for the non-existence of references
 - refname_is_safe(): correct docstring
 - files_rename_ref(): tidy up whitespace

 Deletion of a branch "foo/bar" could remove .git/refs/heads/foo
 once there no longer is any other branch whose name begins with
 "foo/", but we didn't do so so far.  Now we do.

 Expecting a reroll.
 cf. <5051c78e-51f9-becd-e1a6-9c0b781d6912@alum.mit.edu>


* ls/filter-process-delayed (2017-01-08) 1 commit
 . convert: add "status=delayed" to filter process protocol

 Ejected, as does not build when merged to 'pu'.


* nd/worktree-move (2017-01-09) 6 commits
 - worktree remove: new command
 - worktree move: refuse to move worktrees with submodules
 - worktree move: accept destination as directory
 - worktree move: new command
 - worktree.c: add update_worktree_location()
 - worktree.c: add validate_worktree()

 "git worktree" learned move and remove subcommands.

 Needs review.


* nd/log-graph-configurable-colors (2017-01-19) 4 commits
 - SQUASH???
 - log --graph: customize the graph lines with config log.graphColors
 - color.c: trim leading spaces in color_parse_mem()
 - color.c: fix color_parse_mem() with value_len == 0

 Some people feel the default set of colors used by "git log --graph"
 rather limiting.  A mechanism to customize the set of colors has
 been introduced.

 Will merge to 'next' after making sure SQUASH??? is correctly
 squashed in.


* cc/split-index-config (2016-12-26) 21 commits
 - Documentation/git-update-index: explain splitIndex.*
 - Documentation/config: add splitIndex.sharedIndexExpire
 - read-cache: use freshen_shared_index() in read_index_from()
 - read-cache: refactor read_index_from()
 - t1700: test shared index file expiration
 - read-cache: unlink old sharedindex files
 - config: add git_config_get_expiry() from gc.c
 - read-cache: touch shared index files when used
 - sha1_file: make check_and_freshen_file() non static
 - Documentation/config: add splitIndex.maxPercentChange
 - t1700: add tests for splitIndex.maxPercentChange
 - read-cache: regenerate shared index if necessary
 - config: add git_config_get_max_percent_split_change()
 - Documentation/git-update-index: talk about core.splitIndex config var
 - Documentation/config: add information for core.splitIndex
 - t1700: add tests for core.splitIndex
 - update-index: warn in case of split-index incoherency
 - read-cache: add and then use tweak_split_index()
 - split-index: add {add,remove}_split_index() functions
 - config: add git_config_get_split_index()
 - config: mark an error message up for translation

 The experimental "split index" feature has gained a few
 configuration variables to make it easier to use.

 Waiting for review comments to be addressed.
 cf. <20161226102222.17150-1-chriscool@tuxfamily.org>
 cf. <a1a44640-ff6c-2294-72ac-46322eff8505@ramsayjones.plus.com>


* bw/push-submodule-only (2016-12-20) 3 commits
  (merged to 'next' on 2017-01-23 at d6cd1c60ae)
 + push: add option to push only submodules
 + submodules: add RECURSE_SUBMODULES_ONLY value
 + transport: reformat flag #defines to be more readable

 "git submodule push" learned "--recurse-submodules=only option to
 push submodules out without pushing the top-level superproject.

 Will merge to 'master'.


* ls/p4-path-encoding (2016-12-18) 1 commit
 - git-p4: fix git-p4.pathEncoding for removed files

 When "git p4" imports changelist that removes paths, it failed to
 convert pathnames when the p4 used encoding different from the one
 used on the Git side.  This has been corrected.

 Will be rerolled.
 cf. <7E1C7387-4F37-423F-803D-3B5690B49D40@gmail.com>


* js/difftool-builtin (2017-01-19) 3 commits
  (merged to 'next' on 2017-01-23 at 6f4810dbd9)
 + difftool: retire the scripted version
 + difftool: implement the functionality in the builtin
 + difftool: add a skeleton for the upcoming builtin

 Rewrite a scripted porcelain "git difftool" in C.

 Will merge to 'master'.


* sb/push-make-submodule-check-the-default (2016-11-29) 2 commits
  (merged to 'next' on 2016-12-12 at 1863e05af5)
 + push: change submodule default to check when submodules exist
 + submodule add: extend force flag to add existing repos

 Turn the default of "push.recurseSubmodules" to "check" when
 submodules seem to be in use.

 Will cook in 'next'.


* kn/ref-filter-branch-list (2017-01-10) 21 commits
 - SQUASH???
 - branch: implement '--format' option
 - branch: use ref-filter printing APIs
 - branch, tag: use porcelain output
 - ref-filter: allow porcelain to translate messages in the output
 - ref-filter: add an 'rstrip=<N>' option to atoms which deal with refnames
 - ref-filter: modify the 'lstrip=<N>' option to work with negative '<N>'
 - ref-filter: Do not abruptly die when using the 'lstrip=<N>' option
 - ref-filter: rename the 'strip' option to 'lstrip'
 - ref-filter: make remote_ref_atom_parser() use refname_atom_parser_internal()
 - ref-filter: introduce refname_atom_parser()
 - ref-filter: introduce refname_atom_parser_internal()
 - ref-filter: make "%(symref)" atom work with the ':short' modifier
 - ref-filter: add support for %(upstream:track,nobracket)
 - ref-filter: make %(upstream:track) prints "[gone]" for invalid upstreams
 - ref-filter: introduce format_ref_array_item()
 - ref-filter: move get_head_description() from branch.c
 - ref-filter: modify "%(objectname:short)" to take length
 - ref-filter: implement %(if:equals=<string>) and %(if:notequals=<string>)
 - ref-filter: include reference to 'used_atom' within 'atom_value'
 - ref-filter: implement %(if), %(then), and %(else) atoms

 The code to list branches in "git branch" has been consolidated
 with the more generic ref-filter API.

 I think this is almost ready.  Will wait for a few days, squash
 fixes in if needed and merge to 'next'.


* jk/no-looking-at-dotgit-outside-repo-final (2016-10-26) 1 commit
  (merged to 'next' on 2016-12-05 at 0c77e39cd5)
 + setup_git_env: avoid blind fall-back to ".git"

 Originally merged to 'next' on 2016-10-26

 This is the endgame of the topic to avoid blindly falling back to
 ".git" when the setup sequence said we are _not_ in Git repository.
 A corner case that happens to work right now may be broken by a
 call to die("BUG").

 Will cook in 'next'.


* pb/bisect (2016-10-18) 27 commits
 - bisect--helper: remove the dequote in bisect_start()
 - bisect--helper: retire `--bisect-auto-next` subcommand
 - bisect--helper: retire `--bisect-autostart` subcommand
 - bisect--helper: retire `--bisect-write` subcommand
 - bisect--helper: `bisect_replay` shell function in C
 - bisect--helper: `bisect_log` shell function in C
 - bisect--helper: retire `--write-terms` subcommand
 - bisect--helper: retire `--check-expected-revs` subcommand
 - bisect--helper: `bisect_state` & `bisect_head` shell function in C
 - bisect--helper: `bisect_autostart` shell function in C
 - bisect--helper: retire `--next-all` subcommand
 - bisect--helper: retire `--bisect-clean-state` subcommand
 - bisect--helper: `bisect_next` and `bisect_auto_next` shell function in C
 - t6030: no cleanup with bad merge base
 - bisect--helper: `bisect_start` shell function partially in C
 - bisect--helper: `get_terms` & `bisect_terms` shell function in C
 - bisect--helper: `bisect_next_check` & bisect_voc shell function in C
 - bisect--helper: `check_and_set_terms` shell function in C
 - bisect--helper: `bisect_write` shell function in C
 - bisect--helper: `is_expected_rev` & `check_expected_revs` shell function in C
 - bisect--helper: `bisect_reset` shell function in C
 - wrapper: move is_empty_file() and rename it as is_empty_or_missing_file()
 - t6030: explicitly test for bisection cleanup
 - bisect--helper: `bisect_clean_state` shell function in C
 - bisect--helper: `write_terms` shell function in C
 - bisect: rewrite `check_term_format` shell function in C
 - bisect--helper: use OPT_CMDMODE instead of OPT_BOOL

 Move more parts of "git bisect" to C.

 Expecting a reroll.
 cf. <CAFZEwPPXPPHi8KiEGS9ggzNHDCGhuqMgH9Z8-Pf9GLshg8+LPA@mail.gmail.com>
 cf. <CAFZEwPM9RSTGN54dzaw9gO9iZmsYjJ_d1SjUD4EzSDDbmh-XuA@mail.gmail.com>


* st/verify-tag (2017-01-18) 6 commits
  (merged to 'next' on 2017-01-23 at 2810959427)
 + t/t7004-tag: Add --format specifier tests
 + t/t7030-verify-tag: Add --format specifier tests
 + builtin/tag: add --format argument for tag -v
 + builtin/verify-tag: add --format to verify-tag
 + ref-filter: add function to print single ref_array_item
 + gpg-interface, tag: add GPG_VERIFY_OMIT_STATUS flag

 "git tag" and "git verify-tag" learned to put GPG verification
 status in their "--format=<placeholders>" output format.

 Will merge to 'master'.


* jc/merge-drop-old-syntax (2015-04-29) 1 commit
  (merged to 'next' on 2016-12-05 at 041946dae0)
 + merge: drop 'git merge <message> HEAD <commit>' syntax

 Originally merged to 'next' on 2016-10-11

 Stop supporting "git merge <message> HEAD <commit>" syntax that has
 been deprecated since October 2007, and issues a deprecation
 warning message since v2.5.0.

 Will cook in 'next'.

--------------------------------------------------
[Discarded]

* jc/bundle (2016-03-03) 6 commits
 . index-pack: --clone-bundle option
 . Merge branch 'jc/index-pack' into jc/bundle
 . bundle v3: the beginning
 . bundle: keep a copy of bundle file name in the in-core bundle header
 . bundle: plug resource leak
 . bundle doc: 'verify' is not about verifying the bundle

 The beginning of "split bundle", which could be one of the
 ingredients to allow "git clone" traffic off of the core server
 network to CDN.

 While I think it would make it easier for people to experiment and
 build on if the topic is merged to 'next', I am at the same time a
 bit reluctant to merge an unproven new topic that introduces a new
 file format, which we may end up having to support til the end of
 time.  It is likely that to support a "prime clone from CDN", it
 would need a lot more than just "these are the heads and the pack
 data is over there", so this may not be sufficient.


* jk/nofollow-attr-ignore (2016-11-02) 5 commits
 . exclude: do not respect symlinks for in-tree .gitignore
 . attr: do not respect symlinks for in-tree .gitattributes
 . exclude: convert "check_index" into a flags field
 . attr: convert "macro_ok" into a flags field
 . add open_nofollow() helper

 As we do not follow symbolic links when reading control files like
 .gitignore and .gitattributes from the index, match the behaviour
 and not follow symbolic links when reading them from the working
 tree.  This also tightens security a bit by not leaking contents of
 an unrelated file in the error messages when it is pointed at by
 one of these files that is a symbolic link.

 Perhaps we want to cover .gitmodules too with the same mechanism?

^ permalink raw reply

* [PATCH 0/12] reducing resource usage of for_each_alternate_ref
From: Jeff King @ 2017-01-24  0:37 UTC (permalink / raw)
  To: git

As I've mentioned before, I have some alternates repositories with
absurd numbers of refs, most of which are duplicates of each other[1].
There are a couple of problems I've seen:

 1. the way that for_each_alternate_ref() works has very high peak memory
    usage for this case

 2. the way that receive-pack de-duplicates the refs has high peak memory
    usage

 3. we access the alternate refs twice in fetch-pack

This fixes all three, along with a few other minor bugfixes, cleanups,
and optimizations. I've tried to order the series to keep bugfixes and
less-contentious changes near the front.

Just to give some numbers, on my worst-case repository (see [1]), this
drops peak RSS for "git clone --reference" from over 25GB to about 40MB.
Sort of, anyway.  You still pay a big CPU and RSS cost on the process in
the alternates repo that accesses packed-refs, but the 25GB came on top
of that. So this is a first pass at the low-hanging fruit.

I'll be the first to admit that this setup is insane. And some of the
optimizations are tradeoffs that help particularly the case where your
refs aren't unique. But for the most part should help _every_ case. And
in the cases where your refs are unique, either you don't have many (so
the tradeoffs are OK) or you have so many that you are pretty much
screwed no matter what (if your fetch is looking at 30 million unique
ref tips, the object storage is your problem, not looking at the refs).

A brief overview of the patches:

  [01/12]: for_each_alternate_ref: handle failure from real_pathdup()
  [02/12]: for_each_alternate_ref: stop trimming trailing slashes
  [03/12]: for_each_alternate_ref: use strbuf for path allocation

    Bugfixes and cleanups (the first one is actually a recent-ish
    regression).

  [04/12]: for_each_alternate_ref: pass name/oid instead of ref struct
  [05/12]: for_each_alternate_ref: replace transport code with for-each-ref
  [06/12]: clone: disable save_commit_buffer

    This gives the 25GB->40MB benefit. There are a bunch of caveats in
    patch 05, but I _think_ it's the right direction for the reasons I
    outlined there.

  [07/12]: fetch-pack: cache results of for_each_alternate_ref

    Just running for-each-ref in the giant alternates repo is about a
    minute of CPU, plus 10GB heap, and fetch-pack wants to do it twice.
    This drops it to once.

  [08/12]: add oidset API
  [09/12]: receive-pack: use oidset to de-duplicate .have lines

    These give another ~12GB RSS improvement when receive-pack looks at
    the alternates in my worst-case repo.

    This is less tested than the earlier ones, as we've disabled
    receive-pack looking at alternates in production (see [1] below).
    I just did them for completeness upstream.

  [10/12]: receive-pack: fix misleading namespace/.have comment
  [11/12]: receive-pack: treat namespace .have lines like alternates
  [12/12]: receive-pack: avoid duplicates between our refs and alternates

    These are optimizations to avoid more duplicate .have lines, and
    should benefit even non-insane cases. As with 8-12, not as well
    tested by me.

 Makefile               |  1 +
 builtin/clone.c        |  1 +
 builtin/receive-pack.c | 41 +++++++++++++++-------------
 fetch-pack.c           | 48 ++++++++++++++++++++++++++++-----
 object.h               |  2 +-
 oidset.c               | 49 ++++++++++++++++++++++++++++++++++
 oidset.h               | 45 +++++++++++++++++++++++++++++++
 t/t5400-send-pack.sh   | 38 ++++++++++++++++++++++++++
 transport.c            | 72 +++++++++++++++++++++++++++++++++++---------------
 transport.h            |  2 +-
 10 files changed, 250 insertions(+), 49 deletions(-)
 create mode 100644 oidset.c
 create mode 100644 oidset.h

-Peff

[1] Background, if you care:

    I've mentioned before that GitHub's repository storage uses a
    fork-network layout. Each user gets their own "fork" repository, and
    it points to "network.git" as shared storage. The network.git
    repository has a ref for each fork that is updated periodically via
    "git fetch".

    So the network.git repo contains O(nr_forks * nr_refs_in_fork) refs.
    Quite a few of these point to the same tip sha1s, as each fork has
    the same tags. One of the most pathological cases is a popular
    public repo that has ~44K tags in it. The network repo has ~80
    million refs, of which ~60K are unique. You can imagine that it
    takes some time to access the refs. Basically anything that loads
    the network.git packed-refs file takes an extra minute of CPU and
    needs ~10G RSS to store the internal cache of `packed-refs`.

    Most operations don't care about this; they work on the fork repo,
    and never look at the network refs. But we _do_ sometimes peek at
    alternate refs from receive-pack and fetch-pack to tell the other
    side about tips we have.

    These optimizations backfire completely in such a setting. Besides
    the CPU and memory spikes on the server, even just the unique refs
    add over 3MB to the ref advertisement. So for the time being, we've
    disabled them. I have patches that add a receive.advertiseAlternates
    config option, if anybody is interested.

    So why do I care?  There is one exception: when we are cloning a
    fork, we turn on the fetch-pack side of the optimizations. This is
    worth it for a large repository, as it saves us having to transfer
    any objects at all (and therefore not process them with index-pack,
    which is expensive).

    This series is also a first step towards other optimizations, like
    asking for just the alternate refs from _one_ of the forks. I hope
    one day to turn alternates optimizations back on everywhere.

^ permalink raw reply

* [PATCH 01/12] for_each_alternate_ref: handle failure from real_pathdup()
From: Jeff King @ 2017-01-24  0:38 UTC (permalink / raw)
  To: git
In-Reply-To: <20170124003729.j4ygjcgypdq7hceg@sigill.intra.peff.net>

In older versions of git, if real_path() failed to resolve
the alternate object store path, we would die() with an
error. However, since 4ac9006f8 (real_path: have callers use
real_pathdup and strbuf_realpath, 2016-12-12) we use the
real_pathdup() function, which may return NULL. Since we
don't check the return value, we can segfault.

This is hard to trigger in practice, since we check that the
path is accessible before creating the alternate_object_database
struct. But it could be removed racily, or we could see a
transient filesystem error.

We could restore the original behavior by switching back to
xstrdup(real_path()).  However, dying is probably not the
best option here. This whole function is best-effort
already; there might not even be a repository around the
shared objects at all. And if the alternate store has gone
away, there are no objects to show.

So let's just quietly return, as we would if we failed to
open "refs/", or if upload-pack failed to start, etc.

Signed-off-by: Jeff King <peff@peff.net>
---
 transport.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/transport.c b/transport.c
index c86ba2eb8..74d0e45bd 100644
--- a/transport.c
+++ b/transport.c
@@ -1215,6 +1215,8 @@ static int refs_from_alternate_cb(struct alternate_object_database *e,
 	struct alternate_refs_data *cb = data;
 
 	other = real_pathdup(e->path);
+	if (!other)
+		return 0;
 	len = strlen(other);
 
 	while (other[len-1] == '/')
-- 
2.11.0.765.g454d2182f


^ permalink raw reply related

* [PATCH 02/12] for_each_alternate_ref: stop trimming trailing slashes
From: Jeff King @ 2017-01-24  0:39 UTC (permalink / raw)
  To: git
In-Reply-To: <20170124003729.j4ygjcgypdq7hceg@sigill.intra.peff.net>

The real_pathdup() function will have removed extra slashes
for us already (on top of the normalize_path() done when we
created the alternate_object_database struct in the first
place).

Incidentally, this also fixes the case where the path is
just "/", which would read off the start of the array.
That doesn't seem possible to trigger in practice, though,
as link_alt_odb_entry() blindly eats trailing slashes,
including a bare "/".

Signed-off-by: Jeff King <peff@peff.net>
---
I think the "/" thing in link_alt_odb_entry() is buggy, and it's an easy
one-liner fix.  But I notice some of the rest of the code isn't ready to
handle "/" (mostly it just duplicates "/" when appending to the path),
so I left it for now (and I doubt anybody cares anyway).

 transport.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/transport.c b/transport.c
index 74d0e45bd..6b4b3ed31 100644
--- a/transport.c
+++ b/transport.c
@@ -1219,8 +1219,6 @@ static int refs_from_alternate_cb(struct alternate_object_database *e,
 		return 0;
 	len = strlen(other);
 
-	while (other[len-1] == '/')
-		other[--len] = '\0';
 	if (len < 8 || memcmp(other + len - 8, "/objects", 8))
 		goto out;
 	/* Is this a git repository with refs? */
-- 
2.11.0.765.g454d2182f


^ permalink raw reply related

* [PATCH 03/12] for_each_alternate_ref: use strbuf for path allocation
From: Jeff King @ 2017-01-24  0:40 UTC (permalink / raw)
  To: git
In-Reply-To: <20170124003729.j4ygjcgypdq7hceg@sigill.intra.peff.net>

We have a string with ".../objects" pointing to the
alternate object store, and overwrite bits of it to look at
other paths in the (potential) git repository holding it.
This works because the only path we care about is "refs",
which is shorter than "objects".

Using a strbuf to hold the path lets us get rid of some
magic numbers, and makes it more obvious that the memory
operations are safe.

Signed-off-by: Jeff King <peff@peff.net>
---
I had thought I was going to need to generate more paths in the
alternate repo, but I ended up not needing to. So this was originally
done to make that easier, but I think it stands on its own as a cleanup.

 transport.c | 28 ++++++++++++++--------------
 1 file changed, 14 insertions(+), 14 deletions(-)

diff --git a/transport.c b/transport.c
index 6b4b3ed31..594533d88 100644
--- a/transport.c
+++ b/transport.c
@@ -1207,34 +1207,34 @@ struct alternate_refs_data {
 static int refs_from_alternate_cb(struct alternate_object_database *e,
 				  void *data)
 {
-	char *other;
-	size_t len;
+	struct strbuf path = STRBUF_INIT;
+	size_t base_len;
 	struct remote *remote;
 	struct transport *transport;
 	const struct ref *extra;
 	struct alternate_refs_data *cb = data;
 
-	other = real_pathdup(e->path);
-	if (!other)
-		return 0;
-	len = strlen(other);
-
-	if (len < 8 || memcmp(other + len - 8, "/objects", 8))
+	if (!strbuf_realpath(&path, e->path, 0))
+		goto out;
+	if (!strbuf_strip_suffix(&path, "/objects"))
 		goto out;
+	base_len = path.len;
+
 	/* Is this a git repository with refs? */
-	memcpy(other + len - 8, "/refs", 6);
-	if (!is_directory(other))
+	strbuf_addstr(&path, "/refs");
+	if (!is_directory(path.buf))
 		goto out;
-	other[len - 8] = '\0';
-	remote = remote_get(other);
-	transport = transport_get(remote, other);
+	strbuf_setlen(&path, base_len);
+
+	remote = remote_get(path.buf);
+	transport = transport_get(remote, path.buf);
 	for (extra = transport_get_remote_refs(transport);
 	     extra;
 	     extra = extra->next)
 		cb->fn(extra, cb->data);
 	transport_disconnect(transport);
 out:
-	free(other);
+	strbuf_release(&path);
 	return 0;
 }
 
-- 
2.11.0.765.g454d2182f


^ permalink raw reply related

* [PATCH 04/12] for_each_alternate_ref: pass name/oid instead of ref struct
From: Jeff King @ 2017-01-24  0:40 UTC (permalink / raw)
  To: git
In-Reply-To: <20170124003729.j4ygjcgypdq7hceg@sigill.intra.peff.net>

Breaking down the fields in the interface makes it easier to
change the backend of for_each_alternate_ref to something
that doesn't use "struct ref" internally.

The only field that callers actually look at is the oid,
anyway. The refname is kept in the interface as a plausible
thing for future code to want.

Signed-off-by: Jeff King <peff@peff.net>
---
 builtin/receive-pack.c |  6 ++++--
 fetch-pack.c           | 12 ++++++++----
 transport.c            |  2 +-
 transport.h            |  2 +-
 4 files changed, 14 insertions(+), 8 deletions(-)

diff --git a/builtin/receive-pack.c b/builtin/receive-pack.c
index 6b97cbdbe..b9f2c0cc5 100644
--- a/builtin/receive-pack.c
+++ b/builtin/receive-pack.c
@@ -277,10 +277,12 @@ static int show_one_alternate_sha1(const unsigned char sha1[20], void *unused)
 	return 0;
 }
 
-static void collect_one_alternate_ref(const struct ref *ref, void *data)
+static void collect_one_alternate_ref(const char *refname,
+				      const struct object_id *oid,
+				      void *data)
 {
 	struct sha1_array *sa = data;
-	sha1_array_append(sa, ref->old_oid.hash);
+	sha1_array_append(sa, oid->hash);
 }
 
 static void write_head_info(void)
diff --git a/fetch-pack.c b/fetch-pack.c
index 601f0779a..54f84c573 100644
--- a/fetch-pack.c
+++ b/fetch-pack.c
@@ -253,9 +253,11 @@ static void send_request(struct fetch_pack_args *args,
 		write_or_die(fd, buf->buf, buf->len);
 }
 
-static void insert_one_alternate_ref(const struct ref *ref, void *unused)
+static void insert_one_alternate_ref(const char *refname,
+				     const struct object_id *oid,
+				     void *unused)
 {
-	rev_list_insert_ref(NULL, ref->old_oid.hash);
+	rev_list_insert_ref(NULL, oid->hash);
 }
 
 #define INITIAL_FLUSH 16
@@ -619,9 +621,11 @@ static void filter_refs(struct fetch_pack_args *args,
 	*refs = newlist;
 }
 
-static void mark_alternate_complete(const struct ref *ref, void *unused)
+static void mark_alternate_complete(const char *refname,
+				    const struct object_id *oid,
+				    void *unused)
 {
-	mark_complete(ref->old_oid.hash);
+	mark_complete(oid->hash);
 }
 
 static int everything_local(struct fetch_pack_args *args,
diff --git a/transport.c b/transport.c
index 594533d88..983d8fec1 100644
--- a/transport.c
+++ b/transport.c
@@ -1231,7 +1231,7 @@ static int refs_from_alternate_cb(struct alternate_object_database *e,
 	for (extra = transport_get_remote_refs(transport);
 	     extra;
 	     extra = extra->next)
-		cb->fn(extra, cb->data);
+		cb->fn(extra->name, &extra->old_oid, cb->data);
 	transport_disconnect(transport);
 out:
 	strbuf_release(&path);
diff --git a/transport.h b/transport.h
index 9820f10b8..b7bb07d2c 100644
--- a/transport.h
+++ b/transport.h
@@ -254,6 +254,6 @@ int transport_refs_pushed(struct ref *ref);
 void transport_print_push_status(const char *dest, struct ref *refs,
 		  int verbose, int porcelain, unsigned int *reject_reasons);
 
-typedef void alternate_ref_fn(const struct ref *, void *);
+typedef void alternate_ref_fn(const char *refname, const struct object_id *oid, void *);
 extern void for_each_alternate_ref(alternate_ref_fn, void *);
 #endif
-- 
2.11.0.765.g454d2182f


^ permalink raw reply related

* [PATCH 05/12] for_each_alternate_ref: replace transport code with for-each-ref
From: Jeff King @ 2017-01-24  0:44 UTC (permalink / raw)
  To: git
In-Reply-To: <20170124003729.j4ygjcgypdq7hceg@sigill.intra.peff.net>

The current method for getting the refs from an alternate is
to run upload-pack in the alternate and parse its output
using the normal transport code.  This works and is
reasonably short, but it has a very bad memory footprint
when there are a lot of refs in the alternate. There are two
problems:

  1. It reads in all of the refs before passing any back to
     us. Which means that our peak memory usage has to store
     every ref (including duplicates for peeled variants),
     even if our callback could determine that some are not
     interesting (e.g., because they point to the same sha1
     as another ref).

  2. It allocates a "struct ref" for each one. Among other
     things, this contains 3 separate 20-byte oids, along
     with the name and various pointers.  That can add up,
     especially if the callback is only interested in the
     sha1 (which it can store in a sha1_array as just 20
     bytes).

On a particularly pathological case, where the alternate had
over 80 million refs pointing to only around 60,000 unique
objects, the peak heap usage of "git clone --reference" grew
to over 25GB.

This patch instead calls git-for-each-ref in the alternate
repository, and passes each line to the callback as we read
it. That drops the peak heap of the same command to 50MB.

I considered and rejected a few alternatives.

We could read all of the refs in the alternate using our own
ref code, just as we do with submodules.  However, as memory
footprint is one of the concerns here, we want to avoid
loading those refs into our own memory as a whole.

It's possible that this will be a better technique in the
future when the ref code can more easily iterate without
loading all of packed-refs into memory.

Another option is to keep calling upload-pack, and just
parse its output ourselves in a streaming fashion. Besides
for-each-ref being simpler (we get to define the format
ourselves, and don't have to deal with speaking the git
protocol), it's more flexible for possible future changes.

For instance, it might be useful for the caller to be able
to limit the set of "interesting" alternate refs.  The
motivating example is one where many "forks" of a particular
repository share object storage, and the shared storage has
refs for each fork (which is why so many of the refs are
duplicates; each fork has the same tags).  A plausible
future optimization would be to ask for the alternate refs
for just _one_ fork (if you had some out-of-band way of
knowing which was the most interesting or important for the
current operation).

Similarly, no callbacks actually care about the symref value
of alternate refs, and as before, this patch ignores them
entirely.  However, if we wanted to add them, for-each-ref's
"%(symref)" is going to be more flexible than upload-pack,
because the latter only handles the HEAD symref due to
historical constraints.

There is one potential downside, though: unlike upload-pack,
our for-each-ref command doesn't report the peeled value of
refs. The existing code calls the alternate_ref_fn callback
twice for tags: once for the tag, and once for the peeled
value with the refname set to "ref^{}".

For the callers in fetch-pack, this doesn't matter at all.
We immediately peel each tag down to a commit either way (so
there's a slight improvement, as do not bother passing the
redundant data over the pipe). For the caller in
receive-pack, it means we will not advertise the peeled
values of tags in our alternate. However, we also don't
advertise peeled values for our _own_ tags, so this is
actually making things more consistent.

It's unclear whether receive-pack advertising peeled values
is a win or not. On one hand, giving more information to the
other side may let it omit some objects from the push. On
the other hand, for tags which both sides have, they simply
bloat the advertisement. The upload-pack advertisement of
git.git is about 30% larger than the receive-pack
advertisement due to its peeled information.

This patch omits the peeled information from
for_each_alternate_ref entirely, and leaves it up to the
caller whether they want to dig up the information.

Signed-off-by: Jeff King <peff@peff.net>
---
I also tried adding "%(*objectname)" to for-each-ref to just
grab the peeled information, but the peel implementation in
ref-filter is _really_ slow. It doesn't use the packed-ref
peel information, and it doesn't recognize duplicates (so in
the 80 million case, it really parses 80 million tags).

 transport.c | 48 ++++++++++++++++++++++++++++++++++++++----------
 1 file changed, 38 insertions(+), 10 deletions(-)

diff --git a/transport.c b/transport.c
index 983d8fec1..ef8e09298 100644
--- a/transport.c
+++ b/transport.c
@@ -1199,6 +1199,42 @@ char *transport_anonymize_url(const char *url)
 	return xstrdup(url);
 }
 
+static void read_alternate_refs(const char *path,
+				alternate_ref_fn *cb,
+				void *data)
+{
+	struct child_process cmd = CHILD_PROCESS_INIT;
+	struct strbuf line = STRBUF_INIT;
+	FILE *fh;
+
+	cmd.git_cmd = 1;
+	argv_array_pushf(&cmd.args, "--git-dir=%s", path);
+	argv_array_push(&cmd.args, "for-each-ref");
+	argv_array_push(&cmd.args, "--format=%(objectname) %(refname)");
+	cmd.env = local_repo_env;
+	cmd.out = -1;
+
+	if (start_command(&cmd))
+		return;
+
+	fh = xfdopen(cmd.out, "r");
+	while (strbuf_getline_lf(&line, fh) != EOF) {
+		struct object_id oid;
+
+		if (get_oid_hex(line.buf, &oid) ||
+		    line.buf[GIT_SHA1_HEXSZ] != ' ') {
+			warning("invalid line while parsing alternate refs: %s",
+				line.buf);
+			break;
+		}
+
+		cb(line.buf + GIT_SHA1_HEXSZ + 1, &oid, data);
+	}
+
+	fclose(fh);
+	finish_command(&cmd);
+}
+
 struct alternate_refs_data {
 	alternate_ref_fn *fn;
 	void *data;
@@ -1209,9 +1245,6 @@ static int refs_from_alternate_cb(struct alternate_object_database *e,
 {
 	struct strbuf path = STRBUF_INIT;
 	size_t base_len;
-	struct remote *remote;
-	struct transport *transport;
-	const struct ref *extra;
 	struct alternate_refs_data *cb = data;
 
 	if (!strbuf_realpath(&path, e->path, 0))
@@ -1226,13 +1259,8 @@ static int refs_from_alternate_cb(struct alternate_object_database *e,
 		goto out;
 	strbuf_setlen(&path, base_len);
 
-	remote = remote_get(path.buf);
-	transport = transport_get(remote, path.buf);
-	for (extra = transport_get_remote_refs(transport);
-	     extra;
-	     extra = extra->next)
-		cb->fn(extra->name, &extra->old_oid, cb->data);
-	transport_disconnect(transport);
+	read_alternate_refs(path.buf, cb->fn, cb->data);
+
 out:
 	strbuf_release(&path);
 	return 0;
-- 
2.11.0.765.g454d2182f


^ permalink raw reply related

* [PATCH 06/12] clone: disable save_commit_buffer
From: Jeff King @ 2017-01-24  0:45 UTC (permalink / raw)
  To: git
In-Reply-To: <20170124003729.j4ygjcgypdq7hceg@sigill.intra.peff.net>

Normally git caches the raw commit object contents in
"struct commit". This makes it fast to run parse_commit()
followed by a pretty-print operation.

For commands which don't actually pretty-print the commits,
the caching is wasteful (and may use quite a lot of memory
if git accesses a large number of commits).

For fetching operations like clone, we already disable
save_commit_buffer in everything_local(), where we may
traverse. But clone also parses commits before it even gets
there; it looks at commits reachable from its refs, and from
alternate refs.

In one real-world case with a large number of tags, this
cut about 10MB off of clone's heap usage. Not spectacular,
but there's really no downside.

Signed-off-by: Jeff King <peff@peff.net>
---
 builtin/clone.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/builtin/clone.c b/builtin/clone.c
index 5ef81927a..3fca45e7e 100644
--- a/builtin/clone.c
+++ b/builtin/clone.c
@@ -858,6 +858,7 @@ int cmd_clone(int argc, const char **argv, const char *prefix)
 	struct refspec *refspec;
 	const char *fetch_pattern;
 
+	save_commit_buffer = 0;
 	packet_trace_identity("clone");
 	argc = parse_options(argc, argv, prefix, builtin_clone_options,
 			     builtin_clone_usage, 0);
-- 
2.11.0.765.g454d2182f


^ permalink raw reply related

* [PATCH 07/12] fetch-pack: cache results of for_each_alternate_ref
From: Jeff King @ 2017-01-24  0:45 UTC (permalink / raw)
  To: git
In-Reply-To: <20170124003729.j4ygjcgypdq7hceg@sigill.intra.peff.net>

We may run for_each_alternate_ref() twice, once in
find_common() and once in everything_local(). This operation
can be expensive, because it involves running a sub-process
which must freshly load all of the alternate's refs from
disk.

Let's cache and reuse the results between the two calls. We
can make some optimizations based on the particular use
pattern in fetch-pack to keep our memory usage down.

The first is that we only care about the sha1s, not the refs
themselves. So it's OK to store only the sha1s, and to
suppress duplicates. The natural fit would therefore be a
sha1_array.

However, sha1_array's de-duplication happens only after it
has read and sorted all entries. It still stores each
duplicate. For an alternate with a large number of refs
pointing to the same commits, this is a needless expense.

Instead, we'd prefer to eliminate duplicates before putting
them in the cache, which implies using a hash. We can
further note that fetch-pack will call parse_object() on
each alternate sha1. We can therefore keep our cache as a
set of pointers to "struct object". That gives us a place to
put our "already seen" bit with an optimized hash lookup.
And as a bonus, the object stores the sha1 for us, so
pointer-to-object is all we need.

There are two extra optimizations I didn't do here:

  - we actually store an array of pointer-to-object.
    Technically we could just walk the obj_hash table
    looking for entries with the ALTERNATE flag set (because
    our use case doesn't care about the order here).

    But that hash table may be mostly composed of
    non-ALTERNATE entries, so we'd waste time walking over
    them. So it would be a slight win in memory use, but a
    loss in CPU.

  - the items we pull out of the cache are actual "struct
    object"s, but then we feed "obj->sha1" to our
    sub-functions, which promptly call parse_object().

    This second parse is cheap, because it starts with
    lookup_object() and will bail immediately when it sees
    we've already parsed the object. We could save the extra
    hash lookup, but it would involve refactoring the
    functions we call. It may or may not be worth the
    trouble.

Signed-off-by: Jeff King <peff@peff.net>
---
 fetch-pack.c | 52 ++++++++++++++++++++++++++++++++++++++++++----------
 object.h     |  2 +-
 2 files changed, 43 insertions(+), 11 deletions(-)

diff --git a/fetch-pack.c b/fetch-pack.c
index 54f84c573..e0f5d5ce8 100644
--- a/fetch-pack.c
+++ b/fetch-pack.c
@@ -35,6 +35,7 @@ static const char *alternate_shallow_file;
 #define COMMON_REF	(1U << 2)
 #define SEEN		(1U << 3)
 #define POPPED		(1U << 4)
+#define ALTERNATE	(1U << 5)
 
 static int marked;
 
@@ -67,6 +68,41 @@ static inline void print_verbose(const struct fetch_pack_args *args,
 	fputc('\n', stderr);
 }
 
+struct alternate_object_cache {
+	struct object **items;
+	size_t nr, alloc;
+};
+
+static void cache_one_alternate(const char *refname,
+				const struct object_id *oid,
+				void *vcache)
+{
+	struct alternate_object_cache *cache = vcache;
+	struct object *obj = parse_object(oid->hash);
+
+	if (!obj || (obj->flags & ALTERNATE))
+		return;
+
+	obj->flags |= ALTERNATE;
+	ALLOC_GROW(cache->items, cache->nr + 1, cache->alloc);
+	cache->items[cache->nr++] = obj;
+}
+
+static void for_each_cached_alternate(void (*cb)(struct object *))
+{
+	static int initialized;
+	static struct alternate_object_cache cache;
+	size_t i;
+
+	if (!initialized) {
+		for_each_alternate_ref(cache_one_alternate, &cache);
+		initialized = 1;
+	}
+
+	for (i = 0; i < cache.nr; i++)
+		cb(cache.items[i]);
+}
+
 static void rev_list_push(struct commit *commit, int mark)
 {
 	if (!(commit->object.flags & mark)) {
@@ -253,11 +289,9 @@ static void send_request(struct fetch_pack_args *args,
 		write_or_die(fd, buf->buf, buf->len);
 }
 
-static void insert_one_alternate_ref(const char *refname,
-				     const struct object_id *oid,
-				     void *unused)
+static void insert_one_alternate_object(struct object *obj)
 {
-	rev_list_insert_ref(NULL, oid->hash);
+	rev_list_insert_ref(NULL, obj->oid.hash);
 }
 
 #define INITIAL_FLUSH 16
@@ -300,7 +334,7 @@ static int find_common(struct fetch_pack_args *args,
 	marked = 1;
 
 	for_each_ref(rev_list_insert_ref_oid, NULL);
-	for_each_alternate_ref(insert_one_alternate_ref, NULL);
+	for_each_cached_alternate(insert_one_alternate_object);
 
 	fetching = 0;
 	for ( ; refs ; refs = refs->next) {
@@ -621,11 +655,9 @@ static void filter_refs(struct fetch_pack_args *args,
 	*refs = newlist;
 }
 
-static void mark_alternate_complete(const char *refname,
-				    const struct object_id *oid,
-				    void *unused)
+static void mark_alternate_complete(struct object *obj)
 {
-	mark_complete(oid->hash);
+	mark_complete(obj->oid.hash);
 }
 
 static int everything_local(struct fetch_pack_args *args,
@@ -661,7 +693,7 @@ static int everything_local(struct fetch_pack_args *args,
 
 	if (!args->deepen) {
 		for_each_ref(mark_complete_oid, NULL);
-		for_each_alternate_ref(mark_alternate_complete, NULL);
+		for_each_cached_alternate(mark_alternate_complete);
 		commit_list_sort_by_date(&complete);
 		if (cutoff)
 			mark_recent_complete_commits(args, cutoff);
diff --git a/object.h b/object.h
index 614a00675..f52957dcb 100644
--- a/object.h
+++ b/object.h
@@ -29,7 +29,7 @@ struct object_array {
 /*
  * object flag allocation:
  * revision.h:      0---------10                                26
- * fetch-pack.c:    0---4
+ * fetch-pack.c:    0---5
  * walker.c:        0-2
  * upload-pack.c:       4       11----------------19
  * builtin/blame.c:               12-13
-- 
2.11.0.765.g454d2182f


^ permalink raw reply related

* [PATCH 08/12] add oidset API
From: Jeff King @ 2017-01-24  0:46 UTC (permalink / raw)
  To: git
In-Reply-To: <20170124003729.j4ygjcgypdq7hceg@sigill.intra.peff.net>

This is similar to many of our uses of sha1-array, but it
overcomes one limitation of a sha1-array: when you are
de-duplicating a large input with relatively few unique
entries, sha1-array uses 20 bytes per non-unique entry.
Whereas this set will use memory linear in the number of
unique entries (albeit a few more than 20 bytes due to
hashmap overhead).

Signed-off-by: Jeff King <peff@peff.net>
---
This may be overkill. You can get roughly the same thing by making
actual object structs via lookup_unknown_object(). But see the next
patch for some comments on that.

 Makefile |  1 +
 oidset.c | 49 +++++++++++++++++++++++++++++++++++++++++++++++++
 oidset.h | 45 +++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 95 insertions(+)
 create mode 100644 oidset.c
 create mode 100644 oidset.h

diff --git a/Makefile b/Makefile
index 27afd0f37..e41efc2d8 100644
--- a/Makefile
+++ b/Makefile
@@ -774,6 +774,7 @@ LIB_OBJS += notes-cache.o
 LIB_OBJS += notes-merge.o
 LIB_OBJS += notes-utils.o
 LIB_OBJS += object.o
+LIB_OBJS += oidset.o
 LIB_OBJS += pack-bitmap.o
 LIB_OBJS += pack-bitmap-write.o
 LIB_OBJS += pack-check.o
diff --git a/oidset.c b/oidset.c
new file mode 100644
index 000000000..6094cff8c
--- /dev/null
+++ b/oidset.c
@@ -0,0 +1,49 @@
+#include "cache.h"
+#include "oidset.h"
+
+struct oidset_entry {
+	struct hashmap_entry hash;
+	struct object_id oid;
+};
+
+int oidset_hashcmp(const void *va, const void *vb,
+			  const void *vkey)
+{
+	const struct oidset_entry *a = va, *b = vb;
+	const struct object_id *key = vkey;
+	return oidcmp(&a->oid, key ? key : &b->oid);
+}
+
+int oidset_contains(const struct oidset *set, const struct object_id *oid)
+{
+	struct hashmap_entry key;
+
+	if (!set->map.cmpfn)
+		return 0;
+
+	hashmap_entry_init(&key, sha1hash(oid->hash));
+	return !!hashmap_get(&set->map, &key, oid);
+}
+
+int oidset_insert(struct oidset *set, const struct object_id *oid)
+{
+	struct oidset_entry *entry;
+
+	if (!set->map.cmpfn)
+		hashmap_init(&set->map, oidset_hashcmp, 0);
+
+	if (oidset_contains(set, oid))
+		return 1;
+
+	entry = xmalloc(sizeof(*entry));
+	hashmap_entry_init(&entry->hash, sha1hash(oid->hash));
+	oidcpy(&entry->oid, oid);
+
+	hashmap_add(&set->map, entry);
+	return 0;
+}
+
+void oidset_clear(struct oidset *set)
+{
+	hashmap_free(&set->map, 1);
+}
diff --git a/oidset.h b/oidset.h
new file mode 100644
index 000000000..b7eaab5b8
--- /dev/null
+++ b/oidset.h
@@ -0,0 +1,45 @@
+#ifndef OIDSET_H
+#define OIDSET_H
+
+/**
+ * This API is similar to sha1-array, in that it maintains a set of object ids
+ * in a memory-efficient way. The major differences are:
+ *
+ *   1. It uses a hash, so we can do online duplicate removal, rather than
+ *      sort-and-uniq at the end. This can reduce memory footprint if you have
+ *      a large list of oids with many duplicates.
+ *
+ *   2. The per-unique-oid memory footprint is slightly higher due to hash
+ *      table overhead.
+ */
+
+/**
+ * A single oidset; should be zero-initialized (or use OIDSET_INIT).
+ */
+struct oidset {
+	struct hashmap map;
+};
+
+#define OIDSET_INIT { { NULL } }
+
+/**
+ * Returns true iff `set` contains `oid`.
+ */
+int oidset_contains(const struct oidset *set, const struct object_id *oid);
+
+/**
+ * Insert the oid into the set; a copy is made, so "oid" does not need
+ * to persist after this function is called.
+ *
+ * Returns 1 if the oid was already in the set, 0 otherwise. This can be used
+ * to perform an efficient check-and-add.
+ */
+int oidset_insert(struct oidset *set, const struct object_id *oid);
+
+/**
+ * Remove all entries from the oidset, freeing any resources associated with
+ * it.
+ */
+void oidset_clear(struct oidset *set);
+
+#endif /* OIDSET_H */
-- 
2.11.0.765.g454d2182f


^ permalink raw reply related

* [PATCH 10/12] receive-pack: fix misleading namespace/.have comment
From: Jeff King @ 2017-01-24  0:47 UTC (permalink / raw)
  To: git
In-Reply-To: <20170124003729.j4ygjcgypdq7hceg@sigill.intra.peff.net>

The comment claims that we handle alternate ".have" lines
through this function, but that hasn't been the case since
85f251045 (write_head_info(): handle "extra refs" locally,
2012-01-06).

Signed-off-by: Jeff King <peff@peff.net>
---
 builtin/receive-pack.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/builtin/receive-pack.c b/builtin/receive-pack.c
index 27bb52988..8f8762e4a 100644
--- a/builtin/receive-pack.c
+++ b/builtin/receive-pack.c
@@ -261,10 +261,7 @@ static int show_ref_cb(const char *path_full, const struct object_id *oid,
 	/*
 	 * Advertise refs outside our current namespace as ".have"
 	 * refs, so that the client can use them to minimize data
-	 * transfer but will otherwise ignore them. This happens to
-	 * cover ".have" that are thrown in by add_one_alternate_ref()
-	 * to mark histories that are complete in our alternates as
-	 * well.
+	 * transfer but will otherwise ignore them.
 	 */
 	if (!path)
 		path = ".have";
-- 
2.11.0.765.g454d2182f


^ permalink raw reply related

* [PATCH 09/12] receive-pack: use oidset to de-duplicate .have lines
From: Jeff King @ 2017-01-24  0:47 UTC (permalink / raw)
  To: git
In-Reply-To: <20170124003729.j4ygjcgypdq7hceg@sigill.intra.peff.net>

If you have an alternate object store with a very large
number of refs, the peak memory usage of the sha1_array can
grow high, even if most of them are duplicates that end up
not being printed at all.

The similar for_each_alternate_ref() code-paths in
fetch-pack solve this by using flags in "struct object" to
de-duplicate (and so are relying on obj_hash at the core).

But we don't have a "struct object" at all in this case. We
could call lookup_unknown_object() to get one, but if our
goal is reducing memory footprint, it's not great:

 - an unknown object is as large as the largest object type
   (a commit), which is bigger than an oidset entry

 - we can free the memory after our ref advertisement, but
   "struct object" entries persist forever (and the
   receive-pack may hang around for a long time, as the
   bottleneck is often client upload bandwidth).

So let's use an oidset. Note that unlike a sha1-array it
doesn't sort the output as a side effect. However, our
output is at least stable, because for_each_alternate_ref()
will give us the sha1s in ref-sorted order.

In one particularly pathological case with an alternate that
has 60,000 unique refs out of 80 million total, this reduced
the peak heap usage of "git receive-pack . </dev/null" from
13GB to 14MB.

Signed-off-by: Jeff King <peff@peff.net>
---
 builtin/receive-pack.c | 26 ++++++++++++--------------
 1 file changed, 12 insertions(+), 14 deletions(-)

diff --git a/builtin/receive-pack.c b/builtin/receive-pack.c
index b9f2c0cc5..27bb52988 100644
--- a/builtin/receive-pack.c
+++ b/builtin/receive-pack.c
@@ -21,6 +21,7 @@
 #include "sigchain.h"
 #include "fsck.h"
 #include "tmp-objdir.h"
+#include "oidset.h"
 
 static const char * const receive_pack_usage[] = {
 	N_("git receive-pack <git-dir>"),
@@ -271,27 +272,24 @@ static int show_ref_cb(const char *path_full, const struct object_id *oid,
 	return 0;
 }
 
-static int show_one_alternate_sha1(const unsigned char sha1[20], void *unused)
+static void show_one_alternate_ref(const char *refname,
+				   const struct object_id *oid,
+				   void *data)
 {
-	show_ref(".have", sha1);
-	return 0;
-}
+	struct oidset *seen = data;
 
-static void collect_one_alternate_ref(const char *refname,
-				      const struct object_id *oid,
-				      void *data)
-{
-	struct sha1_array *sa = data;
-	sha1_array_append(sa, oid->hash);
+	if (oidset_insert(seen, oid))
+		return;
+
+	show_ref(".have", oid->hash);
 }
 
 static void write_head_info(void)
 {
-	struct sha1_array sa = SHA1_ARRAY_INIT;
+	static struct oidset seen = OIDSET_INIT;
 
-	for_each_alternate_ref(collect_one_alternate_ref, &sa);
-	sha1_array_for_each_unique(&sa, show_one_alternate_sha1, NULL);
-	sha1_array_clear(&sa);
+	for_each_alternate_ref(show_one_alternate_ref, &seen);
+	oidset_clear(&seen);
 	for_each_ref(show_ref_cb, NULL);
 	if (!sent_capabilities)
 		show_ref("capabilities^{}", null_sha1);
-- 
2.11.0.765.g454d2182f


^ permalink raw reply related

* [PATCH 12/12] receive-pack: avoid duplicates between our refs and alternates
From: Jeff King @ 2017-01-24  0:48 UTC (permalink / raw)
  To: git
In-Reply-To: <20170124003729.j4ygjcgypdq7hceg@sigill.intra.peff.net>

We de-duplicate ".have" refs among themselves, but never
check if they are duplicates of our local refs. It's not
unreasonable that they would be if we are a "--shared" or
"--reference" clone of a similar repository; we'd have all
the same tags.

We can handle this by inserting our local refs into the
oidset, but obviously not suppressing duplicates (since the
refnames are important).

Note that this also switches the order in which we advertise
refs, processing ours first and then any alternates. The
order shouldn't matter (and arguably showing our refs first
makes more sense).

Signed-off-by: Jeff King <peff@peff.net>
---
 builtin/receive-pack.c |  4 +++-
 t/t5400-send-pack.sh   | 38 ++++++++++++++++++++++++++++++++++++++
 2 files changed, 41 insertions(+), 1 deletion(-)

diff --git a/builtin/receive-pack.c b/builtin/receive-pack.c
index c55e2f993..bc7ce0ea2 100644
--- a/builtin/receive-pack.c
+++ b/builtin/receive-pack.c
@@ -268,6 +268,8 @@ static int show_ref_cb(const char *path_full, const struct object_id *oid,
 		if (oidset_insert(seen, oid))
 			return 0;
 		path = ".have";
+	} else {
+		oidset_insert(seen, oid);
 	}
 	show_ref(path, oid->hash);
 	return 0;
@@ -289,9 +291,9 @@ static void write_head_info(void)
 {
 	static struct oidset seen = OIDSET_INIT;
 
+	for_each_ref(show_ref_cb, &seen);
 	for_each_alternate_ref(show_one_alternate_ref, &seen);
 	oidset_clear(&seen);
-	for_each_ref(show_ref_cb, &seen);
 	if (!sent_capabilities)
 		show_ref("capabilities^{}", null_sha1);
 
diff --git a/t/t5400-send-pack.sh b/t/t5400-send-pack.sh
index 305ca7a93..3331e0f53 100755
--- a/t/t5400-send-pack.sh
+++ b/t/t5400-send-pack.sh
@@ -255,4 +255,42 @@ test_expect_success 'deny pushing to delete current branch' '
 	)
 '
 
+extract_ref_advertisement () {
+	perl -lne '
+		# \\ is there to skip capabilities after \0
+		/push< ([^\\]+)/ or next;
+		exit 0 if $1 eq "0000";
+		print $1;
+	'
+}
+
+test_expect_success 'receive-pack de-dupes .have lines' '
+	git init shared &&
+	git -C shared commit --allow-empty -m both &&
+	git clone -s shared fork &&
+	(
+		cd shared &&
+		git checkout -b only-shared &&
+		git commit --allow-empty -m only-shared &&
+		git update-ref refs/heads/foo HEAD
+	) &&
+
+	# Notable things in this expectation:
+	#  - local refs are not de-duped
+	#  - .have does not duplicate locals
+	#  - .have does not duplicate itself
+	local=$(git -C fork rev-parse HEAD) &&
+	shared=$(git -C shared rev-parse only-shared) &&
+	cat >expect <<-EOF &&
+	$local refs/heads/master
+	$local refs/remotes/origin/HEAD
+	$local refs/remotes/origin/master
+	$shared .have
+	EOF
+
+	GIT_TRACE_PACKET=$(pwd)/trace git push fork HEAD:foo &&
+	extract_ref_advertisement <trace >refs &&
+	test_cmp expect refs
+'
+
 test_done
-- 
2.11.0.765.g454d2182f

^ permalink raw reply related

* [PATCH 11/12] receive-pack: treat namespace .have lines like alternates
From: Jeff King @ 2017-01-24  0:48 UTC (permalink / raw)
  To: git
In-Reply-To: <20170124003729.j4ygjcgypdq7hceg@sigill.intra.peff.net>

Namely, de-duplicate them. We use the same set as the
alternates, since we call them both ".have" (i.e., there is
no value in showing one versus the other).

Signed-off-by: Jeff King <peff@peff.net>
---
 builtin/receive-pack.c | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/builtin/receive-pack.c b/builtin/receive-pack.c
index 8f8762e4a..c55e2f993 100644
--- a/builtin/receive-pack.c
+++ b/builtin/receive-pack.c
@@ -251,8 +251,9 @@ static void show_ref(const char *path, const unsigned char *sha1)
 }
 
 static int show_ref_cb(const char *path_full, const struct object_id *oid,
-		       int flag, void *unused)
+		       int flag, void *data)
 {
+	struct oidset *seen = data;
 	const char *path = strip_namespace(path_full);
 
 	if (ref_is_hidden(path, path_full))
@@ -263,8 +264,11 @@ static int show_ref_cb(const char *path_full, const struct object_id *oid,
 	 * refs, so that the client can use them to minimize data
 	 * transfer but will otherwise ignore them.
 	 */
-	if (!path)
+	if (!path) {
+		if (oidset_insert(seen, oid))
+			return 0;
 		path = ".have";
+	}
 	show_ref(path, oid->hash);
 	return 0;
 }
@@ -287,7 +291,7 @@ static void write_head_info(void)
 
 	for_each_alternate_ref(show_one_alternate_ref, &seen);
 	oidset_clear(&seen);
-	for_each_ref(show_ref_cb, NULL);
+	for_each_ref(show_ref_cb, &seen);
 	if (!sent_capabilities)
 		show_ref("capabilities^{}", null_sha1);
 
-- 
2.11.0.765.g454d2182f


^ permalink raw reply related

* Re: [PATCH v2 25/27] attr: store attribute stack in attr_check structure
From: Brandon Williams @ 2017-01-24  1:11 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git, sbeller, pclouds
In-Reply-To: <20170123220614.GA187696@google.com>

On 01/23, Brandon Williams wrote:
> As we discussed off-line I'll also do the rework to break up the
> question and result.  That way two threads can be executing using the
> same attr_check structure.

Thinking about this I don't really see what we would gain by breaking
them up.

Right now most callers have a static attr_check struct which holds the
question and answer (and in my series a buffer of all_attrs used during
the collection process).  If this struct is broken up into question and
answer then the only part of it that can be shared with multiple threads
is the question, which ends up being an array with 2 maybe 3 entries on
average.  The result and the array of all_attrs would then need to be
allocated each time calling into the attribute system since they can't
be shared.  Since this allocation is already going to happen wouldn't it
just make sense to drop the static modifier on the check structure (or
have a per-thread check structure) if you really wanted a particular
function thread safe?  It seems like breaking the question and answer up
doesn't buy you much in terms of reducing allocation churn and instead
complicates the API with needing to keep track of two structures instead
of a one.

-- 
Brandon Williams

^ permalink raw reply

* Re: [PATCH 3/3] git-prompt.sh: fix for submodule 'dirty' indicator
From: brian m. carlson @ 2017-01-24  1:17 UTC (permalink / raw)
  To: Benjamin Fuchs; +Cc: git, szeder.dev, sbeller
In-Reply-To: <1485113421-22264-1-git-send-email-email@benjaminfuchs.de>

[-- Attachment #1: Type: text/plain, Size: 1924 bytes --]

On Sun, Jan 22, 2017 at 08:30:21PM +0100, Benjamin Fuchs wrote:
> Fixing wrong git diff line.

This patch says 3/3, but I don't see 1 and 2.  Also, this description
doesn't tell me what the problem is, or why this fix is useful.  Such
information helps us down the line when looking at the history, and it
also helps reviewers determine whether your change makes sense.

Right now I can only guess that there's an issue with spaces or slashes
somehow.  You might want to take a look at
Documentation/SubmittingPatches, especially point 2.

> ---
>  contrib/completion/git-prompt.sh | 6 +++---
>  1 file changed, 3 insertions(+), 3 deletions(-)
> 
> diff --git a/contrib/completion/git-prompt.sh b/contrib/completion/git-prompt.sh
> index c44b9a2..43b28e9 100644
> --- a/contrib/completion/git-prompt.sh
> +++ b/contrib/completion/git-prompt.sh
> @@ -306,9 +306,9 @@ __git_ps1_submodule ()
>  	local submodule_name="$(basename "$git_dir")"
>  	if [ "$submodule_name" != ".git" ] && [ "$submodule_name" != "." ]; then
>  		local parent_top="${git_dir%.git*}"
> -		local submodule_top="${git_dir#*modules}"
> +		local submodule_top="${git_dir#*modules/}"
>  		local status=""
> -		git diff -C "$parent_top" --no-ext-diff --ignore-submodules=dirty --quiet -- "$submodule_top" 2>/dev/null || status="+"
> +		git -C "$parent_top" diff --no-ext-diff --ignore-submodules=dirty --quiet -- "$submodule_top" 2>/dev/null || status="+"
>  		printf "$status$submodule_name:"
>  	fi
>  }
> @@ -544,7 +544,7 @@ __git_ps1 ()
>  
>  	local sub=""
>  	if [ -n "${GIT_PS1_SHOWSUBMODULE}" ]; then
> -		sub="$(__git_ps1_submodule $g)"
> +		sub="$(__git_ps1_submodule "$g")"
>  	fi
>  
>  	local f="$w$i$s$u"
> -- 
> 2.7.4
> 

-- 
brian m. carlson / brian with sandals: Houston, Texas, US
+1 832 623 2791 | https://www.crustytoothpaste.net/~bmc | My opinion only
OpenPGP: https://keybase.io/bk2204

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 868 bytes --]

^ permalink raw reply

* Re: [PATCH 0/12] reducing resource usage of for_each_alternate_ref
From: Brandon Williams @ 2017-01-24  1:33 UTC (permalink / raw)
  To: Jeff King; +Cc: git
In-Reply-To: <20170124003729.j4ygjcgypdq7hceg@sigill.intra.peff.net>

On 01/23, Jeff King wrote:
> 
> A brief overview of the patches:
> 
>   [01/12]: for_each_alternate_ref: handle failure from real_pathdup()
>   [02/12]: for_each_alternate_ref: stop trimming trailing slashes
>   [03/12]: for_each_alternate_ref: use strbuf for path allocation
> 
>     Bugfixes and cleanups (the first one is actually a recent-ish
>     regression).

Which is most likely my fault, Sorry! :)

I think the old behavior was to die and not return NULL.

-- 
Brandon Williams

^ permalink raw reply


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox