* [PATCH 1/3] git submodule update: have a dedicated helper for cloning
2015-10-23 18:44 [PATCH 0/3] expose parallelism for submodule {update, clone} Stefan Beller
@ 2015-10-23 18:44 ` Stefan Beller
2015-10-23 18:44 ` [PATCH 2/3] submodule update: Expose parallelism to the user Stefan Beller
` (2 subsequent siblings)
3 siblings, 0 replies; 7+ messages in thread
From: Stefan Beller @ 2015-10-23 18:44 UTC (permalink / raw)
To: git, gitster; +Cc: jrnieder, Jens.Lehmann, Stefan Beller
This introduces a new helper function in git submodule--helper
which takes care of cloning all submodules, which we want to
parallelize eventually.
Some tests (such as empty URL, update_mode=none) are required in the
helper to make the decision for cloning. These checks have been
moved into the C function as well (no need to repeat them in the
shell script).
As we can only access the stderr channel from within the parallel
processing engine, we need to reroute the error message for
specified but initialized submodules to stderr. As it is an error
message, this should have gone to stderr in the first place, so it
is a bug fix along the way.
Signed-off-by: Stefan Beller <sbeller@google.com>
---
builtin/submodule--helper.c | 225 ++++++++++++++++++++++++++++++++++++++++++++
git-submodule.sh | 45 +++------
t/t7400-submodule-basic.sh | 4 +-
3 files changed, 238 insertions(+), 36 deletions(-)
diff --git a/builtin/submodule--helper.c b/builtin/submodule--helper.c
index f4c3eff..e6bce76 100644
--- a/builtin/submodule--helper.c
+++ b/builtin/submodule--helper.c
@@ -255,6 +255,230 @@ static int module_clone(int argc, const char **argv, const char *prefix)
return 0;
}
+static int git_submodule_config(const char *var, const char *value, void *cb)
+{
+ return parse_submodule_config_option(var, value);
+}
+
+struct submodule_update_clone {
+ int count;
+ int quiet;
+ int print_unmatched;
+ char *reference;
+ char *depth;
+ char *update;
+ const char *recursive_prefix;
+ const char *prefix;
+ struct module_list list;
+ struct string_list projectlines;
+ struct pathspec pathspec;
+};
+#define SUBMODULE_UPDATE_CLONE_INIT {0, 0, 0, NULL, NULL, NULL, NULL, NULL, MODULE_LIST_INIT, STRING_LIST_INIT_DUP}
+
+static void fill_clone_command(struct child_process *cp, int quiet,
+ const char *prefix, const char *path,
+ const char *name, const char *url,
+ const char *reference, const char *depth)
+{
+ cp->git_cmd = 1;
+ cp->no_stdin = 1;
+ cp->stdout_to_stderr = 1;
+ cp->err = -1;
+ argv_array_push(&cp->args, "submodule--helper");
+ argv_array_push(&cp->args, "clone");
+ if (quiet)
+ argv_array_push(&cp->args, "--quiet");
+
+ if (prefix) {
+ argv_array_push(&cp->args, "--prefix");
+ argv_array_push(&cp->args, prefix);
+ }
+ argv_array_push(&cp->args, "--path");
+ argv_array_push(&cp->args, path);
+
+ argv_array_push(&cp->args, "--name");
+ argv_array_push(&cp->args, name);
+
+ argv_array_push(&cp->args, "--url");
+ argv_array_push(&cp->args, url);
+ if (reference)
+ argv_array_push(&cp->args, reference);
+ if (depth)
+ argv_array_push(&cp->args, depth);
+}
+
+static int update_clone_get_next_task(void **pp_task_cb,
+ struct child_process *cp,
+ struct strbuf *err,
+ void *pp_cb)
+{
+ struct submodule_update_clone *pp = pp_cb;
+
+ for (; pp->count < pp->list.nr; pp->count++) {
+ const struct submodule *sub = NULL;
+ const char *displaypath = NULL;
+ const struct cache_entry *ce = pp->list.entries[pp->count];
+ struct strbuf sb = STRBUF_INIT;
+ const char *update_module = NULL;
+ const char *url = NULL;
+ int just_cloned = 0;
+
+ if (ce_stage(ce)) {
+ if (pp->recursive_prefix)
+ strbuf_addf(err, "Skipping unmerged submodule %s/%s\n",
+ pp->recursive_prefix, ce->name);
+ else
+ strbuf_addf(err, "Skipping unmerged submodule %s\n",
+ ce->name);
+ continue;
+ }
+
+ sub = submodule_from_path(null_sha1, ce->name);
+ if (pp->recursive_prefix)
+ displaypath = relative_path(pp->recursive_prefix, ce->name, &sb);
+ else
+ displaypath = ce->name;
+
+ if (pp->update)
+ update_module = pp->update;
+ if (!update_module)
+ update_module = sub->update;
+ if (!update_module)
+ update_module = "checkout";
+ if (!strcmp(update_module, "none")) {
+ strbuf_addf(err, "Skipping submodule '%s'\n", displaypath);
+ continue;
+ }
+
+ /*
+ * Looking up the url in .git/config.
+ * We cannot fall back to .gitmodules as we only want to process
+ * configured submodules. This renders the submodule lookup API
+ * useless, as it cannot lookup without fallback.
+ */
+ strbuf_reset(&sb);
+ strbuf_addf(&sb, "submodule.%s.url", sub->name);
+ git_config_get_string_const(sb.buf, &url);
+ if (!url) {
+ /*
+ * Only mention uninitialized submodules when its
+ * path have been specified
+ */
+ if (pp->pathspec.nr)
+ strbuf_addf(err, _("Submodule path '%s' not initialized\n"
+ "Maybe you want to use 'update --init'?"), displaypath);
+ continue;
+ }
+
+ strbuf_reset(&sb);
+ strbuf_addf(&sb, "%s/.git", ce->name);
+ just_cloned = !file_exists(sb.buf);
+
+ strbuf_reset(&sb);
+ strbuf_addf(&sb, "%06o %s %d %d\t%s\n", ce->ce_mode,
+ sha1_to_hex(ce->sha1), ce_stage(ce),
+ just_cloned, ce->name);
+ string_list_append(&pp->projectlines, sb.buf);
+
+ if (just_cloned) {
+ fill_clone_command(cp, pp->quiet, pp->prefix, ce->name,
+ sub->name, url, pp->reference, pp->depth);
+ pp->count++;
+ return 1;
+ }
+ }
+ return 0;
+}
+
+static int update_clone_start_failure(struct child_process *cp,
+ struct strbuf *err,
+ void *pp_cb,
+ void *pp_task_cb)
+{
+ struct submodule_update_clone *pp = pp_cb;
+
+ strbuf_addf(err, "error when starting a child process");
+ pp->print_unmatched = 1;
+
+ return 1;
+}
+
+static int update_clone_task_finished(int result,
+ struct child_process *cp,
+ struct strbuf *err,
+ void *pp_cb,
+ void *pp_task_cb)
+{
+ struct submodule_update_clone *pp = pp_cb;
+
+ if (!result) {
+ return 0;
+ } else {
+ strbuf_addf(err, "error in one child process");
+ pp->print_unmatched = 1;
+ return 1;
+ }
+}
+
+static int update_clone(int argc, const char **argv, const char *prefix)
+{
+ struct string_list_item *item;
+ struct submodule_update_clone pp = SUBMODULE_UPDATE_CLONE_INIT;
+
+ struct option module_list_options[] = {
+ OPT_STRING(0, "prefix", &prefix,
+ N_("path"),
+ N_("path into the working tree")),
+ OPT_STRING(0, "recursive_prefix", &pp.recursive_prefix,
+ N_("path"),
+ N_("path into the working tree, across nested "
+ "submodule boundaries")),
+ OPT_STRING(0, "update", &pp.update,
+ N_("string"),
+ N_("update command for submodules")),
+ OPT_STRING(0, "reference", &pp.reference, "<repository>",
+ N_("Use the local reference repository "
+ "instead of a full clone")),
+ OPT_STRING(0, "depth", &pp.depth, "<depth>",
+ N_("Create a shallow clone truncated to the "
+ "specified number of revisions")),
+ OPT__QUIET(&pp.quiet, N_("do't print cloning progress")),
+ OPT_END()
+ };
+
+ const char *const git_submodule_helper_usage[] = {
+ N_("git submodule--helper list [--prefix=<path>] [<path>...]"),
+ NULL
+ };
+ pp.prefix = prefix;
+
+ argc = parse_options(argc, argv, prefix, module_list_options,
+ git_submodule_helper_usage, 0);
+
+ if (module_list_compute(argc, argv, prefix, &pp.pathspec, &pp.list) < 0) {
+ printf("#unmatched\n");
+ return 1;
+ }
+
+ gitmodules_config();
+ /* Overlay the parsed .gitmodules file with .git/config */
+ git_config(git_submodule_config, NULL);
+ run_processes_parallel(1, update_clone_get_next_task,
+ update_clone_start_failure,
+ update_clone_task_finished,
+ &pp);
+
+ if (pp.print_unmatched) {
+ printf("#unmatched\n");
+ return 1;
+ }
+
+ for_each_string_list_item(item, &pp.projectlines) {
+ utf8_fprintf(stdout, "%s", item->string);
+ }
+ return 0;
+}
+
struct cmd_struct {
const char *cmd;
int (*fn)(int, const char **, const char *);
@@ -264,6 +488,7 @@ static struct cmd_struct commands[] = {
{"list", module_list},
{"name", module_name},
{"clone", module_clone},
+ {"update-clone", update_clone}
};
int cmd_submodule__helper(int argc, const char **argv, const char *prefix)
diff --git a/git-submodule.sh b/git-submodule.sh
index 8b0eb9a..ea883b9 100755
--- a/git-submodule.sh
+++ b/git-submodule.sh
@@ -655,17 +655,18 @@ cmd_update()
cmd_init "--" "$@" || return
fi
- cloned_modules=
- git submodule--helper list --prefix "$wt_prefix" "$@" | {
+ git submodule--helper update-clone ${GIT_QUIET:+--quiet} \
+ ${wt_prefix:+--prefix "$wt_prefix"} \
+ ${prefix:+--recursive_prefix "$prefix"} \
+ ${update:+--update "$update"} \
+ ${reference:+--reference "$reference"} \
+ ${depth:+--depth "$depth"} \
+ "$@" | {
err=
- while read mode sha1 stage sm_path
+ while read mode sha1 stage just_cloned sm_path
do
die_if_unmatched "$mode"
- if test "$stage" = U
- then
- echo >&2 "Skipping unmerged submodule $prefix$sm_path"
- continue
- fi
+
name=$(git submodule--helper name "$sm_path") || exit
url=$(git config submodule."$name".url)
branch=$(get_submodule_config "$name" branch master)
@@ -682,27 +683,10 @@ cmd_update()
displaypath=$(relative_path "$prefix$sm_path")
- if test "$update_module" = "none"
- then
- echo "Skipping submodule '$displaypath'"
- continue
- fi
-
- if test -z "$url"
- then
- # Only mention uninitialized submodules when its
- # path have been specified
- test "$#" != "0" &&
- say "$(eval_gettext "Submodule path '\$displaypath' not initialized
-Maybe you want to use 'update --init'?")"
- continue
- fi
-
- if ! test -d "$sm_path"/.git && ! test -f "$sm_path"/.git
+ if test $just_cloned -eq 1
then
- git submodule--helper clone ${GIT_QUIET:+--quiet} --prefix "$prefix" --path "$sm_path" --name "$name" --url "$url" "$reference" "$depth" || exit
- cloned_modules="$cloned_modules;$name"
subsha1=
+ update_module=checkout
else
subsha1=$(clear_local_git_env; cd "$sm_path" &&
git rev-parse --verify HEAD) ||
@@ -742,13 +726,6 @@ Maybe you want to use 'update --init'?")"
die "$(eval_gettext "Unable to fetch in submodule path '\$displaypath'")"
fi
- # Is this something we just cloned?
- case ";$cloned_modules;" in
- *";$name;"*)
- # then there is no local change to integrate
- update_module=checkout ;;
- esac
-
must_die_on_failure=
case "$update_module" in
checkout)
diff --git a/t/t7400-submodule-basic.sh b/t/t7400-submodule-basic.sh
index 540771c..5991e3c 100755
--- a/t/t7400-submodule-basic.sh
+++ b/t/t7400-submodule-basic.sh
@@ -462,7 +462,7 @@ test_expect_success 'update --init' '
git config --remove-section submodule.example &&
test_must_fail git config submodule.example.url &&
- git submodule update init > update.out &&
+ git submodule update init 2> update.out &&
cat update.out &&
test_i18ngrep "not initialized" update.out &&
test_must_fail git rev-parse --resolve-git-dir init/.git &&
@@ -480,7 +480,7 @@ test_expect_success 'update --init from subdirectory' '
mkdir -p sub &&
(
cd sub &&
- git submodule update ../init >update.out &&
+ git submodule update ../init 2>update.out &&
cat update.out &&
test_i18ngrep "not initialized" update.out &&
test_must_fail git rev-parse --resolve-git-dir ../init/.git &&
--
2.6.2.280.g74301d6
^ permalink raw reply related [flat|nested] 7+ messages in thread* [PATCH 2/3] submodule update: Expose parallelism to the user
2015-10-23 18:44 [PATCH 0/3] expose parallelism for submodule {update, clone} Stefan Beller
2015-10-23 18:44 ` [PATCH 1/3] git submodule update: have a dedicated helper for cloning Stefan Beller
@ 2015-10-23 18:44 ` Stefan Beller
2015-10-23 18:44 ` [PATCH 3/3] clone: Allow an explicit argument for parallel submodule clones Stefan Beller
2015-10-23 19:25 ` [PATCH 0/3] expose parallelism for submodule {update, clone} Junio C Hamano
3 siblings, 0 replies; 7+ messages in thread
From: Stefan Beller @ 2015-10-23 18:44 UTC (permalink / raw)
To: git, gitster; +Cc: jrnieder, Jens.Lehmann, Stefan Beller
Expose possible parallelism either via the "--jobs" CLI parameter or
the "submodule.jobs" setting.
By having the variable initialized to -1, we make sure 0 can be passed
into the parallel processing machine, which will then pick as many parallel
workers as there are CPUs.
Signed-off-by: Stefan Beller <sbeller@google.com>
---
Documentation/git-submodule.txt | 6 +++++-
builtin/submodule--helper.c | 17 +++++++++++++----
git-submodule.sh | 9 +++++++++
3 files changed, 27 insertions(+), 5 deletions(-)
diff --git a/Documentation/git-submodule.txt b/Documentation/git-submodule.txt
index f17687e..f5429fa 100644
--- a/Documentation/git-submodule.txt
+++ b/Documentation/git-submodule.txt
@@ -16,7 +16,7 @@ SYNOPSIS
'git submodule' [--quiet] deinit [-f|--force] [--] <path>...
'git submodule' [--quiet] update [--init] [--remote] [-N|--no-fetch]
[-f|--force] [--rebase|--merge] [--reference <repository>]
- [--depth <depth>] [--recursive] [--] [<path>...]
+ [--depth <depth>] [--recursive] [--jobs <n>] [--] [<path>...]
'git submodule' [--quiet] summary [--cached|--files] [(-n|--summary-limit) <n>]
[commit] [--] [<path>...]
'git submodule' [--quiet] foreach [--recursive] <command>
@@ -374,6 +374,10 @@ for linkgit:git-clone[1]'s `--reference` and `--shared` options carefully.
clone with a history truncated to the specified number of revisions.
See linkgit:git-clone[1]
+-j::
+--jobs::
+ This option is only valid for the update command.
+ Clone new submodules in parallel with as many jobs.
<path>...::
Paths to submodule(s). When specified this will restrict the command
diff --git a/builtin/submodule--helper.c b/builtin/submodule--helper.c
index e6bce76..4888e84 100644
--- a/builtin/submodule--helper.c
+++ b/builtin/submodule--helper.c
@@ -422,6 +422,7 @@ static int update_clone_task_finished(int result,
static int update_clone(int argc, const char **argv, const char *prefix)
{
+ int max_jobs = -1;
struct string_list_item *item;
struct submodule_update_clone pp = SUBMODULE_UPDATE_CLONE_INIT;
@@ -442,6 +443,8 @@ static int update_clone(int argc, const char **argv, const char *prefix)
OPT_STRING(0, "depth", &pp.depth, "<depth>",
N_("Create a shallow clone truncated to the "
"specified number of revisions")),
+ OPT_INTEGER('j', "jobs", &max_jobs,
+ N_("parallel jobs")),
OPT__QUIET(&pp.quiet, N_("do't print cloning progress")),
OPT_END()
};
@@ -463,10 +466,16 @@ static int update_clone(int argc, const char **argv, const char *prefix)
gitmodules_config();
/* Overlay the parsed .gitmodules file with .git/config */
git_config(git_submodule_config, NULL);
- run_processes_parallel(1, update_clone_get_next_task,
- update_clone_start_failure,
- update_clone_task_finished,
- &pp);
+
+ if (max_jobs == -1)
+ if (git_config_get_int("submodule.jobs", &max_jobs))
+ max_jobs = 1;
+
+ run_processes_parallel(max_jobs,
+ update_clone_get_next_task,
+ update_clone_start_failure,
+ update_clone_task_finished,
+ &pp);
if (pp.print_unmatched) {
printf("#unmatched\n");
diff --git a/git-submodule.sh b/git-submodule.sh
index ea883b9..c2dfb16 100755
--- a/git-submodule.sh
+++ b/git-submodule.sh
@@ -636,6 +636,14 @@ cmd_update()
--depth=*)
depth=$1
;;
+ -j|--jobs)
+ case "$2" in '') usage ;; esac
+ jobs="--jobs=$2"
+ shift
+ ;;
+ --jobs=*)
+ jobs=$1
+ ;;
--)
shift
break
@@ -661,6 +669,7 @@ cmd_update()
${update:+--update "$update"} \
${reference:+--reference "$reference"} \
${depth:+--depth "$depth"} \
+ ${jobs:+$jobs} \
"$@" | {
err=
while read mode sha1 stage just_cloned sm_path
--
2.6.2.280.g74301d6
^ permalink raw reply related [flat|nested] 7+ messages in thread* [PATCH 3/3] clone: Allow an explicit argument for parallel submodule clones
2015-10-23 18:44 [PATCH 0/3] expose parallelism for submodule {update, clone} Stefan Beller
2015-10-23 18:44 ` [PATCH 1/3] git submodule update: have a dedicated helper for cloning Stefan Beller
2015-10-23 18:44 ` [PATCH 2/3] submodule update: Expose parallelism to the user Stefan Beller
@ 2015-10-23 18:44 ` Stefan Beller
2015-10-28 21:03 ` Sebastian Schuberth
2015-10-23 19:25 ` [PATCH 0/3] expose parallelism for submodule {update, clone} Junio C Hamano
3 siblings, 1 reply; 7+ messages in thread
From: Stefan Beller @ 2015-10-23 18:44 UTC (permalink / raw)
To: git, gitster; +Cc: jrnieder, Jens.Lehmann, Stefan Beller
Just pass it along to "git submodule update", which may pick reasonable
defaults if you don't specify an explicit number.
TODO: Add a test for this.
Signed-off-by: Stefan Beller <sbeller@google.com>
---
Documentation/git-clone.txt | 5 ++++-
builtin/clone.c | 23 +++++++++++++++++------
2 files changed, 21 insertions(+), 7 deletions(-)
diff --git a/Documentation/git-clone.txt b/Documentation/git-clone.txt
index f1f2a3f..affa52e 100644
--- a/Documentation/git-clone.txt
+++ b/Documentation/git-clone.txt
@@ -14,7 +14,7 @@ SYNOPSIS
[-o <name>] [-b <name>] [-u <upload-pack>] [--reference <repository>]
[--dissociate] [--separate-git-dir <git dir>]
[--depth <depth>] [--[no-]single-branch]
- [--recursive | --recurse-submodules] [--] <repository>
+ [--recursive | --recurse-submodules] [--jobs <n>] [--] <repository>
[<directory>]
DESCRIPTION
@@ -216,6 +216,9 @@ objects from the source repository into a pack in the cloned repository.
The result is Git repository can be separated from working
tree.
+-j::
+--jobs::
+ The number of submodules fetched at the same time.
<repository>::
The (possibly remote) repository to clone from. See the
diff --git a/builtin/clone.c b/builtin/clone.c
index 5864ad1..59ec984 100644
--- a/builtin/clone.c
+++ b/builtin/clone.c
@@ -50,6 +50,7 @@ static int option_progress = -1;
static struct string_list option_config;
static struct string_list option_reference;
static int option_dissociate;
+static int max_children;
static struct option builtin_clone_options[] = {
OPT__VERBOSITY(&option_verbosity),
@@ -72,6 +73,8 @@ static struct option builtin_clone_options[] = {
N_("initialize submodules in the clone")),
OPT_BOOL(0, "recurse-submodules", &option_recursive,
N_("initialize submodules in the clone")),
+ OPT_INTEGER('j', "jobs", &max_children,
+ N_("number of submodules cloned in parallel")),
OPT_STRING(0, "template", &option_template, N_("template-directory"),
N_("directory from which templates will be used")),
OPT_STRING_LIST(0, "reference", &option_reference, N_("repo"),
@@ -95,10 +98,6 @@ static struct option builtin_clone_options[] = {
OPT_END()
};
-static const char *argv_submodule[] = {
- "submodule", "update", "--init", "--recursive", NULL
-};
-
static const char *get_repo_path_1(struct strbuf *path, int *is_bundle)
{
static char *suffix[] = { "/.git", "", ".git/.git", ".git" };
@@ -674,8 +673,20 @@ static int checkout(void)
err |= run_hook_le(NULL, "post-checkout", sha1_to_hex(null_sha1),
sha1_to_hex(sha1), "1", NULL);
- if (!err && option_recursive)
- err = run_command_v_opt(argv_submodule, RUN_GIT_CMD);
+ if (!err && option_recursive) {
+ struct argv_array args = ARGV_ARRAY_INIT;
+ argv_array_pushl(&args, "submodule", "update", "--init", "--recursive", NULL);
+
+ if (max_children) {
+ struct strbuf sb = STRBUF_INIT;
+ strbuf_addf(&sb, "--jobs=%d", max_children);
+ argv_array_push(&args, sb.buf);
+ strbuf_release(&sb);
+ }
+
+ err = run_command_v_opt(args.argv, RUN_GIT_CMD);
+ argv_array_clear(&args);
+ }
return err;
}
--
2.6.2.280.g74301d6
^ permalink raw reply related [flat|nested] 7+ messages in thread