git.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [RFC/PATCH] ls-files: adding support for submodules
@ 2016-09-09 21:53 Brandon Williams
  2016-09-09 22:35 ` Jeff King
                   ` (2 more replies)
  0 siblings, 3 replies; 27+ messages in thread
From: Brandon Williams @ 2016-09-09 21:53 UTC (permalink / raw)
  To: git; +Cc: Brandon Williams

Allow ls-files to recognize submodules in order to retrieve a list of
files from a repository's submodules.  This is done by forking off a
process to recursively call ls-files on all submodules.

Signed-off-by: Brandon Williams <bmwill@google.com>
---
Hey git developers!

I'm new to the community and this is the first patch for an open source project
that I have worked on.

I'm looking forward to working on the project!
Brandon Williams

 Documentation/git-ls-files.txt         |   7 ++-
 builtin/ls-files.c                     |  58 +++++++++++++++++++
 t/t3007-ls-files-recurse-submodules.sh | 103 +++++++++++++++++++++++++++++++++
 3 files changed, 167 insertions(+), 1 deletion(-)
 create mode 100644 t/t3007-ls-files-recurse-submodules.sh

diff --git a/Documentation/git-ls-files.txt b/Documentation/git-ls-files.txt
index 0d933ac..446209e 100644
--- a/Documentation/git-ls-files.txt
+++ b/Documentation/git-ls-files.txt
@@ -18,7 +18,8 @@ SYNOPSIS
 		[--exclude-per-directory=<file>]
 		[--exclude-standard]
 		[--error-unmatch] [--with-tree=<tree-ish>]
-		[--full-name] [--abbrev] [--] [<file>...]
+		[--full-name] [--recurse-submodules]
+		[--abbrev] [--] [<file>...]
 
 DESCRIPTION
 -----------
@@ -137,6 +138,10 @@ a space) at the start of each line:
 	option forces paths to be output relative to the project
 	top directory.
 
+--recurse-submodules::
+	Recursively calls ls-files on each submodule in the repository.
+	Currently there is only support for the --cached mode.
+
 --abbrev[=<n>]::
 	Instead of showing the full 40-byte hexadecimal object
 	lines, show only a partial prefix.
diff --git a/builtin/ls-files.c b/builtin/ls-files.c
index 00ea91a..c428a51 100644
--- a/builtin/ls-files.c
+++ b/builtin/ls-files.c
@@ -14,6 +14,7 @@
 #include "resolve-undo.h"
 #include "string-list.h"
 #include "pathspec.h"
+#include "run-command.h"
 
 static int abbrev;
 static int show_deleted;
@@ -28,6 +29,7 @@ static int show_valid_bit;
 static int line_terminator = '\n';
 static int debug_mode;
 static int show_eol;
+static int recurse_submodules;
 
 static const char *prefix;
 static int max_prefix_len;
@@ -152,6 +154,45 @@ static void show_killed_files(struct dir_struct *dir)
 	}
 }
 
+/**
+ *  Recursively call ls-files on a submodule
+ */
+static void show_gitlink(const struct cache_entry *ce)
+{
+	struct child_process cp = CHILD_PROCESS_INIT;
+	struct strbuf buf = STRBUF_INIT;
+	struct strbuf name = STRBUF_INIT;
+	int submodule_name_len;
+	FILE *fp;
+
+	argv_array_push(&cp.args, "ls-files");
+	argv_array_push(&cp.args, "--recurse-submodules");
+	cp.git_cmd = 1;
+	cp.dir = ce->name;
+	cp.out = -1;
+	start_command(&cp);
+	fp = fdopen(cp.out, "r");
+
+	/*
+	 * The ls-files child process produces filenames relative to
+	 * the submodule. Prefix each line with the submodule path
+	 * to make it relative to the current repository.
+	 */
+	strbuf_addstr(&name, ce->name);
+	strbuf_addch(&name, '/');
+	submodule_name_len = name.len;
+	while (strbuf_getline(&buf, fp) != EOF) {
+		strbuf_addbuf(&name, &buf);
+		write_name(name.buf);
+		strbuf_setlen(&name, submodule_name_len);
+	}
+
+	finish_command(&cp);
+	strbuf_release(&buf);
+	strbuf_release(&name);
+	fclose(fp);
+}
+
 static void show_ce_entry(const char *tag, const struct cache_entry *ce)
 {
 	int len = max_prefix_len;
@@ -163,6 +204,10 @@ static void show_ce_entry(const char *tag, const struct cache_entry *ce)
 			    len, ps_matched,
 			    S_ISDIR(ce->ce_mode) || S_ISGITLINK(ce->ce_mode)))
 		return;
+	if (recurse_submodules && S_ISGITLINK(ce->ce_mode)) {
+		show_gitlink(ce);
+		return;
+	}
 
 	if (tag && *tag && show_valid_bit &&
 	    (ce->ce_flags & CE_VALID)) {
@@ -468,6 +513,8 @@ int cmd_ls_files(int argc, const char **argv, const char *cmd_prefix)
 		{ OPTION_SET_INT, 0, "full-name", &prefix_len, NULL,
 			N_("make the output relative to the project top directory"),
 			PARSE_OPT_NOARG | PARSE_OPT_NONEG, NULL },
+		OPT_BOOL(0, "recurse-submodules", &recurse_submodules,
+			N_("recurse through submodules")),
 		OPT_BOOL(0, "error-unmatch", &error_unmatch,
 			N_("if any <file> is not in the index, treat this as an error")),
 		OPT_STRING(0, "with-tree", &with_tree, N_("tree-ish"),
@@ -519,6 +566,17 @@ int cmd_ls_files(int argc, const char **argv, const char *cmd_prefix)
 	if (require_work_tree && !is_inside_work_tree())
 		setup_work_tree();
 
+	if (recurse_submodules &&
+	    (show_stage || show_deleted || show_others || show_unmerged ||
+	     show_killed || show_modified || show_resolve_undo ||
+	     show_valid_bit || show_tag || show_eol))
+		die("ls-files --recurse-submodules can only be used in "
+		    "--cached mode");
+
+	if (recurse_submodules && argc)
+		die("ls-files --recurse-submodules does not support path "
+		    "arguments");
+
 	parse_pathspec(&pathspec, 0,
 		       PATHSPEC_PREFER_CWD |
 		       PATHSPEC_STRIP_SUBMODULE_SLASH_CHEAP,
diff --git a/t/t3007-ls-files-recurse-submodules.sh b/t/t3007-ls-files-recurse-submodules.sh
new file mode 100644
index 0000000..78deded
--- /dev/null
+++ b/t/t3007-ls-files-recurse-submodules.sh
@@ -0,0 +1,103 @@
+#!/bin/sh
+
+test_description='Test ls-files recurse-submodules feature
+
+This test verifies the recurse-submodules feature correctly lists files from
+submodules.
+'
+
+. ./test-lib.sh
+
+test_expect_success 'setup directory structure and submodules' '
+	echo a >a &&
+	mkdir b &&
+	echo b >b/b &&
+	git add a b &&
+	git commit -m "add a and b" &&
+	mkdir submodule &&
+	(
+		cd submodule &&
+		git init &&
+		echo c >c &&
+		git add c &&
+		git commit -m "add c"
+	) &&
+	git submodule add ./submodule &&
+	git commit -m "added submodule"
+'
+
+cat >expect <<EOF
+.gitmodules
+a
+b/b
+submodule/c
+EOF
+
+test_expect_success 'ls-files correctly outputs files in submodule' '
+	git ls-files --recurse-submodules >actual &&
+	test_cmp expect actual
+'
+
+test_expect_success 'ls-files does not output files not added to a repo' '
+	echo a >not_added &&
+	echo b >b/not_added &&
+	(
+		cd submodule &&
+		echo c >not_added
+	) &&
+	git ls-files --recurse-submodules >actual &&
+	test_cmp expect actual
+'
+
+cat >expect <<EOF
+.gitmodules
+a
+b/b
+submodule/.gitmodules
+submodule/c
+submodule/subsub/d
+EOF
+
+test_expect_success 'ls-files recurses more than 1 level' '
+	(
+		cd submodule &&
+		mkdir subsub &&
+		(
+			cd subsub &&
+			git init &&
+			echo d >d &&
+			git add d &&
+			git commit -m "add d"
+		) &&
+		git submodule add ./subsub &&
+		git commit -m "added subsub"
+	) &&
+	git ls-files --recurse-submodules >actual &&
+	test_cmp expect actual
+'
+
+cat >expect_error <<EOF
+fatal: ls-files --recurse-submodules does not support path arguments
+EOF
+
+test_expect_success 'error when using path arguments' '
+	test_must_fail git ls-files --recurse-submodules b 2>actual &&
+	test_cmp expect_error actual
+'
+
+cat >expect_error <<EOF
+fatal: ls-files --recurse-submodules can only be used in --cached mode
+EOF
+
+test_expect_success 'error when using different modes' '
+	for opt in {v,t}; do
+		test_must_fail git ls-files --recurse-submodules -$opt 2>actual &&
+		test_cmp expect_error actual
+	done &&
+	for opt in {deleted,modified,others,ignored,stage,killed,unmerged,eol}; do
+		test_must_fail git ls-files --recurse-submodules --$opt 2>actual &&
+		test_cmp expect_error actual
+	done
+'
+
+test_done
-- 
2.8.0.rc3.226.g39d4020


^ permalink raw reply related	[flat|nested] 27+ messages in thread
* Re: [PATCHv5] Another squash on run-command: add an asynchronous parallel child processor
@ 2015-09-24  2:17 Junio C Hamano
  2015-09-24 21:13 ` [PATCH 0/2] " Stefan Beller
  0 siblings, 1 reply; 27+ messages in thread
From: Junio C Hamano @ 2015-09-24  2:17 UTC (permalink / raw)
  To: Stefan Beller
  Cc: git, ramsay, jacob.keller, peff, jrnieder, johannes.schindelin,
	Jens.Lehmann, vlovich, sunshine

Stefan Beller <sbeller@google.com> writes:

> I agree the commit 7087c5f3 (SQUASH??? on top of run-command: add an
> asynchronous parallel child processor) makes sense; I arrived at the same
> patch after adding in the feedback.

I dunno.  As I said, while a small part of that commit is necessary
to be squashed into [6/14] (i.e. the "failed to start" bugfix and
redefinition of the return value of start_one()), the remainder of
the commit is primarily to illustrate possible future enhancements
that the basic structure of your code must support, so that we can
get the fundamentals right.  Each of the actual "possible future
enhancements" may or may not be a good idea and there is nothing
that back it up.  In such a vacuum, I'd prefer to leave them simple
and avoid starting performance tuning prematurely.

One thing that is sort-of backed up already by your "cap to 4"
experiment is that some sort of slow-start ramping up is far better
than letting thundering herd stampede, so I am OK if we kept that
SPAWN_CAP part of the commit.

But even then, we do not know if tying that cap to online_cpu() is a
good idea.  Neither of us have a good argument backed by data on it.

It is tempting to imagine, when you have N cores on an otherwise
idle box, the setting of SPAWN_CAP shouldn't make much difference to
how well the first child process makes its initial progress as long
as it does not exceed the number of idle cores N-1 you have at hand.

But that assumes that the task is CPU bound and you have infinite
memory bandwidth.  Once the task needs a lot of disk bandwidth to
make its initial progress, which certainly is the case for fetch,
the first child that is spawned together with (SPAWN_CAP-1) other
processes would be competing for the shared resource, and having
more online_cpus() would not help you.

If we are not doing analysis that takes into such factors (and it is
way too premature for us to be tuning), even "online_cpu() - 1" is
unnecessarily too complex than a hardcoded small number (say, "2",
or even "1").

The same thing can be said for the output_timeout selection.  "Do
not get stuck for too long until we have fully ramped up.  Do not
spin too frequently when there is no more room for a new child" was
something I came up out of thin air as an example of something we
might want to do, and I did write such a code in that commit, but
that was primarily done so that you can clearly see that a better
design would be to allow the caller, i.e. the scheduling loop,
specify output_timeout to buffer_stderr(), and to keep the latter a
"dumb" helper that can be controlled by a more smart caller (as
opposed to hiding such a logic in buffer_stderr() and have a "dumb"
driver call it).  The actual output_timeout computation logic is not
well thought out---it may even turn out to be that we are better off
if we lengthened the timeout before we have fully ramped up, to
encourage the first process to produce some output before we give
chance to other new processes to be spawned in the later round.

So for that change, while I think adding that parameter to
buffer_stderr() is something we would want to keep, I'd prefer to
keep the caller simpler by always passing a hardcoded 100 in the
initial version, before we start tuning.  And I do not think we want
to start tuning before building a solid foundation to tune.

In short, if I were amending that SQUASH??? commit, I'd probably be
making it do less, not more, than what it does, something along the
line of the attached.

 run-command.c | 15 ++++-----------
 1 file changed, 4 insertions(+), 11 deletions(-)

diff --git a/run-command.c b/run-command.c
index b6d8b39..829b6fe 100644
--- a/run-command.c
+++ b/run-command.c
@@ -1108,20 +1108,19 @@ static void pp_collect_finished(struct parallel_processes *pp)
 }
 
 
-#define SPAWN_CAP (pp.max_processes + 1) /* spawn as many as possible */
-
 int run_processes_parallel(int n, void *data,
 			   get_next_task_fn get_next_task,
 			   start_failure_fn start_failure,
 			   return_value_fn return_value)
 {
 	struct parallel_processes pp;
-	pp_init(&pp, n, data, get_next_task, start_failure, return_value);
 
+	pp_init(&pp, n, data, get_next_task, start_failure, return_value);
 	while (1) {
-		int no_more_task, cnt, output_timeout;
+		int no_more_task, cnt, output_timeout = 100;
+		int spawn_cap = 2;
 
-		for (cnt = SPAWN_CAP, no_more_task = 0;
+		for (cnt = spawn_cap, no_more_task = 0;
 		     cnt && pp.nr_processes < pp.max_processes;
 		     cnt--) {
 			if (!pp_start_one(&pp)) {
@@ -1132,12 +1131,6 @@ int run_processes_parallel(int n, void *data,
 
 		if (no_more_task && !pp.nr_processes)
 			break;
-		if (!cnt)
-			output_timeout = 50;
-		else if (pp.nr_processes < pp.max_processes)
-			output_timeout = 100;
-		else
-			output_timeout = 1000;
 		pp_buffer_stderr(&pp, output_timeout);
 
 		pp_output(&pp);

^ permalink raw reply related	[flat|nested] 27+ messages in thread

end of thread, other threads:[~2016-09-15 21:37 UTC | newest]

Thread overview: 27+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2016-09-09 21:53 [RFC/PATCH] ls-files: adding support for submodules Brandon Williams
2016-09-09 22:35 ` Jeff King
2016-09-09 22:40   ` Junio C Hamano
2016-09-09 23:01 ` Junio C Hamano
2016-09-09 23:47   ` Stefan Beller
2016-09-11 22:10     ` Junio C Hamano
2016-09-12  0:51       ` Jeff King
2016-09-12  0:52         ` Jeff King
2016-09-12 16:01           ` Junio C Hamano
     [not found]             ` <CAKoko1oSEac_Nr1SkRB=dM_r3Jnew1Et2ZKj716iU3JLyHe2GQ@mail.gmail.com>
2016-09-12 17:39               ` Brandon Williams
2016-09-12  1:47       ` Junio C Hamano
2016-09-13  0:33 ` [PATCH v2] " Brandon Williams
2016-09-13  3:35   ` Brandon Williams
2016-09-13 16:31   ` Junio C Hamano
2016-09-15 20:51     ` Junio C Hamano
2016-09-15 20:51       ` [PATCH 1/2] SQUASH??? Junio C Hamano
2016-09-15 20:51       ` [PATCH 2/2] SQUASH??? Undecided Junio C Hamano
2016-09-15 21:12         ` Stefan Beller
2016-09-15 21:37           ` Brandon Williams
2016-09-15 20:57     ` [PATCH v2] ls-files: adding support for submodules Junio C Hamano
2016-09-15 20:58     ` Junio C Hamano
2016-09-15 20:58     ` Junio C Hamano
  -- strict thread matches above, loose matches on Subject: below --
2015-09-24  2:17 [PATCHv5] Another squash on run-command: add an asynchronous parallel child processor Junio C Hamano
2015-09-24 21:13 ` [PATCH 0/2] " Stefan Beller
2015-09-24 21:13   ` [PATCH 1/2] SQUASH??? Stefan Beller
2015-09-25  0:49     ` Junio C Hamano
2015-09-25  1:09       ` Junio C Hamano
2015-09-25 17:52       ` Stefan Beller
2015-09-25 17:56         ` Junio C Hamano

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).