git.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Peter Wu <peter@lekensteyn.nl>
To: "Nguyễn Thái Ngọc Duy" <pclouds@gmail.com>
Cc: git@vger.kernel.org
Subject: Re: [PATCH] archive: support filtering paths with glob
Date: Sat, 13 Sep 2014 12:52:49 +0200	[thread overview]
Message-ID: <1433062.XhRCATRB3s@al> (raw)
In-Reply-To: <1409837872-20837-1-git-send-email-pclouds@gmail.com>

On Thursday 04 September 2014 20:37:52 Nguyễn Thái Ngọc Duy wrote:
> This patch fixes two problems with using :(glob) (or even "*.c"
> without ":(glob)").
> 
> The first one is we forgot to turn on the 'recursive' flag in struct
> pathspec. Without that, tree_entry_interesting() will not mark
> potential directories "interesting" so that it can confirm whether
> those directories have anything matching the pathspec.
> 
> The marking directories interesting has a side effect that we need to
> walk inside a directory to realize that there's nothing interested in
> there. By that time, 'archive' code has already written the (empty)
> directory down. That means lots of empty directories in the result
> archive.
> 
> This problem is fixed by lazily writing directories down when we know
> they are actually needed. There is a theoretical bug in this
> implementation: we can't write empty trees/directories that match that
> pathspec.
> 
> Noticed-by: Peter Wu <peter@lekensteyn.nl>
> Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>

Ah, ignore my last mail, I just noticed this one. This patch works fine. By the
way, the glob pattern is treated as a 'nullglob'. If you specify a glob pattern
that matches nothing, an empty archive is created. Perhaps an error message
should be raised for that as it is unlikely that a user wants that?

Tested-by: Peter Wu <peter@lekensteyn.nl>

> ---
>  Similar approach could probably be used for teaching ls-tree about pathspec..
> 
>  archive.c           | 82 ++++++++++++++++++++++++++++++++++++++++++++++++++++-
>  t/t5000-tar-tree.sh | 10 +++++++
>  2 files changed, 91 insertions(+), 1 deletion(-)
> 
> diff --git a/archive.c b/archive.c
> index 3fc0fb2..9e62bf4 100644
> --- a/archive.c
> +++ b/archive.c
> @@ -98,9 +98,19 @@ static void setup_archive_check(struct git_attr_check *check)
>  	check[1].attr = attr_export_subst;
>  }
>  
> +struct directory {
> +	struct directory *up;
> +	unsigned char sha1[20];
> +	int baselen, len;
> +	unsigned mode;
> +	int stage;
> +	char path[FLEX_ARRAY];
> +};
> +
>  struct archiver_context {
>  	struct archiver_args *args;
>  	write_archive_entry_fn_t write_entry;
> +	struct directory *bottom;
>  };
>  
>  static int write_archive_entry(const unsigned char *sha1, const char *base,
> @@ -146,6 +156,65 @@ static int write_archive_entry(const unsigned char *sha1, const char *base,
>  	return write_entry(args, sha1, path.buf, path.len, mode);
>  }
>  
> +static void queue_directory(const unsigned char *sha1,
> +		const char *base, int baselen, const char *filename,
> +		unsigned mode, int stage, struct archiver_context *c)
> +{
> +	struct directory *d;
> +	d = xmallocz(sizeof(*d) + baselen + 1 + strlen(filename));
> +	d->up	   = c->bottom;
> +	d->baselen = baselen;
> +	d->mode	   = mode;
> +	d->stage   = stage;
> +	c->bottom  = d;
> +	d->len = sprintf(d->path, "%.*s%s/", baselen, base, filename);
> +	hashcpy(d->sha1, sha1);
> +}
> +
> +static int write_directory(struct archiver_context *c)
> +{
> +	struct directory *d = c->bottom;
> +	int ret;
> +
> +	if (!d)
> +		return 0;
> +	c->bottom = d->up;
> +	d->path[d->len - 1] = '\0'; /* no trailing slash */
> +	ret =
> +		write_directory(c) ||
> +		write_archive_entry(d->sha1, d->path, d->baselen,
> +				    d->path + d->baselen, d->mode,
> +				    d->stage, c) != READ_TREE_RECURSIVE;
> +	free(d);
> +	return ret ? -1 : 0;
> +}
> +
> +static int queue_or_write_archive_entry(const unsigned char *sha1,
> +		const char *base, int baselen, const char *filename,
> +		unsigned mode, int stage, void *context)
> +{
> +	struct archiver_context *c = context;
> +
> +	while (c->bottom &&
> +	       !(baselen >= c->bottom->len &&
> +		 !strncmp(base, c->bottom->path, c->bottom->len))) {
> +		struct directory *next = c->bottom->up;
> +		free(c->bottom);
> +		c->bottom = next;
> +	}
> +
> +	if (S_ISDIR(mode)) {
> +		queue_directory(sha1, base, baselen, filename,
> +				mode, stage, c);
> +		return READ_TREE_RECURSIVE;
> +	}
> +
> +	if (write_directory(c))
> +		return -1;
> +	return write_archive_entry(sha1, base, baselen, filename, mode,
> +				   stage, context);
> +}
> +
>  int write_archive_entries(struct archiver_args *args,
>  		write_archive_entry_fn_t write_entry)
>  {
> @@ -167,6 +236,7 @@ int write_archive_entries(struct archiver_args *args,
>  			return err;
>  	}
>  
> +	memset(&context, 0, sizeof(context));
>  	context.args = args;
>  	context.write_entry = write_entry;
>  
> @@ -187,9 +257,17 @@ int write_archive_entries(struct archiver_args *args,
>  	}
>  
>  	err = read_tree_recursive(args->tree, "", 0, 0, &args->pathspec,
> -				  write_archive_entry, &context);
> +				  args->pathspec.has_wildcard ?
> +				  queue_or_write_archive_entry :
> +				  write_archive_entry,
> +				  &context);
>  	if (err == READ_TREE_RECURSIVE)
>  		err = 0;
> +	while (context.bottom) {
> +		struct directory *next = context.bottom->up;
> +		free(context.bottom);
> +		context.bottom = next;
> +	}
>  	return err;
>  }
>  
> @@ -221,6 +299,7 @@ static int path_exists(struct tree *tree, const char *path)
>  	int ret;
>  
>  	parse_pathspec(&pathspec, 0, 0, "", paths);
> +	pathspec.recursive = 1;
>  	ret = read_tree_recursive(tree, "", 0, 0, &pathspec, reject_entry, NULL);
>  	free_pathspec(&pathspec);
>  	return ret != 0;
> @@ -237,6 +316,7 @@ static void parse_pathspec_arg(const char **pathspec,
>  	parse_pathspec(&ar_args->pathspec, 0,
>  		       PATHSPEC_PREFER_FULL,
>  		       "", pathspec);
> +	ar_args->pathspec.recursive = 1;
>  	if (pathspec) {
>  		while (*pathspec) {
>  			if (**pathspec && !path_exists(ar_args->tree, *pathspec))
> diff --git a/t/t5000-tar-tree.sh b/t/t5000-tar-tree.sh
> index 7b8babd..ade7a7f 100755
> --- a/t/t5000-tar-tree.sh
> +++ b/t/t5000-tar-tree.sh
> @@ -305,4 +305,14 @@ test_expect_success GZIP 'remote tar.gz can be disabled' '
>  		>remote.tar.gz
>  '
>  
> +test_expect_success 'archive and :(glob)' '
> +	git archive -v HEAD -- ":(glob)**/sh" >/dev/null 2>actual &&
> +	cat >expect <<EOF &&
> +a/
> +a/bin/
> +a/bin/sh
> +EOF
> +	test_cmp expect actual
> +'
> +
>  test_done

  reply	other threads:[~2014-09-13 10:53 UTC|newest]

Thread overview: 9+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2014-09-02 22:17 git archive and glob pathspecs Peter Wu
2014-09-03  6:21 ` Duy Nguyen
2014-09-13 10:36   ` Peter Wu
2014-09-04 13:37 ` [PATCH] archive: support filtering paths with glob Nguyễn Thái Ngọc Duy
2014-09-13 10:52   ` Peter Wu [this message]
2014-09-21  3:55     ` [PATCH v2] " Nguyễn Thái Ngọc Duy
2014-09-22 19:15       ` Junio C Hamano
2014-09-22 23:04         ` Duy Nguyen
2014-09-23 16:57           ` Junio C Hamano

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1433062.XhRCATRB3s@al \
    --to=peter@lekensteyn.nl \
    --cc=git@vger.kernel.org \
    --cc=pclouds@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).