All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Ævar Arnfjörð Bjarmason" <avarab@gmail.com>
To: suzuki toshiya <mpsuzuki@hiroshima-u.ac.jp>
Cc: git@vger.kernel.org
Subject: Re: [PATCH] git-archive: accepts --owner --group aslike GNU tar.
Date: Thu, 28 Dec 2017 15:42:05 +0100	[thread overview]
Message-ID: <87incqrk1u.fsf@evledraar.gmail.com> (raw)
In-Reply-To: <20171228090527.25056-1-mpsuzuki@hiroshima-u.ac.jp>


On Thu, Dec 28 2017, suzuki toshiya jotted:

Thanks for working on this Just a quick review from skimming this,
haven't actually applied + run this:

> git-archive: accepts --owner --group aslike GNU tar.

Make this:

    git-archive: accept --owner and --group like GNU tar

I.e. imperative voice, s/aslike/like/, no full stop at the end. See
Documentation/SubmittingPatches.

> Current tar output by git-archive has always root:root.
> To generate tar output with non-root owner/group,
> the options like GNU tar are added.

Grammar/wording fixes (IMHO):

    The ownership of files craeted by git-archive is always
    root:root. Add --owner and --group options which work like the GNU
    tar equivalent to allow overriding these defaults.

> * archive.h: add members 'uid', 'gid', 'uname', 'gname'
>   to struct archiver_args.
>
> * archive.c: add functions to reflect the operands of
>   '--owner' and '--group' to archiver_args.
>
> * archive-tar.c: copy 'uid', 'gid', 'uname', 'gname'
>   from archiver_args to the entry headers in tar archive.
>
> * t/parse-tar-file.py: a script to dump uid, gid, uname,
>   gname fields from a tar archive.
>
> * t/t5005-archive-uid-gid.sh: a test script comparing
>   uid, gid, uname, gname between the options and
>   generated tar file.

I'd skip this whole bit, no need to itemize changes per-file, we see
that from the patch, especially stuff like "add xyz struct field".

> ---
>  archive-tar.c              |  8 +++---
>  archive.c                  | 66 ++++++++++++++++++++++++++++++++++++++++++
>  archive.h                  |  4 +++
>  t/parse-tar-file.py        | 56 ++++++++++++++++++++++++++++++++++++
>  t/t5005-archive-uid-gid.sh | 71 ++++++++++++++++++++++++++++++++++++++++++++++
>  5 files changed, 201 insertions(+), 4 deletions(-)
>  create mode 100755 t/parse-tar-file.py
>  create mode 100755 t/t5005-archive-uid-gid.sh
>
> diff --git a/archive-tar.c b/archive-tar.c
> index c6ed96ee7..8546a6229 100644
> --- a/archive-tar.c
> +++ b/archive-tar.c
> @@ -204,10 +204,10 @@ static void prepare_header(struct archiver_args *args,
>  	xsnprintf(header->size, sizeof(header->size), "%011lo", S_ISREG(mode) ? size : 0);
>  	xsnprintf(header->mtime, sizeof(header->mtime), "%011lo", (unsigned long) args->time);
>
> -	xsnprintf(header->uid, sizeof(header->uid), "%07o", 0);
> -	xsnprintf(header->gid, sizeof(header->gid), "%07o", 0);
> -	strlcpy(header->uname, "root", sizeof(header->uname));
> -	strlcpy(header->gname, "root", sizeof(header->gname));
> +	xsnprintf(header->uid, sizeof(header->uid), "%07o", args->uid);
> +	xsnprintf(header->gid, sizeof(header->gid), "%07o", args->gid);
> +	strlcpy(header->uname, args->uname ? args->uname : "root", sizeof(header->uname));
> +	strlcpy(header->gname, args->gname ? args->gname : "root", sizeof(header->gname));
>  	xsnprintf(header->devmajor, sizeof(header->devmajor), "%07o", 0);
>  	xsnprintf(header->devminor, sizeof(header->devminor), "%07o", 0);
>
> diff --git a/archive.c b/archive.c
> index 0b7b62af0..db69041f1 100644
> --- a/archive.c
> +++ b/archive.c
> @@ -8,6 +8,7 @@
>  #include "parse-options.h"
>  #include "unpack-trees.h"
>  #include "dir.h"
> +#include "git-compat-util.h"
>
>  static char const * const archive_usage[] = {
>  	N_("git archive [<options>] <tree-ish> [<path>...]"),
> @@ -417,6 +418,57 @@ static void parse_treeish_arg(const char **argv,
>  	{ OPTION_SET_INT, (s), NULL, (v), NULL, "", \
>  	  PARSE_OPT_NOARG | PARSE_OPT_NONEG | PARSE_OPT_HIDDEN, NULL, (p) }
>
> +static void set_args_uname_uid(struct archiver_args *args,
> +		const char* tar_owner, int set_gid_too)
> +{
> +	if (!args || !tar_owner)
> +		return;
> +
> +	const char* col_pos = strchr(tar_owner, ':');
> +	struct passwd* pw = NULL;
> +
> +	if (col_pos) {
> +		args->uname = xstrndup(tar_owner, col_pos - tar_owner);
> +		args->uid = atoi(col_pos + 1);
> +		return;
> +	}
> +
> +	args->uname = xstrndup(tar_owner, strlen(tar_owner));
> +	pw = getpwnam(tar_owner);
> +	if (!pw)
> +		return;

Both here and in set_args_gname_gid you just silently ignore failures
when you can't find the group, is this really the desired behavior?

Also, it seems there's no way to specify a numeric id / group id, e.g. a
reasonable use-case is hardcoding a group that you don't have on the
computer you make the tarball on. Doesn't GNU tar support that as well?

> +	args->uid = pw->pw_uid;
> +	if (set_gid_too)
> +		args->gid = pw->pw_gid;
> +
> +	return;
> +}
> +
> +static void set_args_gname_gid(struct archiver_args *args,
> +		const char* tar_group)
> +{
> +	if (!args || !tar_group)
> +		return;
> +
> +	const char* col_pos = strchr(tar_group, ':');
> +	struct group* gr = NULL;
> +
> +	if (col_pos) {
> +		args->gname = xstrndup(tar_group, col_pos - tar_group);
> +		args->gid = atoi(col_pos + 1);
> +		return;
> +	}
> +
> +	args->gname = xstrndup(tar_group, strlen(tar_group));
> +	gr = getgrnam(tar_group);
> +	if (!gr)
> +		return;
> +
> +	args->gid = gr->gr_gid;
> +	return;
> +}
> +
>  static int parse_archive_args(int argc, const char **argv,
>  		const struct archiver **ar, struct archiver_args *args,
>  		const char *name_hint, int is_remote)
> @@ -431,6 +483,8 @@ static int parse_archive_args(int argc, const char **argv,
>  	int i;
>  	int list = 0;
>  	int worktree_attributes = 0;
> +	char *tar_owner = NULL;
> +	char *tar_group = NULL;
>  	struct option opts[] = {
>  		OPT_GROUP(""),
>  		OPT_STRING(0, "format", &format, N_("fmt"), N_("archive format")),
> @@ -459,6 +513,8 @@ static int parse_archive_args(int argc, const char **argv,
>  			N_("retrieve the archive from remote repository <repo>")),
>  		OPT_STRING(0, "exec", &exec, N_("command"),
>  			N_("path to the remote git-upload-archive command")),
> +		OPT_STRING(0, "owner", &tar_owner, N_("owner"), N_("<name[:uid]> in tar")),
> +		OPT_STRING(0, "group", &tar_group, N_("group"), N_("<name[:gid]> in tar")),
>  		OPT_END()
>  	};
>
> @@ -507,6 +563,16 @@ static int parse_archive_args(int argc, const char **argv,
>  	args->baselen = strlen(base);
>  	args->worktree_attributes = worktree_attributes;
>
> +	args->uname = NULL;
> +	args->gname = NULL;
> +	args->uid = 0;
> +	args->gid = 0;
> +	set_args_uname_uid(args,
> +		tar_owner,
> +		1 /* init args->gid by pw, if resolved */);
> +	set_args_gname_gid(args,
> +		tar_group);
> +

Some overzelous line wrapping here. This could all just be on one line.

>  	return argc;
>  }
>
> diff --git a/archive.h b/archive.h
> index 62d1d82c1..fad4a5d3e 100644
> --- a/archive.h
> +++ b/archive.h
> @@ -15,6 +15,10 @@ struct archiver_args {
>  	unsigned int worktree_attributes : 1;
>  	unsigned int convert : 1;
>  	int compression_level;
> +	int uid;
> +	int gid;
> +	const char *uname;
> +	const char *gname;
>  };
>
>  #define ARCHIVER_WANT_COMPRESSION_LEVELS 1
> diff --git a/t/parse-tar-file.py b/t/parse-tar-file.py

If you add a helper script please put it in a folder in t/, see "ls
t/*/*.pl" for examples.

> new file mode 100755
> index 000000000..c240a8676
> --- /dev/null
> +++ b/t/parse-tar-file.py
> @@ -0,0 +1,56 @@
> +#!/usr/bin/env python
> +
> +import sys
> +import getopt
> +import tarfile
> +
> +optlist, args = getopt.getopt( sys.argv[1:], "", [
> +				"print=", "show=",
> +				"uniq",
> +				"fail-if-multi",
> +		] )
> +
> +infos_to_print = []
> +uniq = False
> +fail_if_multi = False
> +
> +for opt in optlist:
> +  if opt[0] == "--print":
> +    infos_to_print.append(opt[1])
> +  elif opt[0] == "--show":
> +    infos_to_print.append(opt[1])
> +  elif opt[0] == "--uniq":
> +    uniq = True
> +  elif opt[0] == "--fail-if-multi":
> +    uniq = True
> +    fail_if_multi = True
> +

Also the Git project uses 4-space indent for Python elsewhere AFACT.

> +if len(infos_to_print) == 0:
> +  infos_to_print = ["uid", "gid", "uname", "gname", "name"]
> +
> +tar = tarfile.open( args[0] )
> +out_lines = []
> +for tarinfo in tar:
> +  infos = []
> +  for info_tag in infos_to_print:
> +    if info_tag == "uid":
> +      infos.append( str(tarinfo.uid) )
> +    elif info_tag == "gid":
> +      infos.append( str(tarinfo.gid) )
> +    elif info_tag == "uname" or info_tag == "owner":
> +      infos.append( tarinfo.uname )
> +    elif info_tag == "gname" or info_tag == "group":
> +      infos.append( tarinfo.gname )
> +    elif info_tag == "name" or info_tag == "pathname":
> +      infos.append( tarinfo.name )
> +  out_lines.append( "\t".join(infos) )
> +tar.close()
> +
> +if uniq:
> +  out_lines = list(set(out_lines))
> +  if fail_if_multi and (len(out_lines) > 1):
> +    sys.stderr.write("*** not unique value, " + str(len(out_lines)) + " values found\n")
> +    sys.exit(len(out_lines))
> +
> +for line in out_lines:
> +  print line
> diff --git a/t/t5005-archive-uid-gid.sh b/t/t5005-archive-uid-gid.sh
> new file mode 100755
> index 000000000..2b2a694d8
> --- /dev/null
> +++ b/t/t5005-archive-uid-gid.sh
> @@ -0,0 +1,71 @@
> +#!/bin/sh
> +
> +test_description='test --owner --group options for git-archive'
> +. ./test-lib.sh
> +
> +check_uid_gid_uname_gname_in_tar() {
> +	# $1 tar pathname
> +	# $2 uid (digit in string)
> +	# $3 gid (digit in string)
> +	# $4 uname (string)
> +	# $5 gname (string)
> +	uid=`./parse-tar-file.py --print=uid --fail-if-multi $1`
> +	if test $? != 0 -o x"${uid}" != "x"$2
> +	then
> +	  echo "(some) uid differs from the specified value"
> +	  return $?
> +        fi

Very odd indenting going on here. Should be tabs.

> +
> +	gid=`./parse-tar-file.py --print=gid --fail-if-multi $1`
> +	if test $? != 0 -o x"${gid}" != "x"$3
> +	then
> +	  echo "(some) gid differs from the specified value"
> +	  return $?
> +	  exit $?
> +        fi
> +
> +	uname=`./parse-tar-file.py --print=uname --fail-if-multi $1`
> +	if test $? != 0 -o x"${uname}" != "x"$4
> +	then
> +	  echo "(some) uname differs from the specified value"
> +	  return $?
> +        fi
> +
> +	gname=`./parse-tar-file.py --print=gname --fail-if-multi $1`
> +	if test $? != 0 -o x"${gname}" != "x"$5
> +	then
> +	  echo "(some) gname differs from the specified value"
> +	  return $?
> +        fi
> +
> +	return 0
> +}
> +
> +git init . 1>/dev/null 2>/dev/null
> +touch uid-gid-test.001
> +mkdir uid-gid-test.002
> +mkdir uid-gid-test.002/uid-gid-test.003
> +git add uid-gid-test.001
> +git add uid-gid-test.002
> +git add uid-gid-test.002/uid-gid-test.003
> +git commit -m "uid-gid-test" 2>/dev/null 1>/dev/null

All of this setup stuff should be in a test, see "git grep
test_expect.*setup" for examples. Also see the helpers we have in
t/README, i.e. "test_commit" etc.

> +test_expect_success 'test a case with explicitly specified name/id, owner=nobody:1234 group=nogroup:5678' '
> +	git archive --format=tar --owner nobody:1234 --group nogroup:5678 HEAD > uid-gid-test1.tar &&
> +	check_uid_gid_uname_gname_in_tar uid-gid-test1.tar 1234 5678 nobody nogroup &&
> +	test_result=$? &&
> +	return ${test_result}
> +'

There's no need to assign $? to a variable and then return it, just make
check_uid_gid_uname_gname_in_tar be the last thing you do in the test.

> +
> +test_expect_success 'test a case with only name is specified, owner=(current my name) group=(current my group)' '
> +	my_uid=`id -u` &&
> +	my_gid=`id -g` &&
> +	my_uname=`id -u -n` &&
> +	my_gname=`id -g -n` &&
> +	git archive --format=tar --owner ${my_uname} --group ${my_gname} HEAD > uid-gid-test2.tar &&
> +	check_uid_gid_uname_gname_in_tar uid-gid-test2.tar ${my_uid} ${my_gid} ${my_uname} ${my_gname} &&
> +	test_result=$? &&
> +	return ${test_result}
> +'
> +
> +test_done

I'd probably notice other stuff if I did more than skim this, but I'll
stop for now...

  reply	other threads:[~2017-12-28 14:42 UTC|newest]

Thread overview: 4+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-12-28  9:05 [PATCH] git-archive: accepts --owner --group aslike GNU tar suzuki toshiya
2017-12-28 14:42 ` Ævar Arnfjörð Bjarmason [this message]
2017-12-28 19:23 ` Junio C Hamano
     [not found] ` <548d442f05e24e22a1cf4e0074f23f16@OS2PR01MB1147.jpnprd01.prod.outlook.com>
2017-12-29 14:04   ` suzuki toshiya

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=87incqrk1u.fsf@evledraar.gmail.com \
    --to=avarab@gmail.com \
    --cc=git@vger.kernel.org \
    --cc=mpsuzuki@hiroshima-u.ac.jp \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.