All of lore.kernel.org
 help / color / mirror / Atom feed
From: Arnaldo Carvalho de Melo <arnaldo.melo@gmail.com>
To: Ben Hutchings <ben@decadent.org.uk>
Cc: Peter Zijlstra <peterz@infradead.org>,
	Ingo Molnar <mingo@redhat.com>,
	linux-kernel@vger.kernel.org
Subject: Re: [RFC PATCH] tools/perf: pmu-events: Fix reproducibility
Date: Sun, 25 Aug 2019 11:24:12 -0300	[thread overview]
Message-ID: <20190825142412.GC26569@kernel.org> (raw)
In-Reply-To: <20190825131329.naqzd5kwg7mw5d3f@decadent.org.uk>

Em Sun, Aug 25, 2019 at 02:13:29PM +0100, Ben Hutchings escreveu:
> jevents.c uses nftw() to enumerate files and outputs the corresponding
> C structs in the order they are found.  This makes it sensitive to
> directory ordering, so that the perf executable is not reproducible.
> 
> To avoid this, store all the files and directories found and then sort
> them by their (relative) path.  (This maintains the parent-first
> ordering that nftw() promises.)  Then apply the existing callbacks to
> them in the sorted order.
> 
> Don't both storing the stat buffers as we don't need them.
> 
> References: https://tests.reproducible-builds.org/debian/dbdtxt/bullseye/i386/linux_4.19.37-6.diffoscope.txt.gz

Thanks for working on making the building of perf reproducible! Some
comments below.

> Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
> ---
> --- a/tools/perf/pmu-events/jevents.c
> +++ b/tools/perf/pmu-events/jevents.c
> @@ -50,6 +50,12 @@
>  #include "json.h"
>  #include "jevents.h"
>  
> +struct found_file {
> +	const char	*fpath;
> +	int		typeflag;
> +	struct FTW	ftwbuf;
> +};
> +
>  int verbose;
>  char *prog;
>  
> @@ -865,6 +871,44 @@ static int get_maxfds(void)
>   * nftw() doesn't let us pass an argument to the processing function,
>   * so use a global variables.
>   */
> +static struct found_file *found_files;
> +static size_t n_found_files;
> +static size_t max_found_files;

Would be nice to avoid adding more static variables, what about having
it in a struct, declare it on the calling function and then pass it as
context?

> +
> +static int add_one_file(const char *fpath, const struct stat *sb,
> +			int typeflag, struct FTW *ftwbuf)
> +{
> +	struct found_file *file;
> +
> +	if (ftwbuf->level == 0 || ftwbuf->level > 3)
> +		return 0;
> +
> +	/* Grow array if necessary */
> +	if (n_found_files >= max_found_files) {
> +		if (max_found_files == 0)
> +			max_found_files = 16;
> +		else
> +			max_found_files *= 2;
> +		found_files = realloc(found_files,
> +				      max_found_files * sizeof(*found_files));

What if realloc() fails?

> +	}
> +
> +	file = &found_files[n_found_files++];
> +	file->fpath = strdup(fpath);
> +	file->typeflag = typeflag;
> +	file->ftwbuf = *ftwbuf;
> +
> +	return 0;
> +}
> +
> +static int compare_files(const void *left, const void *right)
> +{
> +	const struct found_file *left_file = left;
> +	const struct found_file *right_file = right;
> +
> +	return strcmp(left_file->fpath, right_file->fpath);
> +}
> +
>  static FILE *eventsfp;
>  static char *mapfile;
>  
> @@ -919,19 +963,19 @@ static int is_json_file(const char *name
>  	return 0;
>  }
>  
> -static int preprocess_arch_std_files(const char *fpath, const struct stat *sb,
> +static int preprocess_arch_std_files(const char *fpath,
>  				int typeflag, struct FTW *ftwbuf)
>  {
>  	int level = ftwbuf->level;
>  	int is_file = typeflag == FTW_F;
>  
>  	if (level == 1 && is_file && is_json_file(fpath))
> -		return json_events(fpath, save_arch_std_events, (void *)sb);
> +		return json_events(fpath, save_arch_std_events, NULL);
>  
>  	return 0;
>  }
>  
> -static int process_one_file(const char *fpath, const struct stat *sb,
> +static int process_one_file(const char *fpath,
>  			    int typeflag, struct FTW *ftwbuf)
>  {
>  	char *tblname, *bname;
> @@ -956,9 +1000,9 @@ static int process_one_file(const char *
>  	} else
>  		bname = (char *) fpath + ftwbuf->base;
>  
> -	pr_debug("%s %d %7jd %-20s %s\n",
> +	pr_debug("%s %d %-20s %s\n",
>  		 is_file ? "f" : is_dir ? "d" : "x",
> -		 level, sb->st_size, bname, fpath);
> +		 level, bname, fpath);
>  
>  	/* base dir or too deep */
>  	if (level == 0 || level > 3)
> @@ -1070,6 +1114,7 @@ int main(int argc, char *argv[])
>  	const char *output_file;
>  	const char *start_dirname;
>  	struct stat stbuf;
> +	size_t i;
>  
>  	prog = basename(argv[0]);
>  	if (argc < 4) {
> @@ -1113,8 +1158,26 @@ int main(int argc, char *argv[])
>  	 */
>  
>  	maxfds = get_maxfds();
> +	rc = nftw(ldirname, add_one_file, maxfds, 0);
> +	if (rc < 0) {
> +		/* Make build fail */
> +		free_arch_std_events();
> +		return 1;
> +	} else if (rc) {
> +		goto empty_map;
> +	}
> +
> +	/* Sort file names to ensure reproduciblity */
> +	qsort(found_files, n_found_files, sizeof(*found_files), compare_files);
> +
>  	mapfile = NULL;
> -	rc = nftw(ldirname, preprocess_arch_std_files, maxfds, 0);
> +	for (i = 0; i < n_found_files; i++) {
> +		rc = preprocess_arch_std_files(found_files[i].fpath,
> +					       found_files[i].typeflag,
> +					       &found_files[i].ftwbuf);
> +		if (rc)
> +			break;
> +	}
>  	if (rc && verbose) {
>  		pr_info("%s: Error preprocessing arch standard files %s\n",
>  			prog, ldirname);
> @@ -1127,7 +1190,13 @@ int main(int argc, char *argv[])
>  		goto empty_map;
>  	}
>  
> -	rc = nftw(ldirname, process_one_file, maxfds, 0);
> +	for (i = 0; i < n_found_files; i++) {
> +		rc = process_one_file(found_files[i].fpath,
> +				      found_files[i].typeflag,
> +				      &found_files[i].ftwbuf);
> +		if (rc)
> +			break;
> +	}
>  	if (rc && verbose) {
>  		pr_info("%s: Error walking file tree %s\n", prog, ldirname);
>  		goto empty_map;



-- 

- Arnaldo

      reply	other threads:[~2019-08-25 14:24 UTC|newest]

Thread overview: 2+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-08-25 13:13 [RFC PATCH] tools/perf: pmu-events: Fix reproducibility Ben Hutchings
2019-08-25 14:24 ` Arnaldo Carvalho de Melo [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20190825142412.GC26569@kernel.org \
    --to=arnaldo.melo@gmail.com \
    --cc=ben@decadent.org.uk \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@redhat.com \
    --cc=peterz@infradead.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.