public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
* [RFC PATCH] tools/perf: pmu-events: Fix reproducibility
@ 2019-08-25 13:13 Ben Hutchings
  2019-08-25 14:24 ` Arnaldo Carvalho de Melo
  0 siblings, 1 reply; 2+ messages in thread
From: Ben Hutchings @ 2019-08-25 13:13 UTC (permalink / raw)
  To: Peter Zijlstra, Ingo Molnar, Arnaldo Carvalho de Melo; +Cc: linux-kernel

[-- Attachment #1: Type: text/plain, Size: 4655 bytes --]

jevents.c uses nftw() to enumerate files and outputs the corresponding
C structs in the order they are found.  This makes it sensitive to
directory ordering, so that the perf executable is not reproducible.

To avoid this, store all the files and directories found and then sort
them by their (relative) path.  (This maintains the parent-first
ordering that nftw() promises.)  Then apply the existing callbacks to
them in the sorted order.

Don't both storing the stat buffers as we don't need them.

References: https://tests.reproducible-builds.org/debian/dbdtxt/bullseye/i386/linux_4.19.37-6.diffoscope.txt.gz
Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
---
--- a/tools/perf/pmu-events/jevents.c
+++ b/tools/perf/pmu-events/jevents.c
@@ -50,6 +50,12 @@
 #include "json.h"
 #include "jevents.h"
 
+struct found_file {
+	const char	*fpath;
+	int		typeflag;
+	struct FTW	ftwbuf;
+};
+
 int verbose;
 char *prog;
 
@@ -865,6 +871,44 @@ static int get_maxfds(void)
  * nftw() doesn't let us pass an argument to the processing function,
  * so use a global variables.
  */
+static struct found_file *found_files;
+static size_t n_found_files;
+static size_t max_found_files;
+
+static int add_one_file(const char *fpath, const struct stat *sb,
+			int typeflag, struct FTW *ftwbuf)
+{
+	struct found_file *file;
+
+	if (ftwbuf->level == 0 || ftwbuf->level > 3)
+		return 0;
+
+	/* Grow array if necessary */
+	if (n_found_files >= max_found_files) {
+		if (max_found_files == 0)
+			max_found_files = 16;
+		else
+			max_found_files *= 2;
+		found_files = realloc(found_files,
+				      max_found_files * sizeof(*found_files));
+	}
+
+	file = &found_files[n_found_files++];
+	file->fpath = strdup(fpath);
+	file->typeflag = typeflag;
+	file->ftwbuf = *ftwbuf;
+
+	return 0;
+}
+
+static int compare_files(const void *left, const void *right)
+{
+	const struct found_file *left_file = left;
+	const struct found_file *right_file = right;
+
+	return strcmp(left_file->fpath, right_file->fpath);
+}
+
 static FILE *eventsfp;
 static char *mapfile;
 
@@ -919,19 +963,19 @@ static int is_json_file(const char *name
 	return 0;
 }
 
-static int preprocess_arch_std_files(const char *fpath, const struct stat *sb,
+static int preprocess_arch_std_files(const char *fpath,
 				int typeflag, struct FTW *ftwbuf)
 {
 	int level = ftwbuf->level;
 	int is_file = typeflag == FTW_F;
 
 	if (level == 1 && is_file && is_json_file(fpath))
-		return json_events(fpath, save_arch_std_events, (void *)sb);
+		return json_events(fpath, save_arch_std_events, NULL);
 
 	return 0;
 }
 
-static int process_one_file(const char *fpath, const struct stat *sb,
+static int process_one_file(const char *fpath,
 			    int typeflag, struct FTW *ftwbuf)
 {
 	char *tblname, *bname;
@@ -956,9 +1000,9 @@ static int process_one_file(const char *
 	} else
 		bname = (char *) fpath + ftwbuf->base;
 
-	pr_debug("%s %d %7jd %-20s %s\n",
+	pr_debug("%s %d %-20s %s\n",
 		 is_file ? "f" : is_dir ? "d" : "x",
-		 level, sb->st_size, bname, fpath);
+		 level, bname, fpath);
 
 	/* base dir or too deep */
 	if (level == 0 || level > 3)
@@ -1070,6 +1114,7 @@ int main(int argc, char *argv[])
 	const char *output_file;
 	const char *start_dirname;
 	struct stat stbuf;
+	size_t i;
 
 	prog = basename(argv[0]);
 	if (argc < 4) {
@@ -1113,8 +1158,26 @@ int main(int argc, char *argv[])
 	 */
 
 	maxfds = get_maxfds();
+	rc = nftw(ldirname, add_one_file, maxfds, 0);
+	if (rc < 0) {
+		/* Make build fail */
+		free_arch_std_events();
+		return 1;
+	} else if (rc) {
+		goto empty_map;
+	}
+
+	/* Sort file names to ensure reproduciblity */
+	qsort(found_files, n_found_files, sizeof(*found_files), compare_files);
+
 	mapfile = NULL;
-	rc = nftw(ldirname, preprocess_arch_std_files, maxfds, 0);
+	for (i = 0; i < n_found_files; i++) {
+		rc = preprocess_arch_std_files(found_files[i].fpath,
+					       found_files[i].typeflag,
+					       &found_files[i].ftwbuf);
+		if (rc)
+			break;
+	}
 	if (rc && verbose) {
 		pr_info("%s: Error preprocessing arch standard files %s\n",
 			prog, ldirname);
@@ -1127,7 +1190,13 @@ int main(int argc, char *argv[])
 		goto empty_map;
 	}
 
-	rc = nftw(ldirname, process_one_file, maxfds, 0);
+	for (i = 0; i < n_found_files; i++) {
+		rc = process_one_file(found_files[i].fpath,
+				      found_files[i].typeflag,
+				      &found_files[i].ftwbuf);
+		if (rc)
+			break;
+	}
 	if (rc && verbose) {
 		pr_info("%s: Error walking file tree %s\n", prog, ldirname);
 		goto empty_map;

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 2+ messages in thread

* Re: [RFC PATCH] tools/perf: pmu-events: Fix reproducibility
  2019-08-25 13:13 [RFC PATCH] tools/perf: pmu-events: Fix reproducibility Ben Hutchings
@ 2019-08-25 14:24 ` Arnaldo Carvalho de Melo
  0 siblings, 0 replies; 2+ messages in thread
From: Arnaldo Carvalho de Melo @ 2019-08-25 14:24 UTC (permalink / raw)
  To: Ben Hutchings; +Cc: Peter Zijlstra, Ingo Molnar, linux-kernel

Em Sun, Aug 25, 2019 at 02:13:29PM +0100, Ben Hutchings escreveu:
> jevents.c uses nftw() to enumerate files and outputs the corresponding
> C structs in the order they are found.  This makes it sensitive to
> directory ordering, so that the perf executable is not reproducible.
> 
> To avoid this, store all the files and directories found and then sort
> them by their (relative) path.  (This maintains the parent-first
> ordering that nftw() promises.)  Then apply the existing callbacks to
> them in the sorted order.
> 
> Don't both storing the stat buffers as we don't need them.
> 
> References: https://tests.reproducible-builds.org/debian/dbdtxt/bullseye/i386/linux_4.19.37-6.diffoscope.txt.gz

Thanks for working on making the building of perf reproducible! Some
comments below.

> Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
> ---
> --- a/tools/perf/pmu-events/jevents.c
> +++ b/tools/perf/pmu-events/jevents.c
> @@ -50,6 +50,12 @@
>  #include "json.h"
>  #include "jevents.h"
>  
> +struct found_file {
> +	const char	*fpath;
> +	int		typeflag;
> +	struct FTW	ftwbuf;
> +};
> +
>  int verbose;
>  char *prog;
>  
> @@ -865,6 +871,44 @@ static int get_maxfds(void)
>   * nftw() doesn't let us pass an argument to the processing function,
>   * so use a global variables.
>   */
> +static struct found_file *found_files;
> +static size_t n_found_files;
> +static size_t max_found_files;

Would be nice to avoid adding more static variables, what about having
it in a struct, declare it on the calling function and then pass it as
context?

> +
> +static int add_one_file(const char *fpath, const struct stat *sb,
> +			int typeflag, struct FTW *ftwbuf)
> +{
> +	struct found_file *file;
> +
> +	if (ftwbuf->level == 0 || ftwbuf->level > 3)
> +		return 0;
> +
> +	/* Grow array if necessary */
> +	if (n_found_files >= max_found_files) {
> +		if (max_found_files == 0)
> +			max_found_files = 16;
> +		else
> +			max_found_files *= 2;
> +		found_files = realloc(found_files,
> +				      max_found_files * sizeof(*found_files));

What if realloc() fails?

> +	}
> +
> +	file = &found_files[n_found_files++];
> +	file->fpath = strdup(fpath);
> +	file->typeflag = typeflag;
> +	file->ftwbuf = *ftwbuf;
> +
> +	return 0;
> +}
> +
> +static int compare_files(const void *left, const void *right)
> +{
> +	const struct found_file *left_file = left;
> +	const struct found_file *right_file = right;
> +
> +	return strcmp(left_file->fpath, right_file->fpath);
> +}
> +
>  static FILE *eventsfp;
>  static char *mapfile;
>  
> @@ -919,19 +963,19 @@ static int is_json_file(const char *name
>  	return 0;
>  }
>  
> -static int preprocess_arch_std_files(const char *fpath, const struct stat *sb,
> +static int preprocess_arch_std_files(const char *fpath,
>  				int typeflag, struct FTW *ftwbuf)
>  {
>  	int level = ftwbuf->level;
>  	int is_file = typeflag == FTW_F;
>  
>  	if (level == 1 && is_file && is_json_file(fpath))
> -		return json_events(fpath, save_arch_std_events, (void *)sb);
> +		return json_events(fpath, save_arch_std_events, NULL);
>  
>  	return 0;
>  }
>  
> -static int process_one_file(const char *fpath, const struct stat *sb,
> +static int process_one_file(const char *fpath,
>  			    int typeflag, struct FTW *ftwbuf)
>  {
>  	char *tblname, *bname;
> @@ -956,9 +1000,9 @@ static int process_one_file(const char *
>  	} else
>  		bname = (char *) fpath + ftwbuf->base;
>  
> -	pr_debug("%s %d %7jd %-20s %s\n",
> +	pr_debug("%s %d %-20s %s\n",
>  		 is_file ? "f" : is_dir ? "d" : "x",
> -		 level, sb->st_size, bname, fpath);
> +		 level, bname, fpath);
>  
>  	/* base dir or too deep */
>  	if (level == 0 || level > 3)
> @@ -1070,6 +1114,7 @@ int main(int argc, char *argv[])
>  	const char *output_file;
>  	const char *start_dirname;
>  	struct stat stbuf;
> +	size_t i;
>  
>  	prog = basename(argv[0]);
>  	if (argc < 4) {
> @@ -1113,8 +1158,26 @@ int main(int argc, char *argv[])
>  	 */
>  
>  	maxfds = get_maxfds();
> +	rc = nftw(ldirname, add_one_file, maxfds, 0);
> +	if (rc < 0) {
> +		/* Make build fail */
> +		free_arch_std_events();
> +		return 1;
> +	} else if (rc) {
> +		goto empty_map;
> +	}
> +
> +	/* Sort file names to ensure reproduciblity */
> +	qsort(found_files, n_found_files, sizeof(*found_files), compare_files);
> +
>  	mapfile = NULL;
> -	rc = nftw(ldirname, preprocess_arch_std_files, maxfds, 0);
> +	for (i = 0; i < n_found_files; i++) {
> +		rc = preprocess_arch_std_files(found_files[i].fpath,
> +					       found_files[i].typeflag,
> +					       &found_files[i].ftwbuf);
> +		if (rc)
> +			break;
> +	}
>  	if (rc && verbose) {
>  		pr_info("%s: Error preprocessing arch standard files %s\n",
>  			prog, ldirname);
> @@ -1127,7 +1190,13 @@ int main(int argc, char *argv[])
>  		goto empty_map;
>  	}
>  
> -	rc = nftw(ldirname, process_one_file, maxfds, 0);
> +	for (i = 0; i < n_found_files; i++) {
> +		rc = process_one_file(found_files[i].fpath,
> +				      found_files[i].typeflag,
> +				      &found_files[i].ftwbuf);
> +		if (rc)
> +			break;
> +	}
>  	if (rc && verbose) {
>  		pr_info("%s: Error walking file tree %s\n", prog, ldirname);
>  		goto empty_map;



-- 

- Arnaldo

^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2019-08-25 14:24 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2019-08-25 13:13 [RFC PATCH] tools/perf: pmu-events: Fix reproducibility Ben Hutchings
2019-08-25 14:24 ` Arnaldo Carvalho de Melo

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox