Re: [RFC PATCH] tools/perf: pmu-events: Fix reproducibility

From: Arnaldo Carvalho de Melo
Date: Sun Aug 25 2019 - 10:24:18 EST


Em Sun, Aug 25, 2019 at 02:13:29PM +0100, Ben Hutchings escreveu:
> jevents.c uses nftw() to enumerate files and outputs the corresponding
> C structs in the order they are found. This makes it sensitive to
> directory ordering, so that the perf executable is not reproducible.
>
> To avoid this, store all the files and directories found and then sort
> them by their (relative) path. (This maintains the parent-first
> ordering that nftw() promises.) Then apply the existing callbacks to
> them in the sorted order.
>
> Don't both storing the stat buffers as we don't need them.
>
> References: https://tests.reproducible-builds.org/debian/dbdtxt/bullseye/i386/linux_4.19.37-6.diffoscope.txt.gz

Thanks for working on making the building of perf reproducible! Some
comments below.

> Signed-off-by: Ben Hutchings <ben@xxxxxxxxxxxxxxx>
> ---
> --- a/tools/perf/pmu-events/jevents.c
> +++ b/tools/perf/pmu-events/jevents.c
> @@ -50,6 +50,12 @@
> #include "json.h"
> #include "jevents.h"
>
> +struct found_file {
> + const char *fpath;
> + int typeflag;
> + struct FTW ftwbuf;
> +};
> +
> int verbose;
> char *prog;
>
> @@ -865,6 +871,44 @@ static int get_maxfds(void)
> * nftw() doesn't let us pass an argument to the processing function,
> * so use a global variables.
> */
> +static struct found_file *found_files;
> +static size_t n_found_files;
> +static size_t max_found_files;

Would be nice to avoid adding more static variables, what about having
it in a struct, declare it on the calling function and then pass it as
context?

> +
> +static int add_one_file(const char *fpath, const struct stat *sb,
> + int typeflag, struct FTW *ftwbuf)
> +{
> + struct found_file *file;
> +
> + if (ftwbuf->level == 0 || ftwbuf->level > 3)
> + return 0;
> +
> + /* Grow array if necessary */
> + if (n_found_files >= max_found_files) {
> + if (max_found_files == 0)
> + max_found_files = 16;
> + else
> + max_found_files *= 2;
> + found_files = realloc(found_files,
> + max_found_files * sizeof(*found_files));

What if realloc() fails?

> + }
> +
> + file = &found_files[n_found_files++];
> + file->fpath = strdup(fpath);
> + file->typeflag = typeflag;
> + file->ftwbuf = *ftwbuf;
> +
> + return 0;
> +}
> +
> +static int compare_files(const void *left, const void *right)
> +{
> + const struct found_file *left_file = left;
> + const struct found_file *right_file = right;
> +
> + return strcmp(left_file->fpath, right_file->fpath);
> +}
> +
> static FILE *eventsfp;
> static char *mapfile;
>
> @@ -919,19 +963,19 @@ static int is_json_file(const char *name
> return 0;
> }
>
> -static int preprocess_arch_std_files(const char *fpath, const struct stat *sb,
> +static int preprocess_arch_std_files(const char *fpath,
> int typeflag, struct FTW *ftwbuf)
> {
> int level = ftwbuf->level;
> int is_file = typeflag == FTW_F;
>
> if (level == 1 && is_file && is_json_file(fpath))
> - return json_events(fpath, save_arch_std_events, (void *)sb);
> + return json_events(fpath, save_arch_std_events, NULL);
>
> return 0;
> }
>
> -static int process_one_file(const char *fpath, const struct stat *sb,
> +static int process_one_file(const char *fpath,
> int typeflag, struct FTW *ftwbuf)
> {
> char *tblname, *bname;
> @@ -956,9 +1000,9 @@ static int process_one_file(const char *
> } else
> bname = (char *) fpath + ftwbuf->base;
>
> - pr_debug("%s %d %7jd %-20s %s\n",
> + pr_debug("%s %d %-20s %s\n",
> is_file ? "f" : is_dir ? "d" : "x",
> - level, sb->st_size, bname, fpath);
> + level, bname, fpath);
>
> /* base dir or too deep */
> if (level == 0 || level > 3)
> @@ -1070,6 +1114,7 @@ int main(int argc, char *argv[])
> const char *output_file;
> const char *start_dirname;
> struct stat stbuf;
> + size_t i;
>
> prog = basename(argv[0]);
> if (argc < 4) {
> @@ -1113,8 +1158,26 @@ int main(int argc, char *argv[])
> */
>
> maxfds = get_maxfds();
> + rc = nftw(ldirname, add_one_file, maxfds, 0);
> + if (rc < 0) {
> + /* Make build fail */
> + free_arch_std_events();
> + return 1;
> + } else if (rc) {
> + goto empty_map;
> + }
> +
> + /* Sort file names to ensure reproduciblity */
> + qsort(found_files, n_found_files, sizeof(*found_files), compare_files);
> +
> mapfile = NULL;
> - rc = nftw(ldirname, preprocess_arch_std_files, maxfds, 0);
> + for (i = 0; i < n_found_files; i++) {
> + rc = preprocess_arch_std_files(found_files[i].fpath,
> + found_files[i].typeflag,
> + &found_files[i].ftwbuf);
> + if (rc)
> + break;
> + }
> if (rc && verbose) {
> pr_info("%s: Error preprocessing arch standard files %s\n",
> prog, ldirname);
> @@ -1127,7 +1190,13 @@ int main(int argc, char *argv[])
> goto empty_map;
> }
>
> - rc = nftw(ldirname, process_one_file, maxfds, 0);
> + for (i = 0; i < n_found_files; i++) {
> + rc = process_one_file(found_files[i].fpath,
> + found_files[i].typeflag,
> + &found_files[i].ftwbuf);
> + if (rc)
> + break;
> + }
> if (rc && verbose) {
> pr_info("%s: Error walking file tree %s\n", prog, ldirname);
> goto empty_map;



--

- Arnaldo