Re: [PATCH 03/12] perf, tools: Support storing additional data in strlist

From: Masami Hiramatsu
Date: Tue Nov 28 2017 - 08:31:39 EST


On Mon, 27 Nov 2017 16:23:12 -0800
Andi Kleen <andi@xxxxxxxxxxxxxx> wrote:

> From: Andi Kleen <ak@xxxxxxxxxxxxxxx>
>
> Add a configurable node size to strlist, which allows users
> to store additional data in a str_node. Also add a new interface
> to add a new strlist node, and return the node, so additional
> data can be added.
>
> Signed-off-by: Andi Kleen <ak@xxxxxxxxxxxxxxx>
> ---
> tools/perf/util/rblist.c | 16 ++++++++++++++--
> tools/perf/util/rblist.h | 2 ++
> tools/perf/util/strlist.c | 15 ++++++++++++++-
> tools/perf/util/strlist.h | 8 ++++++++
> 4 files changed, 38 insertions(+), 3 deletions(-)
>
> diff --git a/tools/perf/util/rblist.c b/tools/perf/util/rblist.c
> index 0dfe27d99458..fa221e6b0932 100644
> --- a/tools/perf/util/rblist.c
> +++ b/tools/perf/util/rblist.c
> @@ -11,11 +11,13 @@
>
> #include "rblist.h"
>
> -int rblist__add_node(struct rblist *rblist, const void *new_entry)
> +int rblist__add_node_ptr(struct rblist *rblist, const void *new_entry,
> + struct rb_node **nodep)
> {
> struct rb_node **p = &rblist->entries.rb_node;
> struct rb_node *parent = NULL, *new_node;
>
> + *nodep = NULL;
> while (*p != NULL) {
> int rc;
>
> @@ -26,13 +28,16 @@ int rblist__add_node(struct rblist *rblist, const void *new_entry)
> p = &(*p)->rb_left;
> else if (rc < 0)
> p = &(*p)->rb_right;
> - else
> + else {
> + *nodep = parent;
> return -EEXIST;
> + }
> }
>
> new_node = rblist->node_new(rblist, new_entry);
> if (new_node == NULL)
> return -ENOMEM;
> + *nodep = new_node;
>
> rb_link_node(new_node, parent, p);
> rb_insert_color(new_node, &rblist->entries);
> @@ -41,6 +46,13 @@ int rblist__add_node(struct rblist *rblist, const void *new_entry)
> return 0;
> }
>
> +int rblist__add_node(struct rblist *rblist, const void *new_entry)
> +{
> + struct rb_node *nd;
> +
> + return rblist__add_node_ptr(rblist, new_entry, &nd);
> +}
> +
> void rblist__remove_node(struct rblist *rblist, struct rb_node *rb_node)
> {
> rb_erase(rb_node, &rblist->entries);
> diff --git a/tools/perf/util/rblist.h b/tools/perf/util/rblist.h
> index 4c8638a22571..2941e4295f63 100644
> --- a/tools/perf/util/rblist.h
> +++ b/tools/perf/util/rblist.h
> @@ -31,6 +31,8 @@ struct rblist {
> void rblist__init(struct rblist *rblist);
> void rblist__delete(struct rblist *rblist);
> int rblist__add_node(struct rblist *rblist, const void *new_entry);
> +int rblist__add_node_ptr(struct rblist *rblist, const void *new_entry,
> + struct rb_node **nodep);
> void rblist__remove_node(struct rblist *rblist, struct rb_node *rb_node);
> struct rb_node *rblist__find(struct rblist *rblist, const void *entry);
> struct rb_node *rblist__findnew(struct rblist *rblist, const void *entry);
> diff --git a/tools/perf/util/strlist.c b/tools/perf/util/strlist.c
> index 9de5434bb49e..68ef21c3797c 100644
> --- a/tools/perf/util/strlist.c
> +++ b/tools/perf/util/strlist.c
> @@ -18,7 +18,7 @@ struct rb_node *strlist__node_new(struct rblist *rblist, const void *entry)
> const char *s = entry;
> struct rb_node *rc = NULL;
> struct strlist *strlist = container_of(rblist, struct strlist, rblist);
> - struct str_node *snode = malloc(sizeof(*snode));
> + struct str_node *snode = malloc(strlist->node_size);
>
> if (snode != NULL) {
> if (strlist->dupstr) {
> @@ -66,6 +66,14 @@ int strlist__add(struct strlist *slist, const char *new_entry)
> return rblist__add_node(&slist->rblist, new_entry);
> }
>
> +struct str_node *strlist__add_node(struct strlist *slist, const char *new_entry)
> +{
> + struct rb_node *nd;
> +
> + rblist__add_node_ptr(&slist->rblist, new_entry, &nd);
> + return container_of(nd, struct str_node, rb_node);
> +}

It should check the result of rblist__add_node_ptr() and return NULL if an error.

> +
> int strlist__load(struct strlist *slist, const char *filename)
> {
> char entry[1024];
> @@ -165,11 +173,15 @@ struct strlist *strlist__new(const char *list, const struct strlist_config *conf
> bool dupstr = true;
> bool file_only = false;
> const char *dirname = NULL;
> + size_t node_size = sizeof(struct str_node);
>
> if (config) {
> dupstr = !config->dont_dupstr;
> dirname = config->dirname;
> file_only = config->file_only;
> + node_size = config->node_size;
> + if (!node_size)
> + node_size = sizeof(struct str_node);

This is dangerous, node_size can be smaller than sizeof(struct str_node).
Instead, why don't we use "data_size" and set additional data size?

Also, we can add "u8 data[0]" at the end of struct str_node, so that
user can easily access the appended data.

> }
>
> rblist__init(&slist->rblist);
> @@ -179,6 +191,7 @@ struct strlist *strlist__new(const char *list, const struct strlist_config *conf
>
> slist->dupstr = dupstr;
> slist->file_only = file_only;
> + slist->node_size = node_size;
>
> if (list && strlist__parse_list(slist, list, dirname) != 0)
> goto out_error;
> diff --git a/tools/perf/util/strlist.h b/tools/perf/util/strlist.h
> index d58f1e08b170..fd407e11e124 100644
> --- a/tools/perf/util/strlist.h
> +++ b/tools/perf/util/strlist.h
> @@ -16,25 +16,33 @@ struct strlist {
> struct rblist rblist;
> bool dupstr;
> bool file_only;
> + size_t node_size;
> };
>
> /*
> * @file_only: When dirname is present, only consider entries as filenames,
> * that should not be added to the list if dirname/entry is not
> * found
> + * @node_size: Allocate extra space after str_node which can be used for other
> + * data. This is the complete size including str_node
> */
> struct strlist_config {
> bool dont_dupstr;
> bool file_only;
> const char *dirname;
> + size_t node_size;
> };
>
> +#define STRLIST_CONFIG_DEFAULT \
> + { false, false, NULL, sizeof(struct str_node) }
> +
> struct strlist *strlist__new(const char *slist, const struct strlist_config *config);
> void strlist__delete(struct strlist *slist);
>
> void strlist__remove(struct strlist *slist, struct str_node *sn);
> int strlist__load(struct strlist *slist, const char *filename);
> int strlist__add(struct strlist *slist, const char *str);
> +struct str_node *strlist__add_node(struct strlist *slist, const char *str);

Hmm, I see this is much more efficient, but from the programming point of view,
we can also use the combination of strlist__add() and strlist__find() for
that purpose.

BTW, what happen if we already have same string on strlist?

Thank you,


>
> struct str_node *strlist__entry(const struct strlist *slist, unsigned int idx);
> struct str_node *strlist__find(struct strlist *slist, const char *entry);
> --
> 2.13.6
>


--
Masami Hiramatsu <mhiramat@xxxxxxxxxx>