Re: [RFC PATCH 3/5] pgo: Wire up the new more generic code for modules
From: jarmo . tiitto
Date: Mon Jun 14 2021 - 15:08:16 EST
Kees Cook wrote maanantaina 14. kesäkuuta 2021 18.55.23 EEST:
> On Sat, Jun 12, 2021 at 06:24:24AM +0300, Jarmo Tiitto wrote:
> > prf_open() now uses the inode->i_private to get
> > the prf_object for the file. This can be either
> > vmlinux.profraw or any module.profraw file.
> >
> > The prf_vmlinux object is now added into prf_list and
> > allocate_node() scans the list and reserves vnodes
> > from corresponding prf_object(s).
> >
> > Signed-off-by: Jarmo Tiitto <jarmo.tiitto@xxxxxxxxx>
> > ---
> > note: There is no module notifier code yet,
> > so only vmlinux.profraw profile data
> > is available with this commit.
> >
> > Another thing is that pgo/reset will only
> > reset vmlinux.profraw.
> > Profile data reset for modules may be added later:
> > maybe writing module's name into pgo/reset would reset only
> > the specified module's profile data?
> > Then writing "all" or zero would atomically reset everything.
>
> Yeah, I think matching the internal naming is right. "vmlinux",
> module::name, and "all"?
>
> > I'm bit unsure about the new allocate_node() code since
> > it is the first place I had to put rcu_read_lock()
> > and the code is likely to change from this.
>
> Comments below...
>
> > ---
> >
> > kernel/pgo/fs.c | 30 ++++++++++++++++++++-----
> > kernel/pgo/instrument.c | 49 +++++++++++++++++++++++++++--------------
> > kernel/pgo/pgo.h | 2 ++
> > 3 files changed, 60 insertions(+), 21 deletions(-)
> >
> > diff --git a/kernel/pgo/fs.c b/kernel/pgo/fs.c
> > index 7e269d69bcd7..84b36e61758b 100644
> > --- a/kernel/pgo/fs.c
> > +++ b/kernel/pgo/fs.c
> > @@ -32,8 +32,10 @@ static struct dentry *directory;
> >
> > struct prf_private_data {
> >
> > void *buffer;
> > size_t size;
> >
> > + struct prf_object *core;
> >
> > };
> >
> > +/* vmlinux's prf core */
> >
> > static struct prf_object prf_vmlinux;
> >
> > /*
> >
> > @@ -281,7 +283,6 @@ static int prf_serialize(struct prf_object *po, struct
> > prf_private_data *p, size>
> > prf_serialize_values(po, &buffer);
> >
> > return 0;
> >
> > -
> >
> > }
> >
> > /* open() implementation for PGO. Creates a copy of the profiling data
set.
> > */>
> > @@ -292,13 +293,21 @@ static int prf_open(struct inode *inode, struct file
> > *file)>
> > size_t buf_size;
> > int err = -EINVAL;
> >
> > + if (WARN_ON(!inode->i_private)) {
> > + /* bug: inode was not initialized by us */
> > + return err;
> > + }
> > +
> >
> > data = kzalloc(sizeof(*data), GFP_KERNEL);
> > if (!data)
> >
> > return -ENOMEM;
> >
> > + /* Get prf_object of this inode */
> > + data->core = inode->i_private;
> > +
> >
> > /* Get initial buffer size. */
> > flags = prf_lock();
> >
> > - data->size = prf_buffer_size(&prf_vmlinux);
> > + data->size = prf_buffer_size(data->core);
> >
> > prf_unlock(flags);
> >
> > do {
> >
> > @@ -318,12 +327,13 @@ static int prf_open(struct inode *inode, struct file
> > *file)>
> > * data length in data->size.
> > */
> >
> > flags = prf_lock();
> >
> > - err = prf_serialize(&prf_vmlinux, data, buf_size);
> > + err = prf_serialize(data->core, data, buf_size);
> >
> > prf_unlock(flags);
> > /* In unlikely case, try again. */
> >
> > } while (err == -EAGAIN);
> >
> > if (err < 0) {
> >
> > +
> >
> > if (data)
> >
> > vfree(data->buffer);
> >
> > kfree(data);
> >
> > @@ -412,6 +422,8 @@ static const struct file_operations prf_reset_fops = {
> >
> > /* Create debugfs entries. */
> > static int __init pgo_init(void)
> > {
> >
> > + unsigned long flags;
> > +
> >
> > /* Init profiler vmlinux core entry */
> > memset(&prf_vmlinux, 0, sizeof(prf_vmlinux));
> > prf_vmlinux.data = __llvm_prf_data_start;
> >
> > @@ -430,19 +442,27 @@ static int __init pgo_init(void)
> >
> > prf_vmlinux.vnds_num = prf_get_count(__llvm_prf_vnds_start,
> >
> > __llvm_prf_vnds_end,
sizeof(__llvm_prf_vnds_start[0]));
> >
> > + /* enable profiling */
> > + flags = prf_list_lock();
> > + list_add_tail_rcu(&prf_vmlinux.link, &prf_list);
> > + prf_list_unlock(flags);
> >
> > directory = debugfs_create_dir("pgo", NULL);
> > if (!directory)
> >
> > goto err_remove;
> >
> > - if (!debugfs_create_file("vmlinux.profraw", 0600, directory, NULL,
> > - &prf_fops))
> > + prf_vmlinux.file = debugfs_create_file("vmlinux.profraw",
> > + 0600, directory, &prf_vmlinux, &prf_fops);
> > + if (!prf_vmlinux.file)
> >
> > goto err_remove;
> >
> > if (!debugfs_create_file("reset", 0200, directory, NULL,
> >
> > &prf_reset_fops))
> >
> > goto err_remove;
> >
> > + /* show notice why the system slower: */
> > + pr_notice("Clang PGO instrumentation is active.");
> > +
>
> Please pull this change into a separate patch and make it pr_info()
> ("notice" is, I think, not right here).
>
All rightly then.
> > return 0;
> >
> > err_remove:
> > diff --git a/kernel/pgo/instrument.c b/kernel/pgo/instrument.c
> > index 24fdeb79b674..e214c9d7a113 100644
> > --- a/kernel/pgo/instrument.c
> > +++ b/kernel/pgo/instrument.c
> > @@ -24,6 +24,7 @@
> >
> > #include <linux/export.h>
> > #include <linux/spinlock.h>
> > #include <linux/types.h>
> >
> > +#include <linux/rculist.h>
> >
> > #include "pgo.h"
> >
> > /*
> >
> > @@ -56,22 +57,38 @@ void prf_unlock(unsigned long flags)
> >
> > static struct llvm_prf_value_node *allocate_node(struct llvm_prf_data *p,
> >
> >
u32 index, u64 value)
> >
> > {
> >
> > - const int max_vnds = prf_get_count(__llvm_prf_vnds_start,
> > - __llvm_prf_vnds_end, sizeof(struct
llvm_prf_value_node));
> > -
> > - /*
> > - * Check that p is within vmlinux __llvm_prf_data section.
> > - * If not, don't allocate since we can't handle modules yet.
> > - */
> > - if (!memory_contains(__llvm_prf_data_start,
> > - __llvm_prf_data_end, p, sizeof(*p)))
> > - return NULL;
> > -
> > - if (WARN_ON_ONCE(current_node >= max_vnds))
> > - return NULL; /* Out of nodes */
> > -
> > - /* reserve vnode for vmlinux */
> > - return &__llvm_prf_vnds_start[current_node++];
> > + struct llvm_prf_value_node *vnode = NULL;
> > + struct prf_object *po;
> > + struct llvm_prf_data *data_end;
> > + int max_vnds;
> > +
> > + rcu_read_lock();
>
> AIUI, list readers are using rcu_read_lock(), and writers are using
> prf_list_lock()?
>
Yes, I intended the list readers to use rcu_read_lock() and writers to take
the prf_list_lock().
Sadly after I sent this patch set I found during more testing that there are
few problems that I need to work on:
There is an lockup that only occurs during bare metal run after +15min, so I
haven't been able to catch it in VM.
I suspect this is caused by the RCU locking I added such that it results in
recursive calls into __llvm_profile_instrument_target()
I will try build with CONFIG_PROVE_LOCKING, but I have had problems with
the kernel not getting past cgroup_init_early()... even without my patches
applied. (stock -rc kernel) :-(
> > +
> > + list_for_each_entry_rcu(po, &prf_list, link) {
> > + /* get section limits */
> > + max_vnds = prf_vnds_count(po);
> > + data_end = po->data + prf_data_count(po);
> > +
> > + /*
> > + * Check that p is within:
> > + * [po->data, po->data + prf_data_count(po)] section.
> > + * If yes, allocate vnode from this prf_object.
> > + */
> > + if (memory_contains(po->data, data_end, p,
sizeof(*p))) {
> > +
> > +
> > + if (WARN_ON_ONCE(po->current_node >=
max_vnds))
> > + return NULL; /* Out of
nodes */
> > +
> > + /* reserve the vnode */
> > + vnode = &po->vnds[po->current_node++];
> > + goto out;
> > + }
> > + }
> > +
> > +out:
> > + rcu_read_unlock();
> > + return vnode;
> >
> > }
> >
> > /*
> >
> > diff --git a/kernel/pgo/pgo.h b/kernel/pgo/pgo.h
> > index 44d79e2861e1..59d0aa966fbe 100644
> > --- a/kernel/pgo/pgo.h
> > +++ b/kernel/pgo/pgo.h
> > @@ -19,6 +19,8 @@
> >
> > #ifndef _PGO_H
> > #define _PGO_H
> >
> > +#include <linux/rculist.h>
> > +
> >
> > /*
> >
> > * Note: These internal LLVM definitions must match the compiler version.
> > * See llvm/include/llvm/ProfileData/InstrProfData.inc in LLVM's source
> > code.
> >
> > --
> > 2.32.0
>
> --
> Kees Cook