[PATCH v2 1/1] pgo: Fix sleep in atomic section in prf_open()

From: Jarmo Tiitto
Date: Thu Jun 03 2021 - 11:54:55 EST


In prf_open() the required buffer size can be so large that
vzalloc() may sleep thus triggering bug:

======
BUG: sleeping function called from invalid context at include/linux/sched/mm.h:201
in_atomic(): 1, irqs_disabled(): 1, non_block: 0, pid: 337, name: cat
CPU: 1 PID: 337 Comm: cat Not tainted 5.13.0-rc2-24-hack+ #154
Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 0.0.0 02/06/2015
Call Trace:
dump_stack+0xc7/0x134
___might_sleep+0x177/0x190
__might_sleep+0x5a/0x90
kmem_cache_alloc_node_trace+0x6b/0x3a0
? __get_vm_area_node+0xcd/0x1b0
? dput+0x283/0x300
__get_vm_area_node+0xcd/0x1b0
__vmalloc_node_range+0x7b/0x420
? prf_open+0x1da/0x580
? prf_open+0x32/0x580
? __llvm_profile_instrument_memop+0x36/0x50
vzalloc+0x54/0x60
? prf_open+0x1da/0x580
prf_open+0x1da/0x580
full_proxy_open+0x211/0x370
....
======

Since we can't vzalloc while holding pgo_lock,
split the code into steps:
* First get buffer size via prf_buffer_size()
and release the lock.
* Round up to the page size and allocate the buffer.
* Finally re-acquire the pgo_lock and call prf_serialize().
prf_serialize() will now check if the buffer is large enough
and returns -EAGAIN if it is not.

New in this v2 patch:
The -EAGAIN case was determined to be such rare event that
running following in a loop:

$cat /sys/kernel/debug/pgo/vmlinux.profraw > vmlinux.profdata;

Didn't trigger it, and I don't know if it ever may occur at all.

Signed-off-by: Jarmo Tiitto <jarmo.tiitto@xxxxxxxxx>
---
kernel/pgo/fs.c | 52 ++++++++++++++++++++++++++++++++++++-------------
1 file changed, 38 insertions(+), 14 deletions(-)

diff --git a/kernel/pgo/fs.c b/kernel/pgo/fs.c
index ef985159dad3..9afd6f001a1b 100644
--- a/kernel/pgo/fs.c
+++ b/kernel/pgo/fs.c
@@ -24,13 +24,14 @@
#include <linux/module.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
+#include <linux/mm.h>
#include "pgo.h"

static struct dentry *directory;

struct prf_private_data {
void *buffer;
- unsigned long size;
+ size_t size;
};

/*
@@ -213,6 +214,7 @@ static inline unsigned long prf_get_padding(unsigned long size)
return 7 & (sizeof(u64) - size % sizeof(u64));
}

+/* Note: caller *must* hold pgo_lock */
static unsigned long prf_buffer_size(void)
{
return sizeof(struct llvm_prf_header) +
@@ -225,18 +227,21 @@ static unsigned long prf_buffer_size(void)

/*
* Serialize the profiling data into a format LLVM's tools can understand.
+ * Note: p->buffer must point into vzalloc()'d
+ * area of at least prf_buffer_size() in size.
* Note: caller *must* hold pgo_lock.
*/
-static int prf_serialize(struct prf_private_data *p)
+static int prf_serialize(struct prf_private_data *p, size_t buf_size)
{
int err = 0;
void *buffer;

+ /* get buffer size, again. */
p->size = prf_buffer_size();
- p->buffer = vzalloc(p->size);

- if (!p->buffer) {
- err = -ENOMEM;
+ /* check for unlikely overflow. */
+ if (p->size > buf_size) {
+ err = -EAGAIN;
goto out;
}

@@ -259,27 +264,46 @@ static int prf_open(struct inode *inode, struct file *file)
{
struct prf_private_data *data;
unsigned long flags;
- int err;
+ size_t buf_size;
+ int err = 0;

data = kzalloc(sizeof(*data), GFP_KERNEL);
if (!data) {
err = -ENOMEM;
- goto out;
+ goto out_free;
}

+ /* get buffer size */
flags = prf_lock();
+ buf_size = prf_buffer_size();
+ prf_unlock(flags);

- err = prf_serialize(data);
- if (unlikely(err)) {
- kfree(data);
- goto out_unlock;
+ /* allocate, round up to page size. */
+ buf_size = PAGE_ALIGN(buf_size);
+ data->buffer = vzalloc(buf_size);
+
+ if (!data->buffer) {
+ err = -ENOMEM;
+ goto out_free;
}

+ /* try serialize and get actual
+ * data length in data->size
+ */
+ flags = prf_lock();
+ err = prf_serialize(data, buf_size);
+ prf_unlock(flags);
+
+ if (err)
+ goto out_free;
+
file->private_data = data;
+ return 0;

-out_unlock:
- prf_unlock(flags);
-out:
+out_free:
+ if (data)
+ vfree(data->buffer);
+ kfree(data);
return err;
}


base-commit: 5d0cda65918279ada060417c5fecb7e86ccb3def
--
2.31.1