Re: [PATCH] fs/select: add vmalloc fallback for select(2)

From: Eric Dumazet
Date: Thu Sep 22 2016 - 12:24:57 EST


On Thu, 2016-09-22 at 17:28 +0200, Vlastimil Babka wrote:
> The select(2) syscall performs a kmalloc(size, GFP_KERNEL) where size grows
> with the number of fds passed. We had a customer report page allocation
> failures of order-4 for this allocation. This is a costly order, so it might
> easily fail, as the VM expects such allocation to have a lower-order fallback.
>
> Such trivial fallback is vmalloc(), as the memory doesn't have to be
> physically contiguous. Also the allocation is temporary for the duration of the
> syscall, so it's unlikely to stress vmalloc too much.
>
> Note that the poll(2) syscall seems to use a linked list of order-0 pages, so
> it doesn't need this kind of fallback.
>
> Signed-off-by: Vlastimil Babka <vbabka@xxxxxxx>
> ---
> fs/select.c | 15 +++++++++++----
> 1 file changed, 11 insertions(+), 4 deletions(-)
>
> diff --git a/fs/select.c b/fs/select.c
> index 8ed9da50896a..8fe5bddbe99b 100644
> --- a/fs/select.c
> +++ b/fs/select.c
> @@ -29,6 +29,7 @@
> #include <linux/sched/rt.h>
> #include <linux/freezer.h>
> #include <net/busy_poll.h>
> +#include <linux/vmalloc.h>
>
> #include <asm/uaccess.h>
>
> @@ -558,6 +559,7 @@ int core_sys_select(int n, fd_set __user *inp, fd_set __user *outp,
> struct fdtable *fdt;
> /* Allocate small arguments on the stack to save memory and be faster */
> long stack_fds[SELECT_STACK_ALLOC/sizeof(long)];
> + unsigned long alloc_size;
>
> ret = -EINVAL;
> if (n < 0)
> @@ -580,10 +582,15 @@ int core_sys_select(int n, fd_set __user *inp, fd_set __user *outp,
> bits = stack_fds;
> if (size > sizeof(stack_fds) / 6) {
> /* Not enough space in on-stack array; must use kmalloc */
> + alloc_size = 6 * size;
> ret = -ENOMEM;
> - bits = kmalloc(6 * size, GFP_KERNEL);
> - if (!bits)
> - goto out_nofds;
> + bits = kmalloc(alloc_size, GFP_KERNEL|__GFP_NOWARN);
> + if (!bits && alloc_size > PAGE_SIZE) {
> + bits = vmalloc(alloc_size);
> +
> + if (!bits)
> + goto out_nofds;

Test should happen if alloc_size <= PAGE_SIZE

> + }

if (!bits && alloc_size > PAGE_SIZE)
bits = vmalloc(alloc_size);

if (!bits)
goto out_nofds;



> }
> fds.in = bits;
> fds.out = bits + size;
> @@ -618,7 +625,7 @@ int core_sys_select(int n, fd_set __user *inp, fd_set __user *outp,
>
> out:
> if (bits != stack_fds)
> - kfree(bits);
> + kvfree(bits);
> out_nofds:
> return ret;
> }