Re: [PATCH v2 2/4] mm/vmalloc: add support for __GFP_NOFAIL

From: Uladzislau Rezki
Date: Tue Nov 23 2021 - 14:02:01 EST


On Mon, Nov 22, 2021 at 04:32:31PM +0100, Michal Hocko wrote:
> From: Michal Hocko <mhocko@xxxxxxxx>
>
> Dave Chinner has mentioned that some of the xfs code would benefit from
> kvmalloc support for __GFP_NOFAIL because they have allocations that
> cannot fail and they do not fit into a single page.
>
> The large part of the vmalloc implementation already complies with the
> given gfp flags so there is no work for those to be done. The area
> and page table allocations are an exception to that. Implement a retry
> loop for those.
>
> Add a short sleep before retrying. 1 jiffy is a completely random
> timeout. Ideally the retry would wait for an explicit event - e.g.
> a change to the vmalloc space change if the failure was caused by
> the space fragmentation or depletion. But there are multiple different
> reasons to retry and this could become much more complex. Keep the retry
> simple for now and just sleep to prevent from hogging CPUs.
>
> Signed-off-by: Michal Hocko <mhocko@xxxxxxxx>
> ---
> mm/vmalloc.c | 22 +++++++++++++++++-----
> 1 file changed, 17 insertions(+), 5 deletions(-)
>
> diff --git a/mm/vmalloc.c b/mm/vmalloc.c
> index 17ca7001de1f..b6aed4f94a85 100644
> --- a/mm/vmalloc.c
> +++ b/mm/vmalloc.c
> @@ -2844,6 +2844,8 @@ vm_area_alloc_pages(gfp_t gfp, int nid,
> * more permissive.
> */
> if (!order) {
> + gfp_t bulk_gfp = gfp & ~__GFP_NOFAIL;
> +
> while (nr_allocated < nr_pages) {
> unsigned int nr, nr_pages_request;
>
> @@ -2861,12 +2863,12 @@ vm_area_alloc_pages(gfp_t gfp, int nid,
> * but mempolcy want to alloc memory by interleaving.
> */
> if (IS_ENABLED(CONFIG_NUMA) && nid == NUMA_NO_NODE)
> - nr = alloc_pages_bulk_array_mempolicy(gfp,
> + nr = alloc_pages_bulk_array_mempolicy(bulk_gfp,
> nr_pages_request,
> pages + nr_allocated);
>
> else
> - nr = alloc_pages_bulk_array_node(gfp, nid,
> + nr = alloc_pages_bulk_array_node(bulk_gfp, nid,
> nr_pages_request,
> pages + nr_allocated);
>
> @@ -2921,6 +2923,7 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
> {
> const gfp_t nested_gfp = (gfp_mask & GFP_RECLAIM_MASK) | __GFP_ZERO;
> const gfp_t orig_gfp_mask = gfp_mask;
> + bool nofail = gfp_mask & __GFP_NOFAIL;
> unsigned long addr = (unsigned long)area->addr;
> unsigned long size = get_vm_area_size(area);
> unsigned long array_size;
> @@ -2978,8 +2981,12 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
> else if ((gfp_mask & (__GFP_FS | __GFP_IO)) == 0)
> flags = memalloc_noio_save();
>
> - ret = vmap_pages_range(addr, addr + size, prot, area->pages,
> + do {
> + ret = vmap_pages_range(addr, addr + size, prot, area->pages,
> page_shift);
> + if (nofail && (ret < 0))
> + schedule_timeout_uninterruptible(1);
> + } while (nofail && (ret < 0));
>
> if ((gfp_mask & (__GFP_FS | __GFP_IO)) == __GFP_IO)
> memalloc_nofs_restore(flags);
> @@ -3074,9 +3081,14 @@ void *__vmalloc_node_range(unsigned long size, unsigned long align,
> VM_UNINITIALIZED | vm_flags, start, end, node,
> gfp_mask, caller);
> if (!area) {
> + bool nofail = gfp_mask & __GFP_NOFAIL;
> warn_alloc(gfp_mask, NULL,
> - "vmalloc error: size %lu, vm_struct allocation failed",
> - real_size);
> + "vmalloc error: size %lu, vm_struct allocation failed%s",
> + real_size, (nofail) ? ". Retrying." : "");
> + if (nofail) {
> + schedule_timeout_uninterruptible(1);
> + goto again;
> + }
> goto fail;
> }
>
> --
> 2.30.2
>
I have raised two concerns in our previous discussion about this change,
well that is sad...

--
Vlad Rezki