diff --git a/include/linux/cma.h b/include/linux/cma.h index 9db877506ea8..3d58ce1a8730 100644 --- a/include/linux/cma.h +++ b/include/linux/cma.h @@ -46,6 +46,7 @@ extern int cma_init_reserved_mem(phys_addr_t base, phys_addr_t size, struct cma **res_cma); extern struct page *cma_alloc(struct cma *cma, unsigned long count, unsigned int align, bool no_warn); +extern struct folio *cma_alloc_folio(struct cma *cma, int order); extern bool cma_pages_valid(struct cma *cma, const struct page *pages, unsigned long count); extern bool cma_release(struct cma *cma, const struct page *pages, unsigned long count); diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index c9bf68c239a0..630ab4f5f78d 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -900,9 +900,9 @@ static inline bool hugepage_movable_supported(struct hstate *h) static inline gfp_t htlb_alloc_mask(struct hstate *h) { if (hugepage_movable_supported(h)) - return GFP_HIGHUSER_MOVABLE; + return GFP_HIGHUSER_MOVABLE | __GFP_COMP; else - return GFP_HIGHUSER; + return GFP_HIGHUSER | __GFP_COMP; } static inline gfp_t htlb_modify_alloc_mask(struct hstate *h, gfp_t gfp_mask) diff --git a/mm/cma.c b/mm/cma.c index 3e9724716bad..39b6b99c6af1 100644 --- a/mm/cma.c +++ b/mm/cma.c @@ -403,18 +403,8 @@ static void cma_debug_show_areas(struct cma *cma) spin_unlock_irq(&cma->lock); } -/** - * cma_alloc() - allocate pages from contiguous area - * @cma: Contiguous memory region for which the allocation is performed. - * @count: Requested number of pages. - * @align: Requested alignment of pages (in PAGE_SIZE order). - * @no_warn: Avoid printing message about failed allocation - * - * This function allocates part of contiguous memory on specific - * contiguous memory area. - */ -struct page *cma_alloc(struct cma *cma, unsigned long count, - unsigned int align, bool no_warn) +static struct page *__cma_alloc(struct cma *cma, unsigned long count, + unsigned int align, gfp_t gfp) { unsigned long mask, offset; unsigned long pfn = -1; @@ -463,8 +453,7 @@ struct page *cma_alloc(struct cma *cma, unsigned long count, pfn = cma->base_pfn + (bitmap_no << cma->order_per_bit); mutex_lock(&cma_mutex); - ret = alloc_contig_range(pfn, pfn + count, MIGRATE_CMA, - GFP_KERNEL | (no_warn ? __GFP_NOWARN : 0)); + ret = alloc_contig_range(pfn, pfn + count, MIGRATE_CMA, gfp); mutex_unlock(&cma_mutex); if (ret == 0) { page = pfn_to_page(pfn); @@ -494,7 +483,7 @@ struct page *cma_alloc(struct cma *cma, unsigned long count, page_kasan_tag_reset(nth_page(page, i)); } - if (ret && !no_warn) { + if (ret && !(gfp & __GFP_NOWARN)) { pr_err_ratelimited("%s: %s: alloc failed, req-size: %lu pages, ret: %d\n", __func__, cma->name, count, ret); cma_debug_show_areas(cma); @@ -513,6 +502,31 @@ struct page *cma_alloc(struct cma *cma, unsigned long count, return page; } +/** + * cma_alloc() - allocate pages from contiguous area + * @cma: Contiguous memory region for which the allocation is performed. + * @count: Requested number of pages. + * @align: Requested alignment of pages (in PAGE_SIZE order). + * @no_warn: Avoid printing message about failed allocation + * + * This function allocates part of contiguous memory on specific + * contiguous memory area. + */ +struct page *cma_alloc(struct cma *cma, unsigned long count, + unsigned int align, bool no_warn) +{ + return __cma_alloc(cma, count, align, GFP_KERNEL | (no_warn ? __GFP_NOWARN : 0)); +} + +struct folio *cma_alloc_folio(struct cma *cma, int order) +{ + struct page *page; + + page = __cma_alloc(cma, 1 << order, order, GFP_KERNEL | __GFP_COMP); + + return page ? page_folio(page) : NULL; +} + bool cma_pages_valid(struct cma *cma, const struct page *pages, unsigned long count) { diff --git a/mm/compaction.c b/mm/compaction.c index eb95e9b435d0..00fb571727d3 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -86,33 +86,6 @@ static struct page *mark_allocated_noprof(struct page *page, unsigned int order, } #define mark_allocated(...) alloc_hooks(mark_allocated_noprof(__VA_ARGS__)) -static void split_map_pages(struct list_head *freepages) -{ - unsigned int i, order; - struct page *page, *next; - LIST_HEAD(tmp_list); - - for (order = 0; order < NR_PAGE_ORDERS; order++) { - list_for_each_entry_safe(page, next, &freepages[order], lru) { - unsigned int nr_pages; - - list_del(&page->lru); - - nr_pages = 1 << order; - - mark_allocated(page, order, __GFP_MOVABLE); - if (order) - split_page(page, order); - - for (i = 0; i < nr_pages; i++) { - list_add(&page->lru, &tmp_list); - page++; - } - } - list_splice_init(&tmp_list, &freepages[0]); - } -} - static unsigned long release_free_list(struct list_head *freepages) { int order; @@ -754,10 +727,9 @@ isolate_freepages_range(struct compact_control *cc, { unsigned long isolated, pfn, block_start_pfn, block_end_pfn; int order; - struct list_head tmp_freepages[NR_PAGE_ORDERS]; for (order = 0; order < NR_PAGE_ORDERS; order++) - INIT_LIST_HEAD(&tmp_freepages[order]); + INIT_LIST_HEAD(&cc->freepages[order]); pfn = start_pfn; block_start_pfn = pageblock_start_pfn(pfn); @@ -788,7 +760,7 @@ isolate_freepages_range(struct compact_control *cc, break; isolated = isolate_freepages_block(cc, &isolate_start_pfn, - block_end_pfn, tmp_freepages, 0, true); + block_end_pfn, cc->freepages, 0, true); /* * In strict mode, isolate_freepages_block() returns 0 if @@ -807,13 +779,10 @@ isolate_freepages_range(struct compact_control *cc, if (pfn < end_pfn) { /* Loop terminated early, cleanup. */ - release_free_list(tmp_freepages); + release_free_list(cc->freepages); return 0; } - /* __isolate_free_page() does not map the pages */ - split_map_pages(tmp_freepages); - /* We don't use freelists for anything. */ return pfn; } diff --git a/mm/hugetlb.c b/mm/hugetlb.c index aaf508be0a2b..2061d094cd19 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -1512,43 +1512,7 @@ static int hstate_next_node_to_free(struct hstate *h, nodemask_t *nodes_allowed) ((node = hstate_next_node_to_free(hs, mask)) || 1); \ nr_nodes--) -/* used to demote non-gigantic_huge pages as well */ -static void __destroy_compound_gigantic_folio(struct folio *folio, - unsigned int order, bool demote) -{ - int i; - int nr_pages = 1 << order; - struct page *p; - - atomic_set(&folio->_entire_mapcount, 0); - atomic_set(&folio->_large_mapcount, 0); - atomic_set(&folio->_pincount, 0); - - for (i = 1; i < nr_pages; i++) { - p = folio_page(folio, i); - p->flags &= ~PAGE_FLAGS_CHECK_AT_FREE; - p->mapping = NULL; - clear_compound_head(p); - if (!demote) - set_page_refcounted(p); - } - - __folio_clear_head(folio); -} - -static void destroy_compound_hugetlb_folio_for_demote(struct folio *folio, - unsigned int order) -{ - __destroy_compound_gigantic_folio(folio, order, true); -} - #ifdef CONFIG_ARCH_HAS_GIGANTIC_PAGE -static void destroy_compound_gigantic_folio(struct folio *folio, - unsigned int order) -{ - __destroy_compound_gigantic_folio(folio, order, false); -} - static void free_gigantic_folio(struct folio *folio, unsigned int order) { /* @@ -1569,38 +1533,52 @@ static void free_gigantic_folio(struct folio *folio, unsigned int order) static struct folio *alloc_gigantic_folio(struct hstate *h, gfp_t gfp_mask, int nid, nodemask_t *nodemask) { - struct page *page; - unsigned long nr_pages = pages_per_huge_page(h); + struct folio *folio; + int order = huge_page_order(h); + bool retry = false; + if (nid == NUMA_NO_NODE) nid = numa_mem_id(); - +retry: + folio = NULL; #ifdef CONFIG_CMA { int node; - if (hugetlb_cma[nid]) { - page = cma_alloc(hugetlb_cma[nid], nr_pages, - huge_page_order(h), true); - if (page) - return page_folio(page); - } + if (hugetlb_cma[nid]) + folio = cma_alloc_folio(hugetlb_cma[nid], order); - if (!(gfp_mask & __GFP_THISNODE)) { + if (!folio && !(gfp_mask & __GFP_THISNODE)) { for_each_node_mask(node, *nodemask) { if (node == nid || !hugetlb_cma[node]) continue; - page = cma_alloc(hugetlb_cma[node], nr_pages, - huge_page_order(h), true); - if (page) - return page_folio(page); + folio = cma_alloc_folio(hugetlb_cma[node], order); + if (folio) + break; } } } #endif + if (!folio) { + struct page *page = alloc_contig_pages(1 << order, gfp_mask, nid, nodemask); - page = alloc_contig_pages(nr_pages, gfp_mask, nid, nodemask); - return page ? page_folio(page) : NULL; + if (!page) + return NULL; + + folio = page_folio(page); + } + + if (folio_ref_freeze(folio, 1)) + return folio; + + pr_warn("HugeTLB page can not be used due to unexpected inflated ref count\n"); + free_gigantic_folio(folio, order); + if (!retry) { + retry = true; + goto retry; + } + return NULL; } #else /* !CONFIG_CONTIG_ALLOC */ @@ -1619,8 +1597,6 @@ static struct folio *alloc_gigantic_folio(struct hstate *h, gfp_t gfp_mask, } static inline void free_gigantic_folio(struct folio *folio, unsigned int order) { } -static inline void destroy_compound_gigantic_folio(struct folio *folio, - unsigned int order) { } #endif /* @@ -1747,19 +1723,17 @@ static void __update_and_free_hugetlb_folio(struct hstate *h, folio_clear_hugetlb_hwpoison(folio); folio_ref_unfreeze(folio, 1); + INIT_LIST_HEAD(&folio->_deferred_list); /* * Non-gigantic pages demoted from CMA allocated gigantic pages * need to be given back to CMA in free_gigantic_folio. */ if (hstate_is_gigantic(h) || - hugetlb_cma_folio(folio, huge_page_order(h))) { - destroy_compound_gigantic_folio(folio, huge_page_order(h)); + hugetlb_cma_folio(folio, huge_page_order(h))) free_gigantic_folio(folio, huge_page_order(h)); - } else { - INIT_LIST_HEAD(&folio->_deferred_list); + else folio_put(folio); - } } /* @@ -2032,95 +2006,6 @@ static void prep_new_hugetlb_folio(struct hstate *h, struct folio *folio, int ni spin_unlock_irq(&hugetlb_lock); } -static bool __prep_compound_gigantic_folio(struct folio *folio, - unsigned int order, bool demote) -{ - int i, j; - int nr_pages = 1 << order; - struct page *p; - - __folio_clear_reserved(folio); - for (i = 0; i < nr_pages; i++) { - p = folio_page(folio, i); - - /* - * For gigantic hugepages allocated through bootmem at - * boot, it's safer to be consistent with the not-gigantic - * hugepages and clear the PG_reserved bit from all tail pages - * too. Otherwise drivers using get_user_pages() to access tail - * pages may get the reference counting wrong if they see - * PG_reserved set on a tail page (despite the head page not - * having PG_reserved set). Enforcing this consistency between - * head and tail pages allows drivers to optimize away a check - * on the head page when they need know if put_page() is needed - * after get_user_pages(). - */ - if (i != 0) /* head page cleared above */ - __ClearPageReserved(p); - /* - * Subtle and very unlikely - * - * Gigantic 'page allocators' such as memblock or cma will - * return a set of pages with each page ref counted. We need - * to turn this set of pages into a compound page with tail - * page ref counts set to zero. Code such as speculative page - * cache adding could take a ref on a 'to be' tail page. - * We need to respect any increased ref count, and only set - * the ref count to zero if count is currently 1. If count - * is not 1, we return an error. An error return indicates - * the set of pages can not be converted to a gigantic page. - * The caller who allocated the pages should then discard the - * pages using the appropriate free interface. - * - * In the case of demote, the ref count will be zero. - */ - if (!demote) { - if (!page_ref_freeze(p, 1)) { - pr_warn("HugeTLB page can not be used due to unexpected inflated ref count\n"); - goto out_error; - } - } else { - VM_BUG_ON_PAGE(page_count(p), p); - } - if (i != 0) - set_compound_head(p, &folio->page); - } - __folio_set_head(folio); - /* we rely on prep_new_hugetlb_folio to set the hugetlb flag */ - folio_set_order(folio, order); - atomic_set(&folio->_entire_mapcount, -1); - atomic_set(&folio->_large_mapcount, -1); - atomic_set(&folio->_pincount, 0); - return true; - -out_error: - /* undo page modifications made above */ - for (j = 0; j < i; j++) { - p = folio_page(folio, j); - if (j != 0) - clear_compound_head(p); - set_page_refcounted(p); - } - /* need to clear PG_reserved on remaining tail pages */ - for (; j < nr_pages; j++) { - p = folio_page(folio, j); - __ClearPageReserved(p); - } - return false; -} - -static bool prep_compound_gigantic_folio(struct folio *folio, - unsigned int order) -{ - return __prep_compound_gigantic_folio(folio, order, false); -} - -static bool prep_compound_gigantic_folio_for_demote(struct folio *folio, - unsigned int order) -{ - return __prep_compound_gigantic_folio(folio, order, true); -} - /* * Find and lock address space (mapping) in write mode. * @@ -2159,7 +2044,7 @@ static struct folio *alloc_buddy_hugetlb_folio(struct hstate *h, */ if (node_alloc_noretry && node_isset(nid, *node_alloc_noretry)) alloc_try_hard = false; - gfp_mask |= __GFP_COMP|__GFP_NOWARN; + gfp_mask |= __GFP_NOWARN; if (alloc_try_hard) gfp_mask |= __GFP_RETRY_MAYFAIL; if (nid == NUMA_NO_NODE) @@ -2206,48 +2091,14 @@ static struct folio *alloc_buddy_hugetlb_folio(struct hstate *h, return folio; } -static struct folio *__alloc_fresh_hugetlb_folio(struct hstate *h, - gfp_t gfp_mask, int nid, nodemask_t *nmask, - nodemask_t *node_alloc_noretry) -{ - struct folio *folio; - bool retry = false; - -retry: - if (hstate_is_gigantic(h)) - folio = alloc_gigantic_folio(h, gfp_mask, nid, nmask); - else - folio = alloc_buddy_hugetlb_folio(h, gfp_mask, - nid, nmask, node_alloc_noretry); - if (!folio) - return NULL; - - if (hstate_is_gigantic(h)) { - if (!prep_compound_gigantic_folio(folio, huge_page_order(h))) { - /* - * Rare failure to convert pages to compound page. - * Free pages and try again - ONCE! - */ - free_gigantic_folio(folio, huge_page_order(h)); - if (!retry) { - retry = true; - goto retry; - } - return NULL; - } - } - - return folio; -} - static struct folio *only_alloc_fresh_hugetlb_folio(struct hstate *h, gfp_t gfp_mask, int nid, nodemask_t *nmask, nodemask_t *node_alloc_noretry) { struct folio *folio; - folio = __alloc_fresh_hugetlb_folio(h, gfp_mask, nid, nmask, - node_alloc_noretry); + folio = hstate_is_gigantic(h) ? alloc_gigantic_folio(h, gfp_mask, nid, nmask) : + alloc_buddy_hugetlb_folio(h, gfp_mask, nid, nmask, node_alloc_noretry); if (folio) init_new_hugetlb_folio(h, folio); return folio; @@ -2265,7 +2116,8 @@ static struct folio *alloc_fresh_hugetlb_folio(struct hstate *h, { struct folio *folio; - folio = __alloc_fresh_hugetlb_folio(h, gfp_mask, nid, nmask, NULL); + folio = hstate_is_gigantic(h) ? alloc_gigantic_folio(h, gfp_mask, nid, nmask) : + alloc_buddy_hugetlb_folio(h, gfp_mask, nid, nmask, NULL); if (!folio) return NULL; @@ -3333,6 +3185,7 @@ static void __init hugetlb_folio_init_tail_vmemmap(struct folio *folio, for (pfn = head_pfn + start_page_number; pfn < end_pfn; pfn++) { struct page *page = pfn_to_page(pfn); + __ClearPageReserved(folio_page(folio, pfn - head_pfn)); __init_single_page(page, pfn, zone, nid); prep_compound_tail((struct page *)folio, pfn - head_pfn); ret = page_ref_freeze(page, 1); @@ -3950,11 +3803,9 @@ static int demote_free_hugetlb_folio(struct hstate *h, struct folio *folio) } } - /* - * Use destroy_compound_hugetlb_folio_for_demote for all huge page - * sizes as it will not ref count folios. - */ - destroy_compound_hugetlb_folio_for_demote(folio, huge_page_order(h)); + split_page_memcg(&folio->page, huge_page_order(h), huge_page_order(target_hstate)); + split_page_owner(&folio->page, huge_page_order(h), huge_page_order(target_hstate)); + pgalloc_tag_split(&folio->page, 1 << huge_page_order(h)); /* * Taking target hstate mutex synchronizes with set_max_huge_pages. @@ -3969,11 +3820,7 @@ static int demote_free_hugetlb_folio(struct hstate *h, struct folio *folio) i += pages_per_huge_page(target_hstate)) { subpage = folio_page(folio, i); inner_folio = page_folio(subpage); - if (hstate_is_gigantic(target_hstate)) - prep_compound_gigantic_folio_for_demote(inner_folio, - target_hstate->order); - else - prep_compound_page(subpage, target_hstate->order); + prep_compound_page(subpage, target_hstate->order); folio_change_private(inner_folio, NULL); prep_new_hugetlb_folio(target_hstate, inner_folio, nid); free_huge_folio(inner_folio); diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 28f80daf5c04..4ecf2c9428f3 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -1192,16 +1192,36 @@ static void free_pcppages_bulk(struct zone *zone, int count, spin_unlock_irqrestore(&zone->lock, flags); } +/* Split a multi-block free page into its individual pageblocks */ +static void split_large_buddy(struct zone *zone, struct page *page, + unsigned long pfn, int order, fpi_t fpi_flags) +{ + unsigned long end_pfn = pfn + (1 << order); + + VM_WARN_ON_ONCE(pfn & ((1 << order) - 1)); + /* Caller removed page from freelist, buddy info cleared! */ + VM_WARN_ON_ONCE(PageBuddy(page)); + + if (order > pageblock_order) + order = pageblock_order; + + while (pfn != end_pfn) { + int mt = get_pfnblock_migratetype(page, pfn); + + __free_one_page(page, pfn, zone, order, mt, fpi_flags); + pfn += 1 << order; + page = pfn_to_page(pfn); + } +} + static void free_one_page(struct zone *zone, struct page *page, unsigned long pfn, unsigned int order, fpi_t fpi_flags) { unsigned long flags; - int migratetype; spin_lock_irqsave(&zone->lock, flags); - migratetype = get_pfnblock_migratetype(page, pfn); - __free_one_page(page, pfn, zone, order, migratetype, fpi_flags); + split_large_buddy(zone, page, pfn, order, fpi_flags); spin_unlock_irqrestore(&zone->lock, flags); } @@ -1693,27 +1713,6 @@ static unsigned long find_large_buddy(unsigned long start_pfn) return start_pfn; } -/* Split a multi-block free page into its individual pageblocks */ -static void split_large_buddy(struct zone *zone, struct page *page, - unsigned long pfn, int order) -{ - unsigned long end_pfn = pfn + (1 << order); - - VM_WARN_ON_ONCE(order <= pageblock_order); - VM_WARN_ON_ONCE(pfn & (pageblock_nr_pages - 1)); - - /* Caller removed page from freelist, buddy info cleared! */ - VM_WARN_ON_ONCE(PageBuddy(page)); - - while (pfn != end_pfn) { - int mt = get_pfnblock_migratetype(page, pfn); - - __free_one_page(page, pfn, zone, pageblock_order, mt, FPI_NONE); - pfn += pageblock_nr_pages; - page = pfn_to_page(pfn); - } -} - /** * move_freepages_block_isolate - move free pages in block for page isolation * @zone: the zone @@ -1754,7 +1753,7 @@ bool move_freepages_block_isolate(struct zone *zone, struct page *page, del_page_from_free_list(buddy, zone, order, get_pfnblock_migratetype(buddy, pfn)); set_pageblock_migratetype(page, migratetype); - split_large_buddy(zone, buddy, pfn, order); + split_large_buddy(zone, buddy, pfn, order, FPI_NONE); return true; } @@ -1765,7 +1764,7 @@ bool move_freepages_block_isolate(struct zone *zone, struct page *page, del_page_from_free_list(page, zone, order, get_pfnblock_migratetype(page, pfn)); set_pageblock_migratetype(page, migratetype); - split_large_buddy(zone, page, pfn, order); + split_large_buddy(zone, page, pfn, order, FPI_NONE); return true; } move: @@ -6439,6 +6438,40 @@ int __alloc_contig_migrate_range(struct compact_control *cc, return (ret < 0) ? ret : 0; } +static struct page *mark_allocated_noprof(struct page *page, unsigned int order, gfp_t gfp_flags) +{ + post_alloc_hook(page, order, __GFP_MOVABLE); + return page; +} +#define mark_allocated(...) alloc_hooks(mark_allocated_noprof(__VA_ARGS__)) + +static void split_free_pages(struct list_head *freepages) +{ + unsigned int i, order; + struct page *page, *next; + LIST_HEAD(tmp_list); + + for (order = 0; order < NR_PAGE_ORDERS; order++) { + list_for_each_entry_safe(page, next, &freepages[order], lru) { + unsigned int nr_pages; + + list_del(&page->lru); + + nr_pages = 1 << order; + + mark_allocated(page, order, __GFP_MOVABLE); + if (order) + split_page(page, order); + + for (i = 0; i < nr_pages; i++) { + list_add(&page->lru, &tmp_list); + page++; + } + } + list_splice_init(&tmp_list, &freepages[0]); + } +} + /** * alloc_contig_range() -- tries to allocate given range of pages * @start: start PFN to allocate @@ -6551,12 +6584,25 @@ int alloc_contig_range_noprof(unsigned long start, unsigned long end, goto done; } - /* Free head and tail (if any) */ - if (start != outer_start) - free_contig_range(outer_start, start - outer_start); - if (end != outer_end) - free_contig_range(end, outer_end - end); + if (!(gfp_mask & __GFP_COMP)) { + split_free_pages(cc.freepages); + /* Free head and tail (if any) */ + if (start != outer_start) + free_contig_range(outer_start, start - outer_start); + if (end != outer_end) + free_contig_range(end, outer_end - end); + } else if (start == outer_start && end == outer_end && is_power_of_2(end - start)) { + struct page *head = pfn_to_page(start); + int order = ilog2(end - start); + + check_new_pages(head, order); + prep_new_page(head, order, gfp_mask, 0); + } else { + ret = -EINVAL; + WARN(true, "PFN range: requested [%lu, %lu), leaked [%lu, %lu)\n", + start, end, outer_start, outer_end); + } done: undo_isolate_page_range(start, end, migratetype); return ret; @@ -6665,6 +6711,18 @@ struct page *alloc_contig_pages_noprof(unsigned long nr_pages, gfp_t gfp_mask, void free_contig_range(unsigned long pfn, unsigned long nr_pages) { unsigned long count = 0; + struct folio *folio = pfn_folio(pfn); + + if (folio_test_large(folio)) { + int expected = folio_nr_pages(folio); + + if (nr_pages == expected) + folio_put(folio); + else + WARN(true, "PFN %lu: nr_pages %lu != expected %d\n", + pfn, nr_pages, expected); + return; + } for (; nr_pages--; pfn++) { struct page *page = pfn_to_page(pfn);