+
+/*
+ * Protects some early interrupt threads, and also for a short period of time
+ * from smp_init() to page_alloc_init_late() when deferred pages are
+ * initialized.
+ */
+static __initdata DEFINE_SPINLOCK(deferred_zone_grow_lock);
Comment is a little confusing. Locks don't protect "threads" - they
protect data. Can we be specific about which data is being protected?
Why is a new lock needed here? Those data structures already have
designated locks, don't they?
If the lock protects "early interrupt threads" then it's surprising to
see it taken with spin_lock() and not spin_lock_irqsave()?
+DEFINE_STATIC_KEY_TRUE(deferred_pages);
+
+/*
+ * If this zone has deferred pages, try to grow it by initializing enough
+ * deferred pages to satisfy the allocation specified by order, rounded up to
+ * the nearest PAGES_PER_SECTION boundary. So we're adding memory in increments
+ * of SECTION_SIZE bytes by initializing struct pages in increments of
+ * PAGES_PER_SECTION * sizeof(struct page) bytes.
+ */
Please also document the return value.
+static noinline bool __init
Why was noinline needed?
+deferred_grow_zone(struct zone *zone, unsigned int order)
+{
+ int zid = zone_idx(zone);
+ int nid = zone->node;
+ pg_data_t *pgdat = NODE_DATA(nid);
+ unsigned long nr_pages_needed = ALIGN(1 << order, PAGES_PER_SECTION);
+ unsigned long nr_pages = 0;
+ unsigned long first_init_pfn, first_deferred_pfn, spfn, epfn, t;
+ phys_addr_t spa, epa;
+ u64 i;
+
+ /* Only the last zone may have deferred pages */
+ if (zone_end_pfn(zone) != pgdat_end_pfn(pgdat))
+ return false;
+
+ first_deferred_pfn = READ_ONCE(pgdat->first_deferred_pfn);
It would be nice to have a little comment explaining why READ_ONCE was
needed.
Would it still be needed if this code was moved into the locked region?
+ first_init_pfn = max(zone->zone_start_pfn, first_deferred_pfn);
+
+ if (first_init_pfn >= pgdat_end_pfn(pgdat))
+ return false;
+
+ spin_lock(&deferred_zone_grow_lock);
+ /*
+ * Bail if we raced with another thread that disabled on demand
+ * initialization.
+ */
+ if (!static_branch_unlikely(&deferred_pages)) {
+ spin_unlock(&deferred_zone_grow_lock);
+ return false;
+ }
+
+ for_each_free_mem_range(i, nid, MEMBLOCK_NONE, &spa, &epa, NULL) {
+ spfn = max_t(unsigned long, first_init_pfn, PFN_UP(spa));
+ epfn = min_t(unsigned long, zone_end_pfn(zone), PFN_DOWN(epa));
+
+ while (spfn < epfn && nr_pages < nr_pages_needed) {
+ t = ALIGN(spfn + PAGES_PER_SECTION, PAGES_PER_SECTION);
+ first_deferred_pfn = min(t, epfn);
+ nr_pages += deferred_init_pages(nid, zid, spfn,
+ first_deferred_pfn);
+ spfn = first_deferred_pfn;
+ }
+
+ if (nr_pages >= nr_pages_needed)
+ break;
+ }
+
+ for_each_free_mem_range(i, nid, MEMBLOCK_NONE, &spa, &epa, NULL) {
+ spfn = max_t(unsigned long, first_init_pfn, PFN_UP(spa));
+ epfn = min_t(unsigned long, first_deferred_pfn, PFN_DOWN(epa));
+ deferred_free_pages(nid, zid, spfn, epfn);
+
+ if (first_deferred_pfn == epfn)
+ break;
+ }
+ WRITE_ONCE(pgdat->first_deferred_pfn, first_deferred_pfn);
+ spin_unlock(&deferred_zone_grow_lock);
+
+ return nr_pages >= nr_pages_needed;
+}
+
+/*
+ * deferred_grow_zone() is __init, but it is called from
+ * get_page_from_freelist() during early boot until deferred_pages permanently
+ * disables this call. This is why, we have refdata wrapper to avoid warning,
+ * and ensure that the function body gets unloaded.
s/why,/why/
s/ensure/to ensure/
+ */
+static bool __ref
+_deferred_grow_zone(struct zone *zone, unsigned int order)
+{
+ return deferred_grow_zone(zone, order);
+}
+
#endif /* CONFIG_DEFERRED_STRUCT_PAGE_INIT */
void __init page_alloc_init_late(void)
@@ -1613,6 +1665,14 @@ void __init page_alloc_init_late(void)
#ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
int nid;
+ /*
+ * We are about to initialize the rest of deferred pages, permanently
+ * disable on-demand struct page initialization.
+ */
+ spin_lock(&deferred_zone_grow_lock);
+ static_branch_disable(&deferred_pages);
+ spin_unlock(&deferred_zone_grow_lock);
Ah, so the new lock is to protect the static branch machinery only?