[PATCH] mm, hotplug: get rid of zone/node shrinking

From: Michal Hocko
Date: Tue Apr 04 2017 - 15:09:00 EST


this is basically a revert of 815121d2b5cd ("memory_hotplug: clear zone
when removing the memory"). While the node/zone shrinking sounds
attractive at first sight because it allows to
online_movable(range)
[...]
offline_movable(range)
[...]
online_kernel(range)

but this requires that the range is in the beginning or the end of a
zone or operate on the whole zone basis. This code is even broken as
noticed by Reza Arbab. He has triggered an oops when doing the full node
offline
Unable to handle kernel paging request for data at address 0xc000800000f19e78
__nr_to_section
__pfn_to_section
find_biggest_section_pfn
shrink_pgdat_span
__remove_zone
__remove_section
__remove_pages
arch_remove_memory
remove_memory
which is caused by an overflow in find_biggest_section_pfn. This code
simply cannot work on the first section [0, PAGES_PER_SECTION] because
pfn = end_pfn - 1;
for (; pfn >= start_pfn; pfn -= PAGES_PER_SECTION) {
pfn would underflow in the unsigned arithmetic. This doesn't happen
usually because the lowest pfns are usually reserved and out of the
hotplug reach.

The changelog of the above commit doesn't mention any such usecase and
sounds more like an nice-to-have and inverse to __add_zone which we are
trying to get rid of in this series. So let's simplify the code and
remove the complication. We can reintroduce it back along with a valid
usecase description.

Signed-off-by: Michal Hocko <mhocko@xxxxxxxx>
---
mm/memory_hotplug.c | 207 ----------------------------------------------------
1 file changed, 207 deletions(-)

diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index a358d7a67651..d48a4456b20d 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -379,212 +379,9 @@ EXPORT_SYMBOL_GPL(__add_pages);

#ifdef CONFIG_MEMORY_HOTREMOVE
/* find the smallest valid pfn in the range [start_pfn, end_pfn) */
-static int find_smallest_section_pfn(int nid, struct zone *zone,
- unsigned long start_pfn,
- unsigned long end_pfn)
-{
- struct mem_section *ms;
-
- for (; start_pfn < end_pfn; start_pfn += PAGES_PER_SECTION) {
- ms = __pfn_to_section(start_pfn);
-
- if (unlikely(!valid_section(ms)))
- continue;
-
- if (unlikely(pfn_to_nid(start_pfn) != nid))
- continue;
-
- if (zone && zone != page_zone(pfn_to_page(start_pfn)))
- continue;
-
- return start_pfn;
- }
-
- return 0;
-}
-
-/* find the biggest valid pfn in the range [start_pfn, end_pfn). */
-static int find_biggest_section_pfn(int nid, struct zone *zone,
- unsigned long start_pfn,
- unsigned long end_pfn)
-{
- struct mem_section *ms;
- unsigned long pfn;
-
- /* pfn is the end pfn of a memory section. */
- pfn = end_pfn - 1;
- for (; pfn >= start_pfn; pfn -= PAGES_PER_SECTION) {
- ms = __pfn_to_section(pfn);
-
- if (unlikely(!valid_section(ms)))
- continue;
-
- if (unlikely(pfn_to_nid(pfn) != nid))
- continue;
-
- if (zone && zone != page_zone(pfn_to_page(pfn)))
- continue;
-
- return pfn;
- }
-
- return 0;
-}
-
-static void shrink_zone_span(struct zone *zone, unsigned long start_pfn,
- unsigned long end_pfn)
-{
- unsigned long zone_start_pfn = zone->zone_start_pfn;
- unsigned long z = zone_end_pfn(zone); /* zone_end_pfn namespace clash */
- unsigned long zone_end_pfn = z;
- unsigned long pfn;
- struct mem_section *ms;
- int nid = zone_to_nid(zone);
-
- zone_span_writelock(zone);
- if (zone_start_pfn == start_pfn) {
- /*
- * If the section is smallest section in the zone, it need
- * shrink zone->zone_start_pfn and zone->zone_spanned_pages.
- * In this case, we find second smallest valid mem_section
- * for shrinking zone.
- */
- pfn = find_smallest_section_pfn(nid, zone, end_pfn,
- zone_end_pfn);
- if (pfn) {
- zone->zone_start_pfn = pfn;
- zone->spanned_pages = zone_end_pfn - pfn;
- }
- } else if (zone_end_pfn == end_pfn) {
- /*
- * If the section is biggest section in the zone, it need
- * shrink zone->spanned_pages.
- * In this case, we find second biggest valid mem_section for
- * shrinking zone.
- */
- pfn = find_biggest_section_pfn(nid, zone, zone_start_pfn,
- start_pfn);
- if (pfn)
- zone->spanned_pages = pfn - zone_start_pfn + 1;
- }
-
- /*
- * The section is not biggest or smallest mem_section in the zone, it
- * only creates a hole in the zone. So in this case, we need not
- * change the zone. But perhaps, the zone has only hole data. Thus
- * it check the zone has only hole or not.
- */
- pfn = zone_start_pfn;
- for (; pfn < zone_end_pfn; pfn += PAGES_PER_SECTION) {
- ms = __pfn_to_section(pfn);
-
- if (unlikely(!valid_section(ms)))
- continue;
-
- if (page_zone(pfn_to_page(pfn)) != zone)
- continue;
-
- /* If the section is current section, it continues the loop */
- if (start_pfn == pfn)
- continue;
-
- /* If we find valid section, we have nothing to do */
- zone_span_writeunlock(zone);
- return;
- }
-
- /* The zone has no valid section */
- zone->zone_start_pfn = 0;
- zone->spanned_pages = 0;
- zone_span_writeunlock(zone);
-}
-
-static void shrink_pgdat_span(struct pglist_data *pgdat,
- unsigned long start_pfn, unsigned long end_pfn)
-{
- unsigned long pgdat_start_pfn = pgdat->node_start_pfn;
- unsigned long p = pgdat_end_pfn(pgdat); /* pgdat_end_pfn namespace clash */
- unsigned long pgdat_end_pfn = p;
- unsigned long pfn;
- struct mem_section *ms;
- int nid = pgdat->node_id;
-
- if (pgdat_start_pfn == start_pfn) {
- /*
- * If the section is smallest section in the pgdat, it need
- * shrink pgdat->node_start_pfn and pgdat->node_spanned_pages.
- * In this case, we find second smallest valid mem_section
- * for shrinking zone.
- */
- pfn = find_smallest_section_pfn(nid, NULL, end_pfn,
- pgdat_end_pfn);
- if (pfn) {
- pgdat->node_start_pfn = pfn;
- pgdat->node_spanned_pages = pgdat_end_pfn - pfn;
- }
- } else if (pgdat_end_pfn == end_pfn) {
- /*
- * If the section is biggest section in the pgdat, it need
- * shrink pgdat->node_spanned_pages.
- * In this case, we find second biggest valid mem_section for
- * shrinking zone.
- */
- pfn = find_biggest_section_pfn(nid, NULL, pgdat_start_pfn,
- start_pfn);
- if (pfn)
- pgdat->node_spanned_pages = pfn - pgdat_start_pfn + 1;
- }
-
- /*
- * If the section is not biggest or smallest mem_section in the pgdat,
- * it only creates a hole in the pgdat. So in this case, we need not
- * change the pgdat.
- * But perhaps, the pgdat has only hole data. Thus it check the pgdat
- * has only hole or not.
- */
- pfn = pgdat_start_pfn;
- for (; pfn < pgdat_end_pfn; pfn += PAGES_PER_SECTION) {
- ms = __pfn_to_section(pfn);
-
- if (unlikely(!valid_section(ms)))
- continue;
-
- if (pfn_to_nid(pfn) != nid)
- continue;
-
- /* If the section is current section, it continues the loop */
- if (start_pfn == pfn)
- continue;
-
- /* If we find valid section, we have nothing to do */
- return;
- }
-
- /* The pgdat has no valid section */
- pgdat->node_start_pfn = 0;
- pgdat->node_spanned_pages = 0;
-}
-
-static void __remove_zone(struct zone *zone, unsigned long start_pfn)
-{
- struct pglist_data *pgdat = zone->zone_pgdat;
- int nr_pages = PAGES_PER_SECTION;
- int zone_type;
- unsigned long flags;
-
- zone_type = zone - pgdat->node_zones;
-
- pgdat_resize_lock(zone->zone_pgdat, &flags);
- shrink_zone_span(zone, start_pfn, start_pfn + nr_pages);
- shrink_pgdat_span(pgdat, start_pfn, start_pfn + nr_pages);
- pgdat_resize_unlock(zone->zone_pgdat, &flags);
-}
-
static int __remove_section(struct zone *zone, struct mem_section *ms,
unsigned long map_offset)
{
- unsigned long start_pfn;
- int scn_nr;
int ret = -EINVAL;

if (!valid_section(ms))
@@ -594,10 +391,6 @@ static int __remove_section(struct zone *zone, struct mem_section *ms,
if (ret)
return ret;

- scn_nr = __section_nr(ms);
- start_pfn = section_nr_to_pfn(scn_nr);
- __remove_zone(zone, start_pfn);
-
sparse_remove_one_section(zone, ms, map_offset);
return 0;
}
--
2.11.0


--
Michal Hocko
SUSE Labs