Re: [PATCH] do not free non slab allocated per_cpu_pageset
From: Andrew Morton
Date: Fri Sep 22 2006 - 14:40:28 EST
On Fri, 22 Sep 2006 11:20:22 -0700 (PDT)
David Rientjes <rientjes@xxxxxxxxxxxxxxxxx> wrote:
> On Thu, 21 Sep 2006, David Rientjes wrote:
>
> > The _only_ time zone_pcp is slab allocated is through process_zones(). So
> > if we have an error on kmalloc_node for that zone_pcp, all previous
> > allocations are freed and process_zones() fails for that cpu.
> >
> > We are guaranteed that the process_zones() for cpu 0 succeeds, otherwise
> > the pageset notifier isn't registered. On CPU_UP_PREPARE for cpu 4 in
> > this case, process_zones() fails because we couldn't kmalloc the
> > per_cpu_pageset and we return NOTIFY_BAD. This prints the failed message
> > in the report and then CPU_UP_CANCELED is sent back to the notifier which
> > attempts to kfree the zone that was never kmalloc'd.
> >
> > The fix will work except for the case that zone_pcp is never set to NULL
> > as it should be.
> >
>
> As reported by Keith, the following 2.6.18 patch stops the panic
> associated with attempting to free a non slab-allocated per_cpu_pageset.
>
> Signed-off-by: David Rientjes <rientjes@xxxxxxxxxxxxxxxxx>
> ---
> mm/page_alloc.c | 4 +++-
> 1 files changed, 3 insertions(+), 1 deletions(-)
>
> diff --git a/mm/page_alloc.c b/mm/page_alloc.c
> index 54a4f53..e16173f 100644
> --- a/mm/page_alloc.c
> +++ b/mm/page_alloc.c
> @@ -1845,8 +1845,10 @@ static inline void free_zone_pagesets(in
> for_each_zone(zone) {
> struct per_cpu_pageset *pset = zone_pcp(zone, cpu);
>
> + /* Free per_cpu_pageset if it is slab allocated */
> + if (pset != &boot_pageset[cpu])
> + kfree(pset);
> zone_pcp(zone, cpu) = NULL;
> - kfree(pset);
> }
> }
>
I think I preferred my earlier fix, recently reworked as:
--- a/mm/page_alloc.c~process_zones-fix-error-handling
+++ a/mm/page_alloc.c
@@ -1805,17 +1805,30 @@ static void setup_pagelist_highmark(stru
*/
static struct per_cpu_pageset boot_pageset[NR_CPUS];
+static void free_zone_pagesets(int cpu)
+{
+ struct zone *zone;
+
+ for_each_zone(zone) {
+ kfree(zone_pcp(zone, cpu));
+ zone_pcp(zone, cpu) = NULL;
+ }
+}
+
/*
* Dynamically allocate memory for the
* per cpu pageset array in struct zone.
*/
static int __cpuinit process_zones(int cpu)
{
- struct zone *zone, *dzone;
+ struct zone *zone;
- for_each_zone(zone) {
+ for_each_zone(zone)
+ zone_pcp(zone, cpu) = NULL;
- zone_pcp(zone, cpu) = kmalloc_node(sizeof(struct per_cpu_pageset),
+ for_each_zone(zone) {
+ zone_pcp(zone, cpu) =
+ kmalloc_node(sizeof(struct per_cpu_pageset),
GFP_KERNEL, cpu_to_node(cpu));
if (!zone_pcp(zone, cpu))
goto bad;
@@ -1824,32 +1837,16 @@ static int __cpuinit process_zones(int c
if (percpu_pagelist_fraction)
setup_pagelist_highmark(zone_pcp(zone, cpu),
- (zone->present_pages / percpu_pagelist_fraction));
+ (zone->present_pages / percpu_pagelist_fraction));
}
return 0;
bad:
- for_each_zone(dzone) {
- if (dzone == zone)
- break;
- kfree(zone_pcp(dzone, cpu));
- zone_pcp(dzone, cpu) = NULL;
- }
+ free_zone_pagesets(cpu);
+ printk(KERN_EMERG "%s: kmalloc() failed\n", __FUNCTION__);
return -ENOMEM;
}
-static inline void free_zone_pagesets(int cpu)
-{
- struct zone *zone;
-
- for_each_zone(zone) {
- struct per_cpu_pageset *pset = zone_pcp(zone, cpu);
-
- zone_pcp(zone, cpu) = NULL;
- kfree(pset);
- }
-}
-
static int __cpuinit pageset_cpuup_callback(struct notifier_block *nfb,
unsigned long action,
void *hcpu)
_
ie:
static void free_zone_pagesets(int cpu)
{
struct zone *zone;
for_each_zone(zone) {
kfree(zone_pcp(zone, cpu));
zone_pcp(zone, cpu) = NULL;
}
}
/*
* Dynamically allocate memory for the
* per cpu pageset array in struct zone.
*/
static int __cpuinit process_zones(int cpu)
{
struct zone *zone;
for_each_zone(zone)
zone_pcp(zone, cpu) = NULL;
for_each_zone(zone) {
zone_pcp(zone, cpu) =
kmalloc_node(sizeof(struct per_cpu_pageset),
GFP_KERNEL, cpu_to_node(cpu));
if (!zone_pcp(zone, cpu))
goto bad;
setup_pageset(zone_pcp(zone, cpu), zone_batchsize(zone));
if (percpu_pagelist_fraction)
setup_pagelist_highmark(zone_pcp(zone, cpu),
(zone->present_pages / percpu_pagelist_fraction));
}
return 0;
bad:
free_zone_pagesets(cpu);
printk(KERN_EMERG "%s: kmalloc() failed\n", __FUNCTION__);
return -ENOMEM;
}
It simplifies the code both from a to-look-at-it perspective and also
conceptually: it flips all the per-cpu pointers from their bootstrap state
into their kfreeable state in a single hit, rather than leaving them
holding an unknown mixture of the two.
I haven't tested it though..
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/