[GIT PULL] percpu fixes for v4.7-rc3

From: Tejun Heo
Date: Mon Jun 13 2016 - 16:58:45 EST


Hello, Linus.

While adding GFP_ATOMIC support to the percpu allocator,
synchronization for fast-path which doesn't require external
allocations was separated into pcpu_lock; unfortunately, it
incorrectly decoupled async paths and percpu chunks could get
destroyed while still being operated on. This pull request contains
two patches to fix the bug.

Thanks.

The following changes since commit 28165ec7a99be98123aa89540bf2cfc24df19498:

Merge tag 'armsoc-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/arm/arm-soc (2016-05-24 15:50:58 -0700)

are available in the git repository at:

git://git.kernel.org/pub/scm/linux/kernel/git/tj/percpu.git for-4.7-fixes

for you to fetch changes up to 6710e594f71ccaad8101bc64321152af7cd9ea28:

percpu: fix synchronization between synchronous map extension and chunk destruction (2016-05-25 11:48:25 -0400)

----------------------------------------------------------------
Tejun Heo (2):
percpu: fix synchronization between chunk->map_extend_work and chunk destruction
percpu: fix synchronization between synchronous map extension and chunk destruction

mm/percpu.c | 73 +++++++++++++++++++++++++++++++++++++------------------------
1 file changed, 44 insertions(+), 29 deletions(-)

diff --git a/mm/percpu.c b/mm/percpu.c
index 0c59684..9903830 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -112,7 +112,7 @@ struct pcpu_chunk {
int map_used; /* # of map entries used before the sentry */
int map_alloc; /* # of map entries allocated */
int *map; /* allocation map */
- struct work_struct map_extend_work;/* async ->map[] extension */
+ struct list_head map_extend_list;/* on pcpu_map_extend_chunks */

void *data; /* chunk data */
int first_free; /* no free below this */
@@ -162,10 +162,13 @@ static struct pcpu_chunk *pcpu_reserved_chunk;
static int pcpu_reserved_chunk_limit;

static DEFINE_SPINLOCK(pcpu_lock); /* all internal data structures */
-static DEFINE_MUTEX(pcpu_alloc_mutex); /* chunk create/destroy, [de]pop */
+static DEFINE_MUTEX(pcpu_alloc_mutex); /* chunk create/destroy, [de]pop, map ext */

static struct list_head *pcpu_slot __read_mostly; /* chunk list slots */

+/* chunks which need their map areas extended, protected by pcpu_lock */
+static LIST_HEAD(pcpu_map_extend_chunks);
+
/*
* The number of empty populated pages, protected by pcpu_lock. The
* reserved chunk doesn't contribute to the count.
@@ -395,13 +398,19 @@ static int pcpu_need_to_extend(struct pcpu_chunk *chunk, bool is_atomic)
{
int margin, new_alloc;

+ lockdep_assert_held(&pcpu_lock);
+
if (is_atomic) {
margin = 3;

if (chunk->map_alloc <
- chunk->map_used + PCPU_ATOMIC_MAP_MARGIN_LOW &&
- pcpu_async_enabled)
- schedule_work(&chunk->map_extend_work);
+ chunk->map_used + PCPU_ATOMIC_MAP_MARGIN_LOW) {
+ if (list_empty(&chunk->map_extend_list)) {
+ list_add_tail(&chunk->map_extend_list,
+ &pcpu_map_extend_chunks);
+ pcpu_schedule_balance_work();
+ }
+ }
} else {
margin = PCPU_ATOMIC_MAP_MARGIN_HIGH;
}
@@ -435,6 +444,8 @@ static int pcpu_extend_area_map(struct pcpu_chunk *chunk, int new_alloc)
size_t old_size = 0, new_size = new_alloc * sizeof(new[0]);
unsigned long flags;

+ lockdep_assert_held(&pcpu_alloc_mutex);
+
new = pcpu_mem_zalloc(new_size);
if (!new)
return -ENOMEM;
@@ -467,20 +478,6 @@ static int pcpu_extend_area_map(struct pcpu_chunk *chunk, int new_alloc)
return 0;
}

-static void pcpu_map_extend_workfn(struct work_struct *work)
-{
- struct pcpu_chunk *chunk = container_of(work, struct pcpu_chunk,
- map_extend_work);
- int new_alloc;
-
- spin_lock_irq(&pcpu_lock);
- new_alloc = pcpu_need_to_extend(chunk, false);
- spin_unlock_irq(&pcpu_lock);
-
- if (new_alloc)
- pcpu_extend_area_map(chunk, new_alloc);
-}
-
/**
* pcpu_fit_in_area - try to fit the requested allocation in a candidate area
* @chunk: chunk the candidate area belongs to
@@ -740,7 +737,7 @@ static struct pcpu_chunk *pcpu_alloc_chunk(void)
chunk->map_used = 1;

INIT_LIST_HEAD(&chunk->list);
- INIT_WORK(&chunk->map_extend_work, pcpu_map_extend_workfn);
+ INIT_LIST_HEAD(&chunk->map_extend_list);
chunk->free_size = pcpu_unit_size;
chunk->contig_hint = pcpu_unit_size;

@@ -895,6 +892,9 @@ static void __percpu *pcpu_alloc(size_t size, size_t align, bool reserved,
return NULL;
}

+ if (!is_atomic)
+ mutex_lock(&pcpu_alloc_mutex);
+
spin_lock_irqsave(&pcpu_lock, flags);

/* serve reserved allocations from the reserved chunk if available */
@@ -967,12 +967,9 @@ static void __percpu *pcpu_alloc(size_t size, size_t align, bool reserved,
if (is_atomic)
goto fail;

- mutex_lock(&pcpu_alloc_mutex);
-
if (list_empty(&pcpu_slot[pcpu_nr_slots - 1])) {
chunk = pcpu_create_chunk();
if (!chunk) {
- mutex_unlock(&pcpu_alloc_mutex);
err = "failed to allocate new chunk";
goto fail;
}
@@ -983,7 +980,6 @@ static void __percpu *pcpu_alloc(size_t size, size_t align, bool reserved,
spin_lock_irqsave(&pcpu_lock, flags);
}

- mutex_unlock(&pcpu_alloc_mutex);
goto restart;

area_found:
@@ -993,8 +989,6 @@ static void __percpu *pcpu_alloc(size_t size, size_t align, bool reserved,
if (!is_atomic) {
int page_start, page_end, rs, re;

- mutex_lock(&pcpu_alloc_mutex);
-
page_start = PFN_DOWN(off);
page_end = PFN_UP(off + size);

@@ -1005,7 +999,6 @@ static void __percpu *pcpu_alloc(size_t size, size_t align, bool reserved,

spin_lock_irqsave(&pcpu_lock, flags);
if (ret) {
- mutex_unlock(&pcpu_alloc_mutex);
pcpu_free_area(chunk, off, &occ_pages);
err = "failed to populate";
goto fail_unlock;
@@ -1045,6 +1038,8 @@ static void __percpu *pcpu_alloc(size_t size, size_t align, bool reserved,
/* see the flag handling in pcpu_blance_workfn() */
pcpu_atomic_alloc_failed = true;
pcpu_schedule_balance_work();
+ } else {
+ mutex_unlock(&pcpu_alloc_mutex);
}
return NULL;
}
@@ -1129,6 +1124,7 @@ static void pcpu_balance_workfn(struct work_struct *work)
if (chunk == list_first_entry(free_head, struct pcpu_chunk, list))
continue;

+ list_del_init(&chunk->map_extend_list);
list_move(&chunk->list, &to_free);
}

@@ -1146,6 +1142,25 @@ static void pcpu_balance_workfn(struct work_struct *work)
pcpu_destroy_chunk(chunk);
}

+ /* service chunks which requested async area map extension */
+ do {
+ int new_alloc = 0;
+
+ spin_lock_irq(&pcpu_lock);
+
+ chunk = list_first_entry_or_null(&pcpu_map_extend_chunks,
+ struct pcpu_chunk, map_extend_list);
+ if (chunk) {
+ list_del_init(&chunk->map_extend_list);
+ new_alloc = pcpu_need_to_extend(chunk, false);
+ }
+
+ spin_unlock_irq(&pcpu_lock);
+
+ if (new_alloc)
+ pcpu_extend_area_map(chunk, new_alloc);
+ } while (chunk);
+
/*
* Ensure there are certain number of free populated pages for
* atomic allocs. Fill up from the most packed so that atomic
@@ -1644,7 +1659,7 @@ int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai,
*/
schunk = memblock_virt_alloc(pcpu_chunk_struct_size, 0);
INIT_LIST_HEAD(&schunk->list);
- INIT_WORK(&schunk->map_extend_work, pcpu_map_extend_workfn);
+ INIT_LIST_HEAD(&schunk->map_extend_list);
schunk->base_addr = base_addr;
schunk->map = smap;
schunk->map_alloc = ARRAY_SIZE(smap);
@@ -1673,7 +1688,7 @@ int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai,
if (dyn_size) {
dchunk = memblock_virt_alloc(pcpu_chunk_struct_size, 0);
INIT_LIST_HEAD(&dchunk->list);
- INIT_WORK(&dchunk->map_extend_work, pcpu_map_extend_workfn);
+ INIT_LIST_HEAD(&dchunk->map_extend_list);
dchunk->base_addr = base_addr;
dchunk->map = dmap;
dchunk->map_alloc = ARRAY_SIZE(dmap);