[PATCH 2/2] mm/percpu.c: don't bother to re-walk the pcpu_slot list if nobody free space since we last drop pcpu_lock.

From: Jianyu Zhan
Date: Thu Mar 27 2014 - 07:06:23 EST


Presently, after we fail the first try to walk the pcpu_slot list
to find a chunk for allocating, we just drop the pcpu_lock spinlock,
and go allocating a new chunk. Then we re-gain the pcpu_lock and
anchoring our hope on that during this period, some guys might have
freed space for us(we still hold the pcpu_alloc_mutex during this
period, so only freeing or reclaiming could happen), we do a fully
rewalk of the pcpu_slot list.

However if nobody free space, this fully rewalk may seem too silly,
and we would eventually fall back to the new chunk.

And since we hold pcpu_alloc_mutex, only freeing or reclaiming path
could touch the pcpu_slot(which just need holding a pcpu_lock), we
could maintain a pcpu_slot_stat bitmap to record that during the period
we don't have the pcpu_lock, if anybody free space to any slot we
interest in. If so, we just just go inside these slots for a try;
if not, we just do allocation using the newly-allocated fully-free
new chunk.

Signed-off-by: Jianyu Zhan <nasa4836@xxxxxxxxx>
---
mm/percpu.c | 80 ++++++++++++++++++++++++++++++++++++++++++++++++++++---------
1 file changed, 69 insertions(+), 11 deletions(-)

diff --git a/mm/percpu.c b/mm/percpu.c
index cfda29c..4e81367 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -178,6 +178,13 @@ static DEFINE_MUTEX(pcpu_alloc_mutex); /* protects whole alloc and reclaim */
static DEFINE_SPINLOCK(pcpu_lock); /* protects index data structures */

static struct list_head *pcpu_slot __read_mostly; /* chunk list slots */
+/* A bitmap to record the stat of pcpu_slot, protected by pcpu_lock.
+ * If the correspoding bit == 0, that slot doesn't get changed during
+ * pcpu_lock dropped period; if bit == 1, otherwise.
+ *
+ * We have to defer its initialization until we konw the exact value of
+ * pcpu_nr_slots. */
+static unsigned long *pcpu_slot_stat_bitmap;

/* reclaim work to release fully free chunks, scheduled from free path */
static void pcpu_reclaim(struct work_struct *work);
@@ -313,10 +320,13 @@ static void pcpu_mem_free(void *ptr, size_t size)
vfree(ptr);
}

+#define PCPU_ALLOC 1
+#define PCPU_FREE 0
/**
* pcpu_chunk_relocate - put chunk in the appropriate chunk slot
* @chunk: chunk of interest
* @oslot: the previous slot it was on
+ * @reason: why we get here, from allocating or freeing path?
*
* This function is called after an allocation or free changed @chunk.
* New slot according to the changed state is determined and @chunk is
@@ -326,15 +336,23 @@ static void pcpu_mem_free(void *ptr, size_t size)
* CONTEXT:
* pcpu_lock.
*/
-static void pcpu_chunk_relocate(struct pcpu_chunk *chunk, int oslot)
+static void pcpu_chunk_relocate(struct pcpu_chunk *chunk, int oslot,
+ int reason)
{
int nslot = pcpu_chunk_slot(chunk);

- if (chunk != pcpu_reserved_chunk && oslot != nslot) {
- if (oslot < nslot)
+ if (chunk != pcpu_reserved_chunk) {
+ if (oslot < nslot) {
list_move(&chunk->list, &pcpu_slot[nslot]);
- else
+ /* oslot < nslot means we get more space
+ * in this chunk, so mark it */
+ __set_bit(nslot, pcpu_slot_stat_bitmap);
+ } else if (oslot > nslot)
list_move_tail(&chunk->list, &pcpu_slot[nslot]);
+ else if (reason == PCPU_FREE)
+ /* oslot == nslot, but we are freeing space
+ * in this chunk, worth trying, mark it */
+ __set_bit(nslot, pcpu_slot_stat_bitmap);
}
}

@@ -546,12 +564,12 @@ static int pcpu_alloc_area(struct pcpu_chunk *chunk, int size, int align)
chunk->free_size -= chunk->map[i];
chunk->map[i] = -chunk->map[i];

- pcpu_chunk_relocate(chunk, oslot);
+ pcpu_chunk_relocate(chunk, oslot, PCPU_ALLOC);
return off;
}

chunk->contig_hint = max_contig; /* fully scanned */
- pcpu_chunk_relocate(chunk, oslot);
+ pcpu_chunk_relocate(chunk, oslot, PCPU_ALLOC);

/* tell the upper layer that this chunk has no matching area */
return -1;
@@ -600,7 +618,7 @@ static void pcpu_free_area(struct pcpu_chunk *chunk, int freeme)
}

chunk->contig_hint = max(chunk->map[i], chunk->contig_hint);
- pcpu_chunk_relocate(chunk, oslot);
+ pcpu_chunk_relocate(chunk, oslot, PCPU_FREE);
}

static struct pcpu_chunk *pcpu_alloc_chunk(void)
@@ -714,6 +732,8 @@ static void __percpu *pcpu_alloc(size_t size, size_t align, bool reserved)
int slot, off, new_alloc;
unsigned long flags;
void __percpu *ptr;
+ bool retry = false;
+ int base_slot = pcpu_size_to_slot(size);

if (unlikely(!size || size > PCPU_MIN_UNIT_SIZE || align > PAGE_SIZE)) {
WARN(true, "illegal size (%zu) or align (%zu) for "
@@ -752,7 +772,12 @@ static void __percpu *pcpu_alloc(size_t size, size_t align, bool reserved)

restart:
/* search through normal chunks */
- for (slot = pcpu_size_to_slot(size); slot < pcpu_nr_slots; slot++) {
+ for (slot = base_slot; slot < pcpu_nr_slots; slot++) {
+ /* even we fall back to retry, just look inside those
+ * changed slots */
+ if (retry && !test_bit(slot, pcpu_slot_stat_bitmap))
+ continue;
+
list_for_each_entry(chunk, &pcpu_slot[slot], list) {
if (size > chunk->contig_hint)
continue;
@@ -770,6 +795,7 @@ restart:
* pcpu_lock has been dropped, need to
* restart cpu_slot list walking.
*/
+ retry = false;
goto restart;
}

@@ -780,6 +806,7 @@ restart:
}

/* hmmm... no space left, create a new chunk */
+ bitmap_zero(pcpu_slot_stat_bitmap, pcpu_nr_slots);
spin_unlock_irqrestore(&pcpu_lock, flags);

chunk = pcpu_create_chunk();
@@ -789,7 +816,28 @@ restart:
}

spin_lock_irqsave(&pcpu_lock, flags);
- pcpu_chunk_relocate(chunk, -1);
+ /* put the new chunk it slot list, we deem it a freeing action.*/
+ pcpu_chunk_relocate(chunk, -1, PCPU_FREE);
+
+ /*
+ * If during the period since we last drop the lock,
+ * no others free space to the pcpu_slot, then
+ * don't bother walking pcpu_slot list again,
+ * just alloc from the newly-alloc'ed chunk.
+ */
+ bitmap_zero(pcpu_slot_stat_bitmap, base_slot);
+
+ /* pcpu_chunk_relocate() will put the new chunk
+ * in slot[pcpu_nr_slots - 1], thus don't test it.*/
+ if (bitmap_empty(pcpu_slot_stat_bitmap,
+ pcpu_nr_slots - 1 - base_slot)) {
+ off = pcpu_alloc_area(chunk, size, align);
+ if (likely(off >= 0))
+ goto area_found;
+ }
+
+ /* somebody might free enough space, worth trying again. */
+ retry = true;
goto restart;

area_found:
@@ -1315,11 +1363,20 @@ int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai,
* empty chunks.
*/
pcpu_nr_slots = __pcpu_size_to_slot(pcpu_unit_size) + 2;
+ /* We allocate the space for pcpu_slot and pcpu_slot_stat_bitmap
+ * in one go. */
pcpu_slot = memblock_virt_alloc(
- pcpu_nr_slots * sizeof(pcpu_slot[0]), 0);
+ pcpu_nr_slots * sizeof(pcpu_slot[0]) +
+ BITS_TO_LONGS(pcpu_nr_slots) * sizeof(unsigned long), 0);
for (i = 0; i < pcpu_nr_slots; i++)
INIT_LIST_HEAD(&pcpu_slot[i]);

+ pcpu_slot_stat_bitmap = (unsigned long *)PTR_ALIGN(
+ (char *)pcpu_slot + pcpu_nr_slots * sizeof(pcpu_slot[0]),
+ sizeof(unsigned long));
+ bitmap_zero(pcpu_slot_stat_bitmap, pcpu_nr_slots);
+
+
/*
* Initialize static chunk. If reserved_size is zero, the
* static chunk covers static area + dynamic allocation area
@@ -1366,7 +1423,8 @@ int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai,

/* link the first chunk in */
pcpu_first_chunk = dchunk ?: schunk;
- pcpu_chunk_relocate(pcpu_first_chunk, -1);
+ /* put the new chunk it slot list, we deem it a freeing action.*/
+ pcpu_chunk_relocate(pcpu_first_chunk, -1, PCPU_FREE);

/* we're done */
pcpu_base_addr = base_addr;
--
1.8.5.3

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/