Re: [PATCH RFC 0/2] percpu_ida: Take into account CPU topology when stealing tags

From: Alexander Gordeev
Date: Thu May 01 2014 - 17:23:59 EST


On Tue, Apr 22, 2014 at 01:56:29PM +0200, Peter Zijlstra wrote:
> I've not had time to revisit/finish them, but you should definitely
> clean up the percpu_ida stuff and reduce existing contention before
> going overboard.

Hi Peter,

I tried to combine your series into a single patch against 3.15-rc3.

I hope, it looks like you intended and my comments in follow-up
message make sense.

Thanks!

diff --git a/lib/percpu_ida.c b/lib/percpu_ida.c
index 93d145e..14f5151 100644
--- a/lib/percpu_ida.c
+++ b/lib/percpu_ida.c
@@ -51,6 +51,15 @@ static inline void move_tags(unsigned *dst, unsigned *dst_nr,
*dst_nr += nr;
}

+static inline void double_lock(spinlock_t *l1, spinlock_t *l2)
+{
+ if (l1 > l2)
+ swap(l1, l2);
+
+ spin_lock(l1);
+ spin_lock_nested(l2, SINGLE_DEPTH_NESTING);
+}
+
/*
* Try to steal tags from a remote cpu's percpu freelist.
*
@@ -84,7 +93,7 @@ static inline void steal_tags(struct percpu_ida *pool,
if (remote == tags)
continue;

- spin_lock(&remote->lock);
+ double_lock(&tags->lock, &remote->lock);

if (remote->nr_free) {
memcpy(tags->freelist,
@@ -96,36 +105,81 @@ static inline void steal_tags(struct percpu_ida *pool,
}

spin_unlock(&remote->lock);
+ spin_unlock(&tags->lock);

if (tags->nr_free)
break;
}
}

-/*
- * Pop up to IDA_PCPU_BATCH_MOVE IDs off the global freelist, and push them onto
- * our percpu freelist:
- */
-static inline void alloc_global_tags(struct percpu_ida *pool,
- struct percpu_ida_cpu *tags)
+static inline unsigned alloc_local_tag(struct percpu_ida_cpu *tags)
{
- move_tags(tags->freelist, &tags->nr_free,
- pool->freelist, &pool->nr_free,
- min(pool->nr_free, pool->percpu_batch_size));
+ int tag = -ENOSPC;
+
+ spin_lock(&tags->lock);
+ if (tags->nr_free)
+ tag = tags->freelist[--tags->nr_free];
+ spin_unlock(&tags->lock);
+
+ return tag;
}

-static inline unsigned alloc_local_tag(struct percpu_ida_cpu *tags)
+static inline int alloc_local_tag(struct percpu_ida *pool)
{
+ struct percpu_ida_cpu *tags;
+ unsigned long flags;
int tag = -ENOSPC;

+ local_irq_save(flags);
+ tags = this_cpu_ptr(pool->tag_cpu);
spin_lock(&tags->lock);
if (tags->nr_free)
tag = tags->freelist[--tags->nr_free];
- spin_unlock(&tags->lock);
+ spin_unlock_irqrestore(&tags->lock, flags);

return tag;
}

+static inline int alloc_global_tag(struct percpu_ida *pool)
+{
+ struct percpu_ida_cpu *tags;
+ unsigned long flags;
+ int tag = -ENOSPC;
+
+ spin_lock_irqsave(&pool->lock, flags);
+ tags = this_cpu_ptr(pool->tag_cpu);
+
+ if (!tags->nr_free) {
+ /*
+ * Move tags from the global-, onto our percpu-freelist.
+ */
+ move_tags(tags->freelist, &tags->nr_free,
+ pool->freelist, &pool->nr_free,
+ min(pool->nr_free, pool->percpu_batch_size));
+ }
+
+ spin_unlock(&pool->lock);
+
+ if (!tags->nr_free)
+ steal_tags(pool, tags);
+
+ if (tags->nr_free) {
+ spin_lock(&tags->lock);
+ if (tags->nr_free) {
+ tag = tags->freelist[--tags->nr_free];
+ if (tags->nr_free) {
+ cpumask_set_cpu(smp_processor_id(),
+ &pool->cpus_have_tags);
+ }
+ }
+ spin_unlock(&tags->lock);
+ }
+
+ local_irq_restore(flags);
+
+ return tag;
+}
+
/**
* percpu_ida_alloc - allocate a tag
* @pool: pool to allocate from
@@ -146,66 +200,26 @@ static inline unsigned alloc_local_tag(struct percpu_ida_cpu *tags)
*/
int percpu_ida_alloc(struct percpu_ida *pool, int state)
{
- DEFINE_WAIT(wait);
- struct percpu_ida_cpu *tags;
- unsigned long flags;
- int tag;
-
- local_irq_save(flags);
- tags = this_cpu_ptr(pool->tag_cpu);
-
- /* Fastpath */
- tag = alloc_local_tag(tags);
- if (likely(tag >= 0)) {
- local_irq_restore(flags);
- return tag;
- }
-
- while (1) {
- spin_lock(&pool->lock);
-
- /*
- * prepare_to_wait() must come before steal_tags(), in case
- * percpu_ida_free() on another cpu flips a bit in
- * cpus_have_tags
- *
- * global lock held and irqs disabled, don't need percpu lock
- */
- if (state != TASK_RUNNING)
- prepare_to_wait(&pool->wait, &wait, state);
-
- if (!tags->nr_free)
- alloc_global_tags(pool, tags);
- if (!tags->nr_free)
- steal_tags(pool, tags);
-
- if (tags->nr_free) {
- tag = tags->freelist[--tags->nr_free];
- if (tags->nr_free)
- cpumask_set_cpu(smp_processor_id(),
- &pool->cpus_have_tags);
- }
-
- spin_unlock(&pool->lock);
- local_irq_restore(flags);
-
- if (tag >= 0 || state == TASK_RUNNING)
- break;
-
- if (signal_pending_state(state, current)) {
- tag = -ERESTARTSYS;
- break;
- }
-
- schedule();
-
- local_irq_save(flags);
- tags = this_cpu_ptr(pool->tag_cpu);
- }
- if (state != TASK_RUNNING)
- finish_wait(&pool->wait, &wait);
-
- return tag;
+ int ret;
+
+ ret = alloc_local_tag(pool);
+ if (likely(ret >= 0))
+ return ret;
+
+ if (state != TASK_RUNNING) {
+ int tag;
+
+ ret = ___wait_event(pool->wait,
+ (tag = alloc_global_tag(pool)) >= 0,
+ state, 0, 0, schedule());
+
+ if (tag >= 0)
+ ret = tag;
+ } else {
+ ret = alloc_global_tag(pool);
+ }
+
+ return ret;
}
EXPORT_SYMBOL_GPL(percpu_ida_alloc);

@@ -239,12 +253,19 @@ void percpu_ida_free(struct percpu_ida *pool, unsigned tag)
wake_up(&pool->wait);
}

+ /*
+ * No point in waking when 1 < n < max_size free, because steal_tags()
+ * will assume max_size per set bit, having more free will not make it
+ * more or less likely it will visit this cpu.
+ */
+
if (nr_free == pool->percpu_max_size) {
spin_lock(&pool->lock);

/*
- * Global lock held and irqs disabled, don't need percpu
- * lock
+ * Global lock held and irqs disabled, don't need percpu lock
+ * because everybody accessing remote @tags will hold
+ * pool->lock -- steal_tags().
*/
if (tags->nr_free == pool->percpu_max_size) {
move_tags(pool->freelist, &pool->nr_free,
@@ -344,33 +365,31 @@ EXPORT_SYMBOL_GPL(__percpu_ida_init);
int percpu_ida_for_each_free(struct percpu_ida *pool, percpu_ida_cb fn,
void *data)
{
- unsigned long flags;
struct percpu_ida_cpu *remote;
- unsigned cpu, i, err = 0;
+ unsigned long flags;
+ int cpu, i, err = 0;

- local_irq_save(flags);
for_each_possible_cpu(cpu) {
remote = per_cpu_ptr(pool->tag_cpu, cpu);
- spin_lock(&remote->lock);
+ spin_lock_irqsave(&remote->lock, flags);
for (i = 0; i < remote->nr_free; i++) {
err = fn(remote->freelist[i], data);
if (err)
break;
}
- spin_unlock(&remote->lock);
+ spin_unlock_irqrestore(&remote->lock, flags);
if (err)
- goto out;
+ return err;
}

- spin_lock(&pool->lock);
+ spin_lock_irqsave(&pool->lock, flags);
for (i = 0; i < pool->nr_free; i++) {
err = fn(pool->freelist[i], data);
if (err)
break;
}
- spin_unlock(&pool->lock);
-out:
- local_irq_restore(flags);
+ spin_unlock_irqrestore(&pool->lock, flags);
+
return err;
}
EXPORT_SYMBOL_GPL(percpu_ida_for_each_free);
--
1.7.7.6


--
Regards,
Alexander Gordeev
agordeev@xxxxxxxxxx
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/