Re: [PATCH v3 1/4] lib/dlock-list: Distributed and lock-protected lists

From: Tejun Heo
Date: Mon Jul 18 2016 - 19:38:13 EST


Hello, Waiman.

On Fri, Jul 15, 2016 at 01:39:40PM -0400, Waiman Long wrote:
> Suggested-by: Tejun Heo <tj@xxxxxxxxxx>

Not sure I should be on suggested-by given that this wasn't my idea at
all.

> +/*
> + * include/linux/dlock-list.h
> + *
> + * A distributed (per-cpu) set of lists each of which is protected by its
> + * own spinlock, but acts like a single consolidated list to the callers.
> + *
> + * The dlock_list_head_percpu structure contains the spinlock, the other
> + * dlock_list_node structures only contains a pointer to the spinlock in
> + * dlock_list_head_percpu.
> + */

The more I think about it, the more bothered I'm about the dlock_list
name. For the most part, this isn't different from other percpu data
structures in the kernel. Sure, it might benefit from doing Nth cpu,
but so are other percpu data structures and it's not just "distributed
lock" list either. The list itself is percpu, not just locking. Can
we please go back to percpu_list? Christoph, what do you think?

> +struct dlock_list_node {
> + struct list_head list;
> + spinlock_t *lockptr;
> +};

Wouldn't it be better to point to dlock_list_percpu?

> +#define DLOCK_LIST_HEAD_PERCPU_INIT(name) \
> + { \
> + .list.prev = &name.list, \
> + .list.next = &name.list, \

Use LIST_HEAD_INIT()? Also, why do we even need the initializers if
the data structure can only be dynamically allocated. In fact, do
the definitions even need to be exposed in the header?

> + .list.lock = __SPIN_LOCK_UNLOCKED(name), \
> + }
> +
> +/*
> + * dlock list iteration state
> + */
> +struct dlock_list_iter {
> + int cpu;
^
I'm not sure lining up with space here is common in kernel.

> + spinlock_t *lock;
> + struct list_head *head; /* List head of current per-cpu list */
> + struct dlock_list_node *curr;
> + struct dlock_list_node *next;
> +};
> +
> +#define DLOCK_LIST_ITER_INIT() \
^
Don't we usually omit () in these cases?
> + { \
> + .cpu = -1, \
> + }
> +
> +#define DEFINE_DLOCK_LIST_ITER(s) \
> + struct dlock_list_iter s = DLOCK_LIST_ITER_INIT()
> +
> +static inline void init_dlock_list_iter(struct dlock_list_iter *iter)
> +{
> + *iter = (struct dlock_list_iter)DLOCK_LIST_ITER_INIT();
> +}
> +
> +#define DLOCK_LIST_NODE_INIT(name) \
> + { \
> + .list.prev = &name.list, \
> + .list.next = &name.list, \
^
LIST_HEAD_INIT()?
> + }
> +
> +static inline void init_dlock_list_node(struct dlock_list_node *node)
> +{
> + INIT_LIST_HEAD(&node->list);
> + node->lockptr = NULL;

Why not use DLOCK_LIST_NODE_INIT()?

> +}
> +
> +/*
> + * Check if all the dlock lists are empty
> + *
> + * This can be a pretty expensive function call. If this function is required
> + * in a performance critical path, we may have to maintain a global count
> + * of the list entries in the global dlock_list_head structure instead.
> + */

/** function comment please.

> +static inline bool dlock_list_empty(struct dlock_list_head *dlist)
> +{
> + int cpu;
> +
> + for_each_possible_cpu(cpu)
> + if (!list_empty(&per_cpu_ptr(dlist->head, cpu)->list))
> + return false;
> + return true;
> +}
> +
> +/*
> + * Allocation and freeing of dlock list
> + */
> +extern int alloc_dlock_list_head(struct dlock_list_head *dlist);
^
ditto with alignment

> +extern void free_dlock_list_head(struct dlock_list_head *dlist);
> +
> +/*
> + * The dlock list iteration functions which return true if iteration has
> + * to be continued.
> + */
> +extern bool dlock_list_next(struct dlock_list_head *dlist,
> + struct dlock_list_iter *iter);
> +extern bool dlock_list_next_safe(struct dlock_list_head *dlist,
> + struct dlock_list_iter *iter);

Why not return dlock_list_node * for the current node? That'd more
conventional and allows dlock_list_iter to be opaque.

> diff --git a/lib/dlock-list.c b/lib/dlock-list.c
> new file mode 100644
> index 0000000..af4a9f3
> --- /dev/null
> +++ b/lib/dlock-list.c
...
> +int alloc_dlock_list_head(struct dlock_list_head *dlist)
> +{
> + struct dlock_list_head dlist_tmp;
> + int cpu;
> +
> + dlist_tmp.head = alloc_percpu(struct dlock_list_head_percpu);
> + if (!dlist_tmp.head)
> + return -ENOMEM;
> +
> + for_each_possible_cpu(cpu) {
> + struct dlock_list_head_percpu *head;
> +
> + head = per_cpu_ptr(dlist_tmp.head, cpu);
> + INIT_LIST_HEAD(&head->list);
> + head->lock = __SPIN_LOCK_UNLOCKED(&head->lock);
> + lockdep_set_class(&head->lock, &dlock_list_key);
> + }
> +
> + dlist->head = dlist_tmp.head;

Just use dlist->head directly or use local __perpcu head pointer?

> + return 0;
> +}
> +EXPORT_SYMBOL(alloc_dlock_list_head);

Does this actually need to be exported? If so, it might be a better
idea to start with EXPORT_SYMBOL_GPL().

> +void dlock_list_add(struct dlock_list_node *node,
> + struct dlock_list_head *dlist)
> +{
> + struct dlock_list_head_percpu *head;
^

This probably requires __percpu annotation. Have you run it through
sparse and checked for address space warnings?

> +
> + /*
> + * Disable preemption to make sure that CPU won't gets changed.
> + */
> + head = get_cpu_ptr(dlist->head);
> + spin_lock(&head->lock);
> + node->lockptr = &head->lock;
> + list_add(&node->list, &head->list);
> + spin_unlock(&head->lock);
> + put_cpu_ptr(dlist->head);
> +}
> +EXPORT_SYMBOL(dlock_list_add);
> +
> +/**
> + * dlock_list_del - Delete a node from a dlock list
> + * @node : The node to be deleted
> + *
> + * We need to check the lock pointer again after taking the lock to guard
> + * against concurrent deletion of the same node. If the lock pointer changes
> + * (becomes NULL or to a different one), we assume that the deletion was done
> + * elsewhere. A warning will be printed if this happens as it is likely to be
> + * a bug.
> + */
> +void dlock_list_del(struct dlock_list_node *node)
> +{
> + spinlock_t *lock = READ_ONCE(node->lockptr);
> +
> + if (unlikely(!lock)) {
> + WARN_ONCE(1,
> + "dlock_list_del: node 0x%lx has no associated lock\n",
> + (unsigned long)node);

Maybe "if (WARN_ONCE(!lock...)"? WARN_ONCE implies unlikely.

> + return;
> + }
> +
> + spin_lock(lock);
> + if (likely(lock == node->lockptr)) {
> + list_del_init(&node->list);
> + node->lockptr = NULL;
> + } else {
> + /*
> + * This path should never be executed.
> + */
> + WARN_ON_ONCE(1);
> + }

This still kinda bothers me because this pretty much requires the
users to have strong synchronization around the operations and makes
it unusable in situations where opportunistic behaviors are
acceptable. It negates the usefulness quite a bit.

> + spin_unlock(lock);
> +}
> +EXPORT_SYMBOL(dlock_list_del);
> +
> +/*
> + * Helper function to find the first entry of the next per-cpu list
> + * It works somewhat like for_each_possible_cpu(cpu).
> + *
> + * Return: true if the entry is found, false if all the lists exhausted
> + *
> + */
> +static inline bool dlock_list_next_cpu(struct dlock_list_head *dlist,
^
Just let the compiler figure it out.

> + struct dlock_list_iter *iter)
> +{
> + if (iter->lock)
> + spin_unlock(iter->lock);
> +next_cpu:
> + /*
> + * for_each_possible_cpu(cpu)
> + */
> + iter->cpu = cpumask_next(iter->cpu, cpu_possible_mask);
> + if (iter->cpu >= nr_cpu_ids)
> + return false; /* All the per-cpu lists iterated */
> +
> + iter->head = &per_cpu_ptr(dlist->head, iter->cpu)->list;
> + if (list_empty(iter->head))
> + goto next_cpu;
> +
> + iter->lock = &per_cpu_ptr(dlist->head, iter->cpu)->lock;
> + spin_lock(iter->lock);
> + /*
> + * There is a slight chance that the list may become empty just
> + * before the lock is acquired. So an additional check is
> + * needed to make sure that iter->curr points to a valid entry.
> + */
> + if (list_empty(iter->head)) {
> + spin_unlock(iter->lock);
> + goto next_cpu;
> + }
> + iter->curr = list_entry(iter->head->next,
> + struct dlock_list_node, list);
> + return true;
> +}
...
> +/**
> + * dlock_list_next_safe - Removal-safe iterator of dlock list
> + * @dlist: Pointer to the dlock_list_head structure
> + * @iter : Pointer to the dlock list iterator structure
> + * Return: true if the next entry is found, false if all the entries iterated
> + *
> + * The iterator has to be properly initialized before calling this function.
> + * This iteration function is safe with respect to list entry removal.
> + * However, it cannot correctly iterate newly added entries right after the
> + * current one.
> + */

This still looks wrong to me. If you want to provide the two variants
of iterations, can't you just implement one next function and build
the two types of iterations on top of it?

Thanks.

--
tejun