Re: [PATCH v5 7/7] locking/lockdep: Add a fast path for chain_hlocks allocation

From: Peter Zijlstra
Date: Tue Feb 04 2020 - 08:44:53 EST


On Tue, Feb 04, 2020 at 02:18:13PM +0100, Peter Zijlstra wrote:
> On Mon, Feb 03, 2020 at 11:41:47AM -0500, Waiman Long wrote:
>
> > @@ -2809,6 +2813,18 @@ static int alloc_chain_hlocks(int req)
> > return curr;
> > }
> >
> > + /*
> > + * Fast path: splitting out a sub-block at the end of the
> > + * primordial chain block.
> > + */
> > + if (likely((size > MAX_LOCK_DEPTH) &&
> > + (size - req > MAX_CHAIN_BUCKETS))) {
> > + size -= req;
> > + nr_free_chain_hlocks -= req;
> > + init_chain_block_size(curr, size);
> > + return curr + size;
> > + }
> > +
> > if (size > max_size) {
> > max_prev = prev;
> > max_curr = curr;
>
> A less horrible hack might be to keep the freelist sorted on size (large
> -> small)
>
> That moves the linear-search from alloc_chain_hlocks() into
> add_chain_block(). But the thing is that it would amortize to O(1)
> because this initial chunk is pretty much 'always' the largest.
>
> Only once we've exhausted the initial block will we hit that search, but
> then the hope is that we mostly live off of the buckets, not the
> variable freelist.

Completely untested something like so

---

--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -2717,7 +2717,7 @@ static inline void init_chain_block(int
static inline void add_chain_block(int offset, int size)
{
int bucket = size_to_bucket(size);
- int next = chain_block_buckets[bucket];
+ int prev, curr, next = chain_block_buckets[bucket];

if (unlikely(size < 2)) {
/*
@@ -2731,26 +2731,38 @@ static inline void add_chain_block(int o
return;
}

- init_chain_block(offset, next, bucket, size);
- chain_block_buckets[bucket] = offset;
- nr_free_chain_hlocks += size;
-
- if (bucket == 0)
+ if (!bucket) {
nr_large_chain_blocks++;
+ /*
+ * Variable sized, sort large to small.
+ */
+ for_each_chain_block(0, prev, curr, next) {
+ if (size > chain_block_size(curr))
+ break;
+ }
+ init_chain_block(offset, curr, 0, size);
+ if (prev < 0)
+ chain_block_buckets[prev + MAX_CHAIN_BUCKETS] = offset;
+ else
+ init_chain_block(prev, offset, MAX_CHAIN_BUCKETS, 0);
+ } else {
+ /*
+ * Fixed size, add to head.
+ */
+ init_chain_block(offset, next, bucket, size);
+ chain_block_buckets[bucket] = offset;
+ }
+ nr_free_chain_hlocks += size;
}

+
/*
* When offset < 0, the bucket number is encoded in it.
*/
-static inline void del_chain_block(int offset, int size, int next)
+static inline void del_chain_block(int bucket, int size, int next)
{
- int bucket = size_to_bucket(size);
-
nr_free_chain_hlocks -= size;
- if (offset < 0)
- chain_block_buckets[offset + MAX_CHAIN_BUCKETS] = next;
- else
- init_chain_block(offset, next, bucket, size);
+ chain_block_buckets[bucket] = next;

if (bucket == 0)
nr_large_chain_blocks--;
@@ -2774,9 +2786,7 @@ static void init_chain_block_buckets(voi
*/
static int alloc_chain_hlocks(int req)
{
- int max_prev, max_curr, max_next, max_size = INT_MIN;
- int prev, curr, next, size;
- int bucket;
+ int bucket, curr, size;

/*
* We rely on the MSB to act as an escape bit to denote freelist
@@ -2798,40 +2808,24 @@ static int alloc_chain_hlocks(int req)

if (bucket != 0) {
curr = chain_block_buckets[bucket];
- if (curr < 0)
- goto search;
-
- del_chain_block(bucket - MAX_CHAIN_BUCKETS, req,
- chain_block_next(curr));
- return curr;
+ if (curr >= 0) {
+ del_chain_block(bucket, req, chain_block_next(curr));
+ return curr;
+ }
}

-search:
/*
- * linear search of the variable sized freelist; look for an exact
- * match or the largest block.
+ * The variable sized freelist is sorted by size; the first entry is
+ * the largest. Use it if it fits.
*/
- for_each_chain_block(0, prev, curr, next) {
+ curr = chain_block_buckets[0];
+ if (curr >= 0) {
size = chain_block_size(curr);
- next = chain_block_next(curr);
-
- if (size == req) {
- del_chain_block(prev, req, next);
+ if (likely(size > req)) {
+ del_chain_block(0, size, chain_block_next(curr));
+ add_chain_block(curr + req, size - req);
return curr;
}
-
- if (size > max_size) {
- max_prev = prev;
- max_curr = curr;
- max_next = next;
- max_size = size;
- }
- }
-
- if (likely(max_size > req)) {
- del_chain_block(max_prev, max_size, max_next);
- add_chain_block(max_curr + req, max_size - req);
- return max_curr;
}

/*
@@ -2843,8 +2837,7 @@ static int alloc_chain_hlocks(int req)
if (curr < 0)
continue;

- del_chain_block(bucket - MAX_CHAIN_BUCKETS, size,
- chain_block_next(curr));
+ del_chain_block(bucket, size, chain_block_next(curr));
add_chain_block(curr + req, size - req);
return curr;
}