Re: [RFC] Rebasing the IDR

From: Daniel Vetter
Date: Thu Nov 30 2017 - 14:56:50 EST


Adding dri-devel, I think a pile of those are in drm.

On Thu, Nov 30, 2017 at 6:36 PM, Matthew Wilcox <willy@xxxxxxxxxxxxx> wrote:
> About 40 of the approximately 180 users of the IDR in the kernel are
> "1-based" instead of "0-based". That is, they never want to have ID 0
> allocated; they want to see IDs allocated between 1 and N. Usually, that's
> expressed like this:
>
> /* Get the user-visible handle using idr. */
> ret = idr_alloc(&file_priv->object_idr, obj, 1, 0, GFP_KERNEL);
>
> The current implementation of this grieves me. You see, we mark each
> node of the radix tree according to whether it has any free entries
> or not, and entry 0 is always free! If we've already allocated 10,000
> entries from this IDR, and see this call, we'll walk all the way down
> the left side of the tree looking for entry 1, get to the bottom,
> see that entries 1-63 are allocated, then walk up to the next level,
> see that 64-4095 are allocated, walk up to the next level, see that
> 8192-12287 has a free entry, and then start walking down again.
>
> It'd be somewhere around two to three times quicker to know not to
> look down the left hand side of the tree, see that 1-8191 are used and
> start looking in the range 8192-12287. I considered a function like
> idr_reserve(idr, N) to reserve the first N entries (we have at least one
> consumer in the tree that is 2-based, not 1-based), but that struck me
> as inelegant. We also have the cool optimisation that if there's only
> one element in the radix tree at offset 0, then we don't even need
> to allocate a node to store it, we just store it right in the head.
> And that optimisation was never available to the 1-based users.
>
> I've come up with this solution instead, idr_base. It's free in terms
> of memory consumption on 64-bit as it fits in the gap at the end of the
> struct idr. Essentially, we just subtract off idr_base when looking
> up the ID, and add it back on when reporting the ID. We need to do some
> saturating arithmetic in idr_get_next(), because starting a walk at 4
> billion or negative 8 quintillion doesn't work out terribly well.
>
> It does require the user to call idr_init_base() instead of idr_init().
> That should be a relatively small conversion effort. Opinions? Feedback?
> Better test cases for the test-suite (ahem)?

Just quick context on why we do this: We need to marshal/demarshal
NULL in a ton of our ioctls, mapping that to 0 is natural.

And yes I very much like this, and totally fine with trading an
idr_init_base when we can nuke the explicit index ranges at every
alloc side instead. So if you give me an idr_alloc function that
doesn't take a range as icing, that would be great.

Cheers, Daniel

>
> diff --git a/include/linux/idr.h b/include/linux/idr.h
> index 91d27a9bcdf4..a657b1f925f8 100644
> --- a/include/linux/idr.h
> +++ b/include/linux/idr.h
> @@ -18,6 +18,7 @@
>
> struct idr {
> struct radix_tree_root idr_rt;
> + unsigned int idr_base;
> unsigned int idr_next;
> };
>
> @@ -30,9 +31,10 @@ struct idr {
> /* Set the IDR flag and the IDR_FREE tag */
> #define IDR_RT_MARKER ((__force gfp_t)(3 << __GFP_BITS_SHIFT))
>
> -#define IDR_INIT \
> -{ \
> - .idr_rt = RADIX_TREE_INIT(IDR_RT_MARKER) \
> +#define IDR_INIT { \
> + .idr_rt = RADIX_TREE_INIT(IDR_RT_MARKER), \
> + .idr_base = 0, \
> + .idr_next = 0, \
> }
> #define DEFINE_IDR(name) struct idr name = IDR_INIT
>
> @@ -123,12 +125,20 @@ static inline int __must_check idr_alloc_u32(struct idr *idr, void *ptr,
>
> static inline void *idr_remove(struct idr *idr, unsigned long id)
> {
> - return radix_tree_delete_item(&idr->idr_rt, id, NULL);
> + return radix_tree_delete_item(&idr->idr_rt, id - idr->idr_base, NULL);
> }
>
> static inline void idr_init(struct idr *idr)
> {
> INIT_RADIX_TREE(&idr->idr_rt, IDR_RT_MARKER);
> + idr->idr_base = 0;
> + idr->idr_next = 0;
> +}
> +
> +static inline void idr_init_base(struct idr *idr, int base)
> +{
> + INIT_RADIX_TREE(&idr->idr_rt, IDR_RT_MARKER);
> + idr->idr_base = base;
> idr->idr_next = 0;
> }
>
> @@ -163,7 +173,7 @@ static inline void idr_preload_end(void)
> */
> static inline void *idr_find(const struct idr *idr, unsigned long id)
> {
> - return radix_tree_lookup(&idr->idr_rt, id);
> + return radix_tree_lookup(&idr->idr_rt, id - idr->idr_base);
> }
>
> /**
> diff --git a/lib/idr.c b/lib/idr.c
> index 1aaeb5a8c426..a1d3531bd62f 100644
> --- a/lib/idr.c
> +++ b/lib/idr.c
> @@ -33,21 +33,22 @@ int idr_alloc_ul(struct idr *idr, void *ptr, unsigned long *nextid,
> {
> struct radix_tree_iter iter;
> void __rcu **slot;
> + int base = idr->idr_base;
>
> if (WARN_ON_ONCE(radix_tree_is_internal_node(ptr)))
> return -EINVAL;
> if (WARN_ON_ONCE(!(idr->idr_rt.gfp_mask & ROOT_IS_IDR)))
> idr->idr_rt.gfp_mask |= IDR_RT_MARKER;
>
> - radix_tree_iter_init(&iter, *nextid);
> - slot = idr_get_free(&idr->idr_rt, &iter, gfp, max);
> + radix_tree_iter_init(&iter, *nextid - base);
> + slot = idr_get_free(&idr->idr_rt, &iter, gfp, max - base);
> if (IS_ERR(slot))
> return PTR_ERR(slot);
>
> radix_tree_iter_replace(&idr->idr_rt, &iter, slot, ptr);
> radix_tree_iter_tag_clear(&idr->idr_rt, &iter, IDR_FREE);
>
> - *nextid = iter.index;
> + *nextid = iter.index + base;
> return 0;
> }
> EXPORT_SYMBOL_GPL(idr_alloc_ul);
> @@ -143,13 +144,14 @@ int idr_for_each(const struct idr *idr,
> {
> struct radix_tree_iter iter;
> void __rcu **slot;
> + int base = idr->idr_base;
>
> radix_tree_for_each_slot(slot, &idr->idr_rt, &iter, 0) {
> int ret;
>
> if (WARN_ON_ONCE(iter.index > INT_MAX))
> break;
> - ret = fn(iter.index, rcu_dereference_raw(*slot), data);
> + ret = fn(iter.index + base, rcu_dereference_raw(*slot), data);
> if (ret)
> return ret;
> }
> @@ -172,15 +174,19 @@ void *idr_get_next(struct idr *idr, int *nextid)
> {
> struct radix_tree_iter iter;
> void __rcu **slot;
> + int base = idr->idr_base;
> + int id = *nextid;
>
> - slot = radix_tree_iter_find(&idr->idr_rt, &iter, *nextid);
> + id = (id < base) ? 0 : id - base;
> + slot = radix_tree_iter_find(&idr->idr_rt, &iter, id);
> if (!slot)
> return NULL;
> + id = iter.index + base;
>
> - if (WARN_ON_ONCE(iter.index > INT_MAX))
> + if (WARN_ON_ONCE(id > INT_MAX))
> return NULL;
>
> - *nextid = iter.index;
> + *nextid = id;
> return rcu_dereference_raw(*slot);
> }
> EXPORT_SYMBOL(idr_get_next);
> @@ -199,12 +205,15 @@ void *idr_get_next_ul(struct idr *idr, unsigned long *nextid)
> {
> struct radix_tree_iter iter;
> void __rcu **slot;
> + unsigned long base = idr->idr_base;
> + unsigned long id = *nextid;
>
> - slot = radix_tree_iter_find(&idr->idr_rt, &iter, *nextid);
> + id = (id < base) ? 0 : id - base;
> + slot = radix_tree_iter_find(&idr->idr_rt, &iter, id);
> if (!slot)
> return NULL;
>
> - *nextid = iter.index;
> + *nextid = iter.index + base;
> return rcu_dereference_raw(*slot);
> }
> EXPORT_SYMBOL(idr_get_next_ul);
> @@ -231,6 +240,7 @@ void *idr_replace(struct idr *idr, void *ptr, unsigned long id)
>
> if (WARN_ON_ONCE(radix_tree_is_internal_node(ptr)))
> return ERR_PTR(-EINVAL);
> + id -= idr->idr_base;
>
> entry = __radix_tree_lookup(&idr->idr_rt, id, &node, &slot);
> if (!slot || radix_tree_tag_get(&idr->idr_rt, id, IDR_FREE))
> diff --git a/tools/testing/radix-tree/idr-test.c b/tools/testing/radix-tree/idr-test.c
> index 36437ade429c..44ef9eba5a7a 100644
> --- a/tools/testing/radix-tree/idr-test.c
> +++ b/tools/testing/radix-tree/idr-test.c
> @@ -153,11 +153,12 @@ void idr_nowait_test(void)
> idr_destroy(&idr);
> }
>
> -void idr_get_next_test(void)
> +void idr_get_next_test(int base)
> {
> unsigned long i;
> int nextid;
> DEFINE_IDR(idr);
> + idr_init_base(&idr, base);
>
> int indices[] = {4, 7, 9, 15, 65, 128, 1000, 99999, 0};
>
> @@ -244,7 +245,9 @@ void idr_checks(void)
> idr_alloc_test();
> idr_null_test();
> idr_nowait_test();
> - idr_get_next_test();
> + idr_get_next_test(0);
> + idr_get_next_test(1);
> + idr_get_next_test(4);
> }
>
> /*



--
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch