Re: [PATCH v5 5/6] maple_tree: add sufficient height

From: Liam R. Howlett
Date: Thu Apr 10 2025 - 15:19:33 EST


* Sidhartha Kumar <sidhartha.kumar@xxxxxxxxxx> [250410 15:14]:
> In order to support rebalancing and spanning stores using less than the
> worst case number of nodes, we need to track more than just the vacant
> height. Using only vacant height to reduce the worst case maple node
> allocation count can lead to a shortcoming of nodes in the following
> scenarios.
>
> For rebalancing writes, when a leaf node becomes insufficient, it may be
> combined with a sibling into a single node. This means that the parent node
> which has entries for this children will lose one entry. If this parent node
> was just meeting the minimum entries, losing one entry will now cause this
> parent node to be insufficient. This leads to a cascading operation of
> rebalancing at different levels and can lead to more node allocations than
> simply using vacant height can return.
>
> For spanning writes, a similar situation occurs. At the location at
> which a spanning write is detected, the number of ancestor nodes may
> similarly need to rebalanced into a smaller number of nodes and the same
> cascading situation could occur.
>
> To use less than the full height of the tree for the number of
> allocations, we also need to track the height at which a non-leaf node
> cannot become insufficient. This means even if a rebalance occurs to a
> child of this node, it currently has enough entries that it can lose one
> without any further action. This field is stored in the maple write state
> as sufficient height. In mas_prealloc_calc() when figuring out how many
> nodes to allocate, we check if the vacant node is lower in the tree
> than a sufficient node (has a larger value). If it is, we cannot use the
> vacant height and must use the difference in the height and sufficient
> height as the basis for the number of nodes needed.
>
> An off by one bug was also discovered in mast_overflow() where it is
> using >= rather than >. This caused extra iterations of the
> mas_spanning_rebalance() loop and lead to unneeded allocations. A test
> is also added to check the number of allocations is correct.
>
> Signed-off-by: Sidhartha Kumar <sidhartha.kumar@xxxxxxxxxx>

Reviewed-by: Liam R. Howlett <Liam.Howlett@xxxxxxxxxx>

> ---
> include/linux/maple_tree.h | 4 +++-
> lib/maple_tree.c | 19 ++++++++++++++++---
> tools/testing/radix-tree/maple.c | 28 ++++++++++++++++++++++++++++
> 3 files changed, 47 insertions(+), 4 deletions(-)
>
> diff --git a/include/linux/maple_tree.h b/include/linux/maple_tree.h
> index 657adb33e61e..9ef129038224 100644
> --- a/include/linux/maple_tree.h
> +++ b/include/linux/maple_tree.h
> @@ -464,6 +464,7 @@ struct ma_wr_state {
> void *entry; /* The entry to write */
> void *content; /* The existing entry that is being overwritten */
> unsigned char vacant_height; /* Height of lowest node with free space */
> + unsigned char sufficient_height;/* Height of lowest node with min sufficiency + 1 nodes */
> };
>
> #define mas_lock(mas) spin_lock(&((mas)->tree->ma_lock))
> @@ -499,7 +500,8 @@ struct ma_wr_state {
> .mas = ma_state, \
> .content = NULL, \
> .entry = wr_entry, \
> - .vacant_height = 0 \
> + .vacant_height = 0, \
> + .sufficient_height = 0 \
> }
>
> #define MA_TOPIARY(name, tree) \
> diff --git a/lib/maple_tree.c b/lib/maple_tree.c
> index 5610b3742a79..aa139668bcae 100644
> --- a/lib/maple_tree.c
> +++ b/lib/maple_tree.c
> @@ -2741,7 +2741,7 @@ static inline bool mast_sufficient(struct maple_subtree_state *mast)
> */
> static inline bool mast_overflow(struct maple_subtree_state *mast)
> {
> - if (mast->bn->b_end >= mt_slot_count(mast->orig_l->node))
> + if (mast->bn->b_end > mt_slot_count(mast->orig_l->node))
> return true;
>
> return false;
> @@ -3550,6 +3550,13 @@ static bool mas_wr_walk(struct ma_wr_state *wr_mas)
> if (mas->end < mt_slots[wr_mas->type] - 1)
> wr_mas->vacant_height = mas->depth + 1;
>
> + if (ma_is_root(mas_mn(mas))) {
> + /* root needs more than 2 entries to be sufficient + 1 */
> + if (mas->end > 2)
> + wr_mas->sufficient_height = 1;
> + } else if (mas->end > mt_min_slots[wr_mas->type] + 1)
> + wr_mas->sufficient_height = mas->depth + 1;
> +
> mas_wr_walk_traverse(wr_mas);
> }
>
> @@ -4185,13 +4192,19 @@ static inline int mas_prealloc_calc(struct ma_wr_state *wr_mas, void *entry)
> ret = 0;
> break;
> case wr_spanning_store:
> - WARN_ON_ONCE(ret != height * 3 + 1);
> + if (wr_mas->sufficient_height < wr_mas->vacant_height)
> + ret = (height - wr_mas->sufficient_height) * 3 + 1;
> + else
> + ret = delta * 3 + 1;
> break;
> case wr_split_store:
> ret = delta * 2 + 1;
> break;
> case wr_rebalance:
> - ret = height * 2 + 1;
> + if (wr_mas->sufficient_height < wr_mas->vacant_height)
> + ret = (height - wr_mas->sufficient_height) * 2 + 1;
> + else
> + ret = delta * 2 + 1;
> break;
> case wr_node_store:
> ret = mt_in_rcu(mas->tree) ? 1 : 0;
> diff --git a/tools/testing/radix-tree/maple.c b/tools/testing/radix-tree/maple.c
> index e37a3ab2e921..2c0b38301253 100644
> --- a/tools/testing/radix-tree/maple.c
> +++ b/tools/testing/radix-tree/maple.c
> @@ -36326,6 +36326,30 @@ static inline void check_spanning_store_height(struct maple_tree *mt)
> mas_unlock(&mas);
> }
>
> +/*
> + * Test to check the path of a spanning rebalance which results in
> + * a collapse where the rebalancing of the child node leads to
> + * insufficieny in the parent node.
> + */
> +static void check_collapsing_rebalance(struct maple_tree *mt)
> +{
> + int i = 0;
> + MA_STATE(mas, mt, ULONG_MAX, ULONG_MAX);
> +
> + /* create a height 6 tree */
> + while (mt_height(mt) < 6) {
> + mtree_store_range(mt, i, i + 10, xa_mk_value(i), GFP_KERNEL);
> + i += 9;
> + }
> +
> + /* delete all entries one at a time, starting from the right */
> + do {
> + mas_erase(&mas);
> + } while (mas_prev(&mas, 0) != NULL);
> +
> + mtree_unlock(mt);
> +}
> +
> /* callback function used for check_nomem_writer_race() */
> static void writer2(void *maple_tree)
> {
> @@ -36496,6 +36520,10 @@ void farmer_tests(void)
> check_spanning_store_height(&tree);
> mtree_destroy(&tree);
>
> + mt_init_flags(&tree, MT_FLAGS_ALLOC_RANGE);
> + check_collapsing_rebalance(&tree);
> + mtree_destroy(&tree);
> +
> mt_init_flags(&tree, MT_FLAGS_ALLOC_RANGE);
> check_null_expand(&tree);
> mtree_destroy(&tree);
> --
> 2.43.0
>