[RESEND PATCH 04/13] idr, radix-tree: Implement copy_preload

From: Matthew Wilcox
Date: Tue Jul 11 2017 - 09:05:38 EST


In the file descriptor table duplication code (called at fork()), we
need to duplicate an IDR. But we have to do it under a lock (so another
thread doesn't open/close a fd in the middle), and there's no suitable
preload operation for this today. Adding just idr_copy_preload() isn't
enough as another thread could grow the fd table between starting the
preload and acquiring the lock. We also need idr_check_preload() to be
called after acquiring the lock.

Signed-off-by: Matthew Wilcox <mawilcox@xxxxxxxxxxxxx>
---
include/linux/idr.h | 32 +++++++++++
include/linux/radix-tree.h | 3 ++
lib/radix-tree.c | 83 +++++++++++++++++++++++++++++
tools/testing/radix-tree/idr-test.c | 24 +++++++++
tools/testing/radix-tree/linux/radix-tree.h | 2 +
tools/testing/radix-tree/main.c | 38 +++++++++++++
6 files changed, 182 insertions(+)

diff --git a/include/linux/idr.h b/include/linux/idr.h
index d43cf01..eed1c1a 100644
--- a/include/linux/idr.h
+++ b/include/linux/idr.h
@@ -166,6 +166,38 @@ static inline bool idr_is_empty(const struct idr *idr)
}

/**
+ * idr_copy_preload - preload for idr_copy()
+ * @src: IDR to be copied from
+ * @gfp: Allocation mask to use for preloading
+ *
+ * Preallocates enough memory for a call to idr_copy(). This function
+ * returns with preemption disabled. Call idr_preload_end() once the
+ * copy has completed.
+ *
+ * Return: -ENOMEM if the memory could not be allocated.
+ */
+static inline int idr_copy_preload(const struct idr *src, gfp_t gfp)
+{
+ return radix_tree_copy_preload(&src->idr_rt, gfp);
+}
+
+/**
+ * idr_check_preload - Check the preload is still sufficient
+ * @src: IDR to be copied from
+ *
+ * Between the successful allocation of memory and acquiring the lock that
+ * protects @src, the IDR may have expanded. If this function returns
+ * false, more memory needs to be preallocated.
+ *
+ * Return: true if enough memory remains allocated, false to retry the
+ * preallocation.
+ */
+static inline bool idr_check_preload(const struct idr *src)
+{
+ return radix_tree_check_preload(&src->idr_rt);
+}
+
+/**
* idr_preload_end - end preload section started with idr_preload()
*
* Each idr_preload() should be matched with an invocation of this
diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h
index f701e0b..f53d004 100644
--- a/include/linux/radix-tree.h
+++ b/include/linux/radix-tree.h
@@ -354,6 +354,9 @@ static inline void radix_tree_preload_end(void)
preempt_enable();
}

+int radix_tree_copy_preload(const struct radix_tree_root *, gfp_t);
+bool radix_tree_check_preload(const struct radix_tree_root *);
+
int radix_tree_split_preload(unsigned old_order, unsigned new_order, gfp_t);
int radix_tree_split(struct radix_tree_root *, unsigned long index,
unsigned new_order);
diff --git a/lib/radix-tree.c b/lib/radix-tree.c
index 855ac8e..c1d75224 100644
--- a/lib/radix-tree.c
+++ b/lib/radix-tree.c
@@ -277,6 +277,55 @@ static unsigned long next_index(unsigned long index,
return (index & ~node_maxindex(node)) + (offset << node->shift);
}

+/**
+ * radix_tree_count_nodes - Returns the number of nodes in this tree
+ * @root: radix tree root
+ *
+ * This routine does not examine every node in the tree; it assumes that
+ * all entries in the tree at level 1 are nodes. It will overestimate
+ * the number of nodes in the tree in the presence of entries of order
+ * RADIX_TREE_MAP_SHIFT or higher.
+ */
+static unsigned long radix_tree_count_nodes(const struct radix_tree_root *root)
+{
+ struct radix_tree_node *node, *child;
+ unsigned long n = 1;
+ void *entry = rcu_dereference_raw(root->rnode);
+ unsigned int offset = 0;
+
+ if (!radix_tree_is_internal_node(entry))
+ return 0;
+
+ node = entry_to_node(entry);
+ if (!node->shift)
+ return n;
+
+ n += node->count;
+ if (node->shift == RADIX_TREE_MAP_SHIFT)
+ return n;
+
+ while (node) {
+ if (offset == RADIX_TREE_MAP_SIZE) {
+ offset = node->offset + 1;
+ node = node->parent;
+ continue;
+ }
+
+ entry = rcu_dereference_raw(node->slots[offset]);
+ offset++;
+ if (!radix_tree_is_internal_node(entry))
+ continue;
+ child = entry_to_node(entry);
+ n += child->count;
+ if (node->shift <= 2 * RADIX_TREE_MAP_SHIFT)
+ continue;
+ offset = 0;
+ node = child;
+ }
+
+ return n;
+}
+
#ifndef __KERNEL__
static void dump_node(struct radix_tree_node *node, unsigned long index)
{
@@ -530,6 +579,40 @@ int radix_tree_maybe_preload(gfp_t gfp_mask)
}
EXPORT_SYMBOL(radix_tree_maybe_preload);

+/**
+ * radix_tree_copy_preload - preload for radix_tree_copy()
+ * @src: radix tree root to be copied from
+ * @gfp: Allocation mask to use for preloading
+ *
+ * Preallocates enough memory for a call to radix_tree_copy(). This function
+ * returns with preemption disabled. Call radix_tree_preload_end() once the
+ * copy has completed.
+ *
+ * Return: -ENOMEM if the memory could not be allocated.
+ */
+int radix_tree_copy_preload(const struct radix_tree_root *src, gfp_t gfp_mask)
+{
+ return __radix_tree_preload(gfp_mask, radix_tree_count_nodes(src));
+}
+
+/**
+ * radix_tree_check_preload - Check the preload is still sufficient
+ * @src: radix tree to be copied from
+ * @cookie: Cookie returned from radix_tree_copy_preload()
+ *
+ * Between the successful allocation of memory and acquiring the lock that
+ * protects @src, the radix tree may have expanded. Call this function
+ * to see if the preallocation needs to be expanded too.
+ *
+ * Return: true if enough memory remains allocated, false if it does not
+ * suffice.
+ */
+bool radix_tree_check_preload(const struct radix_tree_root *src)
+{
+ struct radix_tree_preload *rtp = this_cpu_ptr(&radix_tree_preloads);
+ return rtp->nr >= radix_tree_count_nodes(src);
+}
+
#ifdef CONFIG_RADIX_TREE_MULTIORDER
/*
* Preload with enough objects to ensure that we can split a single entry
diff --git a/tools/testing/radix-tree/idr-test.c b/tools/testing/radix-tree/idr-test.c
index 3f9f429..e8d5386 100644
--- a/tools/testing/radix-tree/idr-test.c
+++ b/tools/testing/radix-tree/idr-test.c
@@ -225,6 +225,29 @@ void idr_get_next_test(void)
idr_destroy(&idr);
}

+void idr_copy_test(void)
+{
+ DEFINE_IDR(src);
+ DEFINE_IDR(dst);
+ int i;
+ void *p;
+
+ for (i = 0; i < 10000; i++) {
+ struct item *item = item_create(i, 0);
+ assert(idr_alloc(&src, item, 0, 20000, GFP_KERNEL) == i);
+ }
+
+ idr_copy_preload(&src, GFP_KERNEL);
+ idr_for_each_entry(&src, p, i) {
+ assert(idr_alloc(&dst, p, i, i + 1, GFP_NOWAIT) == i);
+ }
+ idr_preload_end();
+
+ idr_for_each(&src, item_idr_free, NULL);
+ idr_destroy(&src);
+ idr_destroy(&dst);
+}
+
void idr_checks(void)
{
unsigned long i;
@@ -276,6 +299,7 @@ void idr_checks(void)
idr_tag_test();
idr_nowait_test();
idr_get_next_test();
+ idr_copy_test();
}

/*
diff --git a/tools/testing/radix-tree/linux/radix-tree.h b/tools/testing/radix-tree/linux/radix-tree.h
index bf1bb23..0c5885e 100644
--- a/tools/testing/radix-tree/linux/radix-tree.h
+++ b/tools/testing/radix-tree/linux/radix-tree.h
@@ -4,6 +4,8 @@
#include "generated/map-shift.h"
#include "../../../../include/linux/radix-tree.h"

+unsigned long radix_tree_count_nodes(const struct radix_tree_root *root);
+
extern int kmalloc_verbose;
extern int test_verbose;

diff --git a/tools/testing/radix-tree/main.c b/tools/testing/radix-tree/main.c
index bc9a784..a7504e2 100644
--- a/tools/testing/radix-tree/main.c
+++ b/tools/testing/radix-tree/main.c
@@ -11,6 +11,42 @@
#include "test.h"
#include "regression.h"

+static void count_node_check(void)
+{
+ unsigned long i, j, k;
+ RADIX_TREE(tree, GFP_KERNEL);
+
+ assert(radix_tree_empty(&tree));
+ assert(radix_tree_count_nodes(&tree) == 0);
+ assert(item_insert(&tree, 0) == 0);
+ assert(radix_tree_count_nodes(&tree) == 0);
+
+ for (i = 1; i < RADIX_TREE_MAP_SIZE; i++) {
+ assert(item_insert(&tree, i) == 0);
+ assert(radix_tree_count_nodes(&tree) == 1);
+ }
+
+ j = 3;
+ k = 3;
+ for (i = RADIX_TREE_MAP_SIZE; i < i * RADIX_TREE_MAP_SIZE;
+ i *= RADIX_TREE_MAP_SIZE) {
+ assert(item_insert(&tree, i) == 0);
+ assert(radix_tree_count_nodes(&tree) == j);
+ j += k;
+ k++;
+ }
+
+ assert(item_insert(&tree, i) == 0);
+ assert(radix_tree_count_nodes(&tree) == j);
+
+ item_kill_tree(&tree);
+}
+
+void basic_checks(void)
+{
+ count_node_check();
+}
+
void __gang_check(unsigned long middle, long down, long up, int chunk, int hop)
{
long idx;
@@ -362,6 +398,8 @@ int main(int argc, char **argv)
rcu_register_thread();
radix_tree_init();

+ basic_checks();
+
regression1_test();
regression2_test();
regression3_test();
--
1.8.3.1