[PATCH 3/4] shmem: use radix-tree iterator in shmem_unuse_inode()

From: Konstantin Khlebnikov
Date: Fri Feb 10 2012 - 14:42:33 EST


This patch rewrites shmem swap entry searching with using radix tree iterator
and removes radix_tree_locate_item() which is used only in shmem.
Tagged radix-tree iterating would skip normal pages much more effectively.

Test: push 1Gb tmpfs file into swap and call # time swapoff.
Virtual machine: without patch: 35 seconds, with patch: 7 seconds.
Real hardware: without patch: 180 seconds, with patch: 100 seconds.
(Vm: qemu, swap on ssd, mostly all in host ram. Rh: swap on hdd.)

Signed-off-by: Konstantin Khlebnikov <khlebnikov@xxxxxxxxxx>
---
include/linux/radix-tree.h | 1
lib/radix-tree.c | 93 --------------------------------------------
mm/shmem.c | 27 ++++++++++---
3 files changed, 22 insertions(+), 99 deletions(-)

diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h
index f59d6c8..665396f 100644
--- a/include/linux/radix-tree.h
+++ b/include/linux/radix-tree.h
@@ -250,7 +250,6 @@ unsigned long radix_tree_range_tag_if_tagged(struct radix_tree_root *root,
unsigned long nr_to_tag,
unsigned int fromtag, unsigned int totag);
int radix_tree_tagged(struct radix_tree_root *root, unsigned int tag);
-unsigned long radix_tree_locate_item(struct radix_tree_root *root, void *item);

static inline void radix_tree_preload_end(void)
{
diff --git a/lib/radix-tree.c b/lib/radix-tree.c
index b0158a2..d193d27 100644
--- a/lib/radix-tree.c
+++ b/lib/radix-tree.c
@@ -1120,99 +1120,6 @@ radix_tree_gang_lookup_tag_slot(struct radix_tree_root *root, void ***results,
}
EXPORT_SYMBOL(radix_tree_gang_lookup_tag_slot);

-#if defined(CONFIG_SHMEM) && defined(CONFIG_SWAP)
-#include <linux/sched.h> /* for cond_resched() */
-
-/*
- * This linear search is at present only useful to shmem_unuse_inode().
- */
-static unsigned long __locate(struct radix_tree_node *slot, void *item,
- unsigned long index, unsigned long *found_index)
-{
- unsigned int shift, height;
- unsigned long i;
-
- height = slot->height;
- shift = (height-1) * RADIX_TREE_MAP_SHIFT;
-
- for ( ; height > 1; height--) {
- i = (index >> shift) & RADIX_TREE_MAP_MASK;
- for (;;) {
- if (slot->slots[i] != NULL)
- break;
- index &= ~((1UL << shift) - 1);
- index += 1UL << shift;
- if (index == 0)
- goto out; /* 32-bit wraparound */
- i++;
- if (i == RADIX_TREE_MAP_SIZE)
- goto out;
- }
-
- shift -= RADIX_TREE_MAP_SHIFT;
- slot = rcu_dereference_raw(slot->slots[i]);
- if (slot == NULL)
- goto out;
- }
-
- /* Bottom level: check items */
- for (i = 0; i < RADIX_TREE_MAP_SIZE; i++) {
- if (slot->slots[i] == item) {
- *found_index = index + i;
- index = 0;
- goto out;
- }
- }
- index += RADIX_TREE_MAP_SIZE;
-out:
- return index;
-}
-
-/**
- * radix_tree_locate_item - search through radix tree for item
- * @root: radix tree root
- * @item: item to be found
- *
- * Returns index where item was found, or -1 if not found.
- * Caller must hold no lock (since this time-consuming function needs
- * to be preemptible), and must check afterwards if item is still there.
- */
-unsigned long radix_tree_locate_item(struct radix_tree_root *root, void *item)
-{
- struct radix_tree_node *node;
- unsigned long max_index;
- unsigned long cur_index = 0;
- unsigned long found_index = -1;
-
- do {
- rcu_read_lock();
- node = rcu_dereference_raw(root->rnode);
- if (!radix_tree_is_indirect_ptr(node)) {
- rcu_read_unlock();
- if (node == item)
- found_index = 0;
- break;
- }
-
- node = indirect_to_ptr(node);
- max_index = radix_tree_maxindex(node->height);
- if (cur_index > max_index)
- break;
-
- cur_index = __locate(node, item, cur_index, &found_index);
- rcu_read_unlock();
- cond_resched();
- } while (cur_index != 0 && cur_index <= max_index);
-
- return found_index;
-}
-#else
-unsigned long radix_tree_locate_item(struct radix_tree_root *root, void *item)
-{
- return -1;
-}
-#endif /* CONFIG_SHMEM && CONFIG_SWAP */
-
/**
* radix_tree_shrink - shrink height of a radix tree to minimal
* @root radix tree root
diff --git a/mm/shmem.c b/mm/shmem.c
index b8e5f90..7a3fe08 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -613,14 +613,31 @@ static int shmem_unuse_inode(struct shmem_inode_info *info,
{
struct address_space *mapping = info->vfs_inode.i_mapping;
void *radswap;
- pgoff_t index;
+ struct radix_tree_iter iter;
+ void **slot;
int error;

radswap = swp_to_radix_entry(swap);
- index = radix_tree_locate_item(&mapping->page_tree, radswap);
- if (index == -1)
- return 0;

+ rcu_read_lock();
+ radix_tree_for_each_chunk(slot, &mapping->page_tree, &iter, 0,
+ RADIX_TREE_ITER_TAGGED | SHMEM_TAG_SWAP) {
+ radix_tree_for_each_chunk_slot(slot, &iter,
+ RADIX_TREE_ITER_TAGGED) {
+ if (*slot != radswap)
+ continue;
+ rcu_read_unlock();
+ goto found;
+ }
+ rcu_read_unlock();
+ cond_resched();
+ rcu_read_lock();
+ }
+ rcu_read_unlock();
+
+ return 0;
+
+found:
/*
* Move _head_ to start search for next from here.
* But be careful: shmem_evict_inode checks list_empty without taking
@@ -635,7 +652,7 @@ static int shmem_unuse_inode(struct shmem_inode_info *info,
* but also to hold up shmem_evict_inode(): so inode cannot be freed
* beneath us (pagelock doesn't help until the page is in pagecache).
*/
- error = shmem_add_to_page_cache(page, mapping, index,
+ error = shmem_add_to_page_cache(page, mapping, iter.index,
GFP_NOWAIT, radswap);
/* which does mem_cgroup_uncharge_cache_page on error */


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/