[patch] remove page->virtual

From: Andrew Morton (akpm@digeo.com)
Date: Thu Sep 19 2002 - 23:55:52 EST


Hot off the presses. Seems to work.

The patch removes page->virtual for all architectures which do not
define WANT_PAGE_VIRTUAL. Hash for it instead.

Possibly we could define WANT_PAGE_VIRTUAL for CONFIG_HIHGMEM4G, but it
seems unlikely.

A lot of the pressure went off kmap() and page_address() as a result of
the move to kmap_atomic(). That should be the preferred way to address
CPU load in the set_page_address() and page_address() hashing and
locking.

If kmap_atomic is not usable then the next best approach is for users
to cache the result of kmap() in a local rather than calling
page_address() repeatedly.

One heavy user of kmap() and page_address() is the ext2 directory code.

On a 7G Quad PIII, running four concurrent instances of

        while true
        do
                find /usr/src/linux > /dev/null
        done

on ext2 with everything cached, profiling shows that the new hashed
set_page_address() and page_address() implementations consume 0.4% and
1.3% of CPU time respectively. I think that's OK. (Plus the tested code
was doing an unneeded lookup in set_page_address(), for debug purposes)

c0141684 865 1.31499 page_address
c013851c 871 1.32411 kmem_cache_alloc
c015a068 1038 1.57799 dget_locked
c014ddfc 1220 1.85467 vfs_getattr
c014dd80 1320 2.00669 generic_fillattr
c0144f88 1359 2.06598 sys_chdir
c0159e60 1384 2.10398 dput
c014e334 1385 2.1055 cp_new_stat64
c01090c0 1741 2.6467 system_call
c0145b6a 1753 2.66494 .text.lock.open
c0151114 1786 2.71511 path_release
c0155d3c 2172 3.30192 filldir64
c0187580 2473 3.7595 ext2_readdir
c01eb950 2562 3.8948 atomic_dec_and_lock
c01eb690 2814 4.2779 __generic_copy_to_user
c015aba4 3194 4.85558 __d_lookup
c015207c 3567 5.42262 path_lookup
c01515dc 3775 5.73883 link_path_walk
c01eb99e 3811 5.79355 .text.lock.dec_and_lock
c01546b3 5847 8.88872 .text.lock.namei
c01884f2 6914 10.5108 .text.lock.dir

 include/linux/mm.h | 48 ++++++++----------
 init/main.c | 1
 kernel/ksyms.c | 3 +
 mm/highmem.c | 141 ++++++++++++++++++++++++++++++++++++++++++++++++++---
 mm/page_alloc.c | 5 +
 5 files changed, 164 insertions(+), 34 deletions(-)

--- 2.5.36/include/linux/mm.h~remove-page-virtual Thu Sep 19 20:36:27 2002
+++ 2.5.36-akpm/include/linux/mm.h Thu Sep 19 21:03:06 2002
@@ -176,7 +176,7 @@ struct page {
          * Architectures with slow multiplication can define
          * WANT_PAGE_VIRTUAL in asm/page.h
          */
-#if defined(CONFIG_HIGHMEM) || defined(WANT_PAGE_VIRTUAL)
+#if defined(WANT_PAGE_VIRTUAL)
         void *virtual; /* Kernel virtual address (NULL if
                                            not kmapped, ie. highmem) */
 #endif /* CONFIG_HIGMEM || WANT_PAGE_VIRTUAL */
@@ -289,38 +289,34 @@ static inline void set_page_zone(struct
         page->flags |= zone_num << ZONE_SHIFT;
 }
 
-/*
- * In order to avoid #ifdefs within C code itself, we define
- * set_page_address to a noop for non-highmem machines, where
- * the field isn't useful.
- * The same is true for page_address() in arch-dependent code.
- */
-#if defined(CONFIG_HIGHMEM) || defined(WANT_PAGE_VIRTUAL)
+#define lowmem_page_address(page) \
+ __va( ( ((page) - page_zone(page)->zone_mem_map) \
+ + page_zone(page)->zone_start_pfn) << PAGE_SHIFT)
+
+#if defined(CONFIG_HIGHMEM) && !defined(WANT_PAGE_VIRTUAL)
+#define HASHED_PAGE_VIRTUAL
+#endif
 
+#if defined(WANT_PAGE_VIRTUAL)
+#define page_address(page) ((page)->virtual)
 #define set_page_address(page, address) \
         do { \
                 (page)->virtual = (address); \
         } while(0)
+#define page_address_init() do { } while(0)
+#endif
 
-#else /* CONFIG_HIGHMEM || WANT_PAGE_VIRTUAL */
-#define set_page_address(page, address) do { } while(0)
-#endif /* CONFIG_HIGHMEM || WANT_PAGE_VIRTUAL */
+#if defined(HASHED_PAGE_VIRTUAL)
+void *page_address(struct page *page);
+void set_page_address(struct page *page, void *virtual);
+void page_address_init(void);
+#endif
 
-/*
- * Permanent address of a page. Obviously must never be
- * called on a highmem page.
- */
-#if defined(CONFIG_HIGHMEM) || defined(WANT_PAGE_VIRTUAL)
-
-#define page_address(page) ((page)->virtual)
-
-#else /* CONFIG_HIGHMEM || WANT_PAGE_VIRTUAL */
-
-#define page_address(page) \
- __va( ( ((page) - page_zone(page)->zone_mem_map) \
- + page_zone(page)->zone_start_pfn) << PAGE_SHIFT)
-
-#endif /* CONFIG_HIGHMEM || WANT_PAGE_VIRTUAL */
+#if !defined(HASHED_PAGE_VIRTUAL) && !defined(WANT_PAGE_VIRTUAL)
+#define page_address(page) lowmem_page_address(page)
+#define set_page_address(page, address) do { } while(0)
+#define page_address_init() do { } while(0)
+#endif
 
 /*
  * Return true if this page is mapped into pagetables. Subtle: test pte.direct
--- 2.5.36/init/main.c~remove-page-virtual Thu Sep 19 20:36:27 2002
+++ 2.5.36-akpm/init/main.c Thu Sep 19 20:36:27 2002
@@ -436,6 +436,7 @@ asmlinkage void __init start_kernel(void
                 initrd_start = 0;
         }
 #endif
+ page_address_init();
         mem_init();
         kmem_cache_sizes_init();
         pidhash_init();
--- 2.5.36/kernel/ksyms.c~remove-page-virtual Thu Sep 19 20:36:27 2002
+++ 2.5.36-akpm/kernel/ksyms.c Thu Sep 19 20:36:27 2002
@@ -133,6 +133,9 @@ EXPORT_SYMBOL(highmem_start_page);
 EXPORT_SYMBOL(kmap_prot);
 EXPORT_SYMBOL(kmap_pte);
 #endif
+#ifdef HASHED_PAGE_VIRTUAL
+EXPORT_SYMBOL(page_address);
+#endif
 EXPORT_SYMBOL(get_user_pages);
 
 /* filesystem internal functions */
--- 2.5.36/mm/highmem.c~remove-page-virtual Thu Sep 19 20:36:27 2002
+++ 2.5.36-akpm/mm/highmem.c Thu Sep 19 20:46:18 2002
@@ -22,6 +22,7 @@
 #include <linux/mempool.h>
 #include <linux/blkdev.h>
 #include <linux/init.h>
+#include <linux/hash.h>
 #include <asm/pgalloc.h>
 
 static mempool_t *page_pool, *isa_page_pool;
@@ -88,7 +89,7 @@ static void flush_all_zero_pkmaps(void)
                 page = pte_page(pkmap_page_table[i]);
                 pte_clear(&pkmap_page_table[i]);
 
- page->virtual = NULL;
+ set_page_address(page, NULL);
         }
         flush_tlb_kernel_range(PKMAP_ADDR(0), PKMAP_ADDR(LAST_PKMAP));
 }
@@ -126,8 +127,8 @@ start:
                         spin_lock(&kmap_lock);
 
                         /* Somebody else might have mapped it while we slept */
- if (page->virtual)
- return (unsigned long) page->virtual;
+ if (page_address(page))
+ return (unsigned long)page_address(page);
 
                         /* Re-start */
                         goto start;
@@ -137,7 +138,7 @@ start:
         set_pte(&(pkmap_page_table[last_pkmap_nr]), mk_pte(page, kmap_prot));
 
         pkmap_count[last_pkmap_nr] = 1;
- page->virtual = (void *) vaddr;
+ set_page_address(page, (void *)vaddr);
 
         return vaddr;
 }
@@ -153,7 +154,7 @@ void *kmap_high(struct page *page)
          * We cannot call this from interrupts, as it may block
          */
         spin_lock(&kmap_lock);
- vaddr = (unsigned long) page->virtual;
+ vaddr = (unsigned long)page_address(page);
         if (!vaddr)
                 vaddr = map_new_virtual(page);
         pkmap_count[PKMAP_NR(vaddr)]++;
@@ -170,7 +171,7 @@ void kunmap_high(struct page *page)
         int need_wakeup;
 
         spin_lock(&kmap_lock);
- vaddr = (unsigned long) page->virtual;
+ vaddr = (unsigned long)page_address(page);
         if (!vaddr)
                 BUG();
         nr = PKMAP_NR(vaddr);
@@ -467,7 +468,7 @@ void blk_queue_bounce(request_queue_t *q
         *bio_orig = bio;
 }
 
-#if CONFIG_DEBUG_HIGHMEM
+#if defined(CONFIG_DEBUG_HIGHMEM) && defined(CONFIG_HIGHMEM)
 void check_highmem_ptes(void)
 {
         int idx, type;
@@ -482,3 +483,129 @@ void check_highmem_ptes(void)
 }
 #endif
 
+#if defined(HASHED_PAGE_VIRTUAL)
+
+#define PA_HASH_ORDER 7
+
+/*
+ * Describes one page->virtual association
+ */
+static struct page_address_map {
+ struct page *page;
+ void *virtual;
+ struct list_head list;
+} page_address_maps[LAST_PKMAP];
+
+/*
+ * page_address_map freelist, allocated from page_address_maps.
+ */
+static struct list_head page_address_pool; /* freelist */
+static spinlock_t pool_lock; /* yech */
+
+/*
+ * Hash table bucket
+ */
+static struct page_address_slot {
+ struct list_head lh;
+ spinlock_t lock;
+} ____cacheline_aligned_in_smp page_address_htable[1<<PA_HASH_ORDER];
+
+static struct page_address_slot *page_slot(struct page *page)
+{
+ return &page_address_htable[hash_ptr(page, PA_HASH_ORDER)];
+}
+
+void *page_address(struct page *page)
+{
+ unsigned long flags;
+ void *ret;
+ struct page_address_slot *pas;
+
+ if (!PageHighMem(page))
+ return lowmem_page_address(page);
+
+ pas = page_slot(page);
+ ret = NULL;
+ spin_lock_irqsave(&pas->lock, flags);
+ if (!list_empty(&pas->lh)) {
+ struct page_address_map *pam;
+
+ list_for_each_entry(pam, &pas->lh, list) {
+ if (pam->page == page) {
+ ret = pam->virtual;
+ goto done;
+ }
+ }
+ }
+done:
+ spin_unlock_irqrestore(&pas->lock, flags);
+ return ret;
+}
+
+void set_page_address(struct page *page, void *virtual)
+{
+ unsigned long flags;
+ struct page_address_slot *pas;
+ struct page_address_map *pam;
+
+ BUG_ON(!PageHighMem(page));
+
+ if (virtual) {
+ void *addr = page_address(page);
+ if (addr) {
+ printk("eek!\n");
+ if (addr != virtual)
+ printk("double eek!\n");
+ }
+ }
+
+ pas = page_slot(page);
+ if (virtual) {
+ BUG_ON(list_empty(&page_address_pool));
+
+ spin_lock_irqsave(&pool_lock, flags);
+ pam = list_entry(page_address_pool.next,
+ struct page_address_map, list);
+ list_del(&pam->list);
+ spin_unlock_irqrestore(&pool_lock, flags);
+
+ pam->page = page;
+ pam->virtual = virtual;
+
+ spin_lock_irqsave(&pas->lock, flags);
+ list_add_tail(&pam->list, &pas->lh);
+ spin_unlock_irqrestore(&pas->lock, flags);
+ } else {
+ spin_lock_irqsave(&pas->lock, flags);
+ list_for_each_entry(pam, &pas->lh, list) {
+ if (pam->page == page) {
+ list_del(&pam->list);
+ spin_unlock_irqrestore(&pas->lock, flags);
+ spin_lock_irqsave(&pool_lock, flags);
+ list_add_tail(&pam->list, &page_address_pool);
+ spin_unlock_irqrestore(&pool_lock, flags);
+ goto done;
+ }
+ }
+ spin_unlock_irqrestore(&pas->lock, flags);
+ printk("aargh!\n");
+ }
+done:
+ return;
+}
+
+void __init page_address_init(void)
+{
+ int i;
+
+ INIT_LIST_HEAD(&page_address_pool);
+ for (i = 0; i < ARRAY_SIZE(page_address_maps); i++)
+ list_add(&page_address_maps[i].list, &page_address_pool);
+ for (i = 0; i < ARRAY_SIZE(page_address_htable); i++) {
+ INIT_LIST_HEAD(&page_address_htable[i].lh);
+ spin_lock_init(&page_address_htable[i].lock);
+ }
+ spin_lock_init(&pool_lock);
+}
+
+#endif /* defined(CONFIG_HIGHMEM) && !defined(WANT_PAGE_VIRTUAL) */
--- 2.5.36/mm/page_alloc.c~remove-page-virtual Thu Sep 19 20:36:27 2002
+++ 2.5.36-akpm/mm/page_alloc.c Thu Sep 19 20:36:27 2002
@@ -917,12 +917,15 @@ void __init free_area_init_core(pg_data_
                         set_page_count(page, 0);
                         SetPageReserved(page);
                         INIT_LIST_HEAD(&page->list);
+#ifdef WANT_PAGE_VIRTUAL
                         if (j != ZONE_HIGHMEM)
                                 /*
                                  * The shift left won't overflow because the
                                  * ZONE_NORMAL is below 4G.
                                  */
- set_page_address(page, __va(zone_start_pfn << PAGE_SHIFT));
+ set_page_address(page,
+ __va(zone_start_pfn << PAGE_SHIFT));
+#endif
                         zone_start_pfn++;
                 }
 

.
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/



This archive was generated by hypermail 2b29 : Mon Sep 23 2002 - 22:00:29 EST