per-cpu pte_chain freelists

From: William Lee Irwin III (wli@holomorphy.com)
Date: Tue May 21 2002 - 18:02:14 EST


Seems to do a little better than per-zone.

Cheers,
Bill

# This is a BitKeeper generated patch for the following project:
# Project Name: Linux kernel tree
# This patch format is intended for GNU patch command version 2.5 or higher.
# This patch includes the following deltas:
# ChangeSet 1.423 -> 1.424
# mm/rmap.c 1.7 -> 1.8
# include/linux/mmzone.h 1.11 -> 1.12
# mm/page_alloc.c 1.47 -> 1.48
#
# The following is the BitKeeper ChangeSet Log
# --------------------------------------------
# 02/05/21 wli@tisifone.holomorphy.com 1.424
# per-cpu pte_chain freelists
# --------------------------------------------
#
diff --minimal -Nru a/include/linux/mmzone.h b/include/linux/mmzone.h
--- a/include/linux/mmzone.h Tue May 21 15:55:48 2002
+++ b/include/linux/mmzone.h Tue May 21 15:55:48 2002
@@ -29,6 +29,11 @@
 
 #define MAX_CHUNKS_PER_NODE 8
 
+struct pte_chain_freelist {
+ unsigned long count;
+ struct pte_chain *list;
+};
+
 /*
  * On machines where it is needed (eg PCs) we divide physical memory
  * into multiple physical zones. On a PC we have 3 zones:
@@ -56,8 +61,8 @@
         struct list_head inactive_dirty_list;
         struct list_head inactive_clean_list;
         free_area_t free_area[MAX_ORDER];
- spinlock_t pte_chain_freelist_lock;
- struct pte_chain *pte_chain_freelist;
+
+ struct pte_chain_freelist pte_chain_freelists[NR_CPUS];
 
         /*
          * wait_table -- the array holding the hash table
diff --minimal -Nru a/mm/page_alloc.c b/mm/page_alloc.c
--- a/mm/page_alloc.c Tue May 21 15:55:48 2002
+++ b/mm/page_alloc.c Tue May 21 15:55:48 2002
@@ -908,11 +908,13 @@
                 zone->inactive_clean_pages = 0;
                 zone->inactive_dirty_pages = 0;
                 zone->need_balance = 0;
- zone->pte_chain_freelist = NULL;
                 INIT_LIST_HEAD(&zone->active_list);
                 INIT_LIST_HEAD(&zone->inactive_dirty_list);
                 INIT_LIST_HEAD(&zone->inactive_clean_list);
- spin_lock_init(&zone->pte_chain_freelist_lock);
+ for (i = 0; i < NR_CPUS; ++i) {
+ zone->pte_chain_freelists[i].count = 0;
+ zone->pte_chain_freelists[i].list = NULL;
+ }
 
                 if (!size)
                         continue;
diff --minimal -Nru a/mm/rmap.c b/mm/rmap.c
--- a/mm/rmap.c Tue May 21 15:55:48 2002
+++ b/mm/rmap.c Tue May 21 15:55:48 2002
@@ -51,7 +51,7 @@
 static inline struct pte_chain * pte_chain_alloc(zone_t *);
 static inline void pte_chain_free(struct pte_chain *, struct pte_chain *,
                 struct page *, zone_t *);
-static void alloc_new_pte_chains(zone_t *);
+static void alloc_new_pte_chains(zone_t *, int cpu);
 
 /**
  * page_referenced - test if the page was referenced
@@ -335,25 +335,36 @@
  ** functions.
  **/
 
-static inline void pte_chain_push(zone_t * zone,
- struct pte_chain * pte_chain)
+static inline void pte_chain_push(struct pte_chain ** list, struct pte_chain * pte_chain)
 {
         pte_chain->ptep = NULL;
- pte_chain->next = zone->pte_chain_freelist;
- zone->pte_chain_freelist = pte_chain;
+ pte_chain->next = *list;
+ *list = pte_chain;
 }
 
-static inline struct pte_chain * pte_chain_pop(zone_t * zone)
+static inline struct pte_chain * pte_chain_pop(struct pte_chain ** list)
 {
         struct pte_chain *pte_chain;
 
- pte_chain = zone->pte_chain_freelist;
- zone->pte_chain_freelist = pte_chain->next;
+ pte_chain = *list;
+ *list = pte_chain->next;
         pte_chain->next = NULL;
 
         return pte_chain;
 }
 
+static inline void add_to_pte_chain_freelist(struct pte_chain_freelist * freelists, int cpu, struct pte_chain * pte_chain)
+{
+ pte_chain_push(&freelists[cpu].list, pte_chain);
+ freelists[cpu].count++;
+}
+
+static inline struct pte_chain * del_from_pte_chain_freelist(struct pte_chain_freelist * freelists, int cpu)
+{
+ freelists[cpu].count--;
+ return pte_chain_pop(&freelists[cpu].list);
+}
+
 /**
  * pte_chain_free - free pte_chain structure
  * @pte_chain: pte_chain struct to free
@@ -370,14 +381,17 @@
                 struct pte_chain * prev_pte_chain, struct page * page,
                 zone_t * zone)
 {
+ int cpu = smp_processor_id();
+ struct pte_chain_freelist *freelists;
+
+ freelists = zone->pte_chain_freelists;
+
         if (prev_pte_chain)
                 prev_pte_chain->next = pte_chain->next;
         else if (page)
                 page->pte_chain = pte_chain->next;
 
- spin_lock(&zone->pte_chain_freelist_lock);
- pte_chain_push(zone, pte_chain);
- spin_unlock(&zone->pte_chain_freelist_lock);
+ add_to_pte_chain_freelist(freelists, cpu, pte_chain);
 }
 
 /**
@@ -391,17 +405,19 @@
 static inline struct pte_chain * pte_chain_alloc(zone_t * zone)
 {
         struct pte_chain * pte_chain;
+ struct pte_chain_freelist *freelists;
+ int cpu = smp_processor_id();
+
+ freelists = zone->pte_chain_freelists;
 
- spin_lock(&zone->pte_chain_freelist_lock);
 
         /* Allocate new pte_chain structs as needed. */
- if (!zone->pte_chain_freelist)
- alloc_new_pte_chains(zone);
+ if (!freelists[cpu].list)
+ alloc_new_pte_chains(zone, cpu);
 
         /* Grab the first pte_chain from the freelist. */
- pte_chain = pte_chain_pop(zone);
+ pte_chain = del_from_pte_chain_freelist(freelists, cpu);
 
- spin_unlock(&zone->pte_chain_freelist_lock);
 
         return pte_chain;
 }
@@ -409,6 +425,7 @@
 /**
  * alloc_new_pte_chains - convert a free page to pte_chain structures
  * @zone: memory zone to allocate pte_chains for
+ * @cpu: cpu pte_chains are to be allocated for
  *
  * Grabs a free page and converts it to pte_chain structures. We really
  * should pre-allocate these earlier in the pagefault path or come up
@@ -416,18 +433,33 @@
  *
  * Note that we cannot use the slab cache because the pte_chain structure
  * is way smaller than the minimum size of a slab cache allocation.
- * Caller needs to hold the zone->pte_chain_freelist_lock
+ * Caller needs to hold &zone->pte_chain_freelists[cpu].lock
  */
-static void alloc_new_pte_chains(zone_t *zone)
+
+#define PTE_CHAINS_PER_PAGE (PAGE_SIZE/sizeof(struct pte_chain))
+
+static void alloc_new_pte_chains(zone_t *zone, int cpu)
 {
- struct pte_chain * pte_chain = (void *) get_zeroed_page(GFP_ATOMIC);
- int i = PAGE_SIZE / sizeof(struct pte_chain);
+ struct pte_chain * pte_chain;
+ struct pte_chain_freelist *freelists = zone->pte_chain_freelists;
+ int i;
 
- if (pte_chain) {
- for (; i-- > 0; pte_chain++)
- pte_chain_push(zone, pte_chain);
- } else {
- /* Yeah yeah, I'll fix the pte_chain allocation ... */
+ /*
+ * Atomically allocate a page and hand it back. Things are not
+ * highly unbalanced or there is good reason to allocate, so
+ * actually get a fresh page.
+ */
+ pte_chain = (void *) get_zeroed_page(GFP_ATOMIC);
+
+ /* Yeah yeah, I'll fix the pte_chain allocation ... */
+ if (!pte_chain)
                 panic("Fix pte_chain allocation, you lazy bastard!\n");
- }
+
+ /*
+ * Be greedy and give ourselves the chains. If some cpu wants
+ * them, it'll eventually end up taking them above.
+ */
+ freelists[cpu].count += PTE_CHAINS_PER_PAGE;
+ for (i = 0; i < PTE_CHAINS_PER_PAGE; ++i)
+ pte_chain_push(&freelists[cpu].list, &pte_chain[i]);
 }
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/



This archive was generated by hypermail 2b29 : Thu May 23 2002 - 22:00:23 EST