Re: New version of pageattr caching conflict fix for 2.4

From: Andi Kleen (ak@suse.de)
Date: Fri Jun 14 2002 - 11:13:28 EST


On Fri, Jun 14, 2002 at 11:28:49AM -0400, Benjamin LaHaise wrote:
> On Fri, Jun 14, 2002 at 06:27:54AM +0200, Andi Kleen wrote:
> > Both AMD x86-64 and Intel IA32 documentation states that INVLPG flushes global
> > TLBs. The first version of change_page_attr did in fact use __flush_tlb_all,
> > but I changed it after checking the docs.
>
> As Andrea pointed out, there is an errata whereby 4MB pages aren't flushed
> on the Athlon. If you mask off the low bits of the address for flushing,
> that should fix the problem, and sounds like a plausible explanation for
> the failure I saw.

Ok. I'm using __flush_tlb_all for now.

Here is the latest version; also fixing DRM and some more cleanups.

Review please.

-Andi

diff -burpN -X ../KDIFX -x *-o -x *.[ch]-* linux-2.4.19pre9/arch/i386/mm/Makefile linux-2.4.19pre9-work/arch/i386/mm/Makefile
--- linux-2.4.19pre9/arch/i386/mm/Makefile Fri Dec 29 23:07:20 2000
+++ linux-2.4.19pre9-work/arch/i386/mm/Makefile Wed Jun 5 02:35:11 2002
@@ -9,6 +9,7 @@
 
 O_TARGET := mm.o
 
-obj-y := init.o fault.o ioremap.o extable.o
+obj-y := init.o fault.o ioremap.o extable.o pageattr.o
+export-objs := pageattr.o
 
 include $(TOPDIR)/Rules.make
diff -burpN -X ../KDIFX -x *-o -x *.[ch]-* linux-2.4.19pre9/arch/i386/mm/pageattr.c linux-2.4.19pre9-work/arch/i386/mm/pageattr.c
--- linux-2.4.19pre9/arch/i386/mm/pageattr.c Thu Jan 1 01:00:00 1970
+++ linux-2.4.19pre9-work/arch/i386/mm/pageattr.c Fri Jun 14 18:07:58 2002
@@ -0,0 +1,168 @@
+/*
+ * Copyright 2002 Andi Kleen, SuSE Labs.
+ * Thanks to Ben LaHaise for precious feedback.
+ */
+
+#include <linux/config.h>
+#include <linux/mm.h>
+#include <linux/sched.h>
+#include <linux/highmem.h>
+#include <linux/module.h>
+#include <asm/uaccess.h>
+#include <asm/processor.h>
+
+/* Should move most of this stuff into the appropiate includes */
+#define PAGE_LARGE (_PAGE_PSE|_PAGE_PRESENT)
+#define LARGE_PAGE_MASK (~(LARGE_PAGE_SIZE-1))
+#define LARGE_PAGE_SIZE (1UL << PMD_SHIFT)
+
+static inline pte_t *lookup_address(unsigned long address)
+{
+ pmd_t *pmd;
+ pgd_t *pgd = pgd_offset(&init_mm, address);
+
+ if ((pgd_val(*pgd) & PAGE_LARGE) == PAGE_LARGE)
+ return (pte_t *)pgd;
+ pmd = pmd_offset(pgd, address);
+ if ((pmd_val(*pmd) & PAGE_LARGE) == PAGE_LARGE)
+ return (pte_t *)pmd;
+
+ return pte_offset(pmd, address);
+}
+
+static struct page *split_large_page(unsigned long address, pgprot_t prot)
+{
+ int i;
+ unsigned long addr;
+ struct page *base = alloc_pages(GFP_KERNEL, 0);
+ pte_t *pbase;
+ if (!base)
+ return NULL;
+ address = __pa(address);
+ addr = address & LARGE_PAGE_MASK;
+ pbase = (pte_t *)page_address(base);
+ for (i = 0; i < PTRS_PER_PTE; i++, addr += PAGE_SIZE) {
+ pbase[i] = mk_pte_phys(addr,
+ addr == address ? prot : PAGE_KERNEL);
+ }
+ return base;
+}
+
+static void flush_kernel_map(void * address)
+{
+ /* Could use CLFLUSH here if the CPU supports it (Hammer,P4) */
+ if (boot_cpu_data.x86_model >= 4)
+ asm volatile("wbinvd":::"memory");
+ __flush_tlb_all();
+}
+
+static void set_pmd_pte(pte_t *kpte, unsigned long address, pte_t pte)
+{
+ set_pte_atomic(kpte, pte); /* change init_mm */
+#ifndef CONFIG_X86_PAE
+ {
+ struct list_head *l;
+ spin_lock(&mmlist_lock);
+ list_for_each(l, &init_mm.mmlist) {
+ struct mm_struct *mm = list_entry(l, struct mm_struct, mmlist);
+ pmd_t *pmd = pmd_offset(pgd_offset(mm, address), address);
+ set_pte_atomic((pte_t *)pmd, pte);
+ }
+ spin_unlock(&mmlist_lock);
+ }
+#endif
+}
+
+/* no more special protections in this 2/4MB area - revert to a
+ large page again. */
+static inline void revert_page(struct page *kpte_page, unsigned long address)
+{
+ pte_t *linear = (pte_t *)
+ pmd_offset(pgd_offset(&init_mm, address), address);
+ set_pmd_pte(linear, address,
+ mk_pte_phys(__pa(address & LARGE_PAGE_MASK),
+ __pgprot(_KERNPG_TABLE|_PAGE_PSE)));
+}
+
+/*
+ * Change the page attributes of an page in the linear mapping.
+ *
+ * This should be used when a page is mapped with a different caching policy
+ * than write-back somewhere - some CPUs do not like it when mappings with
+ * different caching policies exist. This changes the page attributes of the
+ * in kernel linear mapping too.
+ *
+ * The caller needs to ensure that there are no conflicting mappings elsewhere.
+ * This function only deals with the kernel linear map.
+ * When page is in highmem it must never be kmap'ed.
+ */
+static int
+__change_page_attr(struct page *page, pgprot_t prot, struct page **oldpage)
+{
+ pte_t *kpte;
+ unsigned long address;
+ struct page *kpte_page;
+
+#ifdef CONFIG_HIGHMEM
+ /* Hopefully not be mapped anywhere else. */
+ if (page >= highmem_start_page)
+ return 0;
+#endif
+ address = (unsigned long)page_address(page);
+
+ kpte = lookup_address(address);
+ kpte_page = virt_to_page(((unsigned long)kpte) & PAGE_MASK);
+ if (pgprot_val(prot) != pgprot_val(PAGE_KERNEL)) {
+ if ((pte_val(*kpte) & _PAGE_PSE) == 0) {
+ pte_t old = *kpte;
+ pte_t standard = mk_pte(page, PAGE_KERNEL);
+
+ set_pte_atomic(kpte, mk_pte(page, prot));
+ if (pte_same(old,standard))
+ atomic_inc(&kpte_page->count);
+ } else {
+ struct page *split = split_large_page(address, prot);
+ if (!split)
+ return -ENOMEM;
+ set_pmd_pte(kpte,address,mk_pte(split, PAGE_KERNEL));
+ }
+ } else if ((pte_val(*kpte) & _PAGE_PSE) == 0) {
+ set_pte_atomic(kpte, mk_pte(page, PAGE_KERNEL));
+ atomic_dec(&kpte_page->count);
+ }
+
+ if (test_bit(X86_FEATURE_PSE, &boot_cpu_data.x86_capability) &&
+ (atomic_read(&kpte_page->count) == 1)) {
+ *oldpage = kpte_page;
+ revert_page(kpte_page, address);
+ }
+ return 0;
+}
+
+int change_page_attr(struct page *page, int numpages, pgprot_t prot)
+{
+ int err = 0;
+ struct page *fpage;
+ int i;
+
+ down_write(&init_mm.mmap_sem);
+ for (i = 0; i < numpages; i++, page++) {
+ fpage = NULL;
+ err = __change_page_attr(page, prot, &fpage);
+ if (err)
+ break;
+ if (fpage || i == numpages-1) {
+ void *address = page_address(page);
+#ifdef CONFIG_SMP
+ smp_call_function(flush_kernel_map, address, 1, 1);
+#endif
+ flush_kernel_map(address);
+ if (fpage)
+ __free_page(fpage);
+ }
+ }
+ up_write(&init_mm.mmap_sem);
+ return err;
+}
+
+EXPORT_SYMBOL(change_page_attr);
diff -burpN -X ../KDIFX -x *-o -x *.[ch]-* linux-2.4.19pre9/drivers/char/agp/agpgart_be.c linux-2.4.19pre9-work/drivers/char/agp/agpgart_be.c
--- linux-2.4.19pre9/drivers/char/agp/agpgart_be.c Sun Jun 2 20:24:00 2002
+++ linux-2.4.19pre9-work/drivers/char/agp/agpgart_be.c Fri Jun 14 17:37:46 2002
@@ -397,7 +397,7 @@ int agp_unbind_memory(agp_memory * curr)
 static void agp_generic_agp_enable(u32 mode)
 {
         struct pci_dev *device = NULL;
- u32 command, scratch, cap_id;
+ u32 command, scratch;
         u8 cap_ptr;
 
         pci_read_config_dword(agp_bridge.dev,
@@ -561,6 +561,7 @@ static int agp_generic_create_gatt_table
                                             agp_bridge.current_size;
                                         break;
                                 }
+ temp = agp_bridge.current_size;
                         } else {
                                 agp_bridge.aperture_size_idx = i;
                         }
@@ -578,23 +579,21 @@ static int agp_generic_create_gatt_table
         }
         table_end = table + ((PAGE_SIZE * (1 << page_order)) - 1);
 
- for (page = virt_to_page(table); page <= virt_to_page(table_end); page++)
+ page = virt_to_page(table);
+
+#ifdef __i386__
+ if (change_page_attr(page, 1<<page_order, PAGE_KERNEL_NOCACHE) < 0)
+ return -ENOMEM;
+#endif
+ for (; page <= virt_to_page(table_end); page++) {
                 SetPageReserved(page);
+ }
 
         agp_bridge.gatt_table_real = (unsigned long *) table;
         CACHE_FLUSH();
- agp_bridge.gatt_table = ioremap_nocache(virt_to_phys(table),
- (PAGE_SIZE * (1 << page_order)));
+ agp_bridge.gatt_table = agp_bridge.gatt_table_real;
         CACHE_FLUSH();
 
- if (agp_bridge.gatt_table == NULL) {
- for (page = virt_to_page(table); page <= virt_to_page(table_end); page++)
- ClearPageReserved(page);
-
- free_pages((unsigned long) table, page_order);
-
- return -ENOMEM;
- }
         agp_bridge.gatt_bus_addr = virt_to_phys(agp_bridge.gatt_table_real);
 
         for (i = 0; i < num_entries; i++) {
@@ -651,7 +650,6 @@ static int agp_generic_free_gatt_table(v
          * from the table.
          */
 
- iounmap(agp_bridge.gatt_table);
         table = (char *) agp_bridge.gatt_table_real;
         table_end = table + ((PAGE_SIZE * (1 << page_order)) - 1);
 
@@ -769,6 +767,13 @@ static unsigned long agp_generic_alloc_p
         if (page == NULL) {
                 return 0;
         }
+#ifdef __i386__
+ if (change_page_attr(page, 1, PAGE_KERNEL_NOCACHE) < 0) {
+ __free_page(page);
+ return 0;
+ }
+#endif
+
         get_page(page);
         LockPage(page);
         atomic_inc(&agp_bridge.current_memory_agp);
@@ -785,6 +790,11 @@ static void agp_generic_destroy_page(uns
         }
         
         page = virt_to_page(pt);
+#ifdef __i386__
+ change_page_attr(page, 1, PAGE_KERNEL);
+#endif
+
+
         put_page(page);
         UnlockPage(page);
         free_page((unsigned long) pt);
@@ -2241,17 +2251,15 @@ static int amd_create_page_map(amd_page_
         if (page_map->real == NULL) {
                 return -ENOMEM;
         }
- SetPageReserved(virt_to_page(page_map->real));
- CACHE_FLUSH();
- page_map->remapped = ioremap_nocache(virt_to_phys(page_map->real),
- PAGE_SIZE);
- if (page_map->remapped == NULL) {
- ClearPageReserved(virt_to_page(page_map->real));
- free_page((unsigned long) page_map->real);
- page_map->real = NULL;
+#ifdef __i386__
+ if (change_page_attr(virt_to_page(page_map->real),
+ 1, PAGE_KERNEL_NOCACHE)) {
+ free_page(page_map->real);
                 return -ENOMEM;
         }
- CACHE_FLUSH();
+#endif
+ SetPageReserved(virt_to_page(page_map->real));
+ page_map->remapped = page_map->real;
 
         for(i = 0; i < PAGE_SIZE / sizeof(unsigned long); i++) {
                 page_map->remapped[i] = agp_bridge.scratch_page;
@@ -2262,7 +2270,6 @@ static int amd_create_page_map(amd_page_
 
 static void amd_free_page_map(amd_page_map *page_map)
 {
- iounmap(page_map->remapped);
         ClearPageReserved(virt_to_page(page_map->real));
         free_page((unsigned long) page_map->real);
 }
@@ -2744,27 +2751,22 @@ static void ali_cache_flush(void)
 
 static unsigned long ali_alloc_page(void)
 {
- struct page *page;
- u32 temp;
-
- page = alloc_page(GFP_KERNEL);
- if (page == NULL)
+ unsigned long p = agp_generic_alloc_page();
+ if (!p)
                 return 0;
 
- get_page(page);
- LockPage(page);
- atomic_inc(&agp_bridge.current_memory_agp);
-
+ /* probably not needed anymore */
         global_cache_flush();
 
         if (agp_bridge.type == ALI_M1541) {
+ u32 temp;
                 pci_read_config_dword(agp_bridge.dev, ALI_CACHE_FLUSH_CTRL, &temp);
                 pci_write_config_dword(agp_bridge.dev, ALI_CACHE_FLUSH_CTRL,
                                 (((temp & ALI_CACHE_FLUSH_ADDR_MASK) |
- virt_to_phys(page_address(page))) |
+ virt_to_phys(p)) |
                                     ALI_CACHE_FLUSH_EN ));
         }
- return (unsigned long)page_address(page);
+ return p;
 }
 
 static void ali_destroy_page(unsigned long addr)
@@ -2786,11 +2788,7 @@ static void ali_destroy_page(unsigned lo
                                     ALI_CACHE_FLUSH_EN));
         }
 
- page = virt_to_page(pt);
- put_page(page);
- UnlockPage(page);
- free_page((unsigned long) pt);
- atomic_dec(&agp_bridge.current_memory_agp);
+ agp_generic_destroy_page(pt);
 }
 
 /* Setup function */
@@ -2870,17 +2868,15 @@ static int serverworks_create_page_map(s
         if (page_map->real == NULL) {
                 return -ENOMEM;
         }
- SetPageReserved(virt_to_page(page_map->real));
- CACHE_FLUSH();
- page_map->remapped = ioremap_nocache(virt_to_phys(page_map->real),
- PAGE_SIZE);
- if (page_map->remapped == NULL) {
- ClearPageReserved(virt_to_page(page_map->real));
- free_page((unsigned long) page_map->real);
- page_map->real = NULL;
+#ifdef __i386__
+ if (change_page_attr(virt_to_page(page_map->real), 1,
+ PAGE_KERNEL_NOCACHE) < 0) {
+ free_page(page_map->real);
                 return -ENOMEM;
         }
- CACHE_FLUSH();
+#endif
+ SetPageReserved(virt_to_page(page_map->real));
+ page_map->remapped = page_map->real;
 
         for(i = 0; i < PAGE_SIZE / sizeof(unsigned long); i++) {
                 page_map->remapped[i] = agp_bridge.scratch_page;
@@ -2891,7 +2887,6 @@ static int serverworks_create_page_map(s
 
 static void serverworks_free_page_map(serverworks_page_map *page_map)
 {
- iounmap(page_map->remapped);
         ClearPageReserved(virt_to_page(page_map->real));
         free_page((unsigned long) page_map->real);
 }
diff -burpN -X ../KDIFX -x *-o -x *.[ch]-* linux-2.4.19pre9/drivers/char/drm/drm_vm.h linux-2.4.19pre9-work/drivers/char/drm/drm_vm.h
--- linux-2.4.19pre9/drivers/char/drm/drm_vm.h Wed Jun 5 18:08:30 2002
+++ linux-2.4.19pre9-work/drivers/char/drm/drm_vm.h Fri Jun 14 06:05:08 2002
@@ -531,6 +531,11 @@ int DRM(mmap)(struct file *filp, struct
                         vma->vm_flags |= VM_IO; /* not in core dump */
                 }
                 offset = DRIVER_GET_REG_OFS();
+#ifdef __i386__
+ change_page_attr(vma->vm_pgoff,
+ (vma->vm_end - vma->vm_start)>>PAGE_SHIFT,
+ vma->vm_page_prot);
+#endif
 #ifdef __sparc__
                 if (io_remap_page_range(vma->vm_start,
                                         VM_OFFSET(vma) + offset,
diff -burpN -X ../KDIFX -x *-o -x *.[ch]-* linux-2.4.19pre9/include/asm-i386/pgtable-2level.h linux-2.4.19pre9-work/include/asm-i386/pgtable-2level.h
--- linux-2.4.19pre9/include/asm-i386/pgtable-2level.h Thu Jul 26 22:40:32 2001
+++ linux-2.4.19pre9-work/include/asm-i386/pgtable-2level.h Thu Jun 13 16:52:53 2002
@@ -40,6 +40,8 @@ static inline int pgd_present(pgd_t pgd)
  * hook is made available.
  */
 #define set_pte(pteptr, pteval) (*(pteptr) = pteval)
+#define set_pte_atomic(pteptr, pteval) (*(pteptr) = pteval)
+
 /*
  * (pmds are folded into pgds so this doesnt get actually called,
  * but the define is needed for a generic inline function.)
diff -burpN -X ../KDIFX -x *-o -x *.[ch]-* linux-2.4.19pre9/include/asm-i386/pgtable-3level.h linux-2.4.19pre9-work/include/asm-i386/pgtable-3level.h
--- linux-2.4.19pre9/include/asm-i386/pgtable-3level.h Thu Jul 26 22:40:32 2001
+++ linux-2.4.19pre9-work/include/asm-i386/pgtable-3level.h Thu Jun 13 17:15:14 2002
@@ -53,6 +53,9 @@ static inline void set_pte(pte_t *ptep,
                 set_64bit((unsigned long long *)(pmdptr),pmd_val(pmdval))
 #define set_pgd(pgdptr,pgdval) \
                 set_64bit((unsigned long long *)(pgdptr),pgd_val(pgdval))
+#define set_pte_atomic(pteptr,pteval) \
+ set_64bit((unsigned long long *)(pteptr),pte_val(pteval))
+
 
 /*
  * Pentium-II erratum A13: in PAE mode we explicitly have to flush
diff -burpN -X ../KDIFX -x *-o -x *.[ch]-* linux-2.4.19pre9/include/asm-i386/pgtable.h linux-2.4.19pre9-work/include/asm-i386/pgtable.h
--- linux-2.4.19pre9/include/asm-i386/pgtable.h Mon Jun 3 21:15:18 2002
+++ linux-2.4.19pre9-work/include/asm-i386/pgtable.h Fri Jun 14 17:45:29 2002
@@ -347,6 +347,9 @@ static inline pte_t pte_modify(pte_t pte
 #define pte_to_swp_entry(pte) ((swp_entry_t) { (pte).pte_low })
 #define swp_entry_to_pte(x) ((pte_t) { (x).val })
 
+struct page;
+int change_page_attr(struct page *, int, pgprot_t prot);
+
 #endif /* !__ASSEMBLY__ */
 
 /* Needs to be defined here and not in linux/mm.h, as it is arch dependent */

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/



This archive was generated by hypermail 2b29 : Sat Jun 15 2002 - 22:00:31 EST