[patch] Re: 2.4.3-pre6: agpart.o causes arch/i386/mm/ioremap.c hang

From: Andrew Morton (andrewm@uow.edu.au)
Date: Fri Mar 23 2001 - 07:44:31 EST


"Adam J. Richter" wrote:
>
> In case anyone is interested, the conflicting lock of
> init_mm.page_table_lock was acquired in line 1318 of mm/memory.c,
> in pte_alloc.

You can sorta blame me for that. I reviewed the locking in
the mmap_sem patch and said it was correct :(

I only checked that the new locking was correct, rather than
checking that the new locking *rules* were being complied
with kernel-wide.

pmd_alloc() has the same problem. It requires ->page_table_lock,
and we have bugs there. Fixed in this patch.

Linus, this patch includes the mm->rss locking stuff which
I sent yesterday.

Also, is the comment over remap_page_range true? Should the caller
be doing a down_write(mmap_sem)? If so, then there are
about thirty callers who don't. I've looked at each one,
and it is safe to do the down_write() *within* remap_page_range().

--- linux-2.4.3-pre6/fs/exec.c Thu Mar 22 18:52:52 2001
+++ linux-akpm/fs/exec.c Fri Mar 23 22:08:48 2001
@@ -252,6 +252,8 @@
 /*
  * This routine is used to map in a page into an address space: needed by
  * execve() for the initial stack and environment pages.
+ *
+ * tsk->mmap_sem is held for writing.
  */
 void put_dirty_page(struct task_struct * tsk, struct page *page, unsigned long address)
 {
@@ -291,6 +293,7 @@
         unsigned long stack_base;
         struct vm_area_struct *mpnt;
         int i;
+ unsigned long rss_increment = 0;
 
         stack_base = STACK_TOP - MAX_ARG_PAGES*PAGE_SIZE;
 
@@ -322,11 +325,14 @@
                 struct page *page = bprm->page[i];
                 if (page) {
                         bprm->page[i] = NULL;
- current->mm->rss++;
+ rss_increment++;
                         put_dirty_page(current,page,stack_base);
                 }
                 stack_base += PAGE_SIZE;
         }
+ spin_lock(&current->mm->page_table_lock);
+ current->mm->rss += rss_increment;
+ spin_unlock(&current->mm->page_table_lock);
         up_write(&current->mm->mmap_sem);
         
         return 0;
--- linux-2.4.3-pre6/include/linux/mm.h Thu Mar 22 18:52:52 2001
+++ linux-akpm/include/linux/mm.h Fri Mar 23 23:32:00 2001
@@ -407,6 +407,8 @@
  * On a two-level page table, this ends up being trivial. Thus the
  * inlining and the symmetry break with pte_alloc() that does all
  * of this out-of-line.
+ *
+ * __pmd_alloc() requires that mm->page_table_lock be held, so we do too.
  */
 static inline pmd_t *pmd_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address)
 {
--- linux-2.4.3-pre6/include/linux/sched.h Thu Mar 22 18:52:52 2001
+++ linux-akpm/include/linux/sched.h Fri Mar 23 22:08:48 2001
@@ -209,9 +209,12 @@
         atomic_t mm_count; /* How many references to "struct mm_struct" (users count as 1) */
         int map_count; /* number of VMAs */
         struct rw_semaphore mmap_sem;
- spinlock_t page_table_lock;
+ spinlock_t page_table_lock; /* Protects task page tables and mm->rss */
 
- struct list_head mmlist; /* List of all active mm's */
+ struct list_head mmlist; /* List of all active mm's. These are globally strung
+ * together off init_mm.mmlist, and are protected
+ * by mmlist_lock
+ */
 
         unsigned long start_code, end_code, start_data, end_data;
         unsigned long start_brk, brk, start_stack;
--- linux-2.4.3-pre6/mm/memory.c Fri Mar 23 22:16:55 2001
+++ linux-akpm/mm/memory.c Fri Mar 23 23:32:05 2001
@@ -374,7 +374,6 @@
                 address = (address + PGDIR_SIZE) & PGDIR_MASK;
                 dir++;
         } while (address && (address < end));
- spin_unlock(&mm->page_table_lock);
         /*
          * Update rss for the mm_struct (not necessarily current->mm)
          * Notice that rss is an unsigned long.
@@ -383,6 +382,7 @@
                 mm->rss -= freed;
         else
                 mm->rss = 0;
+ spin_unlock(&mm->page_table_lock);
 }
 
 
@@ -655,6 +655,7 @@
         } while (address && (address < end));
 }
 
+/* mm->page_table_lock must be held */
 static inline int zeromap_pmd_range(struct mm_struct *mm, pmd_t * pmd, unsigned long address,
                                     unsigned long size, pgprot_t prot)
 {
@@ -734,6 +735,7 @@
         } while (address && (address < end));
 }
 
+/* mm->page_table_lock must be held */
 static inline int remap_pmd_range(struct mm_struct *mm, pmd_t * pmd, unsigned long address, unsigned long size,
         unsigned long phys_addr, pgprot_t prot)
 {
@@ -792,6 +794,8 @@
  * - flush the old one
  * - update the page tables
  * - inform the TLB about the new one
+ *
+ * We hold the mm semaphore for reading and vma->vm_mm->page_table_lock
  */
 static inline void establish_pte(struct vm_area_struct * vma, unsigned long address, pte_t *page_table, pte_t entry)
 {
@@ -800,6 +804,9 @@
         update_mmu_cache(vma, address, entry);
 }
 
+/*
+ * We hold the mm semaphore for reading and vma->vm_mm->page_table_lock
+ */
 static inline void break_cow(struct vm_area_struct * vma, struct page * old_page, struct page * new_page, unsigned long address,
                 pte_t *page_table)
 {
@@ -1024,8 +1031,7 @@
 }
 
 /*
- * We hold the mm semaphore and the page_table_lock on entry
- * and exit.
+ * We hold the mm semaphore and the page_table_lock on entry and exit.
  */
 static int do_swap_page(struct mm_struct * mm,
         struct vm_area_struct * vma, unsigned long address,
--- linux-2.4.3-pre6/mm/mmap.c Thu Mar 22 18:52:52 2001
+++ linux-akpm/mm/mmap.c Fri Mar 23 22:08:48 2001
@@ -889,8 +889,8 @@
         spin_lock(&mm->page_table_lock);
         mpnt = mm->mmap;
         mm->mmap = mm->mmap_avl = mm->mmap_cache = NULL;
- spin_unlock(&mm->page_table_lock);
         mm->rss = 0;
+ spin_unlock(&mm->page_table_lock);
         mm->total_vm = 0;
         mm->locked_vm = 0;
 
--- linux-2.4.3-pre6/mm/mremap.c Thu Mar 22 18:52:52 2001
+++ linux-akpm/mm/mremap.c Fri Mar 23 22:22:04 2001
@@ -15,6 +15,11 @@
 
 extern int vm_enough_memory(long pages);
 
+/*
+ * Throughout all the following functions, mm->mmap_sem must be held for
+ * writing, and mm->page_table_lock must be held
+ */
+
 static inline pte_t *get_one_pte(struct mm_struct *mm, unsigned long addr)
 {
         pgd_t * pgd;
--- linux-2.4.3-pre6/mm/swapfile.c Sun Feb 25 17:37:14 2001
+++ linux-akpm/mm/swapfile.c Fri Mar 23 22:08:48 2001
@@ -209,6 +209,7 @@
  * share this swap entry, so be cautious and let do_wp_page work out
  * what to do if a write is requested later.
  */
+/* tasklist_lock and vma->vm_mm->page_table_lock are held */
 static inline void unuse_pte(struct vm_area_struct * vma, unsigned long address,
         pte_t *dir, swp_entry_t entry, struct page* page)
 {
@@ -234,6 +235,7 @@
         ++vma->vm_mm->rss;
 }
 
+/* tasklist_lock and vma->vm_mm->page_table_lock are held */
 static inline void unuse_pmd(struct vm_area_struct * vma, pmd_t *dir,
         unsigned long address, unsigned long size, unsigned long offset,
         swp_entry_t entry, struct page* page)
@@ -261,6 +263,7 @@
         } while (address && (address < end));
 }
 
+/* tasklist_lock and vma->vm_mm->page_table_lock are held */
 static inline void unuse_pgd(struct vm_area_struct * vma, pgd_t *dir,
         unsigned long address, unsigned long size,
         swp_entry_t entry, struct page* page)
@@ -291,6 +294,7 @@
         } while (address && (address < end));
 }
 
+/* tasklist_lock and vma->vm_mm->page_table_lock are held */
 static void unuse_vma(struct vm_area_struct * vma, pgd_t *pgdir,
                         swp_entry_t entry, struct page* page)
 {
--- linux-2.4.3-pre6/mm/vmalloc.c Thu Mar 22 18:52:52 2001
+++ linux-akpm/mm/vmalloc.c Fri Mar 23 22:38:07 2001
@@ -123,7 +123,11 @@
         if (end > PGDIR_SIZE)
                 end = PGDIR_SIZE;
         do {
- pte_t * pte = pte_alloc(&init_mm, pmd, address);
+ pte_t * pte;
+
+ spin_lock(&init_mm.page_table_lock); /* pte_alloc requires this */
+ pte = pte_alloc(&init_mm, pmd, address);
+ spin_unlock(&init_mm.page_table_lock);
                 if (!pte)
                         return -ENOMEM;
                 if (alloc_area_pte(pte, address, end - address, gfp_mask, prot))
@@ -147,7 +151,9 @@
         do {
                 pmd_t *pmd;
                 
+ spin_lock(&init_mm.page_table_lock); /* pmd_alloc requires this */
                 pmd = pmd_alloc(&init_mm, dir, address);
+ spin_unlock(&init_mm.page_table_lock);
                 ret = -ENOMEM;
                 if (!pmd)
                         break;
--- linux-2.4.3-pre6/mm/vmscan.c Tue Jan 16 07:36:49 2001
+++ linux-akpm/mm/vmscan.c Fri Mar 23 22:08:48 2001
@@ -25,16 +25,15 @@
 #include <asm/pgalloc.h>
 
 /*
- * The swap-out functions return 1 if they successfully
- * threw something out, and we got a free page. It returns
- * zero if it couldn't do anything, and any other value
- * indicates it decreased rss, but the page was shared.
+ * The swap-out function returns 1 if it successfully
+ * scanned all the pages it was asked to (`count').
+ * It returns zero if it couldn't do anything,
  *
- * NOTE! If it sleeps, it *must* return 1 to make sure we
- * don't continue with the swap-out. Otherwise we may be
- * using a process that no longer actually exists (it might
- * have died while we slept).
+ * rss may decrease because pages are shared, but this
+ * doesn't count as having freed a page.
  */
+
+/* mm->page_table_lock is held. mmap_sem is not held */
 static void try_to_swap_out(struct mm_struct * mm, struct vm_area_struct* vma, unsigned long address, pte_t * page_table, struct page *page)
 {
         pte_t pte;
@@ -129,6 +128,7 @@
         return;
 }
 
+/* mm->page_table_lock is held. mmap_sem is not held */
 static int swap_out_pmd(struct mm_struct * mm, struct vm_area_struct * vma, pmd_t *dir, unsigned long address, unsigned long end, int count)
 {
         pte_t * pte;
@@ -165,6 +165,7 @@
         return count;
 }
 
+/* mm->page_table_lock is held. mmap_sem is not held */
 static inline int swap_out_pgd(struct mm_struct * mm, struct vm_area_struct * vma, pgd_t *dir, unsigned long address, unsigned long end, int count)
 {
         pmd_t * pmd;
@@ -194,6 +195,7 @@
         return count;
 }
 
+/* mm->page_table_lock is held. mmap_sem is not held */
 static int swap_out_vma(struct mm_struct * mm, struct vm_area_struct * vma, unsigned long address, int count)
 {
         pgd_t *pgdir;
@@ -218,6 +220,9 @@
         return count;
 }
 
+/*
+ * Returns non-zero if we scanned all `count' pages
+ */
 static int swap_out_mm(struct mm_struct * mm, int count)
 {
         unsigned long address;
@@ -879,6 +884,7 @@
  * If there are applications that are active memory-allocators
  * (most normal use), this basically shouldn't matter.
  */
+
 int kswapd(void *unused)
 {
         struct task_struct *tsk = current;
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/



This archive was generated by hypermail 2b29 : Fri Mar 23 2001 - 21:00:20 EST