[PATCH] crc32.c typo fix

[sfrench/cifs-2.6.git] / mm / memory.c
diff --git a/mm/memory.c b/mm/memory.c

index d209f745db7fbc3154e83cf04666770068986724..e046b7e4b53092bb879f8d183d24de36b3389d83 100644 (file)
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -58,7 +58,7 @@
  #include <linux/swapops.h>
  #include <linux/elf.h>
  
-#ifndef CONFIG_DISCONTIGMEM
+#ifndef CONFIG_NEED_MULTIPLE_NODES
  /* use the per-pgdat data instead for discontigmem - mbligh */
  unsigned long max_mapnr;
  struct page *mem_map;
@@ -776,8 +776,8 @@ unsigned long zap_page_range(struct vm_area_struct *vma, unsigned long address,
   * Do a quick page-table lookup for a single page.
   * mm->page_table_lock must be held.
   */
-static struct page *
-__follow_page(struct mm_struct *mm, unsigned long address, int read, int write)
+static struct page *__follow_page(struct mm_struct *mm, unsigned long address,
+                       int read, int write, int accessed)
  {
         pgd_t *pgd;
         pud_t *pud;
@@ -818,9 +818,11 @@ __follow_page(struct mm_struct *mm, unsigned long address, int read, int write)
                 pfn = pte_pfn(pte);
                 if (pfn_valid(pfn)) {
                         page = pfn_to_page(pfn);
-                       if (write && !pte_dirty(pte) && !PageDirty(page))
-                               set_page_dirty(page);
-                       mark_page_accessed(page);
+                       if (accessed) {
+                               if (write && !pte_dirty(pte) &&!PageDirty(page))
+                                       set_page_dirty(page);
+                               mark_page_accessed(page);
+                       }
                         return page;
                 }
         }
@@ -829,33 +831,21 @@ out:
         return NULL;
  }
  
-struct page *
+inline struct page *
  follow_page(struct mm_struct *mm, unsigned long address, int write)
  {
-       return __follow_page(mm, address, /*read*/0, write);
-}
-
-int
-check_user_page_readable(struct mm_struct *mm, unsigned long address)
-{
-       return __follow_page(mm, address, /*read*/1, /*write*/0) != NULL;
+       return __follow_page(mm, address, 0, write, 1);
  }
  
-EXPORT_SYMBOL(check_user_page_readable);
-
-/* 
- * Given a physical address, is there a useful struct page pointing to
- * it?  This may become more complex in the future if we start dealing
- * with IO-aperture pages for direct-IO.
+/*
+ * check_user_page_readable() can be called frm niterrupt context by oprofile,
+ * so we need to avoid taking any non-irq-safe locks
   */
-
-static inline struct page *get_page_map(struct page *page)
+int check_user_page_readable(struct mm_struct *mm, unsigned long address)
  {
-       if (!pfn_valid(page_to_pfn(page)))
-               return NULL;
-       return page;
+       return __follow_page(mm, address, 1, 0, 0) != NULL;
  }
-
+EXPORT_SYMBOL(check_user_page_readable);
  
  static inline int
  untouched_anonymous_page(struct mm_struct* mm, struct vm_area_struct *vma,
@@ -887,7 +877,6 @@ untouched_anonymous_page(struct mm_struct* mm, struct vm_area_struct *vma,
         return 0;
  }
  
-
  int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
                 unsigned long start, int len, int write, int force,
                 struct page **pages, struct vm_area_struct **vmas)
@@ -924,9 +913,13 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
                         pud = pud_offset(pgd, pg);
                         BUG_ON(pud_none(*pud));
                         pmd = pmd_offset(pud, pg);
-                       BUG_ON(pmd_none(*pmd));
+                       if (pmd_none(*pmd))
+                               return i ? : -EFAULT;
                         pte = pte_offset_map(pmd, pg);
-                       BUG_ON(pte_none(*pte));
+                       if (pte_none(*pte)) {
+                               pte_unmap(pte);
+                               return i ? : -EFAULT;
+                       }
                         if (pages) {
                                 pages[i] = pte_page(*pte);
                                 get_page(pages[i]);
@@ -951,25 +944,37 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
                 }
                 spin_lock(&mm->page_table_lock);
                 do {
-                       struct page *map;
-                       int lookup_write = write;
+                       int write_access = write;
+                       struct page *page;
  
                         cond_resched_lock(&mm->page_table_lock);
-                       while (!(map = follow_page(mm, start, lookup_write))) {
+                       while (!(page = follow_page(mm, start, write_access))) {
+                               int ret;
+
                                 /*
                                  * Shortcut for anonymous pages. We don't want
                                  * to force the creation of pages tables for
-                                * insanly big anonymously mapped areas that
+                                * insanely big anonymously mapped areas that
                                  * nobody touched so far. This is important
                                  * for doing a core dump for these mappings.
                                  */
-                               if (!lookup_write &&
-                                   untouched_anonymous_page(mm,vma,start)) {
-                                       map = ZERO_PAGE(start);
+                               if (!write && untouched_anonymous_page(mm,vma,start)) {
+                                       page = ZERO_PAGE(start);
                                         break;
                                 }
                                 spin_unlock(&mm->page_table_lock);
-                               switch (handle_mm_fault(mm,vma,start,write)) {
+                               ret = __handle_mm_fault(mm, vma, start, write_access);
+
+                               /*
+                                * The VM_FAULT_WRITE bit tells us that do_wp_page has
+                                * broken COW when necessary, even if maybe_mkwrite
+                                * decided not to set pte_write. We can thus safely do
+                                * subsequent page lookups as if they were reads.
+                                */
+                               if (ret & VM_FAULT_WRITE)
+                                       write_access = 0;
+                               
+                               switch (ret & ~VM_FAULT_WRITE) {
                                 case VM_FAULT_MINOR:
                                         tsk->min_flt++;
                                         break;
@@ -983,41 +988,24 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
                                 default:
                                         BUG();
                                 }
-                               /*
-                                * Now that we have performed a write fault
-                                * and surely no longer have a shared page we
-                                * shouldn't write, we shouldn't ignore an
-                                * unwritable page in the page table if
-                                * we are forcing write access.
-                                */
-                               lookup_write = write && !force;
                                 spin_lock(&mm->page_table_lock);
                         }
                         if (pages) {
-                               pages[i] = get_page_map(map);
-                               if (!pages[i]) {
-                                       spin_unlock(&mm->page_table_lock);
-                                       while (i--)
-                                               page_cache_release(pages[i]);
-                                       i = -EFAULT;
-                                       goto out;
-                               }
-                               flush_dcache_page(pages[i]);
-                               if (!PageReserved(pages[i]))
-                                       page_cache_get(pages[i]);
+                               pages[i] = page;
+                               flush_dcache_page(page);
+                               if (!PageReserved(page))
+                                       page_cache_get(page);
                         }
                         if (vmas)
                                 vmas[i] = vma;
                         i++;
                         start += PAGE_SIZE;
                         len--;
-               } while(len && start < vma->vm_end);
+               } while (len && start < vma->vm_end);
                 spin_unlock(&mm->page_table_lock);
-       } while(len);
-out:
+       } while (len);
         return i;
  }
-
  EXPORT_SYMBOL(get_user_pages);
  
  static int zeromap_pte_range(struct mm_struct *mm, pmd_t *pmd,
@@ -1164,7 +1152,7 @@ int remap_pfn_range(struct vm_area_struct *vma, unsigned long addr,
  {
         pgd_t *pgd;
         unsigned long next;
-       unsigned long end = addr + size;
+       unsigned long end = addr + PAGE_ALIGN(size);
         struct mm_struct *mm = vma->vm_mm;
         int err;
  
@@ -1249,6 +1237,7 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct * vma,
         struct page *old_page, *new_page;
         unsigned long pfn = pte_pfn(pte);
         pte_t entry;
+       int ret;
  
         if (unlikely(!pfn_valid(pfn))) {
                 /*
@@ -1264,7 +1253,7 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct * vma,
         }
         old_page = pfn_to_page(pfn);
  
-       if (!TestSetPageLocked(old_page)) {
+       if (PageAnon(old_page) && !TestSetPageLocked(old_page)) {
                 int reuse = can_share_swap_page(old_page);
                 unlock_page(old_page);
                 if (reuse) {
@@ -1276,7 +1265,7 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct * vma,
                         lazy_mmu_prot_update(entry);
                         pte_unmap(page_table);
                         spin_unlock(&mm->page_table_lock);
-                       return VM_FAULT_MINOR;
+                       return VM_FAULT_MINOR|VM_FAULT_WRITE;
                 }
         }
         pte_unmap(page_table);
@@ -1303,6 +1292,7 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct * vma,
         /*
          * Re-check the pte - we dropped the lock
          */
+       ret = VM_FAULT_MINOR;
         spin_lock(&mm->page_table_lock);
         page_table = pte_offset_map(pmd, address);
         if (likely(pte_same(*page_table, pte))) {
@@ -1319,12 +1309,13 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct * vma,
  
                 /* Free the old page.. */
                 new_page = old_page;
+               ret |= VM_FAULT_WRITE;
         }
         pte_unmap(page_table);
         page_cache_release(new_page);
         page_cache_release(old_page);
         spin_unlock(&mm->page_table_lock);
-       return VM_FAULT_MINOR;
+       return ret;
  
  no_new_page:
         page_cache_release(old_page);
@@ -1483,7 +1474,7 @@ restart:
   * unmap_mapping_range - unmap the portion of all mmaps
   * in the specified address_space corresponding to the specified
   * page range in the underlying file.
- * @address_space: the address space containing mmaps to be unmapped.
+ * @mapping: the address space containing mmaps to be unmapped.
   * @holebegin: byte in first page to unmap, relative to the start of
   * the underlying file.  This will be rounded down to a PAGE_SIZE
   * boundary.  Note that this is different from vmtruncate(), which
@@ -1711,10 +1702,6 @@ static int do_swap_page(struct mm_struct * mm,
         }
  
         /* The page isn't present yet, go ahead with the fault. */
-               
-       swap_free(entry);
-       if (vm_swap_full())
-               remove_exclusive_swap_page(page);
  
         inc_mm_counter(mm, rss);
         pte = mk_pte(page, vma->vm_page_prot);
@@ -1722,12 +1709,16 @@ static int do_swap_page(struct mm_struct * mm,
                 pte = maybe_mkwrite(pte_mkdirty(pte), vma);
                 write_access = 0;
         }
-       unlock_page(page);
  
         flush_icache_page(vma, page);
         set_pte_at(mm, address, page_table, pte);
         page_add_anon_rmap(page, vma, address);
  
+       swap_free(entry);
+       if (vm_swap_full())
+               remove_exclusive_swap_page(page);
+       unlock_page(page);
+
         if (write_access) {
                 if (do_wp_page(mm, vma, address,
                                 page_table, pmd, pte) == VM_FAULT_OOM)
@@ -2016,7 +2007,6 @@ static inline int handle_pte_fault(struct mm_struct *mm,
         if (write_access) {
                 if (!pte_write(entry))
                         return do_wp_page(mm, vma, address, pte, pmd, entry);
-
                 entry = pte_mkdirty(entry);
         }
         entry = pte_mkyoung(entry);
@@ -2031,7 +2021,7 @@ static inline int handle_pte_fault(struct mm_struct *mm,
  /*
   * By the time we get here, we already hold the mm semaphore
   */
-int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct * vma,
+int __handle_mm_fault(struct mm_struct *mm, struct vm_area_struct * vma,
                 unsigned long address, int write_access)
  {
         pgd_t *pgd;