Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/btrfs...

[sfrench/cifs-2.6.git] / mm / mmap.c
diff --git a/mm/mmap.c b/mm/mmap.c

index 772140c53ab185ebc76d1f185d6feb9fb8935c15..d49736ff8a8dad10420a4f2f76ca89da52c7da89 100644 (file)
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -84,10 +84,14 @@ pgprot_t vm_get_page_prot(unsigned long vm_flags)
  }
  EXPORT_SYMBOL(vm_get_page_prot);
  
-int sysctl_overcommit_memory = OVERCOMMIT_GUESS;  /* heuristic overcommit */
-int sysctl_overcommit_ratio = 50;      /* default is 50% */
+int sysctl_overcommit_memory __read_mostly = OVERCOMMIT_GUESS;  /* heuristic overcommit */
+int sysctl_overcommit_ratio __read_mostly = 50;        /* default is 50% */
  int sysctl_max_map_count __read_mostly = DEFAULT_MAX_MAP_COUNT;
-struct percpu_counter vm_committed_as;
+/*
+ * Make sure vm_committed_as in one cacheline and not cacheline shared with
+ * other variables. It can be updated by several CPUs frequently.
+ */
+struct percpu_counter vm_committed_as ____cacheline_aligned_in_smp;
  
  /*
   * Check that a process has enough memory to allocate a new virtual
@@ -190,7 +194,7 @@ error:
  }
  
  /*
- * Requires inode->i_mapping->i_mmap_lock
+ * Requires inode->i_mapping->i_mmap_mutex
   */
  static void __remove_shared_vm_struct(struct vm_area_struct *vma,
                 struct file *file, struct address_space *mapping)
@@ -218,9 +222,9 @@ void unlink_file_vma(struct vm_area_struct *vma)
  
         if (file) {
                 struct address_space *mapping = file->f_mapping;
-               spin_lock(&mapping->i_mmap_lock);
+               mutex_lock(&mapping->i_mmap_mutex);
                 __remove_shared_vm_struct(vma, file, mapping);
-               spin_unlock(&mapping->i_mmap_lock);
+               mutex_unlock(&mapping->i_mmap_mutex);
         }
  }
  
@@ -394,29 +398,6 @@ find_vma_prepare(struct mm_struct *mm, unsigned long addr,
         return vma;
  }
  
-static inline void
-__vma_link_list(struct mm_struct *mm, struct vm_area_struct *vma,
-               struct vm_area_struct *prev, struct rb_node *rb_parent)
-{
-       struct vm_area_struct *next;
-
-       vma->vm_prev = prev;
-       if (prev) {
-               next = prev->vm_next;
-               prev->vm_next = vma;
-       } else {
-               mm->mmap = vma;
-               if (rb_parent)
-                       next = rb_entry(rb_parent,
-                                       struct vm_area_struct, vm_rb);
-               else
-                       next = NULL;
-       }
-       vma->vm_next = next;
-       if (next)
-               next->vm_prev = vma;
-}
-
  void __vma_link_rb(struct mm_struct *mm, struct vm_area_struct *vma,
                 struct rb_node **rb_link, struct rb_node *rb_parent)
  {
@@ -464,16 +445,14 @@ static void vma_link(struct mm_struct *mm, struct vm_area_struct *vma,
         if (vma->vm_file)
                 mapping = vma->vm_file->f_mapping;
  
-       if (mapping) {
-               spin_lock(&mapping->i_mmap_lock);
-               vma->vm_truncate_count = mapping->truncate_count;
-       }
+       if (mapping)
+               mutex_lock(&mapping->i_mmap_mutex);
  
         __vma_link(mm, vma, prev, rb_link, rb_parent);
         __vma_link_file(vma);
  
         if (mapping)
-               spin_unlock(&mapping->i_mmap_lock);
+               mutex_unlock(&mapping->i_mmap_mutex);
  
         mm->map_count++;
         validate_mm(mm);
@@ -576,17 +555,8 @@ again:                     remove_next = 1 + (end > next->vm_end);
                 mapping = file->f_mapping;
                 if (!(vma->vm_flags & VM_NONLINEAR))
                         root = &mapping->i_mmap;
-               spin_lock(&mapping->i_mmap_lock);
-               if (importer &&
-                   vma->vm_truncate_count != next->vm_truncate_count) {
-                       /*
-                        * unmap_mapping_range might be in progress:
-                        * ensure that the expanding vma is rescanned.
-                        */
-                       importer->vm_truncate_count = 0;
-               }
+               mutex_lock(&mapping->i_mmap_mutex);
                 if (insert) {
-                       insert->vm_truncate_count = vma->vm_truncate_count;
                         /*
                          * Put into prio_tree now, so instantiated pages
                          * are visible to arm/parisc __flush_dcache_page
@@ -605,7 +575,7 @@ again:                      remove_next = 1 + (end > next->vm_end);
          * lock may be shared between many sibling processes.  Skipping
          * the lock for brk adjustments makes a difference sometimes.
          */
-       if (vma->anon_vma && (insert || importer || start != vma->vm_start)) {
+       if (vma->anon_vma && (importer || start != vma->vm_start)) {
                 anon_vma = vma->anon_vma;
                 anon_vma_lock(anon_vma);
         }
@@ -652,7 +622,7 @@ again:                      remove_next = 1 + (end > next->vm_end);
         if (anon_vma)
                 anon_vma_unlock(anon_vma);
         if (mapping)
-               spin_unlock(&mapping->i_mmap_lock);
+               mutex_unlock(&mapping->i_mmap_mutex);
  
         if (remove_next) {
                 if (file) {
@@ -699,9 +669,17 @@ static inline int is_mergeable_vma(struct vm_area_struct *vma,
  }
  
  static inline int is_mergeable_anon_vma(struct anon_vma *anon_vma1,
-                                       struct anon_vma *anon_vma2)
+                                       struct anon_vma *anon_vma2,
+                                       struct vm_area_struct *vma)
  {
-       return !anon_vma1 || !anon_vma2 || (anon_vma1 == anon_vma2);
+       /*
+        * The list_is_singular() test is to avoid merging VMA cloned from
+        * parents. This can improve scalability caused by anon_vma lock.
+        */
+       if ((!anon_vma1 || !anon_vma2) && (!vma ||
+               list_is_singular(&vma->anon_vma_chain)))
+               return 1;
+       return anon_vma1 == anon_vma2;
  }
  
  /*
@@ -720,7 +698,7 @@ can_vma_merge_before(struct vm_area_struct *vma, unsigned long vm_flags,
         struct anon_vma *anon_vma, struct file *file, pgoff_t vm_pgoff)
  {
         if (is_mergeable_vma(vma, file, vm_flags) &&
-           is_mergeable_anon_vma(anon_vma, vma->anon_vma)) {
+           is_mergeable_anon_vma(anon_vma, vma->anon_vma, vma)) {
                 if (vma->vm_pgoff == vm_pgoff)
                         return 1;
         }
@@ -739,7 +717,7 @@ can_vma_merge_after(struct vm_area_struct *vma, unsigned long vm_flags,
         struct anon_vma *anon_vma, struct file *file, pgoff_t vm_pgoff)
  {
         if (is_mergeable_vma(vma, file, vm_flags) &&
-           is_mergeable_anon_vma(anon_vma, vma->anon_vma)) {
+           is_mergeable_anon_vma(anon_vma, vma->anon_vma, vma)) {
                 pgoff_t vm_pglen;
                 vm_pglen = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
                 if (vma->vm_pgoff + vm_pglen == vm_pgoff)
@@ -817,7 +795,7 @@ struct vm_area_struct *vma_merge(struct mm_struct *mm,
                                 can_vma_merge_before(next, vm_flags,
                                         anon_vma, file, pgoff+pglen) &&
                                 is_mergeable_anon_vma(prev->anon_vma,
-                                                     next->anon_vma)) {
+                                                     next->anon_vma, NULL)) {
                                                         /* cases 1, 6 */
                         err = vma_adjust(prev, prev->vm_start,
                                 next->vm_end, prev->vm_pgoff, NULL);
@@ -928,14 +906,7 @@ struct anon_vma *find_mergeable_anon_vma(struct vm_area_struct *vma)
         if (anon_vma)
                 return anon_vma;
  try_prev:
-       /*
-        * It is potentially slow to have to call find_vma_prev here.
-        * But it's only on the first write fault on the vma, not
-        * every time, and we could devise a way to avoid it later
-        * (e.g. stash info in next's anon_vma_node when assigning
-        * an anon_vma, or when trying vma_merge).  Another time.
-        */
-       BUG_ON(find_vma_prev(vma->vm_mm, vma->vm_start, &near) != vma);
+       near = vma->vm_prev;
         if (!near)
                 goto none;
  
@@ -982,7 +953,7 @@ unsigned long do_mmap_pgoff(struct file *file, unsigned long addr,
  {
         struct mm_struct * mm = current->mm;
         struct inode *inode;
-       unsigned int vm_flags;
+       vm_flags_t vm_flags;
         int error;
         unsigned long reqprot = prot;
  
@@ -1187,7 +1158,7 @@ SYSCALL_DEFINE1(old_mmap, struct mmap_arg_struct __user *, arg)
   */
  int vma_wants_writenotify(struct vm_area_struct *vma)
  {
-       unsigned int vm_flags = vma->vm_flags;
+       vm_flags_t vm_flags = vma->vm_flags;
  
         /* If it was private or non-writable, the write bit is already clear */
         if ((vm_flags & (VM_WRITE|VM_SHARED)) != ((VM_WRITE|VM_SHARED)))
@@ -1215,7 +1186,7 @@ int vma_wants_writenotify(struct vm_area_struct *vma)
   * We account for memory if it's a private writeable mapping,
   * not hugepages and VM_NORESERVE wasn't set.
   */
-static inline int accountable_mapping(struct file *file, unsigned int vm_flags)
+static inline int accountable_mapping(struct file *file, vm_flags_t vm_flags)
  {
         /*
          * hugetlb has its own accounting separate from the core VM
@@ -1229,7 +1200,7 @@ static inline int accountable_mapping(struct file *file, unsigned int vm_flags)
  
  unsigned long mmap_region(struct file *file, unsigned long addr,
                           unsigned long len, unsigned long flags,
-                         unsigned int vm_flags, unsigned long pgoff)
+                         vm_flags_t vm_flags, unsigned long pgoff)
  {
         struct mm_struct *mm = current->mm;
         struct vm_area_struct *vma, *prev;
@@ -1785,7 +1756,7 @@ int expand_upwards(struct vm_area_struct *vma, unsigned long address)
  /*
   * vma is the first one with address < vma->vm_start.  Have to extend vma.
   */
-static int expand_downwards(struct vm_area_struct *vma,
+int expand_downwards(struct vm_area_struct *vma,
                                    unsigned long address)
  {
         int error;
@@ -1832,11 +1803,6 @@ static int expand_downwards(struct vm_area_struct *vma,
         return error;
  }
  
-int expand_stack_downwards(struct vm_area_struct *vma, unsigned long address)
-{
-       return expand_downwards(vma, address);
-}
-
  #ifdef CONFIG_STACK_GROWSUP
  int expand_stack(struct vm_area_struct *vma, unsigned long address)
  {
@@ -1919,17 +1885,17 @@ static void unmap_region(struct mm_struct *mm,
                 unsigned long start, unsigned long end)
  {
         struct vm_area_struct *next = prev? prev->vm_next: mm->mmap;
-       struct mmu_gather *tlb;
+       struct mmu_gather tlb;
         unsigned long nr_accounted = 0;
  
         lru_add_drain();
-       tlb = tlb_gather_mmu(mm, 0);
+       tlb_gather_mmu(&tlb, mm, 0);
         update_hiwater_rss(mm);
         unmap_vmas(&tlb, vma, start, end, &nr_accounted, NULL);
         vm_unacct_memory(nr_accounted);
-       free_pgtables(tlb, vma, prev? prev->vm_end: FIRST_USER_ADDRESS,
-                                next? next->vm_start: 0);
-       tlb_finish_mmu(tlb, start, end);
+       free_pgtables(&tlb, vma, prev ? prev->vm_end : FIRST_USER_ADDRESS,
+                                next ? next->vm_start : 0);
+       tlb_finish_mmu(&tlb, start, end);
  }
  
  /*
@@ -2071,9 +2037,10 @@ int do_munmap(struct mm_struct *mm, unsigned long start, size_t len)
                 return -EINVAL;
  
         /* Find the first overlapping VMA */
-       vma = find_vma_prev(mm, start, &prev);
+       vma = find_vma(mm, start);
         if (!vma)
                 return 0;
+       prev = vma->vm_prev;
         /* we have  start < vma->vm_end  */
  
         /* if it doesn't overlap, we have nothing.. */
@@ -2271,7 +2238,7 @@ EXPORT_SYMBOL(do_brk);
  /* Release all mmaps. */
  void exit_mmap(struct mm_struct *mm)
  {
-       struct mmu_gather *tlb;
+       struct mmu_gather tlb;
         struct vm_area_struct *vma;
         unsigned long nr_accounted = 0;
         unsigned long end;
@@ -2296,14 +2263,14 @@ void exit_mmap(struct mm_struct *mm)
  
         lru_add_drain();
         flush_cache_mm(mm);
-       tlb = tlb_gather_mmu(mm, 1);
+       tlb_gather_mmu(&tlb, mm, 1);
         /* update_hiwater_rss(mm) here? but nobody should be looking */
         /* Use -1 here to ensure all VMAs in the mm are unmapped */
         end = unmap_vmas(&tlb, vma, 0, -1, &nr_accounted, NULL);
         vm_unacct_memory(nr_accounted);
  
-       free_pgtables(tlb, vma, FIRST_USER_ADDRESS, 0);
-       tlb_finish_mmu(tlb, 0, end);
+       free_pgtables(&tlb, vma, FIRST_USER_ADDRESS, 0);
+       tlb_finish_mmu(&tlb, 0, end);
  
         /*
          * Walk the list again, actually closing and freeing it,
@@ -2317,7 +2284,7 @@ void exit_mmap(struct mm_struct *mm)
  
  /* Insert vm structure into process list sorted by address
   * and into the inode's i_mmap tree.  If vm_file is non-NULL
- * then i_mmap_lock is taken here.
+ * then i_mmap_mutex is taken here.
   */
  int insert_vm_struct(struct mm_struct * mm, struct vm_area_struct * vma)
  {
@@ -2529,15 +2496,15 @@ static void vm_lock_anon_vma(struct mm_struct *mm, struct anon_vma *anon_vma)
                  * The LSB of head.next can't change from under us
                  * because we hold the mm_all_locks_mutex.
                  */
-               spin_lock_nest_lock(&anon_vma->root->lock, &mm->mmap_sem);
+               mutex_lock_nest_lock(&anon_vma->root->mutex, &mm->mmap_sem);
                 /*
                  * We can safely modify head.next after taking the
-                * anon_vma->root->lock. If some other vma in this mm shares
+                * anon_vma->root->mutex. If some other vma in this mm shares
                  * the same anon_vma we won't take it again.
                  *
                  * No need of atomic instructions here, head.next
                  * can't change from under us thanks to the
-                * anon_vma->root->lock.
+                * anon_vma->root->mutex.
                  */
                 if (__test_and_set_bit(0, (unsigned long *)
                                        &anon_vma->root->head.next))
@@ -2559,7 +2526,7 @@ static void vm_lock_mapping(struct mm_struct *mm, struct address_space *mapping)
                  */
                 if (test_and_set_bit(AS_MM_ALL_LOCKS, &mapping->flags))
                         BUG();
-               spin_lock_nest_lock(&mapping->i_mmap_lock, &mm->mmap_sem);
+               mutex_lock_nest_lock(&mapping->i_mmap_mutex, &mm->mmap_sem);
         }
  }
  
@@ -2586,7 +2553,7 @@ static void vm_lock_mapping(struct mm_struct *mm, struct address_space *mapping)
   * vma in this mm is backed by the same anon_vma or address_space.
   *
   * We can take all the locks in random order because the VM code
- * taking i_mmap_lock or anon_vma->lock outside the mmap_sem never
+ * taking i_mmap_mutex or anon_vma->mutex outside the mmap_sem never
   * takes more than one of them in a row. Secondly we're protected
   * against a concurrent mm_take_all_locks() by the mm_all_locks_mutex.
   *
@@ -2642,7 +2609,7 @@ static void vm_unlock_anon_vma(struct anon_vma *anon_vma)
                  *
                  * No need of atomic instructions here, head.next
                  * can't change from under us until we release the
-                * anon_vma->root->lock.
+                * anon_vma->root->mutex.
                  */
                 if (!__test_and_clear_bit(0, (unsigned long *)
                                           &anon_vma->root->head.next))
@@ -2658,7 +2625,7 @@ static void vm_unlock_mapping(struct address_space *mapping)
                  * AS_MM_ALL_LOCKS can't change to 0 from under us
                  * because we hold the mm_all_locks_mutex.
                  */
-               spin_unlock(&mapping->i_mmap_lock);
+               mutex_unlock(&mapping->i_mmap_mutex);
                 if (!test_and_clear_bit(AS_MM_ALL_LOCKS,
                                         &mapping->flags))
                         BUG();