mm: fix do_pages_move status handling
[sfrench/cifs-2.6.git] / mm / migrate.c
index 003886606a2251cab9b1e1e50d128d5d5f1de497..70ef794cccae97feda86c34e54b961eaa120138e 100644 (file)
@@ -467,20 +467,21 @@ int migrate_page_move_mapping(struct address_space *mapping,
        oldzone = page_zone(page);
        newzone = page_zone(newpage);
 
-       spin_lock_irq(&mapping->tree_lock);
+       xa_lock_irq(&mapping->i_pages);
 
-       pslot = radix_tree_lookup_slot(&mapping->page_tree,
+       pslot = radix_tree_lookup_slot(&mapping->i_pages,
                                        page_index(page));
 
        expected_count += 1 + page_has_private(page);
        if (page_count(page) != expected_count ||
-               radix_tree_deref_slot_protected(pslot, &mapping->tree_lock) != page) {
-               spin_unlock_irq(&mapping->tree_lock);
+               radix_tree_deref_slot_protected(pslot,
+                                       &mapping->i_pages.xa_lock) != page) {
+               xa_unlock_irq(&mapping->i_pages);
                return -EAGAIN;
        }
 
        if (!page_ref_freeze(page, expected_count)) {
-               spin_unlock_irq(&mapping->tree_lock);
+               xa_unlock_irq(&mapping->i_pages);
                return -EAGAIN;
        }
 
@@ -494,7 +495,7 @@ int migrate_page_move_mapping(struct address_space *mapping,
        if (mode == MIGRATE_ASYNC && head &&
                        !buffer_migrate_lock_buffers(head, mode)) {
                page_ref_unfreeze(page, expected_count);
-               spin_unlock_irq(&mapping->tree_lock);
+               xa_unlock_irq(&mapping->i_pages);
                return -EAGAIN;
        }
 
@@ -522,7 +523,7 @@ int migrate_page_move_mapping(struct address_space *mapping,
                SetPageDirty(newpage);
        }
 
-       radix_tree_replace_slot(&mapping->page_tree, pslot, newpage);
+       radix_tree_replace_slot(&mapping->i_pages, pslot, newpage);
 
        /*
         * Drop cache reference from old page by unfreezing
@@ -531,7 +532,7 @@ int migrate_page_move_mapping(struct address_space *mapping,
         */
        page_ref_unfreeze(page, expected_count - 1);
 
-       spin_unlock(&mapping->tree_lock);
+       xa_unlock(&mapping->i_pages);
        /* Leave irq disabled to prevent preemption while updating stats */
 
        /*
@@ -574,20 +575,19 @@ int migrate_huge_page_move_mapping(struct address_space *mapping,
        int expected_count;
        void **pslot;
 
-       spin_lock_irq(&mapping->tree_lock);
+       xa_lock_irq(&mapping->i_pages);
 
-       pslot = radix_tree_lookup_slot(&mapping->page_tree,
-                                       page_index(page));
+       pslot = radix_tree_lookup_slot(&mapping->i_pages, page_index(page));
 
        expected_count = 2 + page_has_private(page);
        if (page_count(page) != expected_count ||
-               radix_tree_deref_slot_protected(pslot, &mapping->tree_lock) != page) {
-               spin_unlock_irq(&mapping->tree_lock);
+               radix_tree_deref_slot_protected(pslot, &mapping->i_pages.xa_lock) != page) {
+               xa_unlock_irq(&mapping->i_pages);
                return -EAGAIN;
        }
 
        if (!page_ref_freeze(page, expected_count)) {
-               spin_unlock_irq(&mapping->tree_lock);
+               xa_unlock_irq(&mapping->i_pages);
                return -EAGAIN;
        }
 
@@ -596,11 +596,11 @@ int migrate_huge_page_move_mapping(struct address_space *mapping,
 
        get_page(newpage);
 
-       radix_tree_replace_slot(&mapping->page_tree, pslot, newpage);
+       radix_tree_replace_slot(&mapping->i_pages, pslot, newpage);
 
        page_ref_unfreeze(page, expected_count - 1);
 
-       spin_unlock_irq(&mapping->tree_lock);
+       xa_unlock_irq(&mapping->i_pages);
 
        return MIGRATEPAGE_SUCCESS;
 }
@@ -1137,10 +1137,12 @@ static ICE_noinline int unmap_and_move(new_page_t get_new_page,
                                   enum migrate_reason reason)
 {
        int rc = MIGRATEPAGE_SUCCESS;
-       int *result = NULL;
        struct page *newpage;
 
-       newpage = get_new_page(page, private, &result);
+       if (!thp_migration_supported() && PageTransHuge(page))
+               return -ENOMEM;
+
+       newpage = get_new_page(page, private);
        if (!newpage)
                return -ENOMEM;
 
@@ -1161,14 +1163,6 @@ static ICE_noinline int unmap_and_move(new_page_t get_new_page,
                goto out;
        }
 
-       if (unlikely(PageTransHuge(page) && !PageTransHuge(newpage))) {
-               lock_page(page);
-               rc = split_huge_page(page);
-               unlock_page(page);
-               if (rc)
-                       goto out;
-       }
-
        rc = __unmap_and_move(page, newpage, force, mode);
        if (rc == MIGRATEPAGE_SUCCESS)
                set_page_owner_migrate_reason(newpage, reason);
@@ -1231,12 +1225,6 @@ put_new:
                        put_page(newpage);
        }
 
-       if (result) {
-               if (rc)
-                       *result = rc;
-               else
-                       *result = page_to_nid(newpage);
-       }
        return rc;
 }
 
@@ -1264,7 +1252,6 @@ static int unmap_and_move_huge_page(new_page_t get_new_page,
                                enum migrate_mode mode, int reason)
 {
        int rc = -EAGAIN;
-       int *result = NULL;
        int page_was_mapped = 0;
        struct page *new_hpage;
        struct anon_vma *anon_vma = NULL;
@@ -1281,7 +1268,7 @@ static int unmap_and_move_huge_page(new_page_t get_new_page,
                return -ENOSYS;
        }
 
-       new_hpage = get_new_page(hpage, private, &result);
+       new_hpage = get_new_page(hpage, private);
        if (!new_hpage)
                return -ENOMEM;
 
@@ -1345,12 +1332,6 @@ out:
        else
                putback_active_hugepage(new_hpage);
 
-       if (result) {
-               if (rc)
-                       *result = rc;
-               else
-                       *result = page_to_nid(new_hpage);
-       }
        return rc;
 }
 
@@ -1395,6 +1376,7 @@ int migrate_pages(struct list_head *from, new_page_t get_new_page,
                retry = 0;
 
                list_for_each_entry_safe(page, page2, from, lru) {
+retry:
                        cond_resched();
 
                        if (PageHuge(page))
@@ -1408,6 +1390,26 @@ int migrate_pages(struct list_head *from, new_page_t get_new_page,
 
                        switch(rc) {
                        case -ENOMEM:
+                               /*
+                                * THP migration might be unsupported or the
+                                * allocation could've failed so we should
+                                * retry on the same page with the THP split
+                                * to base pages.
+                                *
+                                * Head page is retried immediately and tail
+                                * pages are added to the tail of the list so
+                                * we encounter them after the rest of the list
+                                * is processed.
+                                */
+                               if (PageTransHuge(page)) {
+                                       lock_page(page);
+                                       rc = split_huge_page_to_list(page, from);
+                                       unlock_page(page);
+                                       if (!rc) {
+                                               list_safe_reset_next(page, page2, lru);
+                                               goto retry;
+                                       }
+                               }
                                nr_failed++;
                                goto out;
                        case -EAGAIN:
@@ -1444,141 +1446,101 @@ out:
 }
 
 #ifdef CONFIG_NUMA
-/*
- * Move a list of individual pages
- */
-struct page_to_node {
-       unsigned long addr;
-       struct page *page;
-       int node;
-       int status;
-};
 
-static struct page *new_page_node(struct page *p, unsigned long private,
-               int **result)
+static int store_status(int __user *status, int start, int value, int nr)
 {
-       struct page_to_node *pm = (struct page_to_node *)private;
-
-       while (pm->node != MAX_NUMNODES && pm->page != p)
-               pm++;
+       while (nr-- > 0) {
+               if (put_user(value, status + start))
+                       return -EFAULT;
+               start++;
+       }
 
-       if (pm->node == MAX_NUMNODES)
-               return NULL;
+       return 0;
+}
 
-       *result = &pm->status;
+static int do_move_pages_to_node(struct mm_struct *mm,
+               struct list_head *pagelist, int node)
+{
+       int err;
 
-       if (PageHuge(p))
-               return alloc_huge_page_node(page_hstate(compound_head(p)),
-                                       pm->node);
-       else if (thp_migration_supported() && PageTransHuge(p)) {
-               struct page *thp;
+       if (list_empty(pagelist))
+               return 0;
 
-               thp = alloc_pages_node(pm->node,
-                       (GFP_TRANSHUGE | __GFP_THISNODE) & ~__GFP_RECLAIM,
-                       HPAGE_PMD_ORDER);
-               if (!thp)
-                       return NULL;
-               prep_transhuge_page(thp);
-               return thp;
-       } else
-               return __alloc_pages_node(pm->node,
-                               GFP_HIGHUSER_MOVABLE | __GFP_THISNODE, 0);
+       err = migrate_pages(pagelist, alloc_new_node_page, NULL, node,
+                       MIGRATE_SYNC, MR_SYSCALL);
+       if (err)
+               putback_movable_pages(pagelist);
+       return err;
 }
 
 /*
- * Move a set of pages as indicated in the pm array. The addr
- * field must be set to the virtual address of the page to be moved
- * and the node number must contain a valid target node.
- * The pm array ends with node = MAX_NUMNODES.
+ * Resolves the given address to a struct page, isolates it from the LRU and
+ * puts it to the given pagelist.
+ * Returns -errno if the page cannot be found/isolated or 0 when it has been
+ * queued or the page doesn't need to be migrated because it is already on
+ * the target node
  */
-static int do_move_page_to_node_array(struct mm_struct *mm,
-                                     struct page_to_node *pm,
-                                     int migrate_all)
+static int add_page_for_migration(struct mm_struct *mm, unsigned long addr,
+               int node, struct list_head *pagelist, bool migrate_all)
 {
+       struct vm_area_struct *vma;
+       struct page *page;
+       unsigned int follflags;
        int err;
-       struct page_to_node *pp;
-       LIST_HEAD(pagelist);
 
        down_read(&mm->mmap_sem);
+       err = -EFAULT;
+       vma = find_vma(mm, addr);
+       if (!vma || addr < vma->vm_start || !vma_migratable(vma))
+               goto out;
 
-       /*
-        * Build a list of pages to migrate
-        */
-       for (pp = pm; pp->node != MAX_NUMNODES; pp++) {
-               struct vm_area_struct *vma;
-               struct page *page;
-               struct page *head;
-               unsigned int follflags;
-
-               err = -EFAULT;
-               vma = find_vma(mm, pp->addr);
-               if (!vma || pp->addr < vma->vm_start || !vma_migratable(vma))
-                       goto set_status;
-
-               /* FOLL_DUMP to ignore special (like zero) pages */
-               follflags = FOLL_GET | FOLL_DUMP;
-               if (!thp_migration_supported())
-                       follflags |= FOLL_SPLIT;
-               page = follow_page(vma, pp->addr, follflags);
+       /* FOLL_DUMP to ignore special (like zero) pages */
+       follflags = FOLL_GET | FOLL_DUMP;
+       page = follow_page(vma, addr, follflags);
 
-               err = PTR_ERR(page);
-               if (IS_ERR(page))
-                       goto set_status;
+       err = PTR_ERR(page);
+       if (IS_ERR(page))
+               goto out;
 
-               err = -ENOENT;
-               if (!page)
-                       goto set_status;
+       err = -ENOENT;
+       if (!page)
+               goto out;
 
-               err = page_to_nid(page);
+       err = 0;
+       if (page_to_nid(page) == node)
+               goto out_putpage;
 
-               if (err == pp->node)
-                       /*
-                        * Node already in the right place
-                        */
-                       goto put_and_set;
+       err = -EACCES;
+       if (page_mapcount(page) > 1 && !migrate_all)
+               goto out_putpage;
 
-               err = -EACCES;
-               if (page_mapcount(page) > 1 &&
-                               !migrate_all)
-                       goto put_and_set;
-
-               if (PageHuge(page)) {
-                       if (PageHead(page)) {
-                               isolate_huge_page(page, &pagelist);
-                               err = 0;
-                               pp->page = page;
-                       }
-                       goto put_and_set;
+       if (PageHuge(page)) {
+               if (PageHead(page)) {
+                       isolate_huge_page(page, pagelist);
+                       err = 0;
                }
+       } else {
+               struct page *head;
 
-               pp->page = compound_head(page);
                head = compound_head(page);
                err = isolate_lru_page(head);
-               if (!err) {
-                       list_add_tail(&head->lru, &pagelist);
-                       mod_node_page_state(page_pgdat(head),
-                               NR_ISOLATED_ANON + page_is_file_cache(head),
-                               hpage_nr_pages(head));
-               }
-put_and_set:
-               /*
-                * Either remove the duplicate refcount from
-                * isolate_lru_page() or drop the page ref if it was
-                * not isolated.
-                */
-               put_page(page);
-set_status:
-               pp->status = err;
-       }
-
-       err = 0;
-       if (!list_empty(&pagelist)) {
-               err = migrate_pages(&pagelist, new_page_node, NULL,
-                               (unsigned long)pm, MIGRATE_SYNC, MR_SYSCALL);
                if (err)
-                       putback_movable_pages(&pagelist);
-       }
+                       goto out_putpage;
 
+               err = 0;
+               list_add_tail(&head->lru, pagelist);
+               mod_node_page_state(page_pgdat(head),
+                       NR_ISOLATED_ANON + page_is_file_cache(head),
+                       hpage_nr_pages(head));
+       }
+out_putpage:
+       /*
+        * Either remove the duplicate refcount from
+        * isolate_lru_page() or drop the page ref if it was
+        * not isolated.
+        */
+       put_page(page);
+out:
        up_read(&mm->mmap_sem);
        return err;
 }
@@ -1593,79 +1555,82 @@ static int do_pages_move(struct mm_struct *mm, nodemask_t task_nodes,
                         const int __user *nodes,
                         int __user *status, int flags)
 {
-       struct page_to_node *pm;
-       unsigned long chunk_nr_pages;
-       unsigned long chunk_start;
-       int err;
-
-       err = -ENOMEM;
-       pm = (struct page_to_node *)__get_free_page(GFP_KERNEL);
-       if (!pm)
-               goto out;
+       int current_node = NUMA_NO_NODE;
+       LIST_HEAD(pagelist);
+       int start, i;
+       int err = 0, err1;
 
        migrate_prep();
 
-       /*
-        * Store a chunk of page_to_node array in a page,
-        * but keep the last one as a marker
-        */
-       chunk_nr_pages = (PAGE_SIZE / sizeof(struct page_to_node)) - 1;
-
-       for (chunk_start = 0;
-            chunk_start < nr_pages;
-            chunk_start += chunk_nr_pages) {
-               int j;
+       for (i = start = 0; i < nr_pages; i++) {
+               const void __user *p;
+               unsigned long addr;
+               int node;
 
-               if (chunk_start + chunk_nr_pages > nr_pages)
-                       chunk_nr_pages = nr_pages - chunk_start;
-
-               /* fill the chunk pm with addrs and nodes from user-space */
-               for (j = 0; j < chunk_nr_pages; j++) {
-                       const void __user *p;
-                       int node;
-
-                       err = -EFAULT;
-                       if (get_user(p, pages + j + chunk_start))
-                               goto out_pm;
-                       pm[j].addr = (unsigned long) p;
-
-                       if (get_user(node, nodes + j + chunk_start))
-                               goto out_pm;
-
-                       err = -ENODEV;
-                       if (node < 0 || node >= MAX_NUMNODES)
-                               goto out_pm;
-
-                       if (!node_state(node, N_MEMORY))
-                               goto out_pm;
-
-                       err = -EACCES;
-                       if (!node_isset(node, task_nodes))
-                               goto out_pm;
+               err = -EFAULT;
+               if (get_user(p, pages + i))
+                       goto out_flush;
+               if (get_user(node, nodes + i))
+                       goto out_flush;
+               addr = (unsigned long)p;
+
+               err = -ENODEV;
+               if (node < 0 || node >= MAX_NUMNODES)
+                       goto out_flush;
+               if (!node_state(node, N_MEMORY))
+                       goto out_flush;
 
-                       pm[j].node = node;
+               err = -EACCES;
+               if (!node_isset(node, task_nodes))
+                       goto out_flush;
+
+               if (current_node == NUMA_NO_NODE) {
+                       current_node = node;
+                       start = i;
+               } else if (node != current_node) {
+                       err = do_move_pages_to_node(mm, &pagelist, current_node);
+                       if (err)
+                               goto out;
+                       err = store_status(status, start, current_node, i - start);
+                       if (err)
+                               goto out;
+                       start = i;
+                       current_node = node;
                }
 
-               /* End marker for this chunk */
-               pm[chunk_nr_pages].node = MAX_NUMNODES;
-
-               /* Migrate this chunk */
-               err = do_move_page_to_node_array(mm, pm,
-                                                flags & MPOL_MF_MOVE_ALL);
-               if (err < 0)
-                       goto out_pm;
+               /*
+                * Errors in the page lookup or isolation are not fatal and we simply
+                * report them via status
+                */
+               err = add_page_for_migration(mm, addr, current_node,
+                               &pagelist, flags & MPOL_MF_MOVE_ALL);
+               if (!err)
+                       continue;
 
-               /* Return status information */
-               for (j = 0; j < chunk_nr_pages; j++)
-                       if (put_user(pm[j].status, status + j + chunk_start)) {
-                               err = -EFAULT;
-                               goto out_pm;
-                       }
-       }
-       err = 0;
+               err = store_status(status, i, err, 1);
+               if (err)
+                       goto out_flush;
 
-out_pm:
-       free_page((unsigned long)pm);
+               err = do_move_pages_to_node(mm, &pagelist, current_node);
+               if (err)
+                       goto out;
+               if (i > start) {
+                       err = store_status(status, start, current_node, i - start);
+                       if (err)
+                               goto out;
+               }
+               current_node = NUMA_NO_NODE;
+       }
+out_flush:
+       if (list_empty(&pagelist))
+               return err;
+
+       /* Make sure we do not overwrite the existing error */
+       err1 = do_move_pages_to_node(mm, &pagelist, current_node);
+       if (!err1)
+               err1 = store_status(status, start, current_node, i - start);
+       if (!err)
+               err = err1;
 out:
        return err;
 }
@@ -1866,8 +1831,7 @@ static bool migrate_balanced_pgdat(struct pglist_data *pgdat,
 }
 
 static struct page *alloc_misplaced_dst_page(struct page *page,
-                                          unsigned long data,
-                                          int **result)
+                                          unsigned long data)
 {
        int nid = (int) data;
        struct page *newpage;
@@ -1986,6 +1950,13 @@ int migrate_misplaced_page(struct page *page, struct vm_area_struct *vma,
            (vma->vm_flags & VM_EXEC))
                goto out;
 
+       /*
+        * Also do not migrate dirty pages as not all filesystems can move
+        * dirty pages in MIGRATE_ASYNC mode which is a waste of cycles.
+        */
+       if (page_is_file_cache(page) && PageDirty(page))
+               goto out;
+
        /*
         * Rate-limit the amount of data that is being migrated to a node.
         * Optimal placement is no good if the memory bus is saturated and
@@ -2339,7 +2310,8 @@ again:
                        ptep_get_and_clear(mm, addr, ptep);
 
                        /* Setup special migration page table entry */
-                       entry = make_migration_entry(page, pte_write(pte));
+                       entry = make_migration_entry(page, mpfn &
+                                                    MIGRATE_PFN_WRITE);
                        swp_pte = swp_entry_to_pte(entry);
                        if (pte_soft_dirty(pte))
                                swp_pte = pte_swp_mksoft_dirty(swp_pte);