Merge tag 'xarray-4.20-rc7' of git://git.infradead.org/users/willy/linux-dax
[sfrench/cifs-2.6.git] / mm / khugepaged.c
index c13625c1ad5e5e91abf1d45bdbcf388f9b1f2690..8e2ff195ecb30a6e3314664d60110bfa7781fd53 100644 (file)
@@ -1287,7 +1287,7 @@ static void retract_page_tables(struct address_space *mapping, pgoff_t pgoff)
  * collapse_shmem - collapse small tmpfs/shmem pages into huge one.
  *
  * Basic scheme is simple, details are more complex:
- *  - allocate and freeze a new huge page;
+ *  - allocate and lock a new huge page;
  *  - scan page cache replacing old pages with the new one
  *    + swap in pages if necessary;
  *    + fill in gaps;
@@ -1295,11 +1295,11 @@ static void retract_page_tables(struct address_space *mapping, pgoff_t pgoff)
  *  - if replacing succeeds:
  *    + copy data over;
  *    + free old pages;
- *    + unfreeze huge page;
+ *    + unlock huge page;
  *  - if replacing failed;
  *    + put all pages back and unfreeze them;
  *    + restore gaps in the page cache;
- *    + free huge page;
+ *    + unlock and free huge page;
  */
 static void collapse_shmem(struct mm_struct *mm,
                struct address_space *mapping, pgoff_t start,
@@ -1329,19 +1329,6 @@ static void collapse_shmem(struct mm_struct *mm,
                goto out;
        }
 
-       new_page->index = start;
-       new_page->mapping = mapping;
-       __SetPageSwapBacked(new_page);
-       __SetPageLocked(new_page);
-       BUG_ON(!page_ref_freeze(new_page, 1));
-
-       /*
-        * At this point the new_page is 'frozen' (page_count() is zero),
-        * locked and not up-to-date. It's safe to insert it into the page
-        * cache, because nobody would be able to map it or use it in other
-        * way until we unfreeze it.
-        */
-
        /* This will be less messy when we use multi-index entries */
        do {
                xas_lock_irq(&xas);
@@ -1349,19 +1336,44 @@ static void collapse_shmem(struct mm_struct *mm,
                if (!xas_error(&xas))
                        break;
                xas_unlock_irq(&xas);
-               if (!xas_nomem(&xas, GFP_KERNEL))
+               if (!xas_nomem(&xas, GFP_KERNEL)) {
+                       mem_cgroup_cancel_charge(new_page, memcg, true);
+                       result = SCAN_FAIL;
                        goto out;
+               }
        } while (1);
 
+       __SetPageLocked(new_page);
+       __SetPageSwapBacked(new_page);
+       new_page->index = start;
+       new_page->mapping = mapping;
+
+       /*
+        * At this point the new_page is locked and not up-to-date.
+        * It's safe to insert it into the page cache, because nobody would
+        * be able to map it or use it in another way until we unlock it.
+        */
+
        xas_set(&xas, start);
        for (index = start; index < end; index++) {
                struct page *page = xas_next(&xas);
 
                VM_BUG_ON(index != xas.xa_index);
                if (!page) {
+                       /*
+                        * Stop if extent has been truncated or hole-punched,
+                        * and is now completely empty.
+                        */
+                       if (index == start) {
+                               if (!xas_next_entry(&xas, end - 1)) {
+                                       result = SCAN_TRUNCATED;
+                                       goto xa_locked;
+                               }
+                               xas_set(&xas, index);
+                       }
                        if (!shmem_charge(mapping->host, 1)) {
                                result = SCAN_FAIL;
-                               break;
+                               goto xa_locked;
                        }
                        xas_store(&xas, new_page + (index % HPAGE_PMD_NR));
                        nr_none++;
@@ -1376,13 +1388,12 @@ static void collapse_shmem(struct mm_struct *mm,
                                result = SCAN_FAIL;
                                goto xa_unlocked;
                        }
-                       xas_lock_irq(&xas);
-                       xas_set(&xas, index);
                } else if (trylock_page(page)) {
                        get_page(page);
+                       xas_unlock_irq(&xas);
                } else {
                        result = SCAN_PAGE_LOCK;
-                       break;
+                       goto xa_locked;
                }
 
                /*
@@ -1391,17 +1402,24 @@ static void collapse_shmem(struct mm_struct *mm,
                 */
                VM_BUG_ON_PAGE(!PageLocked(page), page);
                VM_BUG_ON_PAGE(!PageUptodate(page), page);
-               VM_BUG_ON_PAGE(PageTransCompound(page), page);
+
+               /*
+                * If file was truncated then extended, or hole-punched, before
+                * we locked the first page, then a THP might be there already.
+                */
+               if (PageTransCompound(page)) {
+                       result = SCAN_PAGE_COMPOUND;
+                       goto out_unlock;
+               }
 
                if (page_mapping(page) != mapping) {
                        result = SCAN_TRUNCATED;
                        goto out_unlock;
                }
-               xas_unlock_irq(&xas);
 
                if (isolate_lru_page(page)) {
                        result = SCAN_DEL_PAGE_LRU;
-                       goto out_isolate_failed;
+                       goto out_unlock;
                }
 
                if (page_mapped(page))
@@ -1421,7 +1439,9 @@ static void collapse_shmem(struct mm_struct *mm,
                 */
                if (!page_ref_freeze(page, 3)) {
                        result = SCAN_PAGE_COUNT;
-                       goto out_lru;
+                       xas_unlock_irq(&xas);
+                       putback_lru_page(page);
+                       goto out_unlock;
                }
 
                /*
@@ -1433,71 +1453,74 @@ static void collapse_shmem(struct mm_struct *mm,
                /* Finally, replace with the new page. */
                xas_store(&xas, new_page + (index % HPAGE_PMD_NR));
                continue;
-out_lru:
-               xas_unlock_irq(&xas);
-               putback_lru_page(page);
-out_isolate_failed:
-               unlock_page(page);
-               put_page(page);
-               goto xa_unlocked;
 out_unlock:
                unlock_page(page);
                put_page(page);
-               break;
+               goto xa_unlocked;
        }
-       xas_unlock_irq(&xas);
 
+       __inc_node_page_state(new_page, NR_SHMEM_THPS);
+       if (nr_none) {
+               struct zone *zone = page_zone(new_page);
+
+               __mod_node_page_state(zone->zone_pgdat, NR_FILE_PAGES, nr_none);
+               __mod_node_page_state(zone->zone_pgdat, NR_SHMEM, nr_none);
+       }
+
+xa_locked:
+       xas_unlock_irq(&xas);
 xa_unlocked:
+
        if (result == SCAN_SUCCEED) {
                struct page *page, *tmp;
-               struct zone *zone = page_zone(new_page);
 
                /*
                 * Replacing old pages with new one has succeeded, now we
                 * need to copy the content and free the old pages.
                 */
+               index = start;
                list_for_each_entry_safe(page, tmp, &pagelist, lru) {
+                       while (index < page->index) {
+                               clear_highpage(new_page + (index % HPAGE_PMD_NR));
+                               index++;
+                       }
                        copy_highpage(new_page + (page->index % HPAGE_PMD_NR),
                                        page);
                        list_del(&page->lru);
-                       unlock_page(page);
-                       page_ref_unfreeze(page, 1);
                        page->mapping = NULL;
+                       page_ref_unfreeze(page, 1);
                        ClearPageActive(page);
                        ClearPageUnevictable(page);
+                       unlock_page(page);
                        put_page(page);
+                       index++;
                }
-
-               local_irq_disable();
-               __inc_node_page_state(new_page, NR_SHMEM_THPS);
-               if (nr_none) {
-                       __mod_node_page_state(zone->zone_pgdat, NR_FILE_PAGES, nr_none);
-                       __mod_node_page_state(zone->zone_pgdat, NR_SHMEM, nr_none);
+               while (index < end) {
+                       clear_highpage(new_page + (index % HPAGE_PMD_NR));
+                       index++;
                }
-               local_irq_enable();
 
-               /*
-                * Remove pte page tables, so we can re-fault
-                * the page as huge.
-                */
-               retract_page_tables(mapping, start);
-
-               /* Everything is ready, let's unfreeze the new_page */
-               set_page_dirty(new_page);
                SetPageUptodate(new_page);
-               page_ref_unfreeze(new_page, HPAGE_PMD_NR);
+               page_ref_add(new_page, HPAGE_PMD_NR - 1);
+               set_page_dirty(new_page);
                mem_cgroup_commit_charge(new_page, memcg, false, true);
                lru_cache_add_anon(new_page);
-               unlock_page(new_page);
 
+               /*
+                * Remove pte page tables, so we can re-fault the page as huge.
+                */
+               retract_page_tables(mapping, start);
                *hpage = NULL;
 
                khugepaged_pages_collapsed++;
        } else {
                struct page *page;
+
                /* Something went wrong: roll back page cache changes */
-               shmem_uncharge(mapping->host, nr_none);
                xas_lock_irq(&xas);
+               mapping->nrpages -= nr_none;
+               shmem_uncharge(mapping->host, nr_none);
+
                xas_set(&xas, start);
                xas_for_each(&xas, page, end - 1) {
                        page = list_first_entry_or_null(&pagelist,
@@ -1519,19 +1542,18 @@ xa_unlocked:
                        xas_store(&xas, page);
                        xas_pause(&xas);
                        xas_unlock_irq(&xas);
-                       putback_lru_page(page);
                        unlock_page(page);
+                       putback_lru_page(page);
                        xas_lock_irq(&xas);
                }
                VM_BUG_ON(nr_none);
                xas_unlock_irq(&xas);
 
-               /* Unfreeze new_page, caller would take care about freeing it */
-               page_ref_unfreeze(new_page, 1);
                mem_cgroup_cancel_charge(new_page, memcg, true);
-               unlock_page(new_page);
                new_page->mapping = NULL;
        }
+
+       unlock_page(new_page);
 out:
        VM_BUG_ON(!list_empty(&pagelist));
        /* TODO: tracepoints */