mm/compaction: pass pgdat to too_many_isolated() instead of zone

[sfrench/cifs-2.6.git] / mm / compaction.c
diff --git a/mm/compaction.c b/mm/compaction.c

index ef29490b0f462349ec90b8672448f80627ef3af6..f171a83707ced436bb2bd4508060a6cd45a95905 100644 (file)
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -66,7 +66,7 @@ static unsigned long release_freepages(struct list_head *freelist)
         return high_pfn;
  }
  
-static void map_pages(struct list_head *list)
+static void split_map_pages(struct list_head *list)
  {
         unsigned int i, order, nr_pages;
         struct page *page, *next;
@@ -237,6 +237,70 @@ static bool pageblock_skip_persistent(struct page *page)
         return false;
  }
  
+static bool
+__reset_isolation_pfn(struct zone *zone, unsigned long pfn, bool check_source,
+                                                       bool check_target)
+{
+       struct page *page = pfn_to_online_page(pfn);
+       struct page *end_page;
+       unsigned long block_pfn;
+
+       if (!page)
+               return false;
+       if (zone != page_zone(page))
+               return false;
+       if (pageblock_skip_persistent(page))
+               return false;
+
+       /*
+        * If skip is already cleared do no further checking once the
+        * restart points have been set.
+        */
+       if (check_source && check_target && !get_pageblock_skip(page))
+               return true;
+
+       /*
+        * If clearing skip for the target scanner, do not select a
+        * non-movable pageblock as the starting point.
+        */
+       if (!check_source && check_target &&
+           get_pageblock_migratetype(page) != MIGRATE_MOVABLE)
+               return false;
+
+       /*
+        * Only clear the hint if a sample indicates there is either a
+        * free page or an LRU page in the block. One or other condition
+        * is necessary for the block to be a migration source/target.
+        */
+       block_pfn = pageblock_start_pfn(pfn);
+       pfn = max(block_pfn, zone->zone_start_pfn);
+       page = pfn_to_page(pfn);
+       if (zone != page_zone(page))
+               return false;
+       pfn = block_pfn + pageblock_nr_pages;
+       pfn = min(pfn, zone_end_pfn(zone));
+       end_page = pfn_to_page(pfn);
+
+       do {
+               if (pfn_valid_within(pfn)) {
+                       if (check_source && PageLRU(page)) {
+                               clear_pageblock_skip(page);
+                               return true;
+                       }
+
+                       if (check_target && PageBuddy(page)) {
+                               clear_pageblock_skip(page);
+                               return true;
+                       }
+               }
+
+               page += (1 << PAGE_ALLOC_COSTLY_ORDER);
+               pfn += (1 << PAGE_ALLOC_COSTLY_ORDER);
+       } while (page < end_page);
+
+       return false;
+}
+
  /*
   * This function is called to clear all cached information on pageblocks that
   * should be skipped for page isolation when the migrate and free page scanner
@@ -244,30 +308,54 @@ static bool pageblock_skip_persistent(struct page *page)
   */
  static void __reset_isolation_suitable(struct zone *zone)
  {
-       unsigned long start_pfn = zone->zone_start_pfn;
-       unsigned long end_pfn = zone_end_pfn(zone);
-       unsigned long pfn;
+       unsigned long migrate_pfn = zone->zone_start_pfn;
+       unsigned long free_pfn = zone_end_pfn(zone);
+       unsigned long reset_migrate = free_pfn;
+       unsigned long reset_free = migrate_pfn;
+       bool source_set = false;
+       bool free_set = false;
+
+       if (!zone->compact_blockskip_flush)
+               return;
  
         zone->compact_blockskip_flush = false;
  
-       /* Walk the zone and mark every pageblock as suitable for isolation */
-       for (pfn = start_pfn; pfn < end_pfn; pfn += pageblock_nr_pages) {
-               struct page *page;
-
+       /*
+        * Walk the zone and update pageblock skip information. Source looks
+        * for PageLRU while target looks for PageBuddy. When the scanner
+        * is found, both PageBuddy and PageLRU are checked as the pageblock
+        * is suitable as both source and target.
+        */
+       for (; migrate_pfn < free_pfn; migrate_pfn += pageblock_nr_pages,
+                                       free_pfn -= pageblock_nr_pages) {
                 cond_resched();
  
-               page = pfn_to_online_page(pfn);
-               if (!page)
-                       continue;
-               if (zone != page_zone(page))
-                       continue;
-               if (pageblock_skip_persistent(page))
-                       continue;
+               /* Update the migrate PFN */
+               if (__reset_isolation_pfn(zone, migrate_pfn, true, source_set) &&
+                   migrate_pfn < reset_migrate) {
+                       source_set = true;
+                       reset_migrate = migrate_pfn;
+                       zone->compact_init_migrate_pfn = reset_migrate;
+                       zone->compact_cached_migrate_pfn[0] = reset_migrate;
+                       zone->compact_cached_migrate_pfn[1] = reset_migrate;
+               }
  
-               clear_pageblock_skip(page);
+               /* Update the free PFN */
+               if (__reset_isolation_pfn(zone, free_pfn, free_set, true) &&
+                   free_pfn > reset_free) {
+                       free_set = true;
+                       reset_free = free_pfn;
+                       zone->compact_init_free_pfn = reset_free;
+                       zone->compact_cached_free_pfn = reset_free;
+               }
         }
  
-       reset_cached_positions(zone);
+       /* Leave no distance if no suitable block was reset */
+       if (reset_migrate >= reset_free) {
+               zone->compact_cached_migrate_pfn[0] = migrate_pfn;
+               zone->compact_cached_migrate_pfn[1] = migrate_pfn;
+               zone->compact_cached_free_pfn = free_pfn;
+       }
  }
  
  void reset_isolation_suitable(pg_data_t *pgdat)
@@ -285,16 +373,54 @@ void reset_isolation_suitable(pg_data_t *pgdat)
         }
  }
  
+/*
+ * Sets the pageblock skip bit if it was clear. Note that this is a hint as
+ * locks are not required for read/writers. Returns true if it was already set.
+ */
+static bool test_and_set_skip(struct compact_control *cc, struct page *page,
+                                                       unsigned long pfn)
+{
+       bool skip;
+
+       /* Do no update if skip hint is being ignored */
+       if (cc->ignore_skip_hint)
+               return false;
+
+       if (!IS_ALIGNED(pfn, pageblock_nr_pages))
+               return false;
+
+       skip = get_pageblock_skip(page);
+       if (!skip && !cc->no_set_skip_hint)
+               set_pageblock_skip(page);
+
+       return skip;
+}
+
+static void update_cached_migrate(struct compact_control *cc, unsigned long pfn)
+{
+       struct zone *zone = cc->zone;
+
+       pfn = pageblock_end_pfn(pfn);
+
+       /* Set for isolation rather than compaction */
+       if (cc->no_set_skip_hint)
+               return;
+
+       if (pfn > zone->compact_cached_migrate_pfn[0])
+               zone->compact_cached_migrate_pfn[0] = pfn;
+       if (cc->mode != MIGRATE_ASYNC &&
+           pfn > zone->compact_cached_migrate_pfn[1])
+               zone->compact_cached_migrate_pfn[1] = pfn;
+}
+
  /*
   * If no pages were isolated then mark this pageblock to be skipped in the
   * future. The information is later cleared by __reset_isolation_suitable().
   */
  static void update_pageblock_skip(struct compact_control *cc,
-                       struct page *page, unsigned long nr_isolated,
-                       bool migrate_scanner)
+                       struct page *page, unsigned long pfn)
  {
         struct zone *zone = cc->zone;
-       unsigned long pfn;
  
         if (cc->no_set_skip_hint)
                 return;
@@ -302,24 +428,11 @@ static void update_pageblock_skip(struct compact_control *cc,
         if (!page)
                 return;
  
-       if (nr_isolated)
-               return;
-
         set_pageblock_skip(page);
  
-       pfn = page_to_pfn(page);
-
         /* Update where async and sync compaction should restart */
-       if (migrate_scanner) {
-               if (pfn > zone->compact_cached_migrate_pfn[0])
-                       zone->compact_cached_migrate_pfn[0] = pfn;
-               if (cc->mode != MIGRATE_ASYNC &&
-                   pfn > zone->compact_cached_migrate_pfn[1])
-                       zone->compact_cached_migrate_pfn[1] = pfn;
-       } else {
-               if (pfn < zone->compact_cached_free_pfn)
-                       zone->compact_cached_free_pfn = pfn;
-       }
+       if (pfn < zone->compact_cached_free_pfn)
+               zone->compact_cached_free_pfn = pfn;
  }
  #else
  static inline bool isolation_suitable(struct compact_control *cc,
@@ -334,32 +447,42 @@ static inline bool pageblock_skip_persistent(struct page *page)
  }
  
  static inline void update_pageblock_skip(struct compact_control *cc,
-                       struct page *page, unsigned long nr_isolated,
-                       bool migrate_scanner)
+                       struct page *page, unsigned long pfn)
+{
+}
+
+static void update_cached_migrate(struct compact_control *cc, unsigned long pfn)
+{
+}
+
+static bool test_and_set_skip(struct compact_control *cc, struct page *page,
+                                                       unsigned long pfn)
  {
+       return false;
  }
  #endif /* CONFIG_COMPACTION */
  
  /*
   * Compaction requires the taking of some coarse locks that are potentially
- * very heavily contended. For async compaction, back out if the lock cannot
- * be taken immediately. For sync compaction, spin on the lock if needed.
+ * very heavily contended. For async compaction, trylock and record if the
+ * lock is contended. The lock will still be acquired but compaction will
+ * abort when the current block is finished regardless of success rate.
+ * Sync compaction acquires the lock.
   *
- * Returns true if the lock is held
- * Returns false if the lock is not held and compaction should abort
+ * Always returns true which makes it easier to track lock state in callers.
   */
-static bool compact_trylock_irqsave(spinlock_t *lock, unsigned long *flags,
+static bool compact_lock_irqsave(spinlock_t *lock, unsigned long *flags,
                                                 struct compact_control *cc)
  {
-       if (cc->mode == MIGRATE_ASYNC) {
-               if (!spin_trylock_irqsave(lock, *flags)) {
-                       cc->contended = true;
-                       return false;
-               }
-       } else {
-               spin_lock_irqsave(lock, *flags);
+       /* Track if the lock is contended in async mode */
+       if (cc->mode == MIGRATE_ASYNC && !cc->contended) {
+               if (spin_trylock_irqsave(lock, *flags))
+                       return true;
+
+               cc->contended = true;
         }
  
+       spin_lock_irqsave(lock, *flags);
         return true;
  }
  
@@ -391,37 +514,7 @@ static bool compact_unlock_should_abort(spinlock_t *lock,
                 return true;
         }
  
-       if (need_resched()) {
-               if (cc->mode == MIGRATE_ASYNC) {
-                       cc->contended = true;
-                       return true;
-               }
-               cond_resched();
-       }
-
-       return false;
-}
-
-/*
- * Aside from avoiding lock contention, compaction also periodically checks
- * need_resched() and either schedules in sync compaction or aborts async
- * compaction. This is similar to what compact_unlock_should_abort() does, but
- * is used where no lock is concerned.
- *
- * Returns false when no scheduling was needed, or sync compaction scheduled.
- * Returns true when async compaction should abort.
- */
-static inline bool compact_should_abort(struct compact_control *cc)
-{
-       /* async compaction aborts if contended */
-       if (need_resched()) {
-               if (cc->mode == MIGRATE_ASYNC) {
-                       cc->contended = true;
-                       return true;
-               }
-
-               cond_resched();
-       }
+       cond_resched();
  
         return false;
  }
@@ -435,19 +528,24 @@ static unsigned long isolate_freepages_block(struct compact_control *cc,
                                 unsigned long *start_pfn,
                                 unsigned long end_pfn,
                                 struct list_head *freelist,
+                               unsigned int stride,
                                 bool strict)
  {
         int nr_scanned = 0, total_isolated = 0;
-       struct page *cursor, *valid_page = NULL;
+       struct page *cursor;
         unsigned long flags = 0;
         bool locked = false;
         unsigned long blockpfn = *start_pfn;
         unsigned int order;
  
+       /* Strict mode is for isolation, speed is secondary */
+       if (strict)
+               stride = 1;
+
         cursor = pfn_to_page(blockpfn);
  
         /* Isolate free pages. */
-       for (; blockpfn < end_pfn; blockpfn++, cursor++) {
+       for (; blockpfn < end_pfn; blockpfn += stride, cursor += stride) {
                 int isolated;
                 struct page *page = cursor;
  
@@ -465,9 +563,6 @@ static unsigned long isolate_freepages_block(struct compact_control *cc,
                 if (!pfn_valid_within(blockpfn))
                         goto isolate_fail;
  
-               if (!valid_page)
-                       valid_page = page;
-
                 /*
                  * For compound pages such as THP and hugetlbfs, we can save
                  * potentially a lot of iterations if we skip them at once.
@@ -495,18 +590,8 @@ static unsigned long isolate_freepages_block(struct compact_control *cc,
                  * recheck as well.
                  */
                 if (!locked) {
-                       /*
-                        * The zone lock must be held to isolate freepages.
-                        * Unfortunately this is a very coarse lock and can be
-                        * heavily contended if there are parallel allocations
-                        * or parallel compactions. For async compaction do not
-                        * spin on the lock and we acquire the lock as late as
-                        * possible.
-                        */
-                       locked = compact_trylock_irqsave(&cc->zone->lock,
+                       locked = compact_lock_irqsave(&cc->zone->lock,
                                                                 &flags, cc);
-                       if (!locked)
-                               break;
  
                         /* Recheck this is a buddy page under lock */
                         if (!PageBuddy(page))
@@ -565,10 +650,6 @@ isolate_fail:
         if (strict && blockpfn < end_pfn)
                 total_isolated = 0;
  
-       /* Update the pageblock-skip if the whole pageblock was scanned */
-       if (blockpfn == end_pfn)
-               update_pageblock_skip(cc, valid_page, total_isolated, false);
-
         cc->total_free_scanned += nr_scanned;
         if (total_isolated)
                 count_compact_events(COMPACTISOLATED, total_isolated);
@@ -626,7 +707,7 @@ isolate_freepages_range(struct compact_control *cc,
                         break;
  
                 isolated = isolate_freepages_block(cc, &isolate_start_pfn,
-                                               block_end_pfn, &freelist, true);
+                                       block_end_pfn, &freelist, 0, true);
  
                 /*
                  * In strict mode, isolate_freepages_block() returns 0 if
@@ -644,7 +725,7 @@ isolate_freepages_range(struct compact_control *cc,
         }
  
         /* __isolate_free_page() does not map the pages */
-       map_pages(&freelist);
+       split_map_pages(&freelist);
  
         if (pfn < end_pfn) {
                 /* Loop terminated early, cleanup. */
@@ -657,16 +738,16 @@ isolate_freepages_range(struct compact_control *cc,
  }
  
  /* Similar to reclaim, but different enough that they don't share logic */
-static bool too_many_isolated(struct zone *zone)
+static bool too_many_isolated(pg_data_t *pgdat)
  {
         unsigned long active, inactive, isolated;
  
-       inactive = node_page_state(zone->zone_pgdat, NR_INACTIVE_FILE) +
-                       node_page_state(zone->zone_pgdat, NR_INACTIVE_ANON);
-       active = node_page_state(zone->zone_pgdat, NR_ACTIVE_FILE) +
-                       node_page_state(zone->zone_pgdat, NR_ACTIVE_ANON);
-       isolated = node_page_state(zone->zone_pgdat, NR_ISOLATED_FILE) +
-                       node_page_state(zone->zone_pgdat, NR_ISOLATED_ANON);
+       inactive = node_page_state(pgdat, NR_INACTIVE_FILE) +
+                       node_page_state(pgdat, NR_INACTIVE_ANON);
+       active = node_page_state(pgdat, NR_ACTIVE_FILE) +
+                       node_page_state(pgdat, NR_ACTIVE_ANON);
+       isolated = node_page_state(pgdat, NR_ISOLATED_FILE) +
+                       node_page_state(pgdat, NR_ISOLATED_ANON);
  
         return isolated > (inactive + active) / 2;
  }
@@ -693,7 +774,7 @@ static unsigned long
  isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
                         unsigned long end_pfn, isolate_mode_t isolate_mode)
  {
-       struct zone *zone = cc->zone;
+       pg_data_t *pgdat = cc->zone->zone_pgdat;
         unsigned long nr_scanned = 0, nr_isolated = 0;
         struct lruvec *lruvec;
         unsigned long flags = 0;
@@ -702,13 +783,14 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
         unsigned long start_pfn = low_pfn;
         bool skip_on_failure = false;
         unsigned long next_skip_pfn = 0;
+       bool skip_updated = false;
  
         /*
          * Ensure that there are not too many pages isolated from the LRU
          * list by either parallel reclaimers or compaction. If there are,
          * delay for some time until fewer pages are isolated
          */
-       while (unlikely(too_many_isolated(zone))) {
+       while (unlikely(too_many_isolated(pgdat))) {
                 /* async migration should just abort */
                 if (cc->mode == MIGRATE_ASYNC)
                         return 0;
@@ -719,8 +801,7 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
                         return 0;
         }
  
-       if (compact_should_abort(cc))
-               return 0;
+       cond_resched();
  
         if (cc->direct_compaction && (cc->mode == MIGRATE_ASYNC)) {
                 skip_on_failure = true;
@@ -758,8 +839,8 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
                  * if contended.
                  */
                 if (!(low_pfn % SWAP_CLUSTER_MAX)
-                   && compact_unlock_should_abort(zone_lru_lock(zone), flags,
-                                                               &locked, cc))
+                   && compact_unlock_should_abort(&pgdat->lru_lock,
+                                           flags, &locked, cc))
                         break;
  
                 if (!pfn_valid_within(low_pfn))
@@ -768,8 +849,19 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
  
                 page = pfn_to_page(low_pfn);
  
-               if (!valid_page)
+               /*
+                * Check if the pageblock has already been marked skipped.
+                * Only the aligned PFN is checked as the caller isolates
+                * COMPACT_CLUSTER_MAX at a time so the second call must
+                * not falsely conclude that the block should be skipped.
+                */
+               if (!valid_page && IS_ALIGNED(low_pfn, pageblock_nr_pages)) {
+                       if (!cc->ignore_skip_hint && get_pageblock_skip(page)) {
+                               low_pfn = end_pfn;
+                               goto isolate_abort;
+                       }
                         valid_page = page;
+               }
  
                 /*
                  * Skip if free. We read page order here without zone lock
@@ -818,7 +910,7 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
                         if (unlikely(__PageMovable(page)) &&
                                         !PageIsolated(page)) {
                                 if (locked) {
-                                       spin_unlock_irqrestore(zone_lru_lock(zone),
+                                       spin_unlock_irqrestore(&pgdat->lru_lock,
                                                                         flags);
                                         locked = false;
                                 }
@@ -848,10 +940,15 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
  
                 /* If we already hold the lock, we can skip some rechecking */
                 if (!locked) {
-                       locked = compact_trylock_irqsave(zone_lru_lock(zone),
+                       locked = compact_lock_irqsave(&pgdat->lru_lock,
                                                                 &flags, cc);
-                       if (!locked)
-                               break;
+
+                       /* Try get exclusive access under lock */
+                       if (!skip_updated) {
+                               skip_updated = true;
+                               if (test_and_set_skip(cc, page, low_pfn))
+                                       goto isolate_abort;
+                       }
  
                         /* Recheck PageLRU and PageCompound under lock */
                         if (!PageLRU(page))
@@ -868,7 +965,7 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
                         }
                 }
  
-               lruvec = mem_cgroup_page_lruvec(page, zone->zone_pgdat);
+               lruvec = mem_cgroup_page_lruvec(page, pgdat);
  
                 /* Try isolate the page */
                 if (__isolate_lru_page(page, isolate_mode) != 0)
@@ -887,16 +984,13 @@ isolate_success:
                 nr_isolated++;
  
                 /*
-                * Record where we could have freed pages by migration and not
-                * yet flushed them to buddy allocator.
-                * - this is the lowest page that was isolated and likely be
-                * then freed by migration.
+                * Avoid isolating too much unless this block is being
+                * rescanned (e.g. dirty/writeback pages, parallel allocation)
+                * or a lock is contended. For contention, isolate quickly to
+                * potentially remove one source of contention.
                  */
-               if (!cc->last_migrated_pfn)
-                       cc->last_migrated_pfn = low_pfn;
-
-               /* Avoid isolating too much */
-               if (cc->nr_migratepages == COMPACT_CLUSTER_MAX) {
+               if (cc->nr_migratepages == COMPACT_CLUSTER_MAX &&
+                   !cc->rescan && !cc->contended) {
                         ++low_pfn;
                         break;
                 }
@@ -913,12 +1007,11 @@ isolate_fail:
                  */
                 if (nr_isolated) {
                         if (locked) {
-                               spin_unlock_irqrestore(zone_lru_lock(zone), flags);
+                               spin_unlock_irqrestore(&pgdat->lru_lock, flags);
                                 locked = false;
                         }
                         putback_movable_pages(&cc->migratepages);
                         cc->nr_migratepages = 0;
-                       cc->last_migrated_pfn = 0;
                         nr_isolated = 0;
                 }
  
@@ -939,15 +1032,23 @@ isolate_fail:
         if (unlikely(low_pfn > end_pfn))
                 low_pfn = end_pfn;
  
+isolate_abort:
         if (locked)
-               spin_unlock_irqrestore(zone_lru_lock(zone), flags);
+               spin_unlock_irqrestore(&pgdat->lru_lock, flags);
  
         /*
-        * Update the pageblock-skip information and cached scanner pfn,
-        * if the whole pageblock was scanned without isolating any page.
+        * Updated the cached scanner pfn once the pageblock has been scanned
+        * Pages will either be migrated in which case there is no point
+        * scanning in the near future or migration failed in which case the
+        * failure reason may persist. The block is marked for skipping if
+        * there were no pages isolated in the block or if the block is
+        * rescanned twice in a row.
          */
-       if (low_pfn == end_pfn)
-               update_pageblock_skip(cc, valid_page, nr_isolated, true);
+       if (low_pfn == end_pfn && (!nr_isolated || cc->rescan)) {
+               if (valid_page && !skip_updated)
+                       set_pageblock_skip(valid_page);
+               update_cached_migrate(cc, low_pfn);
+       }
  
         trace_mm_compaction_isolate_migratepages(start_pfn, low_pfn,
                                                 nr_scanned, nr_isolated);
@@ -1013,6 +1114,9 @@ static bool suitable_migration_source(struct compact_control *cc,
  {
         int block_mt;
  
+       if (pageblock_skip_persistent(page))
+               return false;
+
         if ((cc->mode != MIGRATE_ASYNC) || !cc->direct_compaction)
                 return true;
  
@@ -1050,6 +1154,12 @@ static bool suitable_migration_target(struct compact_control *cc,
         return false;
  }
  
+static inline unsigned int
+freelist_scan_limit(struct compact_control *cc)
+{
+       return (COMPACT_CLUSTER_MAX >> cc->fast_search_fail) + 1;
+}
+
  /*
   * Test whether the free scanner has reached the same or lower pageblock than
   * the migration scanner, and compaction should thus terminate.
@@ -1060,6 +1170,248 @@ static inline bool compact_scanners_met(struct compact_control *cc)
                 <= (cc->migrate_pfn >> pageblock_order);
  }
  
+/*
+ * Used when scanning for a suitable migration target which scans freelists
+ * in reverse. Reorders the list such as the unscanned pages are scanned
+ * first on the next iteration of the free scanner
+ */
+static void
+move_freelist_head(struct list_head *freelist, struct page *freepage)
+{
+       LIST_HEAD(sublist);
+
+       if (!list_is_last(freelist, &freepage->lru)) {
+               list_cut_before(&sublist, freelist, &freepage->lru);
+               if (!list_empty(&sublist))
+                       list_splice_tail(&sublist, freelist);
+       }
+}
+
+/*
+ * Similar to move_freelist_head except used by the migration scanner
+ * when scanning forward. It's possible for these list operations to
+ * move against each other if they search the free list exactly in
+ * lockstep.
+ */
+static void
+move_freelist_tail(struct list_head *freelist, struct page *freepage)
+{
+       LIST_HEAD(sublist);
+
+       if (!list_is_first(freelist, &freepage->lru)) {
+               list_cut_position(&sublist, freelist, &freepage->lru);
+               if (!list_empty(&sublist))
+                       list_splice_tail(&sublist, freelist);
+       }
+}
+
+static void
+fast_isolate_around(struct compact_control *cc, unsigned long pfn, unsigned long nr_isolated)
+{
+       unsigned long start_pfn, end_pfn;
+       struct page *page = pfn_to_page(pfn);
+
+       /* Do not search around if there are enough pages already */
+       if (cc->nr_freepages >= cc->nr_migratepages)
+               return;
+
+       /* Minimise scanning during async compaction */
+       if (cc->direct_compaction && cc->mode == MIGRATE_ASYNC)
+               return;
+
+       /* Pageblock boundaries */
+       start_pfn = pageblock_start_pfn(pfn);
+       end_pfn = min(start_pfn + pageblock_nr_pages, zone_end_pfn(cc->zone));
+
+       /* Scan before */
+       if (start_pfn != pfn) {
+               isolate_freepages_block(cc, &start_pfn, pfn, &cc->freepages, 1, false);
+               if (cc->nr_freepages >= cc->nr_migratepages)
+                       return;
+       }
+
+       /* Scan after */
+       start_pfn = pfn + nr_isolated;
+       if (start_pfn != end_pfn)
+               isolate_freepages_block(cc, &start_pfn, end_pfn, &cc->freepages, 1, false);
+
+       /* Skip this pageblock in the future as it's full or nearly full */
+       if (cc->nr_freepages < cc->nr_migratepages)
+               set_pageblock_skip(page);
+}
+
+/* Search orders in round-robin fashion */
+static int next_search_order(struct compact_control *cc, int order)
+{
+       order--;
+       if (order < 0)
+               order = cc->order - 1;
+
+       /* Search wrapped around? */
+       if (order == cc->search_order) {
+               cc->search_order--;
+               if (cc->search_order < 0)
+                       cc->search_order = cc->order - 1;
+               return -1;
+       }
+
+       return order;
+}
+
+static unsigned long
+fast_isolate_freepages(struct compact_control *cc)
+{
+       unsigned int limit = min(1U, freelist_scan_limit(cc) >> 1);
+       unsigned int nr_scanned = 0;
+       unsigned long low_pfn, min_pfn, high_pfn = 0, highest = 0;
+       unsigned long nr_isolated = 0;
+       unsigned long distance;
+       struct page *page = NULL;
+       bool scan_start = false;
+       int order;
+
+       /* Full compaction passes in a negative order */
+       if (cc->order <= 0)
+               return cc->free_pfn;
+
+       /*
+        * If starting the scan, use a deeper search and use the highest
+        * PFN found if a suitable one is not found.
+        */
+       if (cc->free_pfn >= cc->zone->compact_init_free_pfn) {
+               limit = pageblock_nr_pages >> 1;
+               scan_start = true;
+       }
+
+       /*
+        * Preferred point is in the top quarter of the scan space but take
+        * a pfn from the top half if the search is problematic.
+        */
+       distance = (cc->free_pfn - cc->migrate_pfn);
+       low_pfn = pageblock_start_pfn(cc->free_pfn - (distance >> 2));
+       min_pfn = pageblock_start_pfn(cc->free_pfn - (distance >> 1));
+
+       if (WARN_ON_ONCE(min_pfn > low_pfn))
+               low_pfn = min_pfn;
+
+       /*
+        * Search starts from the last successful isolation order or the next
+        * order to search after a previous failure
+        */
+       cc->search_order = min_t(unsigned int, cc->order - 1, cc->search_order);
+
+       for (order = cc->search_order;
+            !page && order >= 0;
+            order = next_search_order(cc, order)) {
+               struct free_area *area = &cc->zone->free_area[order];
+               struct list_head *freelist;
+               struct page *freepage;
+               unsigned long flags;
+               unsigned int order_scanned = 0;
+
+               if (!area->nr_free)
+                       continue;
+
+               spin_lock_irqsave(&cc->zone->lock, flags);
+               freelist = &area->free_list[MIGRATE_MOVABLE];
+               list_for_each_entry_reverse(freepage, freelist, lru) {
+                       unsigned long pfn;
+
+                       order_scanned++;
+                       nr_scanned++;
+                       pfn = page_to_pfn(freepage);
+
+                       if (pfn >= highest)
+                               highest = pageblock_start_pfn(pfn);
+
+                       if (pfn >= low_pfn) {
+                               cc->fast_search_fail = 0;
+                               cc->search_order = order;
+                               page = freepage;
+                               break;
+                       }
+
+                       if (pfn >= min_pfn && pfn > high_pfn) {
+                               high_pfn = pfn;
+
+                               /* Shorten the scan if a candidate is found */
+                               limit >>= 1;
+                       }
+
+                       if (order_scanned >= limit)
+                               break;
+               }
+
+               /* Use a minimum pfn if a preferred one was not found */
+               if (!page && high_pfn) {
+                       page = pfn_to_page(high_pfn);
+
+                       /* Update freepage for the list reorder below */
+                       freepage = page;
+               }
+
+               /* Reorder to so a future search skips recent pages */
+               move_freelist_head(freelist, freepage);
+
+               /* Isolate the page if available */
+               if (page) {
+                       if (__isolate_free_page(page, order)) {
+                               set_page_private(page, order);
+                               nr_isolated = 1 << order;
+                               cc->nr_freepages += nr_isolated;
+                               list_add_tail(&page->lru, &cc->freepages);
+                               count_compact_events(COMPACTISOLATED, nr_isolated);
+                       } else {
+                               /* If isolation fails, abort the search */
+                               order = -1;
+                               page = NULL;
+                       }
+               }
+
+               spin_unlock_irqrestore(&cc->zone->lock, flags);
+
+               /*
+                * Smaller scan on next order so the total scan ig related
+                * to freelist_scan_limit.
+                */
+               if (order_scanned >= limit)
+                       limit = min(1U, limit >> 1);
+       }
+
+       if (!page) {
+               cc->fast_search_fail++;
+               if (scan_start) {
+                       /*
+                        * Use the highest PFN found above min. If one was
+                        * not found, be pessemistic for direct compaction
+                        * and use the min mark.
+                        */
+                       if (highest) {
+                               page = pfn_to_page(highest);
+                               cc->free_pfn = highest;
+                       } else {
+                               if (cc->direct_compaction) {
+                                       page = pfn_to_page(min_pfn);
+                                       cc->free_pfn = min_pfn;
+                               }
+                       }
+               }
+       }
+
+       if (highest && highest >= cc->zone->compact_cached_free_pfn) {
+               highest -= pageblock_nr_pages;
+               cc->zone->compact_cached_free_pfn = highest;
+       }
+
+       cc->total_free_scanned += nr_scanned;
+       if (!page)
+               return cc->free_pfn;
+
+       low_pfn = page_to_pfn(page);
+       fast_isolate_around(cc, low_pfn, nr_isolated);
+       return low_pfn;
+}
+
  /*
   * Based on information in the current compact_control, find blocks
   * suitable for isolating free pages from and then isolate them.
@@ -1073,6 +1425,12 @@ static void isolate_freepages(struct compact_control *cc)
         unsigned long block_end_pfn;    /* end of current pageblock */
         unsigned long low_pfn;       /* lowest pfn scanner is able to scan */
         struct list_head *freelist = &cc->freepages;
+       unsigned int stride;
+
+       /* Try a small search of the free lists for a candidate */
+       isolate_start_pfn = fast_isolate_freepages(cc);
+       if (cc->nr_freepages)
+               goto splitmap;
  
         /*
          * Initialise the free scanner. The starting point is where we last
@@ -1086,10 +1444,11 @@ static void isolate_freepages(struct compact_control *cc)
          * is using.
          */
         isolate_start_pfn = cc->free_pfn;
-       block_start_pfn = pageblock_start_pfn(cc->free_pfn);
+       block_start_pfn = pageblock_start_pfn(isolate_start_pfn);
         block_end_pfn = min(block_start_pfn + pageblock_nr_pages,
                                                 zone_end_pfn(zone));
         low_pfn = pageblock_end_pfn(cc->migrate_pfn);
+       stride = cc->mode == MIGRATE_ASYNC ? COMPACT_CLUSTER_MAX : 1;
  
         /*
          * Isolate free pages until enough are available to migrate the
@@ -1100,14 +1459,14 @@ static void isolate_freepages(struct compact_control *cc)
                                 block_end_pfn = block_start_pfn,
                                 block_start_pfn -= pageblock_nr_pages,
                                 isolate_start_pfn = block_start_pfn) {
+               unsigned long nr_isolated;
+
                 /*
                  * This can iterate a massively long zone without finding any
-                * suitable migration targets, so periodically check if we need
-                * to schedule, or even abort async compaction.
+                * suitable migration targets, so periodically check resched.
                  */
-               if (!(block_start_pfn % (SWAP_CLUSTER_MAX * pageblock_nr_pages))
-                                               && compact_should_abort(cc))
-                       break;
+               if (!(block_start_pfn % (SWAP_CLUSTER_MAX * pageblock_nr_pages)))
+                       cond_resched();
  
                 page = pageblock_pfn_to_page(block_start_pfn, block_end_pfn,
                                                                         zone);
@@ -1123,15 +1482,15 @@ static void isolate_freepages(struct compact_control *cc)
                         continue;
  
                 /* Found a block suitable for isolating free pages from. */
-               isolate_freepages_block(cc, &isolate_start_pfn, block_end_pfn,
-                                       freelist, false);
+               nr_isolated = isolate_freepages_block(cc, &isolate_start_pfn,
+                                       block_end_pfn, freelist, stride, false);
  
-               /*
-                * If we isolated enough freepages, or aborted due to lock
-                * contention, terminate.
-                */
-               if ((cc->nr_freepages >= cc->nr_migratepages)
-                                                       || cc->contended) {
+               /* Update the skip hint if the full pageblock was scanned */
+               if (isolate_start_pfn == block_end_pfn)
+                       update_pageblock_skip(cc, page, block_start_pfn);
+
+               /* Are enough freepages isolated? */
+               if (cc->nr_freepages >= cc->nr_migratepages) {
                         if (isolate_start_pfn >= block_end_pfn) {
                                 /*
                                  * Restart at previous pageblock if more
@@ -1148,10 +1507,14 @@ static void isolate_freepages(struct compact_control *cc)
                          */
                         break;
                 }
-       }
  
-       /* __isolate_free_page() does not map the pages */
-       map_pages(freelist);
+               /* Adjust stride depending on isolation */
+               if (nr_isolated) {
+                       stride = 1;
+                       continue;
+               }
+               stride = min_t(unsigned int, COMPACT_CLUSTER_MAX, stride << 1);
+       }
  
         /*
          * Record where the free scanner will restart next time. Either we
@@ -1160,6 +1523,10 @@ static void isolate_freepages(struct compact_control *cc)
          * and the loop terminated due to isolate_start_pfn < low_pfn
          */
         cc->free_pfn = isolate_start_pfn;
+
+splitmap:
+       /* __isolate_free_page() does not map the pages */
+       split_map_pages(freelist);
  }
  
  /*
@@ -1172,13 +1539,8 @@ static struct page *compaction_alloc(struct page *migratepage,
         struct compact_control *cc = (struct compact_control *)data;
         struct page *freepage;
  
-       /*
-        * Isolate free pages if necessary, and if we are not aborting due to
-        * contention.
-        */
         if (list_empty(&cc->freepages)) {
-               if (!cc->contended)
-                       isolate_freepages(cc);
+               isolate_freepages(cc);
  
                 if (list_empty(&cc->freepages))
                         return NULL;
@@ -1217,6 +1579,147 @@ typedef enum {
   */
  int sysctl_compact_unevictable_allowed __read_mostly = 1;
  
+static inline void
+update_fast_start_pfn(struct compact_control *cc, unsigned long pfn)
+{
+       if (cc->fast_start_pfn == ULONG_MAX)
+               return;
+
+       if (!cc->fast_start_pfn)
+               cc->fast_start_pfn = pfn;
+
+       cc->fast_start_pfn = min(cc->fast_start_pfn, pfn);
+}
+
+static inline unsigned long
+reinit_migrate_pfn(struct compact_control *cc)
+{
+       if (!cc->fast_start_pfn || cc->fast_start_pfn == ULONG_MAX)
+               return cc->migrate_pfn;
+
+       cc->migrate_pfn = cc->fast_start_pfn;
+       cc->fast_start_pfn = ULONG_MAX;
+
+       return cc->migrate_pfn;
+}
+
+/*
+ * Briefly search the free lists for a migration source that already has
+ * some free pages to reduce the number of pages that need migration
+ * before a pageblock is free.
+ */
+static unsigned long fast_find_migrateblock(struct compact_control *cc)
+{
+       unsigned int limit = freelist_scan_limit(cc);
+       unsigned int nr_scanned = 0;
+       unsigned long distance;
+       unsigned long pfn = cc->migrate_pfn;
+       unsigned long high_pfn;
+       int order;
+
+       /* Skip hints are relied on to avoid repeats on the fast search */
+       if (cc->ignore_skip_hint)
+               return pfn;
+
+       /*
+        * If the migrate_pfn is not at the start of a zone or the start
+        * of a pageblock then assume this is a continuation of a previous
+        * scan restarted due to COMPACT_CLUSTER_MAX.
+        */
+       if (pfn != cc->zone->zone_start_pfn && pfn != pageblock_start_pfn(pfn))
+               return pfn;
+
+       /*
+        * For smaller orders, just linearly scan as the number of pages
+        * to migrate should be relatively small and does not necessarily
+        * justify freeing up a large block for a small allocation.
+        */
+       if (cc->order <= PAGE_ALLOC_COSTLY_ORDER)
+               return pfn;
+
+       /*
+        * Only allow kcompactd and direct requests for movable pages to
+        * quickly clear out a MOVABLE pageblock for allocation. This
+        * reduces the risk that a large movable pageblock is freed for
+        * an unmovable/reclaimable small allocation.
+        */
+       if (cc->direct_compaction && cc->migratetype != MIGRATE_MOVABLE)
+               return pfn;
+
+       /*
+        * When starting the migration scanner, pick any pageblock within the
+        * first half of the search space. Otherwise try and pick a pageblock
+        * within the first eighth to reduce the chances that a migration
+        * target later becomes a source.
+        */
+       distance = (cc->free_pfn - cc->migrate_pfn) >> 1;
+       if (cc->migrate_pfn != cc->zone->zone_start_pfn)
+               distance >>= 2;
+       high_pfn = pageblock_start_pfn(cc->migrate_pfn + distance);
+
+       for (order = cc->order - 1;
+            order >= PAGE_ALLOC_COSTLY_ORDER && pfn == cc->migrate_pfn && nr_scanned < limit;
+            order--) {
+               struct free_area *area = &cc->zone->free_area[order];
+               struct list_head *freelist;
+               unsigned long flags;
+               struct page *freepage;
+
+               if (!area->nr_free)
+                       continue;
+
+               spin_lock_irqsave(&cc->zone->lock, flags);
+               freelist = &area->free_list[MIGRATE_MOVABLE];
+               list_for_each_entry(freepage, freelist, lru) {
+                       unsigned long free_pfn;
+
+                       nr_scanned++;
+                       free_pfn = page_to_pfn(freepage);
+                       if (free_pfn < high_pfn) {
+                               /*
+                                * Avoid if skipped recently. Ideally it would
+                                * move to the tail but even safe iteration of
+                                * the list assumes an entry is deleted, not
+                                * reordered.
+                                */
+                               if (get_pageblock_skip(freepage)) {
+                                       if (list_is_last(freelist, &freepage->lru))
+                                               break;
+
+                                       continue;
+                               }
+
+                               /* Reorder to so a future search skips recent pages */
+                               move_freelist_tail(freelist, freepage);
+
+                               update_fast_start_pfn(cc, free_pfn);
+                               pfn = pageblock_start_pfn(free_pfn);
+                               cc->fast_search_fail = 0;
+                               set_pageblock_skip(freepage);
+                               break;
+                       }
+
+                       if (nr_scanned >= limit) {
+                               cc->fast_search_fail++;
+                               move_freelist_tail(freelist, freepage);
+                               break;
+                       }
+               }
+               spin_unlock_irqrestore(&cc->zone->lock, flags);
+       }
+
+       cc->total_migrate_scanned += nr_scanned;
+
+       /*
+        * If fast scanning failed then use a cached entry for a page block
+        * that had free pages as the basis for starting a linear scan.
+        */
+       if (pfn == cc->migrate_pfn)
+               pfn = reinit_migrate_pfn(cc);
+
+       return pfn;
+}
+
  /*
   * Isolate all pages that can be migrated from the first suitable block,
   * starting at the block pointed to by the migrate scanner pfn within
@@ -1232,16 +1735,25 @@ static isolate_migrate_t isolate_migratepages(struct zone *zone,
         const isolate_mode_t isolate_mode =
                 (sysctl_compact_unevictable_allowed ? ISOLATE_UNEVICTABLE : 0) |
                 (cc->mode != MIGRATE_SYNC ? ISOLATE_ASYNC_MIGRATE : 0);
+       bool fast_find_block;
  
         /*
          * Start at where we last stopped, or beginning of the zone as
-        * initialized by compact_zone()
+        * initialized by compact_zone(). The first failure will use
+        * the lowest PFN as the starting point for linear scanning.
          */
-       low_pfn = cc->migrate_pfn;
+       low_pfn = fast_find_migrateblock(cc);
         block_start_pfn = pageblock_start_pfn(low_pfn);
         if (block_start_pfn < zone->zone_start_pfn)
                 block_start_pfn = zone->zone_start_pfn;
  
+       /*
+        * fast_find_migrateblock marks a pageblock skipped so to avoid
+        * the isolation_suitable check below, check whether the fast
+        * search was successful.
+        */
+       fast_find_block = low_pfn != cc->migrate_pfn && !cc->fast_search_fail;
+
         /* Only scan within a pageblock boundary */
         block_end_pfn = pageblock_end_pfn(low_pfn);
  
@@ -1250,6 +1762,7 @@ static isolate_migrate_t isolate_migratepages(struct zone *zone,
          * Do not cross the free scanner.
          */
         for (; block_end_pfn <= cc->free_pfn;
+                       fast_find_block = false,
                         low_pfn = block_end_pfn,
                         block_start_pfn = block_end_pfn,
                         block_end_pfn += pageblock_nr_pages) {
@@ -1257,34 +1770,45 @@ static isolate_migrate_t isolate_migratepages(struct zone *zone,
                 /*
                  * This can potentially iterate a massively long zone with
                  * many pageblocks unsuitable, so periodically check if we
-                * need to schedule, or even abort async compaction.
+                * need to schedule.
                  */
-               if (!(low_pfn % (SWAP_CLUSTER_MAX * pageblock_nr_pages))
-                                               && compact_should_abort(cc))
-                       break;
+               if (!(low_pfn % (SWAP_CLUSTER_MAX * pageblock_nr_pages)))
+                       cond_resched();
  
                 page = pageblock_pfn_to_page(block_start_pfn, block_end_pfn,
                                                                         zone);
                 if (!page)
                         continue;
  
-               /* If isolation recently failed, do not retry */
-               if (!isolation_suitable(cc, page))
+               /*
+                * If isolation recently failed, do not retry. Only check the
+                * pageblock once. COMPACT_CLUSTER_MAX causes a pageblock
+                * to be visited multiple times. Assume skip was checked
+                * before making it "skip" so other compaction instances do
+                * not scan the same block.
+                */
+               if (IS_ALIGNED(low_pfn, pageblock_nr_pages) &&
+                   !fast_find_block && !isolation_suitable(cc, page))
                         continue;
  
                 /*
-                * For async compaction, also only scan in MOVABLE blocks.
-                * Async compaction is optimistic to see if the minimum amount
-                * of work satisfies the allocation.
+                * For async compaction, also only scan in MOVABLE blocks
+                * without huge pages. Async compaction is optimistic to see
+                * if the minimum amount of work satisfies the allocation.
+                * The cached PFN is updated as it's possible that all
+                * remaining blocks between source and target are unsuitable
+                * and the compaction scanners fail to meet.
                  */
-               if (!suitable_migration_source(cc, page))
+               if (!suitable_migration_source(cc, page)) {
+                       update_cached_migrate(cc, block_end_pfn);
                         continue;
+               }
  
                 /* Perform the isolation */
                 low_pfn = isolate_migratepages_block(cc, low_pfn,
                                                 block_end_pfn, isolate_mode);
  
-               if (!low_pfn || cc->contended)
+               if (!low_pfn)
                         return ISOLATE_ABORT;
  
                 /*
@@ -1310,19 +1834,16 @@ static inline bool is_via_compact_memory(int order)
         return order == -1;
  }
  
-static enum compact_result __compact_finished(struct zone *zone,
-                                               struct compact_control *cc)
+static enum compact_result __compact_finished(struct compact_control *cc)
  {
         unsigned int order;
         const int migratetype = cc->migratetype;
-
-       if (cc->contended || fatal_signal_pending(current))
-               return COMPACT_CONTENDED;
+       int ret;
  
         /* Compaction run completes if the migrate and free scanner meet */
         if (compact_scanners_met(cc)) {
                 /* Let the next compaction start anew. */
-               reset_cached_positions(zone);
+               reset_cached_positions(cc->zone);
  
                 /*
                  * Mark that the PG_migrate_skip information should be cleared
@@ -1331,7 +1852,7 @@ static enum compact_result __compact_finished(struct zone *zone,
                  * based on an allocation request.
                  */
                 if (cc->direct_compaction)
-                       zone->compact_blockskip_flush = true;
+                       cc->zone->compact_blockskip_flush = true;
  
                 if (cc->whole_zone)
                         return COMPACT_COMPLETE;
@@ -1342,20 +1863,19 @@ static enum compact_result __compact_finished(struct zone *zone,
         if (is_via_compact_memory(cc->order))
                 return COMPACT_CONTINUE;
  
-       if (cc->finishing_block) {
-               /*
-                * We have finished the pageblock, but better check again that
-                * we really succeeded.
-                */
-               if (IS_ALIGNED(cc->migrate_pfn, pageblock_nr_pages))
-                       cc->finishing_block = false;
-               else
-                       return COMPACT_CONTINUE;
-       }
+       /*
+        * Always finish scanning a pageblock to reduce the possibility of
+        * fallbacks in the future. This is particularly important when
+        * migration source is unmovable/reclaimable but it's not worth
+        * special casing.
+        */
+       if (!IS_ALIGNED(cc->migrate_pfn, pageblock_nr_pages))
+               return COMPACT_CONTINUE;
  
         /* Direct compactor: Is a suitable page free? */
+       ret = COMPACT_NO_SUITABLE_PAGE;
         for (order = cc->order; order < MAX_ORDER; order++) {
-               struct free_area *area = &zone->free_area[order];
+               struct free_area *area = &cc->zone->free_area[order];
                 bool can_steal;
  
                 /* Job done if page is free of the right migratetype */
@@ -1393,21 +1913,23 @@ static enum compact_result __compact_finished(struct zone *zone,
                                 return COMPACT_SUCCESS;
                         }
  
-                       cc->finishing_block = true;
-                       return COMPACT_CONTINUE;
+                       ret = COMPACT_CONTINUE;
+                       break;
                 }
         }
  
-       return COMPACT_NO_SUITABLE_PAGE;
+       if (cc->contended || fatal_signal_pending(current))
+               ret = COMPACT_CONTENDED;
+
+       return ret;
  }
  
-static enum compact_result compact_finished(struct zone *zone,
-                       struct compact_control *cc)
+static enum compact_result compact_finished(struct compact_control *cc)
  {
         int ret;
  
-       ret = __compact_finished(zone, cc);
-       trace_mm_compaction_finished(zone, cc->order, ret);
+       ret = __compact_finished(cc);
+       trace_mm_compaction_finished(cc->zone, cc->order, ret);
         if (ret == COMPACT_NO_SUITABLE_PAGE)
                 ret = COMPACT_CONTINUE;
  
@@ -1534,15 +2056,18 @@ bool compaction_zonelist_suitable(struct alloc_context *ac, int order,
         return false;
  }
  
-static enum compact_result compact_zone(struct zone *zone, struct compact_control *cc)
+static enum compact_result
+compact_zone(struct compact_control *cc, struct capture_control *capc)
  {
         enum compact_result ret;
-       unsigned long start_pfn = zone->zone_start_pfn;
-       unsigned long end_pfn = zone_end_pfn(zone);
+       unsigned long start_pfn = cc->zone->zone_start_pfn;
+       unsigned long end_pfn = zone_end_pfn(cc->zone);
+       unsigned long last_migrated_pfn;
         const bool sync = cc->mode != MIGRATE_ASYNC;
+       bool update_cached;
  
         cc->migratetype = gfpflags_to_migratetype(cc->gfp_mask);
-       ret = compaction_suitable(zone, cc->order, cc->alloc_flags,
+       ret = compaction_suitable(cc->zone, cc->order, cc->alloc_flags,
                                                         cc->classzone_idx);
         /* Compaction is likely to fail */
         if (ret == COMPACT_SUCCESS || ret == COMPACT_SKIPPED)
@@ -1555,8 +2080,8 @@ static enum compact_result compact_zone(struct zone *zone, struct compact_contro
          * Clear pageblock skip if there were failures recently and compaction
          * is about to be retried after being deferred.
          */
-       if (compaction_restarting(zone, cc->order))
-               __reset_isolation_suitable(zone);
+       if (compaction_restarting(cc->zone, cc->order))
+               __reset_isolation_suitable(cc->zone);
  
         /*
          * Setup to move all movable pages to the end of the zone. Used cached
@@ -1564,43 +2089,76 @@ static enum compact_result compact_zone(struct zone *zone, struct compact_contro
          * want to compact the whole zone), but check that it is initialised
          * by ensuring the values are within zone boundaries.
          */
+       cc->fast_start_pfn = 0;
         if (cc->whole_zone) {
                 cc->migrate_pfn = start_pfn;
                 cc->free_pfn = pageblock_start_pfn(end_pfn - 1);
         } else {
-               cc->migrate_pfn = zone->compact_cached_migrate_pfn[sync];
-               cc->free_pfn = zone->compact_cached_free_pfn;
+               cc->migrate_pfn = cc->zone->compact_cached_migrate_pfn[sync];
+               cc->free_pfn = cc->zone->compact_cached_free_pfn;
                 if (cc->free_pfn < start_pfn || cc->free_pfn >= end_pfn) {
                         cc->free_pfn = pageblock_start_pfn(end_pfn - 1);
-                       zone->compact_cached_free_pfn = cc->free_pfn;
+                       cc->zone->compact_cached_free_pfn = cc->free_pfn;
                 }
                 if (cc->migrate_pfn < start_pfn || cc->migrate_pfn >= end_pfn) {
                         cc->migrate_pfn = start_pfn;
-                       zone->compact_cached_migrate_pfn[0] = cc->migrate_pfn;
-                       zone->compact_cached_migrate_pfn[1] = cc->migrate_pfn;
+                       cc->zone->compact_cached_migrate_pfn[0] = cc->migrate_pfn;
+                       cc->zone->compact_cached_migrate_pfn[1] = cc->migrate_pfn;
                 }
  
-               if (cc->migrate_pfn == start_pfn)
+               if (cc->migrate_pfn <= cc->zone->compact_init_migrate_pfn)
                         cc->whole_zone = true;
         }
  
-       cc->last_migrated_pfn = 0;
+       last_migrated_pfn = 0;
+
+       /*
+        * Migrate has separate cached PFNs for ASYNC and SYNC* migration on
+        * the basis that some migrations will fail in ASYNC mode. However,
+        * if the cached PFNs match and pageblocks are skipped due to having
+        * no isolation candidates, then the sync state does not matter.
+        * Until a pageblock with isolation candidates is found, keep the
+        * cached PFNs in sync to avoid revisiting the same blocks.
+        */
+       update_cached = !sync &&
+               cc->zone->compact_cached_migrate_pfn[0] == cc->zone->compact_cached_migrate_pfn[1];
  
         trace_mm_compaction_begin(start_pfn, cc->migrate_pfn,
                                 cc->free_pfn, end_pfn, sync);
  
         migrate_prep_local();
  
-       while ((ret = compact_finished(zone, cc)) == COMPACT_CONTINUE) {
+       while ((ret = compact_finished(cc)) == COMPACT_CONTINUE) {
                 int err;
+               unsigned long start_pfn = cc->migrate_pfn;
+
+               /*
+                * Avoid multiple rescans which can happen if a page cannot be
+                * isolated (dirty/writeback in async mode) or if the migrated
+                * pages are being allocated before the pageblock is cleared.
+                * The first rescan will capture the entire pageblock for
+                * migration. If it fails, it'll be marked skip and scanning
+                * will proceed as normal.
+                */
+               cc->rescan = false;
+               if (pageblock_start_pfn(last_migrated_pfn) ==
+                   pageblock_start_pfn(start_pfn)) {
+                       cc->rescan = true;
+               }
  
-               switch (isolate_migratepages(zone, cc)) {
+               switch (isolate_migratepages(cc->zone, cc)) {
                 case ISOLATE_ABORT:
                         ret = COMPACT_CONTENDED;
                         putback_movable_pages(&cc->migratepages);
                         cc->nr_migratepages = 0;
+                       last_migrated_pfn = 0;
                         goto out;
                 case ISOLATE_NONE:
+                       if (update_cached) {
+                               cc->zone->compact_cached_migrate_pfn[1] =
+                                       cc->zone->compact_cached_migrate_pfn[0];
+                       }
+
                         /*
                          * We haven't isolated and migrated anything, but
                          * there might still be unflushed migrations from
@@ -1608,6 +2166,8 @@ static enum compact_result compact_zone(struct zone *zone, struct compact_contro
                          */
                         goto check_drain;
                 case ISOLATE_SUCCESS:
+                       update_cached = false;
+                       last_migrated_pfn = start_pfn;
                         ;
                 }
  
@@ -1639,8 +2199,7 @@ static enum compact_result compact_zone(struct zone *zone, struct compact_contro
                                 cc->migrate_pfn = block_end_pfn(
                                                 cc->migrate_pfn - 1, cc->order);
                                 /* Draining pcplists is useless in this case */
-                               cc->last_migrated_pfn = 0;
-
+                               last_migrated_pfn = 0;
                         }
                 }
  
@@ -1652,21 +2211,26 @@ check_drain:
                  * compact_finished() can detect immediately if allocation
                  * would succeed.
                  */
-               if (cc->order > 0 && cc->last_migrated_pfn) {
+               if (cc->order > 0 && last_migrated_pfn) {
                         int cpu;
                         unsigned long current_block_start =
                                 block_start_pfn(cc->migrate_pfn, cc->order);
  
-                       if (cc->last_migrated_pfn < current_block_start) {
+                       if (last_migrated_pfn < current_block_start) {
                                 cpu = get_cpu();
                                 lru_add_drain_cpu(cpu);
-                               drain_local_pages(zone);
+                               drain_local_pages(cc->zone);
                                 put_cpu();
                                 /* No more flushing until we migrate again */
-                               cc->last_migrated_pfn = 0;
+                               last_migrated_pfn = 0;
                         }
                 }
  
+               /* Stop if a page has been captured */
+               if (capc && capc->page) {
+                       ret = COMPACT_SUCCESS;
+                       break;
+               }
         }
  
  out:
@@ -1685,8 +2249,8 @@ out:
                  * Only go back, not forward. The cached pfn might have been
                  * already reset to zone end in compact_finished()
                  */
-               if (free_pfn > zone->compact_cached_free_pfn)
-                       zone->compact_cached_free_pfn = free_pfn;
+               if (free_pfn > cc->zone->compact_cached_free_pfn)
+                       cc->zone->compact_cached_free_pfn = free_pfn;
         }
  
         count_compact_events(COMPACTMIGRATE_SCANNED, cc->total_migrate_scanned);
@@ -1700,7 +2264,8 @@ out:
  
  static enum compact_result compact_zone_order(struct zone *zone, int order,
                 gfp_t gfp_mask, enum compact_priority prio,
-               unsigned int alloc_flags, int classzone_idx)
+               unsigned int alloc_flags, int classzone_idx,
+               struct page **capture)
  {
         enum compact_result ret;
         struct compact_control cc = {
@@ -1709,6 +2274,7 @@ static enum compact_result compact_zone_order(struct zone *zone, int order,
                 .total_migrate_scanned = 0,
                 .total_free_scanned = 0,
                 .order = order,
+               .search_order = order,
                 .gfp_mask = gfp_mask,
                 .zone = zone,
                 .mode = (prio == COMPACT_PRIO_ASYNC) ?
@@ -1720,14 +2286,24 @@ static enum compact_result compact_zone_order(struct zone *zone, int order,
                 .ignore_skip_hint = (prio == MIN_COMPACT_PRIORITY),
                 .ignore_block_suitable = (prio == MIN_COMPACT_PRIORITY)
         };
+       struct capture_control capc = {
+               .cc = &cc,
+               .page = NULL,
+       };
+
+       if (capture)
+               current->capture_control = &capc;
         INIT_LIST_HEAD(&cc.freepages);
         INIT_LIST_HEAD(&cc.migratepages);
  
-       ret = compact_zone(zone, &cc);
+       ret = compact_zone(&cc, &capc);
  
         VM_BUG_ON(!list_empty(&cc.freepages));
         VM_BUG_ON(!list_empty(&cc.migratepages));
  
+       *capture = capc.page;
+       current->capture_control = NULL;
+
         return ret;
  }
  
@@ -1745,7 +2321,7 @@ int sysctl_extfrag_threshold = 500;
   */
  enum compact_result try_to_compact_pages(gfp_t gfp_mask, unsigned int order,
                 unsigned int alloc_flags, const struct alloc_context *ac,
-               enum compact_priority prio)
+               enum compact_priority prio, struct page **capture)
  {
         int may_perform_io = gfp_mask & __GFP_IO;
         struct zoneref *z;
@@ -1773,7 +2349,7 @@ enum compact_result try_to_compact_pages(gfp_t gfp_mask, unsigned int order,
                 }
  
                 status = compact_zone_order(zone, order, gfp_mask, prio,
-                                       alloc_flags, ac_classzone_idx(ac));
+                               alloc_flags, ac_classzone_idx(ac), capture);
                 rc = max(status, rc);
  
                 /* The allocation should succeed, stop compacting */
@@ -1841,7 +2417,7 @@ static void compact_node(int nid)
                 INIT_LIST_HEAD(&cc.freepages);
                 INIT_LIST_HEAD(&cc.migratepages);
  
-               compact_zone(zone, &cc);
+               compact_zone(&cc, NULL);
  
                 VM_BUG_ON(!list_empty(&cc.freepages));
                 VM_BUG_ON(!list_empty(&cc.migratepages));
@@ -1876,14 +2452,6 @@ int sysctl_compaction_handler(struct ctl_table *table, int write,
         return 0;
  }
  
-int sysctl_extfrag_handler(struct ctl_table *table, int write,
-                       void __user *buffer, size_t *length, loff_t *ppos)
-{
-       proc_dointvec_minmax(table, write, buffer, length, ppos);
-
-       return 0;
-}
-
  #if defined(CONFIG_SYSFS) && defined(CONFIG_NUMA)
  static ssize_t sysfs_compact_node(struct device *dev,
                         struct device_attribute *attr,
@@ -1948,6 +2516,7 @@ static void kcompactd_do_work(pg_data_t *pgdat)
         struct zone *zone;
         struct compact_control cc = {
                 .order = pgdat->kcompactd_max_order,
+               .search_order = pgdat->kcompactd_max_order,
                 .total_migrate_scanned = 0,
                 .total_free_scanned = 0,
                 .classzone_idx = pgdat->kcompactd_classzone_idx,
@@ -1983,7 +2552,7 @@ static void kcompactd_do_work(pg_data_t *pgdat)
  
                 if (kthread_should_stop())
                         return;
-               status = compact_zone(zone, &cc);
+               status = compact_zone(&cc, NULL);
  
                 if (status == COMPACT_SUCCESS) {
                         compaction_defer_reset(zone, cc.order, false);