mm, page_alloc: reserve pageblocks for high-order atomic allocations on demand

author Mel Gorman <mgorman@techsingularity.net>

Sat, 7 Nov 2015 00:28:37 +0000 (16:28 -0800)

committer Linus Torvalds <torvalds@linux-foundation.org>

Sat, 7 Nov 2015 01:50:42 +0000 (17:50 -0800)
author Mel Gorman <mgorman@techsingularity.net>
Sat, 7 Nov 2015 00:28:37 +0000 (16:28 -0800)
committer Linus Torvalds <torvalds@linux-foundation.org>
Sat, 7 Nov 2015 01:50:42 +0000 (17:50 -0800)
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h

index b86cfa3313cfde1144c4321bb1b0ba5d2395942f..d3bafe4ff32b09640f20ec935a4531a15e4268e5 100644 (file)
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -39,6 +39,8 @@ enum {
         MIGRATE_UNMOVABLE,
         MIGRATE_MOVABLE,
         MIGRATE_RECLAIMABLE,
+       MIGRATE_PCPTYPES,       /* the number of types on the pcp lists */
+       MIGRATE_HIGHATOMIC = MIGRATE_PCPTYPES,
  #ifdef CONFIG_CMA
         /*
          * MIGRATE_CMA migration type is designed to mimic the way
@@ -61,8 +63,6 @@ enum {
         MIGRATE_TYPES
  };
  
-#define MIGRATE_PCPTYPES (MIGRATE_RECLAIMABLE+1)
-
  #ifdef CONFIG_CMA
  #  define is_migrate_cma(migratetype) unlikely((migratetype) == MIGRATE_CMA)
  #else
@@ -334,6 +334,8 @@ struct zone {
         /* zone watermarks, access with *_wmark_pages(zone) macros */
         unsigned long watermark[NR_WMARK];
  
+       unsigned long nr_reserved_highatomic;
+
         /*
          * We don't know if the memory that we're going to allocate will be freeable
          * or/and it will be released eventually, so to avoid totally wasting several
diff --git a/mm/page_alloc.c b/mm/page_alloc.c

index 588812614377d38bd6b52435cfd156826fc8fc6c..55e9c56dfe5459757119d10981b56ab0b582a19a 100644 (file)
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1615,6 +1615,101 @@ int find_suitable_fallback(struct free_area *area, unsigned int order,
         return -1;
  }
  
+/*
+ * Reserve a pageblock for exclusive use of high-order atomic allocations if
+ * there are no empty page blocks that contain a page with a suitable order
+ */
+static void reserve_highatomic_pageblock(struct page *page, struct zone *zone,
+                               unsigned int alloc_order)
+{
+       int mt;
+       unsigned long max_managed, flags;
+
+       /*
+        * Limit the number reserved to 1 pageblock or roughly 1% of a zone.
+        * Check is race-prone but harmless.
+        */
+       max_managed = (zone->managed_pages / 100) + pageblock_nr_pages;
+       if (zone->nr_reserved_highatomic >= max_managed)
+               return;
+
+       spin_lock_irqsave(&zone->lock, flags);
+
+       /* Recheck the nr_reserved_highatomic limit under the lock */
+       if (zone->nr_reserved_highatomic >= max_managed)
+               goto out_unlock;
+
+       /* Yoink! */
+       mt = get_pageblock_migratetype(page);
+       if (mt != MIGRATE_HIGHATOMIC &&
+                       !is_migrate_isolate(mt) && !is_migrate_cma(mt)) {
+               zone->nr_reserved_highatomic += pageblock_nr_pages;
+               set_pageblock_migratetype(page, MIGRATE_HIGHATOMIC);
+               move_freepages_block(zone, page, MIGRATE_HIGHATOMIC);
+       }
+
+out_unlock:
+       spin_unlock_irqrestore(&zone->lock, flags);
+}
+
+/*
+ * Used when an allocation is about to fail under memory pressure. This
+ * potentially hurts the reliability of high-order allocations when under
+ * intense memory pressure but failed atomic allocations should be easier
+ * to recover from than an OOM.
+ */
+static void unreserve_highatomic_pageblock(const struct alloc_context *ac)
+{
+       struct zonelist *zonelist = ac->zonelist;
+       unsigned long flags;
+       struct zoneref *z;
+       struct zone *zone;
+       struct page *page;
+       int order;
+
+       for_each_zone_zonelist_nodemask(zone, z, zonelist, ac->high_zoneidx,
+                                                               ac->nodemask) {
+               /* Preserve at least one pageblock */
+               if (zone->nr_reserved_highatomic <= pageblock_nr_pages)
+                       continue;
+
+               spin_lock_irqsave(&zone->lock, flags);
+               for (order = 0; order < MAX_ORDER; order++) {
+                       struct free_area *area = &(zone->free_area[order]);
+
+                       if (list_empty(&area->free_list[MIGRATE_HIGHATOMIC]))
+                               continue;
+
+                       page = list_entry(area->free_list[MIGRATE_HIGHATOMIC].next,
+                                               struct page, lru);
+
+                       /*
+                        * It should never happen but changes to locking could
+                        * inadvertently allow a per-cpu drain to add pages
+                        * to MIGRATE_HIGHATOMIC while unreserving so be safe
+                        * and watch for underflows.
+                        */
+                       zone->nr_reserved_highatomic -= min(pageblock_nr_pages,
+                               zone->nr_reserved_highatomic);
+
+                       /*
+                        * Convert to ac->migratetype and avoid the normal
+                        * pageblock stealing heuristics. Minimally, the caller
+                        * is doing the work and needs the pages. More
+                        * importantly, if the block was always converted to
+                        * MIGRATE_UNMOVABLE or another type then the number
+                        * of pageblocks that cannot be completely freed
+                        * may increase.
+                        */
+                       set_pageblock_migratetype(page, ac->migratetype);
+                       move_freepages_block(zone, page, ac->migratetype);
+                       spin_unlock_irqrestore(&zone->lock, flags);
+                       return;
+               }
+               spin_unlock_irqrestore(&zone->lock, flags);
+       }
+}
+
  /* Remove an element from the buddy allocator from the fallback list */
  static inline struct page *
  __rmqueue_fallback(struct zone *zone, unsigned int order, int start_migratetype)
@@ -1670,7 +1765,7 @@ __rmqueue_fallback(struct zone *zone, unsigned int order, int start_migratetype)
   * Call me with the zone->lock already held.
   */
  static struct page *__rmqueue(struct zone *zone, unsigned int order,
-                                               int migratetype)
+                               int migratetype, gfp_t gfp_flags)
  {
         struct page *page;
  
@@ -1700,7 +1795,7 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order,
  
         spin_lock(&zone->lock);
         for (i = 0; i < count; ++i) {
-               struct page *page = __rmqueue(zone, order, migratetype);
+               struct page *page = __rmqueue(zone, order, migratetype, 0);
                 if (unlikely(page == NULL))
                         break;
  
@@ -2072,7 +2167,7 @@ int split_free_page(struct page *page)
  static inline
  struct page *buffered_rmqueue(struct zone *preferred_zone,
                         struct zone *zone, unsigned int order,
-                       gfp_t gfp_flags, int migratetype)
+                       gfp_t gfp_flags, int alloc_flags, int migratetype)
  {
         unsigned long flags;
         struct page *page;
@@ -2115,7 +2210,15 @@ struct page *buffered_rmqueue(struct zone *preferred_zone,
                         WARN_ON_ONCE(order > 1);
                 }
                 spin_lock_irqsave(&zone->lock, flags);
-               page = __rmqueue(zone, order, migratetype);
+
+               page = NULL;
+               if (alloc_flags & ALLOC_HARDER) {
+                       page = __rmqueue_smallest(zone, order, MIGRATE_HIGHATOMIC);
+                       if (page)
+                               trace_mm_page_alloc_zone_locked(page, order, migratetype);
+               }
+               if (!page)
+                       page = __rmqueue(zone, order, migratetype, gfp_flags);
                 spin_unlock(&zone->lock);
                 if (!page)
                         goto failed;
@@ -2226,15 +2329,24 @@ static bool __zone_watermark_ok(struct zone *z, unsigned int order,
                         unsigned long mark, int classzone_idx, int alloc_flags,
                         long free_pages)
  {
-       /* free_pages may go negative - that's OK */
         long min = mark;
         int o;
         long free_cma = 0;
  
+       /* free_pages may go negative - that's OK */
         free_pages -= (1 << order) - 1;
+
         if (alloc_flags & ALLOC_HIGH)
                 min -= min / 2;
-       if (alloc_flags & ALLOC_HARDER)
+
+       /*
+        * If the caller does not have rights to ALLOC_HARDER then subtract
+        * the high-atomic reserves. This will over-estimate the size of the
+        * atomic reserve but it avoids a search.
+        */
+       if (likely(!(alloc_flags & ALLOC_HARDER)))
+               free_pages -= z->nr_reserved_highatomic;
+       else
                 min -= min / 4;
  
  #ifdef CONFIG_CMA
@@ -2419,10 +2531,18 @@ zonelist_scan:
  
  try_this_zone:
                 page = buffered_rmqueue(ac->preferred_zone, zone, order,
-                                               gfp_mask, ac->migratetype);
+                               gfp_mask, alloc_flags, ac->migratetype);
                 if (page) {
                         if (prep_new_page(page, order, gfp_mask, alloc_flags))
                                 goto try_this_zone;
+
+                       /*
+                        * If this is a high-order atomic allocation then check
+                        * if the pageblock should be reserved for the future
+                        */
+                       if (unlikely(order && (alloc_flags & ALLOC_HARDER)))
+                               reserve_highatomic_pageblock(page, zone, order);
+
                         return page;
                 }
         }
@@ -2695,9 +2815,11 @@ retry:
  
         /*
          * If an allocation failed after direct reclaim, it could be because
-        * pages are pinned on the per-cpu lists. Drain them and try again
+        * pages are pinned on the per-cpu lists or in high alloc reserves.
+        * Shrink them them and try again
          */
         if (!page && !drained) {
+               unreserve_highatomic_pageblock(ac);
                 drain_all_pages(NULL);
                 drained = true;
                 goto retry;
diff --git a/mm/vmstat.c b/mm/vmstat.c

index 5b289dcdcccf34e29ab3c39bd6fbc9c82118f305..879a2be23325ce73158e097eb381628a5fd5f548 100644 (file)
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -923,6 +923,7 @@ static char * const migratetype_names[MIGRATE_TYPES] = {
         "Unmovable",
         "Reclaimable",
         "Movable",
+       "HighAtomic",
  #ifdef CONFIG_CMA
         "CMA",
  #endif
author	Mel Gorman <mgorman@techsingularity.net>
	Sat, 7 Nov 2015 00:28:37 +0000 (16:28 -0800)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Sat, 7 Nov 2015 01:50:42 +0000 (17:50 -0800)
include/linux/mmzone.h		patch \| blob \| history
mm/page_alloc.c		patch \| blob \| history
mm/vmstat.c		patch \| blob \| history