mm: fix do_pages_move status handling

[sfrench/cifs-2.6.git] / mm / page_alloc.c
diff --git a/mm/page_alloc.c b/mm/page_alloc.c

index 0b97b8ece4a9fb43b40303dc1fe1cf4dba7c19b5..905db9d7962fcb1776c0e7ffb1618fb6e4084a75 100644 (file)
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -46,7 +46,6 @@
  #include <linux/stop_machine.h>
  #include <linux/sort.h>
  #include <linux/pfn.h>
-#include <xen/xen.h>
  #include <linux/backing-dev.h>
  #include <linux/fault-inject.h>
  #include <linux/page-isolation.h>
@@ -205,17 +204,18 @@ static void __free_pages_ok(struct page *page, unsigned int order);
   * TBD: should special case ZONE_DMA32 machines here - in those we normally
   * don't need any ZONE_NORMAL reservation
   */
-int sysctl_lowmem_reserve_ratio[MAX_NR_ZONES-1] = {
+int sysctl_lowmem_reserve_ratio[MAX_NR_ZONES] = {
  #ifdef CONFIG_ZONE_DMA
-        256,
+       [ZONE_DMA] = 256,
  #endif
  #ifdef CONFIG_ZONE_DMA32
-        256,
+       [ZONE_DMA32] = 256,
  #endif
+       [ZONE_NORMAL] = 32,
  #ifdef CONFIG_HIGHMEM
-        32,
+       [ZONE_HIGHMEM] = 0,
  #endif
-        32,
+       [ZONE_MOVABLE] = 0,
  };
  
  EXPORT_SYMBOL(totalram_pages);
@@ -316,9 +316,6 @@ static inline bool update_defer_init(pg_data_t *pgdat,
         /* Always populate low zones for address-constrained allocations */
         if (zone_end < pgdat_end_pfn(pgdat))
                 return true;
-       /* Xen PV domains need page structures early */
-       if (xen_pv_domain())
-               return true;
         (*nr_initialised)++;
         if ((*nr_initialised > pgdat->static_init_pgcnt) &&
             (pfn & (PAGES_PER_SECTION - 1)) == 0) {
@@ -1746,16 +1743,38 @@ void __init page_alloc_init_late(void)
  }
  
  #ifdef CONFIG_CMA
+static void __init adjust_present_page_count(struct page *page, long count)
+{
+       struct zone *zone = page_zone(page);
+
+       /* We don't need to hold a lock since it is boot-up process */
+       zone->present_pages += count;
+}
+
  /* Free whole pageblock and set its migration type to MIGRATE_CMA. */
  void __init init_cma_reserved_pageblock(struct page *page)
  {
         unsigned i = pageblock_nr_pages;
+       unsigned long pfn = page_to_pfn(page);
         struct page *p = page;
+       int nid = page_to_nid(page);
+
+       /*
+        * ZONE_MOVABLE will steal present pages from other zones by
+        * changing page links so page_zone() is changed. Before that,
+        * we need to adjust previous zone's page count first.
+        */
+       adjust_present_page_count(page, -pageblock_nr_pages);
  
         do {
                 __ClearPageReserved(p);
                 set_page_count(p, 0);
-       } while (++p, --i);
+
+               /* Steal pages from other zones */
+               set_page_links(p, ZONE_MOVABLE, nid, pfn);
+       } while (++p, ++pfn, --i);
+
+       adjust_present_page_count(page, pageblock_nr_pages);
  
         set_pageblock_migratetype(page, MIGRATE_CMA);
  
@@ -2870,7 +2889,7 @@ int __isolate_free_page(struct page *page, unsigned int order)
                  * exists.
                  */
                 watermark = min_wmark_pages(zone) + (1UL << order);
-               if (!zone_watermark_ok(zone, 0, watermark, 0, ALLOC_CMA))
+               if (!zone_watermark_ok(zone, 0, watermark, 0, 0))
                         return 0;
  
                 __mod_zone_freepage_state(zone, -(1UL << order), mt);
@@ -3146,12 +3165,6 @@ bool __zone_watermark_ok(struct zone *z, unsigned int order, unsigned long mark,
         }
  
  
-#ifdef CONFIG_CMA
-       /* If allocation can't use CMA areas don't use free CMA pages */
-       if (!(alloc_flags & ALLOC_CMA))
-               free_pages -= zone_page_state(z, NR_FREE_CMA_PAGES);
-#endif
-
         /*
          * Check watermarks for an order-0 allocation request. If these
          * are not met, then a high-order request also cannot go ahead
@@ -3178,10 +3191,8 @@ bool __zone_watermark_ok(struct zone *z, unsigned int order, unsigned long mark,
                 }
  
  #ifdef CONFIG_CMA
-               if ((alloc_flags & ALLOC_CMA) &&
-                   !list_empty(&area->free_list[MIGRATE_CMA])) {
+               if (!list_empty(&area->free_list[MIGRATE_CMA]))
                         return true;
-               }
  #endif
                 if (alloc_harder &&
                         !list_empty(&area->free_list[MIGRATE_HIGHATOMIC]))
@@ -3201,13 +3212,6 @@ static inline bool zone_watermark_fast(struct zone *z, unsigned int order,
                 unsigned long mark, int classzone_idx, unsigned int alloc_flags)
  {
         long free_pages = zone_page_state(z, NR_FREE_PAGES);
-       long cma_pages = 0;
-
-#ifdef CONFIG_CMA
-       /* If allocation can't use CMA areas don't use free CMA pages */
-       if (!(alloc_flags & ALLOC_CMA))
-               cma_pages = zone_page_state(z, NR_FREE_CMA_PAGES);
-#endif
  
         /*
          * Fast check for order-0 only. If this fails then the reserves
@@ -3216,7 +3220,7 @@ static inline bool zone_watermark_fast(struct zone *z, unsigned int order,
          * the caller is !atomic then it'll uselessly search the free
          * list. That corner case is then slower but it is harmless.
          */
-       if (!order && (free_pages - cma_pages) > mark + z->lowmem_reserve[classzone_idx])
+       if (!order && free_pages > mark + z->lowmem_reserve[classzone_idx])
                 return true;
  
         return __zone_watermark_ok(z, order, mark, classzone_idx, alloc_flags,
@@ -3852,10 +3856,6 @@ gfp_to_alloc_flags(gfp_t gfp_mask)
         } else if (unlikely(rt_task(current)) && !in_interrupt())
                 alloc_flags |= ALLOC_HARDER;
  
-#ifdef CONFIG_CMA
-       if (gfpflags_to_migratetype(gfp_mask) == MIGRATE_MOVABLE)
-               alloc_flags |= ALLOC_CMA;
-#endif
         return alloc_flags;
  }
  
@@ -4322,9 +4322,6 @@ static inline bool prepare_alloc_pages(gfp_t gfp_mask, unsigned int order,
         if (should_fail_alloc_page(gfp_mask, order))
                 return false;
  
-       if (IS_ENABLED(CONFIG_CMA) && ac->migratetype == MIGRATE_MOVABLE)
-               *alloc_flags |= ALLOC_CMA;
-
         return true;
  }
  
@@ -4734,6 +4731,13 @@ long si_mem_available(void)
                      min(global_node_page_state(NR_SLAB_RECLAIMABLE) / 2,
                          wmark_low);
  
+       /*
+        * Part of the kernel memory, which can be released under memory
+        * pressure.
+        */
+       available += global_node_page_state(NR_INDIRECTLY_RECLAIMABLE_BYTES) >>
+               PAGE_SHIFT;
+
         if (available < 0)
                 available = 0;
         return available;
@@ -6200,6 +6204,7 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat)
  {
         enum zone_type j;
         int nid = pgdat->node_id;
+       unsigned long node_end_pfn = 0;
  
         pgdat_resize_init(pgdat);
  #ifdef CONFIG_NUMA_BALANCING
@@ -6227,9 +6232,13 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat)
                 struct zone *zone = pgdat->node_zones + j;
                 unsigned long size, realsize, freesize, memmap_pages;
                 unsigned long zone_start_pfn = zone->zone_start_pfn;
+               unsigned long movable_size = 0;
  
                 size = zone->spanned_pages;
                 realsize = freesize = zone->present_pages;
+               if (zone_end_pfn(zone) > node_end_pfn)
+                       node_end_pfn = zone_end_pfn(zone);
+
  
                 /*
                  * Adjust freesize so that it accounts for how much memory
@@ -6278,12 +6287,30 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat)
                 zone_seqlock_init(zone);
                 zone_pcp_init(zone);
  
-               if (!size)
+               /*
+                * The size of the CMA area is unknown now so we need to
+                * prepare the memory for the usemap at maximum.
+                */
+               if (IS_ENABLED(CONFIG_CMA) && j == ZONE_MOVABLE &&
+                       pgdat->node_spanned_pages) {
+                       movable_size = node_end_pfn - pgdat->node_start_pfn;
+               }
+
+               if (!size && !movable_size)
                         continue;
  
                 set_pageblock_order();
-               setup_usemap(pgdat, zone, zone_start_pfn, size);
-               init_currently_empty_zone(zone, zone_start_pfn, size);
+               if (movable_size) {
+                       zone->zone_start_pfn = pgdat->node_start_pfn;
+                       zone->spanned_pages = movable_size;
+                       setup_usemap(pgdat, zone,
+                               pgdat->node_start_pfn, movable_size);
+                       init_currently_empty_zone(zone,
+                               pgdat->node_start_pfn, movable_size);
+               } else {
+                       setup_usemap(pgdat, zone, zone_start_pfn, size);
+                       init_currently_empty_zone(zone, zone_start_pfn, size);
+               }
                 memmap_init(size, nid, j, zone_start_pfn);
         }
  }
@@ -7125,13 +7152,15 @@ static void setup_per_zone_lowmem_reserve(void)
                                 struct zone *lower_zone;
  
                                 idx--;
-
-                               if (sysctl_lowmem_reserve_ratio[idx] < 1)
-                                       sysctl_lowmem_reserve_ratio[idx] = 1;
-
                                 lower_zone = pgdat->node_zones + idx;
-                               lower_zone->lowmem_reserve[j] = managed_pages /
-                                       sysctl_lowmem_reserve_ratio[idx];
+
+                               if (sysctl_lowmem_reserve_ratio[idx] < 1) {
+                                       sysctl_lowmem_reserve_ratio[idx] = 0;
+                                       lower_zone->lowmem_reserve[j] = 0;
+                               } else {
+                                       lower_zone->lowmem_reserve[j] =
+                                               managed_pages / sysctl_lowmem_reserve_ratio[idx];
+                               }
                                 managed_pages += lower_zone->managed_pages;
                         }
                 }
@@ -7922,7 +7951,7 @@ void free_contig_range(unsigned long pfn, unsigned nr_pages)
  }
  #endif
  
-#ifdef CONFIG_MEMORY_HOTPLUG
+#if defined CONFIG_MEMORY_HOTPLUG || defined CONFIG_CMA
  /*
   * The zone indicated has a new number of managed_pages; batch sizes and percpu
   * page high values need to be recalulated.