mm: fix do_pages_move status handling
[sfrench/cifs-2.6.git] / mm / page_alloc.c
index 0b97b8ece4a9fb43b40303dc1fe1cf4dba7c19b5..905db9d7962fcb1776c0e7ffb1618fb6e4084a75 100644 (file)
@@ -46,7 +46,6 @@
 #include <linux/stop_machine.h>
 #include <linux/sort.h>
 #include <linux/pfn.h>
-#include <xen/xen.h>
 #include <linux/backing-dev.h>
 #include <linux/fault-inject.h>
 #include <linux/page-isolation.h>
@@ -205,17 +204,18 @@ static void __free_pages_ok(struct page *page, unsigned int order);
  * TBD: should special case ZONE_DMA32 machines here - in those we normally
  * don't need any ZONE_NORMAL reservation
  */
-int sysctl_lowmem_reserve_ratio[MAX_NR_ZONES-1] = {
+int sysctl_lowmem_reserve_ratio[MAX_NR_ZONES] = {
 #ifdef CONFIG_ZONE_DMA
-        256,
+       [ZONE_DMA] = 256,
 #endif
 #ifdef CONFIG_ZONE_DMA32
-        256,
+       [ZONE_DMA32] = 256,
 #endif
+       [ZONE_NORMAL] = 32,
 #ifdef CONFIG_HIGHMEM
-        32,
+       [ZONE_HIGHMEM] = 0,
 #endif
-        32,
+       [ZONE_MOVABLE] = 0,
 };
 
 EXPORT_SYMBOL(totalram_pages);
@@ -316,9 +316,6 @@ static inline bool update_defer_init(pg_data_t *pgdat,
        /* Always populate low zones for address-constrained allocations */
        if (zone_end < pgdat_end_pfn(pgdat))
                return true;
-       /* Xen PV domains need page structures early */
-       if (xen_pv_domain())
-               return true;
        (*nr_initialised)++;
        if ((*nr_initialised > pgdat->static_init_pgcnt) &&
            (pfn & (PAGES_PER_SECTION - 1)) == 0) {
@@ -1746,16 +1743,38 @@ void __init page_alloc_init_late(void)
 }
 
 #ifdef CONFIG_CMA
+static void __init adjust_present_page_count(struct page *page, long count)
+{
+       struct zone *zone = page_zone(page);
+
+       /* We don't need to hold a lock since it is boot-up process */
+       zone->present_pages += count;
+}
+
 /* Free whole pageblock and set its migration type to MIGRATE_CMA. */
 void __init init_cma_reserved_pageblock(struct page *page)
 {
        unsigned i = pageblock_nr_pages;
+       unsigned long pfn = page_to_pfn(page);
        struct page *p = page;
+       int nid = page_to_nid(page);
+
+       /*
+        * ZONE_MOVABLE will steal present pages from other zones by
+        * changing page links so page_zone() is changed. Before that,
+        * we need to adjust previous zone's page count first.
+        */
+       adjust_present_page_count(page, -pageblock_nr_pages);
 
        do {
                __ClearPageReserved(p);
                set_page_count(p, 0);
-       } while (++p, --i);
+
+               /* Steal pages from other zones */
+               set_page_links(p, ZONE_MOVABLE, nid, pfn);
+       } while (++p, ++pfn, --i);
+
+       adjust_present_page_count(page, pageblock_nr_pages);
 
        set_pageblock_migratetype(page, MIGRATE_CMA);
 
@@ -2870,7 +2889,7 @@ int __isolate_free_page(struct page *page, unsigned int order)
                 * exists.
                 */
                watermark = min_wmark_pages(zone) + (1UL << order);
-               if (!zone_watermark_ok(zone, 0, watermark, 0, ALLOC_CMA))
+               if (!zone_watermark_ok(zone, 0, watermark, 0, 0))
                        return 0;
 
                __mod_zone_freepage_state(zone, -(1UL << order), mt);
@@ -3146,12 +3165,6 @@ bool __zone_watermark_ok(struct zone *z, unsigned int order, unsigned long mark,
        }
 
 
-#ifdef CONFIG_CMA
-       /* If allocation can't use CMA areas don't use free CMA pages */
-       if (!(alloc_flags & ALLOC_CMA))
-               free_pages -= zone_page_state(z, NR_FREE_CMA_PAGES);
-#endif
-
        /*
         * Check watermarks for an order-0 allocation request. If these
         * are not met, then a high-order request also cannot go ahead
@@ -3178,10 +3191,8 @@ bool __zone_watermark_ok(struct zone *z, unsigned int order, unsigned long mark,
                }
 
 #ifdef CONFIG_CMA
-               if ((alloc_flags & ALLOC_CMA) &&
-                   !list_empty(&area->free_list[MIGRATE_CMA])) {
+               if (!list_empty(&area->free_list[MIGRATE_CMA]))
                        return true;
-               }
 #endif
                if (alloc_harder &&
                        !list_empty(&area->free_list[MIGRATE_HIGHATOMIC]))
@@ -3201,13 +3212,6 @@ static inline bool zone_watermark_fast(struct zone *z, unsigned int order,
                unsigned long mark, int classzone_idx, unsigned int alloc_flags)
 {
        long free_pages = zone_page_state(z, NR_FREE_PAGES);
-       long cma_pages = 0;
-
-#ifdef CONFIG_CMA
-       /* If allocation can't use CMA areas don't use free CMA pages */
-       if (!(alloc_flags & ALLOC_CMA))
-               cma_pages = zone_page_state(z, NR_FREE_CMA_PAGES);
-#endif
 
        /*
         * Fast check for order-0 only. If this fails then the reserves
@@ -3216,7 +3220,7 @@ static inline bool zone_watermark_fast(struct zone *z, unsigned int order,
         * the caller is !atomic then it'll uselessly search the free
         * list. That corner case is then slower but it is harmless.
         */
-       if (!order && (free_pages - cma_pages) > mark + z->lowmem_reserve[classzone_idx])
+       if (!order && free_pages > mark + z->lowmem_reserve[classzone_idx])
                return true;
 
        return __zone_watermark_ok(z, order, mark, classzone_idx, alloc_flags,
@@ -3852,10 +3856,6 @@ gfp_to_alloc_flags(gfp_t gfp_mask)
        } else if (unlikely(rt_task(current)) && !in_interrupt())
                alloc_flags |= ALLOC_HARDER;
 
-#ifdef CONFIG_CMA
-       if (gfpflags_to_migratetype(gfp_mask) == MIGRATE_MOVABLE)
-               alloc_flags |= ALLOC_CMA;
-#endif
        return alloc_flags;
 }
 
@@ -4322,9 +4322,6 @@ static inline bool prepare_alloc_pages(gfp_t gfp_mask, unsigned int order,
        if (should_fail_alloc_page(gfp_mask, order))
                return false;
 
-       if (IS_ENABLED(CONFIG_CMA) && ac->migratetype == MIGRATE_MOVABLE)
-               *alloc_flags |= ALLOC_CMA;
-
        return true;
 }
 
@@ -4734,6 +4731,13 @@ long si_mem_available(void)
                     min(global_node_page_state(NR_SLAB_RECLAIMABLE) / 2,
                         wmark_low);
 
+       /*
+        * Part of the kernel memory, which can be released under memory
+        * pressure.
+        */
+       available += global_node_page_state(NR_INDIRECTLY_RECLAIMABLE_BYTES) >>
+               PAGE_SHIFT;
+
        if (available < 0)
                available = 0;
        return available;
@@ -6200,6 +6204,7 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat)
 {
        enum zone_type j;
        int nid = pgdat->node_id;
+       unsigned long node_end_pfn = 0;
 
        pgdat_resize_init(pgdat);
 #ifdef CONFIG_NUMA_BALANCING
@@ -6227,9 +6232,13 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat)
                struct zone *zone = pgdat->node_zones + j;
                unsigned long size, realsize, freesize, memmap_pages;
                unsigned long zone_start_pfn = zone->zone_start_pfn;
+               unsigned long movable_size = 0;
 
                size = zone->spanned_pages;
                realsize = freesize = zone->present_pages;
+               if (zone_end_pfn(zone) > node_end_pfn)
+                       node_end_pfn = zone_end_pfn(zone);
+
 
                /*
                 * Adjust freesize so that it accounts for how much memory
@@ -6278,12 +6287,30 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat)
                zone_seqlock_init(zone);
                zone_pcp_init(zone);
 
-               if (!size)
+               /*
+                * The size of the CMA area is unknown now so we need to
+                * prepare the memory for the usemap at maximum.
+                */
+               if (IS_ENABLED(CONFIG_CMA) && j == ZONE_MOVABLE &&
+                       pgdat->node_spanned_pages) {
+                       movable_size = node_end_pfn - pgdat->node_start_pfn;
+               }
+
+               if (!size && !movable_size)
                        continue;
 
                set_pageblock_order();
-               setup_usemap(pgdat, zone, zone_start_pfn, size);
-               init_currently_empty_zone(zone, zone_start_pfn, size);
+               if (movable_size) {
+                       zone->zone_start_pfn = pgdat->node_start_pfn;
+                       zone->spanned_pages = movable_size;
+                       setup_usemap(pgdat, zone,
+                               pgdat->node_start_pfn, movable_size);
+                       init_currently_empty_zone(zone,
+                               pgdat->node_start_pfn, movable_size);
+               } else {
+                       setup_usemap(pgdat, zone, zone_start_pfn, size);
+                       init_currently_empty_zone(zone, zone_start_pfn, size);
+               }
                memmap_init(size, nid, j, zone_start_pfn);
        }
 }
@@ -7125,13 +7152,15 @@ static void setup_per_zone_lowmem_reserve(void)
                                struct zone *lower_zone;
 
                                idx--;
-
-                               if (sysctl_lowmem_reserve_ratio[idx] < 1)
-                                       sysctl_lowmem_reserve_ratio[idx] = 1;
-
                                lower_zone = pgdat->node_zones + idx;
-                               lower_zone->lowmem_reserve[j] = managed_pages /
-                                       sysctl_lowmem_reserve_ratio[idx];
+
+                               if (sysctl_lowmem_reserve_ratio[idx] < 1) {
+                                       sysctl_lowmem_reserve_ratio[idx] = 0;
+                                       lower_zone->lowmem_reserve[j] = 0;
+                               } else {
+                                       lower_zone->lowmem_reserve[j] =
+                                               managed_pages / sysctl_lowmem_reserve_ratio[idx];
+                               }
                                managed_pages += lower_zone->managed_pages;
                        }
                }
@@ -7922,7 +7951,7 @@ void free_contig_range(unsigned long pfn, unsigned nr_pages)
 }
 #endif
 
-#ifdef CONFIG_MEMORY_HOTPLUG
+#if defined CONFIG_MEMORY_HOTPLUG || defined CONFIG_CMA
 /*
  * The zone indicated has a new number of managed_pages; batch sizes and percpu
  * page high values need to be recalulated.