Be more agressive about stealing when MIGRATE_RECLAIMABLE allocations fallback
[sfrench/cifs-2.6.git] / mm / page_alloc.c
index d575a3ee8dd8e40618de0760566fd36569d64c14..b864584c92b467d384b21ab93685052328542cab 100644 (file)
@@ -159,8 +159,13 @@ EXPORT_SYMBOL(nr_node_ids);
 #endif
 
 #ifdef CONFIG_PAGE_GROUP_BY_MOBILITY
+int page_group_by_mobility_disabled __read_mostly;
+
 static inline int get_pageblock_migratetype(struct page *page)
 {
+       if (unlikely(page_group_by_mobility_disabled))
+               return MIGRATE_UNMOVABLE;
+
        return get_pageblock_flags_group(page, PB_migrate, PB_migrate_end);
 }
 
@@ -170,9 +175,21 @@ static void set_pageblock_migratetype(struct page *page, int migratetype)
                                        PB_migrate, PB_migrate_end);
 }
 
-static inline int gfpflags_to_migratetype(gfp_t gfp_flags)
+static inline int allocflags_to_migratetype(gfp_t gfp_flags, int order)
 {
-       return ((gfp_flags & __GFP_MOVABLE) != 0);
+       WARN_ON((gfp_flags & GFP_MOVABLE_MASK) == GFP_MOVABLE_MASK);
+
+       if (unlikely(page_group_by_mobility_disabled))
+               return MIGRATE_UNMOVABLE;
+
+       /* Cluster high-order atomic allocations together */
+       if (unlikely(order > 0) &&
+                       (!(gfp_flags & __GFP_WAIT) || in_interrupt()))
+               return MIGRATE_HIGHATOMIC;
+
+       /* Cluster based on mobility */
+       return (((gfp_flags & __GFP_MOVABLE) != 0) << 1) |
+               ((gfp_flags & __GFP_RECLAIMABLE) != 0);
 }
 
 #else
@@ -185,7 +202,7 @@ static void set_pageblock_migratetype(struct page *page, int migratetype)
 {
 }
 
-static inline int gfpflags_to_migratetype(gfp_t gfp_flags)
+static inline int allocflags_to_migratetype(gfp_t gfp_flags, int order)
 {
        return MIGRATE_UNMOVABLE;
 }
@@ -676,8 +693,10 @@ static int prep_new_page(struct page *page, int order, gfp_t gfp_flags)
  * the free lists for the desirable migrate type are depleted
  */
 static int fallbacks[MIGRATE_TYPES][MIGRATE_TYPES-1] = {
-       [MIGRATE_UNMOVABLE] = { MIGRATE_MOVABLE   },
-       [MIGRATE_MOVABLE]   = { MIGRATE_UNMOVABLE },
+       [MIGRATE_UNMOVABLE]   = { MIGRATE_RECLAIMABLE, MIGRATE_MOVABLE,  MIGRATE_HIGHATOMIC },
+       [MIGRATE_RECLAIMABLE] = { MIGRATE_UNMOVABLE,   MIGRATE_MOVABLE,  MIGRATE_HIGHATOMIC },
+       [MIGRATE_MOVABLE]     = { MIGRATE_RECLAIMABLE, MIGRATE_UNMOVABLE,MIGRATE_HIGHATOMIC },
+       [MIGRATE_HIGHATOMIC]  = { MIGRATE_RECLAIMABLE, MIGRATE_UNMOVABLE,MIGRATE_MOVABLE},
 };
 
 /*
@@ -746,6 +765,23 @@ int move_freepages_block(struct zone *zone, struct page *page, int migratetype)
        return move_freepages(zone, start_page, end_page, migratetype);
 }
 
+/* Return the page with the lowest PFN in the list */
+static struct page *min_page(struct list_head *list)
+{
+       unsigned long min_pfn = -1UL;
+       struct page *min_page = NULL, *page;;
+
+       list_for_each_entry(page, list, lru) {
+               unsigned long pfn = page_to_pfn(page);
+               if (pfn < min_pfn) {
+                       min_pfn = pfn;
+                       min_page = page;
+               }
+       }
+
+       return min_page;
+}
+
 /* Remove an element from the buddy allocator from the fallback list */
 static struct page *__rmqueue_fallback(struct zone *zone, int order,
                                                int start_migratetype)
@@ -754,28 +790,54 @@ static struct page *__rmqueue_fallback(struct zone *zone, int order,
        int current_order;
        struct page *page;
        int migratetype, i;
+       int nonatomic_fallback_atomic = 0;
 
+retry:
        /* Find the largest possible block of pages in the other list */
        for (current_order = MAX_ORDER-1; current_order >= order;
                                                --current_order) {
                for (i = 0; i < MIGRATE_TYPES - 1; i++) {
                        migratetype = fallbacks[start_migratetype][i];
 
+                       /*
+                        * Make it hard to fallback to blocks used for
+                        * high-order atomic allocations
+                        */
+                       if (migratetype == MIGRATE_HIGHATOMIC &&
+                               start_migratetype != MIGRATE_UNMOVABLE &&
+                               !nonatomic_fallback_atomic)
+                               continue;
+
                        area = &(zone->free_area[current_order]);
                        if (list_empty(&area->free_list[migratetype]))
                                continue;
 
+                       /* Bias kernel allocations towards low pfns */
                        page = list_entry(area->free_list[migratetype].next,
                                        struct page, lru);
+                       if (unlikely(start_migratetype != MIGRATE_MOVABLE))
+                               page = min_page(&area->free_list[migratetype]);
                        area->nr_free--;
 
                        /*
                         * If breaking a large block of pages, move all free
-                        * pages to the preferred allocation list
+                        * pages to the preferred allocation list. If falling
+                        * back for a reclaimable kernel allocation, be more
+                        * agressive about taking ownership of free pages
                         */
-                       if (unlikely(current_order >= MAX_ORDER / 2)) {
+                       if (unlikely(current_order >= MAX_ORDER / 2) ||
+                                       start_migratetype == MIGRATE_RECLAIMABLE) {
+                               unsigned long pages;
+                               pages = move_freepages_block(zone, page,
+                                                               start_migratetype);
+
+                               /* Claim the whole block if over half of it is free */
+                               if ((pages << current_order) >= (1 << (MAX_ORDER-2)) &&
+                                               migratetype != MIGRATE_HIGHATOMIC)
+                                       set_pageblock_migratetype(page,
+                                                               start_migratetype);
+
                                migratetype = start_migratetype;
-                               move_freepages_block(zone, page, migratetype);
                        }
 
                        /* Remove the page from the freelists */
@@ -793,6 +855,12 @@ static struct page *__rmqueue_fallback(struct zone *zone, int order,
                }
        }
 
+       /* Allow fallback to high-order atomic blocks if memory is that low */
+       if (!nonatomic_fallback_atomic) {
+               nonatomic_fallback_atomic = 1;
+               goto retry;
+       }
+
        return NULL;
 }
 #else
@@ -1054,7 +1122,7 @@ static struct page *buffered_rmqueue(struct zonelist *zonelist,
        struct page *page;
        int cold = !!(gfp_flags & __GFP_COLD);
        int cpu;
-       int migratetype = gfpflags_to_migratetype(gfp_flags);
+       int migratetype = allocflags_to_migratetype(gfp_flags, order);
 
 again:
        cpu  = get_cpu();
@@ -2347,9 +2415,23 @@ void build_all_zonelists(void)
                /* cpuset refresh routine should be here */
        }
        vm_total_pages = nr_free_pagecache_pages();
-       printk("Built %i zonelists in %s order.  Total pages: %ld\n",
+       /*
+        * Disable grouping by mobility if the number of pages in the
+        * system is too low to allow the mechanism to work. It would be
+        * more accurate, but expensive to check per-zone. This check is
+        * made on memory-hotadd so a system can start with mobility
+        * disabled and enable it later
+        */
+       if (vm_total_pages < (MAX_ORDER_NR_PAGES * MIGRATE_TYPES))
+               page_group_by_mobility_disabled = 1;
+       else
+               page_group_by_mobility_disabled = 0;
+
+       printk("Built %i zonelists in %s order, mobility grouping %s.  "
+               "Total pages: %ld\n",
                        num_online_nodes(),
                        zonelist_order_name[current_zonelist_order],
+                       page_group_by_mobility_disabled ? "off" : "on",
                        vm_total_pages);
 #ifdef CONFIG_NUMA
        printk("Policy zone: %s\n", zone_names[policy_zone]);