Do not group pages by mobility type on low memory systems
[sfrench/cifs-2.6.git] / mm / page_alloc.c
index aa7e5d2f28a52693d3bfa023c306e498bbdbe925..676aec93d699b5f0edf60dda360d9bd7e2d9bd7f 100644 (file)
@@ -159,8 +159,13 @@ EXPORT_SYMBOL(nr_node_ids);
 #endif
 
 #ifdef CONFIG_PAGE_GROUP_BY_MOBILITY
+int page_group_by_mobility_disabled __read_mostly;
+
 static inline int get_pageblock_migratetype(struct page *page)
 {
+       if (unlikely(page_group_by_mobility_disabled))
+               return MIGRATE_UNMOVABLE;
+
        return get_pageblock_flags_group(page, PB_migrate, PB_migrate_end);
 }
 
@@ -170,9 +175,21 @@ static void set_pageblock_migratetype(struct page *page, int migratetype)
                                        PB_migrate, PB_migrate_end);
 }
 
-static inline int gfpflags_to_migratetype(gfp_t gfp_flags)
+static inline int allocflags_to_migratetype(gfp_t gfp_flags, int order)
 {
-       return ((gfp_flags & __GFP_MOVABLE) != 0);
+       WARN_ON((gfp_flags & GFP_MOVABLE_MASK) == GFP_MOVABLE_MASK);
+
+       if (unlikely(page_group_by_mobility_disabled))
+               return MIGRATE_UNMOVABLE;
+
+       /* Cluster high-order atomic allocations together */
+       if (unlikely(order > 0) &&
+                       (!(gfp_flags & __GFP_WAIT) || in_interrupt()))
+               return MIGRATE_HIGHATOMIC;
+
+       /* Cluster based on mobility */
+       return (((gfp_flags & __GFP_MOVABLE) != 0) << 1) |
+               ((gfp_flags & __GFP_RECLAIMABLE) != 0);
 }
 
 #else
@@ -185,7 +202,7 @@ static void set_pageblock_migratetype(struct page *page, int migratetype)
 {
 }
 
-static inline int gfpflags_to_migratetype(gfp_t gfp_flags)
+static inline int allocflags_to_migratetype(gfp_t gfp_flags, int order)
 {
        return MIGRATE_UNMOVABLE;
 }
@@ -676,10 +693,78 @@ static int prep_new_page(struct page *page, int order, gfp_t gfp_flags)
  * the free lists for the desirable migrate type are depleted
  */
 static int fallbacks[MIGRATE_TYPES][MIGRATE_TYPES-1] = {
-       [MIGRATE_UNMOVABLE] = { MIGRATE_MOVABLE   },
-       [MIGRATE_MOVABLE]   = { MIGRATE_UNMOVABLE },
+       [MIGRATE_UNMOVABLE]   = { MIGRATE_RECLAIMABLE, MIGRATE_MOVABLE,  MIGRATE_HIGHATOMIC },
+       [MIGRATE_RECLAIMABLE] = { MIGRATE_UNMOVABLE,   MIGRATE_MOVABLE,  MIGRATE_HIGHATOMIC },
+       [MIGRATE_MOVABLE]     = { MIGRATE_RECLAIMABLE, MIGRATE_UNMOVABLE,MIGRATE_HIGHATOMIC },
+       [MIGRATE_HIGHATOMIC]  = { MIGRATE_RECLAIMABLE, MIGRATE_UNMOVABLE,MIGRATE_MOVABLE},
 };
 
+/*
+ * Move the free pages in a range to the free lists of the requested type.
+ * Note that start_page and end_pages are not aligned in a MAX_ORDER_NR_PAGES
+ * boundary. If alignment is required, use move_freepages_block()
+ */
+int move_freepages(struct zone *zone,
+                       struct page *start_page, struct page *end_page,
+                       int migratetype)
+{
+       struct page *page;
+       unsigned long order;
+       int blocks_moved = 0;
+
+#ifndef CONFIG_HOLES_IN_ZONE
+       /*
+        * page_zone is not safe to call in this context when
+        * CONFIG_HOLES_IN_ZONE is set. This bug check is probably redundant
+        * anyway as we check zone boundaries in move_freepages_block().
+        * Remove at a later date when no bug reports exist related to
+        * CONFIG_PAGE_GROUP_BY_MOBILITY
+        */
+       BUG_ON(page_zone(start_page) != page_zone(end_page));
+#endif
+
+       for (page = start_page; page <= end_page;) {
+               if (!pfn_valid_within(page_to_pfn(page))) {
+                       page++;
+                       continue;
+               }
+
+               if (!PageBuddy(page)) {
+                       page++;
+                       continue;
+               }
+
+               order = page_order(page);
+               list_del(&page->lru);
+               list_add(&page->lru,
+                       &zone->free_area[order].free_list[migratetype]);
+               page += 1 << order;
+               blocks_moved++;
+       }
+
+       return blocks_moved;
+}
+
+int move_freepages_block(struct zone *zone, struct page *page, int migratetype)
+{
+       unsigned long start_pfn, end_pfn;
+       struct page *start_page, *end_page;
+
+       start_pfn = page_to_pfn(page);
+       start_pfn = start_pfn & ~(MAX_ORDER_NR_PAGES-1);
+       start_page = pfn_to_page(start_pfn);
+       end_page = start_page + MAX_ORDER_NR_PAGES - 1;
+       end_pfn = start_pfn + MAX_ORDER_NR_PAGES - 1;
+
+       /* Do not cross zone boundaries */
+       if (start_pfn < zone->zone_start_pfn)
+               start_page = page;
+       if (end_pfn >= zone->zone_start_pfn + zone->spanned_pages)
+               return 0;
+
+       return move_freepages(zone, start_page, end_page, migratetype);
+}
+
 /* Remove an element from the buddy allocator from the fallback list */
 static struct page *__rmqueue_fallback(struct zone *zone, int order,
                                                int start_migratetype)
@@ -688,13 +773,24 @@ static struct page *__rmqueue_fallback(struct zone *zone, int order,
        int current_order;
        struct page *page;
        int migratetype, i;
+       int nonatomic_fallback_atomic = 0;
 
+retry:
        /* Find the largest possible block of pages in the other list */
        for (current_order = MAX_ORDER-1; current_order >= order;
                                                --current_order) {
                for (i = 0; i < MIGRATE_TYPES - 1; i++) {
                        migratetype = fallbacks[start_migratetype][i];
 
+                       /*
+                        * Make it hard to fallback to blocks used for
+                        * high-order atomic allocations
+                        */
+                       if (migratetype == MIGRATE_HIGHATOMIC &&
+                               start_migratetype != MIGRATE_UNMOVABLE &&
+                               !nonatomic_fallback_atomic)
+                               continue;
+
                        area = &(zone->free_area[current_order]);
                        if (list_empty(&area->free_list[migratetype]))
                                continue;
@@ -704,11 +800,13 @@ static struct page *__rmqueue_fallback(struct zone *zone, int order,
                        area->nr_free--;
 
                        /*
-                        * If breaking a large block of pages, place the buddies
-                        * on the preferred allocation list
+                        * If breaking a large block of pages, move all free
+                        * pages to the preferred allocation list
                         */
-                       if (unlikely(current_order >= MAX_ORDER / 2))
+                       if (unlikely(current_order >= MAX_ORDER / 2)) {
                                migratetype = start_migratetype;
+                               move_freepages_block(zone, page, migratetype);
+                       }
 
                        /* Remove the page from the freelists */
                        list_del(&page->lru);
@@ -725,6 +823,12 @@ static struct page *__rmqueue_fallback(struct zone *zone, int order,
                }
        }
 
+       /* Allow fallback to high-order atomic blocks if memory is that low */
+       if (!nonatomic_fallback_atomic) {
+               nonatomic_fallback_atomic = 1;
+               goto retry;
+       }
+
        return NULL;
 }
 #else
@@ -986,7 +1090,7 @@ static struct page *buffered_rmqueue(struct zonelist *zonelist,
        struct page *page;
        int cold = !!(gfp_flags & __GFP_COLD);
        int cpu;
-       int migratetype = gfpflags_to_migratetype(gfp_flags);
+       int migratetype = allocflags_to_migratetype(gfp_flags, order);
 
 again:
        cpu  = get_cpu();
@@ -2279,9 +2383,23 @@ void build_all_zonelists(void)
                /* cpuset refresh routine should be here */
        }
        vm_total_pages = nr_free_pagecache_pages();
-       printk("Built %i zonelists in %s order.  Total pages: %ld\n",
+       /*
+        * Disable grouping by mobility if the number of pages in the
+        * system is too low to allow the mechanism to work. It would be
+        * more accurate, but expensive to check per-zone. This check is
+        * made on memory-hotadd so a system can start with mobility
+        * disabled and enable it later
+        */
+       if (vm_total_pages < (MAX_ORDER_NR_PAGES * MIGRATE_TYPES))
+               page_group_by_mobility_disabled = 1;
+       else
+               page_group_by_mobility_disabled = 0;
+
+       printk("Built %i zonelists in %s order, mobility grouping %s.  "
+               "Total pages: %ld\n",
                        num_online_nodes(),
                        zonelist_order_name[current_zonelist_order],
+                       page_group_by_mobility_disabled ? "off" : "on",
                        vm_total_pages);
 #ifdef CONFIG_NUMA
        printk("Policy zone: %s\n", zone_names[policy_zone]);