memcg, vmscan: add memcg reclaim tracepoint
[sfrench/cifs-2.6.git] / mm / vmscan.c
index 7f25f336551a8d049cedd868c9ef2d0efb9fc6f2..154b37a3373110944b583666f57bc99679438839 100644 (file)
@@ -622,6 +622,24 @@ static enum page_references page_check_references(struct page *page,
        return PAGEREF_RECLAIM;
 }
 
+static noinline_for_stack void free_page_list(struct list_head *free_pages)
+{
+       struct pagevec freed_pvec;
+       struct page *page, *tmp;
+
+       pagevec_init(&freed_pvec, 1);
+
+       list_for_each_entry_safe(page, tmp, free_pages, lru) {
+               list_del(&page->lru);
+               if (!pagevec_add(&freed_pvec, page)) {
+                       __pagevec_free(&freed_pvec);
+                       pagevec_reinit(&freed_pvec);
+               }
+       }
+
+       pagevec_free(&freed_pvec);
+}
+
 /*
  * shrink_page_list() returns the number of reclaimed pages
  */
@@ -630,13 +648,12 @@ static unsigned long shrink_page_list(struct list_head *page_list,
                                        enum pageout_io sync_writeback)
 {
        LIST_HEAD(ret_pages);
-       struct pagevec freed_pvec;
+       LIST_HEAD(free_pages);
        int pgactivate = 0;
        unsigned long nr_reclaimed = 0;
 
        cond_resched();
 
-       pagevec_init(&freed_pvec, 1);
        while (!list_empty(page_list)) {
                enum page_references references;
                struct address_space *mapping;
@@ -811,10 +828,12 @@ static unsigned long shrink_page_list(struct list_head *page_list,
                __clear_page_locked(page);
 free_it:
                nr_reclaimed++;
-               if (!pagevec_add(&freed_pvec, page)) {
-                       __pagevec_free(&freed_pvec);
-                       pagevec_reinit(&freed_pvec);
-               }
+
+               /*
+                * Is there need to periodically free_page_list? It would
+                * appear not as the counts should be low
+                */
+               list_add(&page->lru, &free_pages);
                continue;
 
 cull_mlocked:
@@ -837,9 +856,10 @@ keep:
                list_add(&page->lru, &ret_pages);
                VM_BUG_ON(PageLRU(page) || PageUnevictable(page));
        }
+
+       free_page_list(&free_pages);
+
        list_splice(&ret_pages, page_list);
-       if (pagevec_count(&freed_pvec))
-               __pagevec_free(&freed_pvec);
        count_vm_events(PGACTIVATE, pgactivate);
        return nr_reclaimed;
 }
@@ -1056,7 +1076,8 @@ static unsigned long clear_active_flags(struct list_head *page_list,
                        ClearPageActive(page);
                        nr_active++;
                }
-               count[lru]++;
+               if (count)
+                       count[lru]++;
        }
 
        return nr_active;
@@ -1132,23 +1153,99 @@ static int too_many_isolated(struct zone *zone, int file,
        return isolated > inactive;
 }
 
+/*
+ * TODO: Try merging with migrations version of putback_lru_pages
+ */
+static noinline_for_stack void
+putback_lru_pages(struct zone *zone, struct scan_control *sc,
+                               unsigned long nr_anon, unsigned long nr_file,
+                               struct list_head *page_list)
+{
+       struct page *page;
+       struct pagevec pvec;
+       struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(zone, sc);
+
+       pagevec_init(&pvec, 1);
+
+       /*
+        * Put back any unfreeable pages.
+        */
+       spin_lock(&zone->lru_lock);
+       while (!list_empty(page_list)) {
+               int lru;
+               page = lru_to_page(page_list);
+               VM_BUG_ON(PageLRU(page));
+               list_del(&page->lru);
+               if (unlikely(!page_evictable(page, NULL))) {
+                       spin_unlock_irq(&zone->lru_lock);
+                       putback_lru_page(page);
+                       spin_lock_irq(&zone->lru_lock);
+                       continue;
+               }
+               SetPageLRU(page);
+               lru = page_lru(page);
+               add_page_to_lru_list(zone, page, lru);
+               if (is_active_lru(lru)) {
+                       int file = is_file_lru(lru);
+                       reclaim_stat->recent_rotated[file]++;
+               }
+               if (!pagevec_add(&pvec, page)) {
+                       spin_unlock_irq(&zone->lru_lock);
+                       __pagevec_release(&pvec);
+                       spin_lock_irq(&zone->lru_lock);
+               }
+       }
+       __mod_zone_page_state(zone, NR_ISOLATED_ANON, -nr_anon);
+       __mod_zone_page_state(zone, NR_ISOLATED_FILE, -nr_file);
+
+       spin_unlock_irq(&zone->lru_lock);
+       pagevec_release(&pvec);
+}
+
+static noinline_for_stack void update_isolated_counts(struct zone *zone,
+                                       struct scan_control *sc,
+                                       unsigned long *nr_anon,
+                                       unsigned long *nr_file,
+                                       struct list_head *isolated_list)
+{
+       unsigned long nr_active;
+       unsigned int count[NR_LRU_LISTS] = { 0, };
+       struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(zone, sc);
+
+       nr_active = clear_active_flags(isolated_list, count);
+       __count_vm_events(PGDEACTIVATE, nr_active);
+
+       __mod_zone_page_state(zone, NR_ACTIVE_FILE,
+                             -count[LRU_ACTIVE_FILE]);
+       __mod_zone_page_state(zone, NR_INACTIVE_FILE,
+                             -count[LRU_INACTIVE_FILE]);
+       __mod_zone_page_state(zone, NR_ACTIVE_ANON,
+                             -count[LRU_ACTIVE_ANON]);
+       __mod_zone_page_state(zone, NR_INACTIVE_ANON,
+                             -count[LRU_INACTIVE_ANON]);
+
+       *nr_anon = count[LRU_ACTIVE_ANON] + count[LRU_INACTIVE_ANON];
+       *nr_file = count[LRU_ACTIVE_FILE] + count[LRU_INACTIVE_FILE];
+       __mod_zone_page_state(zone, NR_ISOLATED_ANON, *nr_anon);
+       __mod_zone_page_state(zone, NR_ISOLATED_FILE, *nr_file);
+
+       reclaim_stat->recent_scanned[0] += *nr_anon;
+       reclaim_stat->recent_scanned[1] += *nr_file;
+}
+
 /*
  * shrink_inactive_list() is a helper for shrink_zone().  It returns the number
  * of reclaimed pages
  */
-static unsigned long shrink_inactive_list(unsigned long nr_to_scan,
-                       struct zone *zone, struct scan_control *sc,
-                       int priority, int file)
+static noinline_for_stack unsigned long
+shrink_inactive_list(unsigned long nr_to_scan, struct zone *zone,
+                       struct scan_control *sc, int priority, int file)
 {
        LIST_HEAD(page_list);
-       struct pagevec pvec;
        unsigned long nr_scanned;
        unsigned long nr_reclaimed = 0;
-       struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(zone, sc);
-       struct page *page;
        unsigned long nr_taken;
        unsigned long nr_active;
-       unsigned int count[NR_LRU_LISTS] = { 0, };
        unsigned long nr_anon;
        unsigned long nr_file;
 
@@ -1161,8 +1258,6 @@ static unsigned long shrink_inactive_list(unsigned long nr_to_scan,
        }
 
 
-       pagevec_init(&pvec, 1);
-
        lru_add_drain();
        spin_lock_irq(&zone->lru_lock);
 
@@ -1192,28 +1287,12 @@ static unsigned long shrink_inactive_list(unsigned long nr_to_scan,
                 */
        }
 
-       if (nr_taken == 0)
-               goto done;
-
-       nr_active = clear_active_flags(&page_list, count);
-       __count_vm_events(PGDEACTIVATE, nr_active);
-
-       __mod_zone_page_state(zone, NR_ACTIVE_FILE,
-                                       -count[LRU_ACTIVE_FILE]);
-       __mod_zone_page_state(zone, NR_INACTIVE_FILE,
-                                       -count[LRU_INACTIVE_FILE]);
-       __mod_zone_page_state(zone, NR_ACTIVE_ANON,
-                                       -count[LRU_ACTIVE_ANON]);
-       __mod_zone_page_state(zone, NR_INACTIVE_ANON,
-                                       -count[LRU_INACTIVE_ANON]);
-
-       nr_anon = count[LRU_ACTIVE_ANON] + count[LRU_INACTIVE_ANON];
-       nr_file = count[LRU_ACTIVE_FILE] + count[LRU_INACTIVE_FILE];
-       __mod_zone_page_state(zone, NR_ISOLATED_ANON, nr_anon);
-       __mod_zone_page_state(zone, NR_ISOLATED_FILE, nr_file);
+       if (nr_taken == 0) {
+               spin_unlock_irq(&zone->lru_lock);
+               return 0;
+       }
 
-       reclaim_stat->recent_scanned[0] += nr_anon;
-       reclaim_stat->recent_scanned[1] += nr_file;
+       update_isolated_counts(zone, sc, &nr_anon, &nr_file, &page_list);
 
        spin_unlock_irq(&zone->lru_lock);
 
@@ -1233,7 +1312,7 @@ static unsigned long shrink_inactive_list(unsigned long nr_to_scan,
                 * The attempt at page out may have made some
                 * of the pages active, mark them inactive again.
                 */
-               nr_active = clear_active_flags(&page_list, count);
+               nr_active = clear_active_flags(&page_list, NULL);
                count_vm_events(PGDEACTIVATE, nr_active);
 
                nr_reclaimed += shrink_page_list(&page_list, sc, PAGEOUT_IO_SYNC);
@@ -1244,40 +1323,7 @@ static unsigned long shrink_inactive_list(unsigned long nr_to_scan,
                __count_vm_events(KSWAPD_STEAL, nr_reclaimed);
        __count_zone_vm_events(PGSTEAL, zone, nr_reclaimed);
 
-       spin_lock(&zone->lru_lock);
-       /*
-        * Put back any unfreeable pages.
-        */
-       while (!list_empty(&page_list)) {
-               int lru;
-               page = lru_to_page(&page_list);
-               VM_BUG_ON(PageLRU(page));
-               list_del(&page->lru);
-               if (unlikely(!page_evictable(page, NULL))) {
-                       spin_unlock_irq(&zone->lru_lock);
-                       putback_lru_page(page);
-                       spin_lock_irq(&zone->lru_lock);
-                       continue;
-               }
-               SetPageLRU(page);
-               lru = page_lru(page);
-               add_page_to_lru_list(zone, page, lru);
-               if (is_active_lru(lru)) {
-                       int file = is_file_lru(lru);
-                       reclaim_stat->recent_rotated[file]++;
-               }
-               if (!pagevec_add(&pvec, page)) {
-                       spin_unlock_irq(&zone->lru_lock);
-                       __pagevec_release(&pvec);
-                       spin_lock_irq(&zone->lru_lock);
-               }
-       }
-       __mod_zone_page_state(zone, NR_ISOLATED_ANON, -nr_anon);
-       __mod_zone_page_state(zone, NR_ISOLATED_FILE, -nr_file);
-
-done:
-       spin_unlock_irq(&zone->lru_lock);
-       pagevec_release(&pvec);
+       putback_lru_pages(zone, sc, nr_anon, nr_file, &page_list);
        return nr_reclaimed;
 }
 
@@ -1581,6 +1627,13 @@ static void get_scan_count(struct zone *zone, struct scan_control *sc,
                }
        }
 
+       /*
+        * With swappiness at 100, anonymous and file have the same priority.
+        * This scanning priority is essentially the inverse of IO cost.
+        */
+       anon_prio = sc->swappiness;
+       file_prio = 200 - sc->swappiness;
+
        /*
         * OK, so we have swap space and a fair amount of page cache
         * pages.  We use the recently rotated / recently scanned
@@ -1592,27 +1645,17 @@ static void get_scan_count(struct zone *zone, struct scan_control *sc,
         *
         * anon in [0], file in [1]
         */
+       spin_lock_irq(&zone->lru_lock);
        if (unlikely(reclaim_stat->recent_scanned[0] > anon / 4)) {
-               spin_lock_irq(&zone->lru_lock);
                reclaim_stat->recent_scanned[0] /= 2;
                reclaim_stat->recent_rotated[0] /= 2;
-               spin_unlock_irq(&zone->lru_lock);
        }
 
        if (unlikely(reclaim_stat->recent_scanned[1] > file / 4)) {
-               spin_lock_irq(&zone->lru_lock);
                reclaim_stat->recent_scanned[1] /= 2;
                reclaim_stat->recent_rotated[1] /= 2;
-               spin_unlock_irq(&zone->lru_lock);
        }
 
-       /*
-        * With swappiness at 100, anonymous and file have the same priority.
-        * This scanning priority is essentially the inverse of IO cost.
-        */
-       anon_prio = sc->swappiness;
-       file_prio = 200 - sc->swappiness;
-
        /*
         * The amount of pressure on anon vs file pages is inversely
         * proportional to the fraction of recently scanned pages on
@@ -1623,6 +1666,7 @@ static void get_scan_count(struct zone *zone, struct scan_control *sc,
 
        fp = (file_prio + 1) * (reclaim_stat->recent_scanned[1] + 1);
        fp /= reclaim_stat->recent_rotated[1] + 1;
+       spin_unlock_irq(&zone->lru_lock);
 
        fraction[0] = ap;
        fraction[1] = fp;
@@ -1907,6 +1951,11 @@ unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem,
        sc.nodemask = &nm;
        sc.nr_reclaimed = 0;
        sc.nr_scanned = 0;
+
+       trace_mm_vmscan_memcg_softlimit_reclaim_begin(0,
+                                                     sc.may_writepage,
+                                                     sc.gfp_mask);
+
        /*
         * NOTE: Although we can get the priority field, using it
         * here is not a good idea, since it limits the pages we can scan.
@@ -1915,6 +1964,9 @@ unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem,
         * the priority and make it zero.
         */
        shrink_zone(0, zone, &sc);
+
+       trace_mm_vmscan_memcg_softlimit_reclaim_end(sc.nr_reclaimed);
+
        return sc.nr_reclaimed;
 }
 
@@ -1924,6 +1976,7 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont,
                                           unsigned int swappiness)
 {
        struct zonelist *zonelist;
+       unsigned long nr_reclaimed;
        struct scan_control sc = {
                .may_writepage = !laptop_mode,
                .may_unmap = 1,
@@ -1938,7 +1991,16 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont,
        sc.gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) |
                        (GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK);
        zonelist = NODE_DATA(numa_node_id())->node_zonelists;
-       return do_try_to_free_pages(zonelist, &sc);
+
+       trace_mm_vmscan_memcg_reclaim_begin(0,
+                                           sc.may_writepage,
+                                           sc.gfp_mask);
+
+       nr_reclaimed = do_try_to_free_pages(zonelist, &sc);
+
+       trace_mm_vmscan_memcg_reclaim_end(nr_reclaimed);
+
+       return nr_reclaimed;
 }
 #endif
 
@@ -2554,7 +2616,7 @@ static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
                .swappiness = vm_swappiness,
                .order = order,
        };
-       unsigned long slab_reclaimable;
+       unsigned long nr_slab_pages0, nr_slab_pages1;
 
        cond_resched();
        /*
@@ -2579,8 +2641,8 @@ static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
                } while (priority >= 0 && sc.nr_reclaimed < nr_pages);
        }
 
-       slab_reclaimable = zone_page_state(zone, NR_SLAB_RECLAIMABLE);
-       if (slab_reclaimable > zone->min_slab_pages) {
+       nr_slab_pages0 = zone_page_state(zone, NR_SLAB_RECLAIMABLE);
+       if (nr_slab_pages0 > zone->min_slab_pages) {
                /*
                 * shrink_slab() does not currently allow us to determine how
                 * many pages were freed in this zone. So we take the current
@@ -2591,17 +2653,27 @@ static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
                 * Note that shrink_slab will free memory on all zones and may
                 * take a long time.
                 */
-               while (shrink_slab(sc.nr_scanned, gfp_mask, order) &&
-                       zone_page_state(zone, NR_SLAB_RECLAIMABLE) >
-                               slab_reclaimable - nr_pages)
-                       ;
+               for (;;) {
+                       unsigned long lru_pages = zone_reclaimable_pages(zone);
+
+                       /* No reclaimable slab or very low memory pressure */
+                       if (!shrink_slab(sc.nr_scanned, gfp_mask, lru_pages))
+                               break;
+
+                       /* Freed enough memory */
+                       nr_slab_pages1 = zone_page_state(zone,
+                                                       NR_SLAB_RECLAIMABLE);
+                       if (nr_slab_pages1 + nr_pages <= nr_slab_pages0)
+                               break;
+               }
 
                /*
                 * Update nr_reclaimed by the number of slab pages we
                 * reclaimed from this zone.
                 */
-               sc.nr_reclaimed += slab_reclaimable -
-                       zone_page_state(zone, NR_SLAB_RECLAIMABLE);
+               nr_slab_pages1 = zone_page_state(zone, NR_SLAB_RECLAIMABLE);
+               if (nr_slab_pages1 < nr_slab_pages0)
+                       sc.nr_reclaimed += nr_slab_pages0 - nr_slab_pages1;
        }
 
        p->reclaim_state = NULL;