mm: memcontrol: account kernel stack per node
authorShakeel Butt <shakeelb@google.com>
Fri, 7 Aug 2020 06:21:37 +0000 (23:21 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Fri, 7 Aug 2020 18:33:25 +0000 (11:33 -0700)
Currently the kernel stack is being accounted per-zone.  There is no need
to do that.  In addition due to being per-zone, memcg has to keep a
separate MEMCG_KERNEL_STACK_KB.  Make the stat per-node and deprecate
MEMCG_KERNEL_STACK_KB as memcg_stat_item is an extension of
node_stat_item.  In addition localize the kernel stack stats updates to
account_kernel_stack().

Signed-off-by: Shakeel Butt <shakeelb@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Reviewed-by: Roman Gushchin <guro@fb.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Michal Hocko <mhocko@kernel.org>
Link: http://lkml.kernel.org/r/20200630161539.1759185-1-shakeelb@google.com
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
drivers/base/node.c
fs/proc/meminfo.c
include/linux/memcontrol.h
include/linux/mmzone.h
kernel/fork.c
kernel/scs.c
mm/memcontrol.c
mm/page_alloc.c
mm/vmstat.c

index 0cf13e31603c9dcf71b6ce23b8da144595252459..508b80f6329b4e06f1fbd4958ea006f146334c91 100644 (file)
@@ -440,9 +440,9 @@ static ssize_t node_read_meminfo(struct device *dev,
                       nid, K(node_page_state(pgdat, NR_FILE_MAPPED)),
                       nid, K(node_page_state(pgdat, NR_ANON_MAPPED)),
                       nid, K(i.sharedram),
-                      nid, sum_zone_node_page_state(nid, NR_KERNEL_STACK_KB),
+                      nid, node_page_state(pgdat, NR_KERNEL_STACK_KB),
 #ifdef CONFIG_SHADOW_CALL_STACK
-                      nid, sum_zone_node_page_state(nid, NR_KERNEL_SCS_KB),
+                      nid, node_page_state(pgdat, NR_KERNEL_SCS_KB),
 #endif
                       nid, K(sum_zone_node_page_state(nid, NR_PAGETABLE)),
                       nid, 0UL,
index 38ea95fd919a5d65f3d7be2074ecb92fb2ecbf61..2a4c58f70fb91635529e524821d32feee91399b8 100644 (file)
@@ -101,10 +101,10 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
        show_val_kb(m, "SReclaimable:   ", sreclaimable);
        show_val_kb(m, "SUnreclaim:     ", sunreclaim);
        seq_printf(m, "KernelStack:    %8lu kB\n",
-                  global_zone_page_state(NR_KERNEL_STACK_KB));
+                  global_node_page_state(NR_KERNEL_STACK_KB));
 #ifdef CONFIG_SHADOW_CALL_STACK
        seq_printf(m, "ShadowCallStack:%8lu kB\n",
-                  global_zone_page_state(NR_KERNEL_SCS_KB));
+                  global_node_page_state(NR_KERNEL_SCS_KB));
 #endif
        show_val_kb(m, "PageTables:     ",
                    global_zone_page_state(NR_PAGETABLE));
index 5a8b62d075e60f980af589195b5b32f18579356a..624400c27eba61e8c995c372bddb0bf646679997 100644 (file)
@@ -32,8 +32,6 @@ struct kmem_cache;
 enum memcg_stat_item {
        MEMCG_SWAP = NR_VM_NODE_STAT_ITEMS,
        MEMCG_SOCK,
-       /* XXX: why are these zone and not node counters? */
-       MEMCG_KERNEL_STACK_KB,
        MEMCG_NR_STAT,
 };
 
@@ -729,8 +727,19 @@ void __mod_memcg_lruvec_state(struct lruvec *lruvec, enum node_stat_item idx,
 void __mod_lruvec_state(struct lruvec *lruvec, enum node_stat_item idx,
                        int val);
 void __mod_lruvec_slab_state(void *p, enum node_stat_item idx, int val);
+
 void mod_memcg_obj_state(void *p, int idx, int val);
 
+static inline void mod_lruvec_slab_state(void *p, enum node_stat_item idx,
+                                        int val)
+{
+       unsigned long flags;
+
+       local_irq_save(flags);
+       __mod_lruvec_slab_state(p, idx, val);
+       local_irq_restore(flags);
+}
+
 static inline void mod_memcg_lruvec_state(struct lruvec *lruvec,
                                          enum node_stat_item idx, int val)
 {
@@ -1151,6 +1160,14 @@ static inline void __mod_lruvec_slab_state(void *p, enum node_stat_item idx,
        __mod_node_page_state(page_pgdat(page), idx, val);
 }
 
+static inline void mod_lruvec_slab_state(void *p, enum node_stat_item idx,
+                                        int val)
+{
+       struct page *page = virt_to_head_page(p);
+
+       mod_node_page_state(page_pgdat(page), idx, val);
+}
+
 static inline void mod_memcg_obj_state(void *p, int idx, int val)
 {
 }
index b79100edd2285e1211c1c5bfc280e0e17240d874..a3bd54139a30ed5fad2f4f091959beb4286af408 100644 (file)
@@ -155,10 +155,6 @@ enum zone_stat_item {
        NR_ZONE_WRITE_PENDING,  /* Count of dirty, writeback and unstable pages */
        NR_MLOCK,               /* mlock()ed pages found and moved off LRU */
        NR_PAGETABLE,           /* used for pagetables */
-       NR_KERNEL_STACK_KB,     /* measured in KiB */
-#if IS_ENABLED(CONFIG_SHADOW_CALL_STACK)
-       NR_KERNEL_SCS_KB,       /* measured in KiB */
-#endif
        /* Second 128 byte cacheline */
        NR_BOUNCE,
 #if IS_ENABLED(CONFIG_ZSMALLOC)
@@ -203,6 +199,10 @@ enum node_stat_item {
        NR_KERNEL_MISC_RECLAIMABLE,     /* reclaimable non-slab kernel pages */
        NR_FOLL_PIN_ACQUIRED,   /* via: pin_user_page(), gup flag: FOLL_PIN */
        NR_FOLL_PIN_RELEASED,   /* pages returned via unpin_user_page() */
+       NR_KERNEL_STACK_KB,     /* measured in KiB */
+#if IS_ENABLED(CONFIG_SHADOW_CALL_STACK)
+       NR_KERNEL_SCS_KB,       /* measured in KiB */
+#endif
        NR_VM_NODE_STAT_ITEMS
 };
 
index 76d3f3387554b030d0bbaec914f77a0230b7b07c..c7b4ce9d2647cbc6445c7950c24d2d2828db61fe 100644 (file)
@@ -276,13 +276,8 @@ static inline void free_thread_stack(struct task_struct *tsk)
        if (vm) {
                int i;
 
-               for (i = 0; i < THREAD_SIZE / PAGE_SIZE; i++) {
-                       mod_memcg_page_state(vm->pages[i],
-                                            MEMCG_KERNEL_STACK_KB,
-                                            -(int)(PAGE_SIZE / 1024));
-
+               for (i = 0; i < THREAD_SIZE / PAGE_SIZE; i++)
                        memcg_kmem_uncharge_page(vm->pages[i], 0);
-               }
 
                for (i = 0; i < NR_CACHED_STACKS; i++) {
                        if (this_cpu_cmpxchg(cached_stacks[i],
@@ -382,31 +377,14 @@ static void account_kernel_stack(struct task_struct *tsk, int account)
        void *stack = task_stack_page(tsk);
        struct vm_struct *vm = task_stack_vm_area(tsk);
 
-       BUILD_BUG_ON(IS_ENABLED(CONFIG_VMAP_STACK) && PAGE_SIZE % 1024 != 0);
-
-       if (vm) {
-               int i;
-
-               BUG_ON(vm->nr_pages != THREAD_SIZE / PAGE_SIZE);
 
-               for (i = 0; i < THREAD_SIZE / PAGE_SIZE; i++) {
-                       mod_zone_page_state(page_zone(vm->pages[i]),
-                                           NR_KERNEL_STACK_KB,
-                                           PAGE_SIZE / 1024 * account);
-               }
-       } else {
-               /*
-                * All stack pages are in the same zone and belong to the
-                * same memcg.
-                */
-               struct page *first_page = virt_to_page(stack);
-
-               mod_zone_page_state(page_zone(first_page), NR_KERNEL_STACK_KB,
-                                   THREAD_SIZE / 1024 * account);
-
-               mod_memcg_obj_state(stack, MEMCG_KERNEL_STACK_KB,
-                                   account * (THREAD_SIZE / 1024));
-       }
+       /* All stack pages are in the same node. */
+       if (vm)
+               mod_lruvec_page_state(vm->pages[0], NR_KERNEL_STACK_KB,
+                                     account * (THREAD_SIZE / 1024));
+       else
+               mod_lruvec_slab_state(stack, NR_KERNEL_STACK_KB,
+                                     account * (THREAD_SIZE / 1024));
 }
 
 static int memcg_charge_kernel_stack(struct task_struct *tsk)
@@ -415,24 +393,23 @@ static int memcg_charge_kernel_stack(struct task_struct *tsk)
        struct vm_struct *vm = task_stack_vm_area(tsk);
        int ret;
 
+       BUILD_BUG_ON(IS_ENABLED(CONFIG_VMAP_STACK) && PAGE_SIZE % 1024 != 0);
+
        if (vm) {
                int i;
 
+               BUG_ON(vm->nr_pages != THREAD_SIZE / PAGE_SIZE);
+
                for (i = 0; i < THREAD_SIZE / PAGE_SIZE; i++) {
                        /*
                         * If memcg_kmem_charge_page() fails, page->mem_cgroup
-                        * pointer is NULL, and both memcg_kmem_uncharge_page()
-                        * and mod_memcg_page_state() in free_thread_stack()
-                        * will ignore this page. So it's safe.
+                        * pointer is NULL, and memcg_kmem_uncharge_page() in
+                        * free_thread_stack() will ignore this page.
                         */
                        ret = memcg_kmem_charge_page(vm->pages[i], GFP_KERNEL,
                                                     0);
                        if (ret)
                                return ret;
-
-                       mod_memcg_page_state(vm->pages[i],
-                                            MEMCG_KERNEL_STACK_KB,
-                                            PAGE_SIZE / 1024);
                }
        }
 #endif
index 5d4d9bbdec36c9f8f3dd5450ff3f5704b3ee40b7..4ff4a7ba0094f737e7730e1fcfc02b00fbcfb4fb 100644 (file)
@@ -17,7 +17,7 @@ static void __scs_account(void *s, int account)
 {
        struct page *scs_page = virt_to_page(s);
 
-       mod_zone_page_state(page_zone(scs_page), NR_KERNEL_SCS_KB,
+       mod_node_page_state(page_pgdat(scs_page), NR_KERNEL_SCS_KB,
                            account * (SCS_SIZE / SZ_1K));
 }
 
index 473f9b91d51f86f6b6d5f89245e8d3e20097e048..a3e96336676943d30833c9e84ecdc0677126e738 100644 (file)
@@ -1485,7 +1485,7 @@ static char *memory_stat_format(struct mem_cgroup *memcg)
                       (u64)memcg_page_state(memcg, NR_FILE_PAGES) *
                       PAGE_SIZE);
        seq_buf_printf(&s, "kernel_stack %llu\n",
-                      (u64)memcg_page_state(memcg, MEMCG_KERNEL_STACK_KB) *
+                      (u64)memcg_page_state(memcg, NR_KERNEL_STACK_KB) *
                       1024);
        seq_buf_printf(&s, "slab %llu\n",
                       (u64)(memcg_page_state(memcg, NR_SLAB_RECLAIMABLE_B) +
index f9ad093814d2560a57db05ec526c34c96d937405..8d5d8526c2f336d0a62e3a5f59b468a2b290f353 100644 (file)
@@ -5396,6 +5396,10 @@ void show_free_areas(unsigned int filter, nodemask_t *nodemask)
                        " anon_thp: %lukB"
 #endif
                        " writeback_tmp:%lukB"
+                       " kernel_stack:%lukB"
+#ifdef CONFIG_SHADOW_CALL_STACK
+                       " shadow_call_stack:%lukB"
+#endif
                        " all_unreclaimable? %s"
                        "\n",
                        pgdat->node_id,
@@ -5417,6 +5421,10 @@ void show_free_areas(unsigned int filter, nodemask_t *nodemask)
                        K(node_page_state(pgdat, NR_ANON_THPS) * HPAGE_PMD_NR),
 #endif
                        K(node_page_state(pgdat, NR_WRITEBACK_TEMP)),
+                       node_page_state(pgdat, NR_KERNEL_STACK_KB),
+#ifdef CONFIG_SHADOW_CALL_STACK
+                       node_page_state(pgdat, NR_KERNEL_SCS_KB),
+#endif
                        pgdat->kswapd_failures >= MAX_RECLAIM_RETRIES ?
                                "yes" : "no");
        }
@@ -5448,10 +5456,6 @@ void show_free_areas(unsigned int filter, nodemask_t *nodemask)
                        " present:%lukB"
                        " managed:%lukB"
                        " mlocked:%lukB"
-                       " kernel_stack:%lukB"
-#ifdef CONFIG_SHADOW_CALL_STACK
-                       " shadow_call_stack:%lukB"
-#endif
                        " pagetables:%lukB"
                        " bounce:%lukB"
                        " free_pcp:%lukB"
@@ -5473,10 +5477,6 @@ void show_free_areas(unsigned int filter, nodemask_t *nodemask)
                        K(zone->present_pages),
                        K(zone_managed_pages(zone)),
                        K(zone_page_state(zone, NR_MLOCK)),
-                       zone_page_state(zone, NR_KERNEL_STACK_KB),
-#ifdef CONFIG_SHADOW_CALL_STACK
-                       zone_page_state(zone, NR_KERNEL_SCS_KB),
-#endif
                        K(zone_page_state(zone, NR_PAGETABLE)),
                        K(zone_page_state(zone, NR_BOUNCE)),
                        K(free_pcp),
index b171a76bfe83327b8bb30e8d4645adee0f37e08c..2b866cbab11db75f982dee0c37b278becd24ac24 100644 (file)
@@ -1140,10 +1140,6 @@ const char * const vmstat_text[] = {
        "nr_zone_write_pending",
        "nr_mlock",
        "nr_page_table_pages",
-       "nr_kernel_stack",
-#if IS_ENABLED(CONFIG_SHADOW_CALL_STACK)
-       "nr_shadow_call_stack",
-#endif
        "nr_bounce",
 #if IS_ENABLED(CONFIG_ZSMALLOC)
        "nr_zspages",
@@ -1194,6 +1190,10 @@ const char * const vmstat_text[] = {
        "nr_kernel_misc_reclaimable",
        "nr_foll_pin_acquired",
        "nr_foll_pin_released",
+       "nr_kernel_stack",
+#if IS_ENABLED(CONFIG_SHADOW_CALL_STACK)
+       "nr_shadow_call_stack",
+#endif
 
        /* enum writeback_stat_item counters */
        "nr_dirty_threshold",