mm, page_alloc: enable pcpu_drain with zone capability
[sfrench/cifs-2.6.git] / mm / page_alloc.c
index 4115d7f20223fe1cd6c3971e1e104d7ac4f723ea..75865e1325b5d590c09cbfd6a186fc6ace325e95 100644 (file)
@@ -97,8 +97,12 @@ int _node_numa_mem_[MAX_NUMNODES];
 #endif
 
 /* work_structs for global per-cpu drains */
+struct pcpu_drain {
+       struct zone *zone;
+       struct work_struct work;
+};
 DEFINE_MUTEX(pcpu_drain_mutex);
-DEFINE_PER_CPU(struct work_struct, pcpu_drain);
+DEFINE_PER_CPU(struct pcpu_drain, pcpu_drain);
 
 #ifdef CONFIG_GCC_PLUGIN_LATENT_ENTROPY
 volatile unsigned long latent_entropy __latent_entropy;
@@ -265,18 +269,18 @@ int user_min_free_kbytes = -1;
 int watermark_boost_factor __read_mostly = 15000;
 int watermark_scale_factor = 10;
 
-static unsigned long nr_kernel_pages __meminitdata;
-static unsigned long nr_all_pages __meminitdata;
-static unsigned long dma_reserve __meminitdata;
+static unsigned long nr_kernel_pages __initdata;
+static unsigned long nr_all_pages __initdata;
+static unsigned long dma_reserve __initdata;
 
 #ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
-static unsigned long arch_zone_lowest_possible_pfn[MAX_NR_ZONES] __meminitdata;
-static unsigned long arch_zone_highest_possible_pfn[MAX_NR_ZONES] __meminitdata;
+static unsigned long arch_zone_lowest_possible_pfn[MAX_NR_ZONES] __initdata;
+static unsigned long arch_zone_highest_possible_pfn[MAX_NR_ZONES] __initdata;
 static unsigned long required_kernelcore __initdata;
 static unsigned long required_kernelcore_percent __initdata;
 static unsigned long required_movablecore __initdata;
 static unsigned long required_movablecore_percent __initdata;
-static unsigned long zone_movable_pfn[MAX_NUMNODES] __meminitdata;
+static unsigned long zone_movable_pfn[MAX_NUMNODES] __initdata;
 static bool mirrored_kernelcore __meminitdata;
 
 /* movable_zone is the "real" zone pages in ZONE_MOVABLE are taken from */
@@ -294,6 +298,32 @@ EXPORT_SYMBOL(nr_online_nodes);
 int page_group_by_mobility_disabled __read_mostly;
 
 #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
+/*
+ * During boot we initialize deferred pages on-demand, as needed, but once
+ * page_alloc_init_late() has finished, the deferred pages are all initialized,
+ * and we can permanently disable that path.
+ */
+static DEFINE_STATIC_KEY_TRUE(deferred_pages);
+
+/*
+ * Calling kasan_free_pages() only after deferred memory initialization
+ * has completed. Poisoning pages during deferred memory init will greatly
+ * lengthen the process and cause problem in large memory systems as the
+ * deferred pages initialization is done with interrupt disabled.
+ *
+ * Assuming that there will be no reference to those newly initialized
+ * pages before they are ever allocated, this should have no effect on
+ * KASAN memory tracking as the poison will be properly inserted at page
+ * allocation time. The only corner case is when pages are allocated by
+ * on-demand allocation and then freed again before the deferred pages
+ * initialization is done, but this is not likely to happen.
+ */
+static inline void kasan_free_nondeferred_pages(struct page *page, int order)
+{
+       if (!static_branch_unlikely(&deferred_pages))
+               kasan_free_pages(page, order);
+}
+
 /* Returns true if the struct page for the pfn is uninitialised */
 static inline bool __meminit early_page_uninitialised(unsigned long pfn)
 {
@@ -326,8 +356,13 @@ defer_init(int nid, unsigned long pfn, unsigned long end_pfn)
        /* Always populate low zones for address-constrained allocations */
        if (end_pfn < pgdat_end_pfn(NODE_DATA(nid)))
                return false;
+
+       /*
+        * We start only with one section of pages, more pages are added as
+        * needed until the rest of deferred pages are initialized.
+        */
        nr_initialised++;
-       if ((nr_initialised > NODE_DATA(nid)->static_init_pgcnt) &&
+       if ((nr_initialised > PAGES_PER_SECTION) &&
            (pfn & (PAGES_PER_SECTION - 1)) == 0) {
                NODE_DATA(nid)->first_deferred_pfn = pfn;
                return true;
@@ -335,6 +370,8 @@ defer_init(int nid, unsigned long pfn, unsigned long end_pfn)
        return false;
 }
 #else
+#define kasan_free_nondeferred_pages(p, o)     kasan_free_pages(p, o)
+
 static inline bool early_page_uninitialised(unsigned long pfn)
 {
        return false;
@@ -426,6 +463,7 @@ void set_pfnblock_flags_mask(struct page *page, unsigned long flags,
        unsigned long old_word, word;
 
        BUILD_BUG_ON(NR_PAGEBLOCK_BITS != 4);
+       BUILD_BUG_ON(MIGRATE_TYPES > (1 << PB_migratetype_bits));
 
        bitmap = get_pageblock_bitmap(page, pfn);
        bitidx = pfn_to_bitidx(page, pfn);
@@ -1037,7 +1075,7 @@ static __always_inline bool free_pages_prepare(struct page *page,
        arch_free_page(page, order);
        kernel_poison_pages(page, 1 << order, 0);
        kernel_map_pages(page, 1 << order, 0);
-       kasan_free_pages(page, order);
+       kasan_free_nondeferred_pages(page, order);
 
        return true;
 }
@@ -1606,13 +1644,6 @@ static int __init deferred_init_memmap(void *data)
        return 0;
 }
 
-/*
- * During boot we initialize deferred pages on-demand, as needed, but once
- * page_alloc_init_late() has finished, the deferred pages are all initialized,
- * and we can permanently disable that path.
- */
-static DEFINE_STATIC_KEY_TRUE(deferred_pages);
-
 /*
  * If this zone has deferred pages, try to grow it by initializing enough
  * deferred pages to satisfy the allocation specified by order, rounded up to
@@ -2631,6 +2662,10 @@ void drain_local_pages(struct zone *zone)
 
 static void drain_local_pages_wq(struct work_struct *work)
 {
+       struct pcpu_drain *drain;
+
+       drain = container_of(work, struct pcpu_drain, work);
+
        /*
         * drain_all_pages doesn't use proper cpu hotplug protection so
         * we can race with cpu offline when the WQ can move this from
@@ -2639,7 +2674,7 @@ static void drain_local_pages_wq(struct work_struct *work)
         * a different one.
         */
        preempt_disable();
-       drain_local_pages(NULL);
+       drain_local_pages(drain->zone);
        preempt_enable();
 }
 
@@ -2710,12 +2745,14 @@ void drain_all_pages(struct zone *zone)
        }
 
        for_each_cpu(cpu, &cpus_with_pcps) {
-               struct work_struct *work = per_cpu_ptr(&pcpu_drain, cpu);
-               INIT_WORK(work, drain_local_pages_wq);
-               queue_work_on(cpu, mm_percpu_wq, work);
+               struct pcpu_drain *drain = per_cpu_ptr(&pcpu_drain, cpu);
+
+               drain->zone = zone;
+               INIT_WORK(&drain->work, drain_local_pages_wq);
+               queue_work_on(cpu, mm_percpu_wq, &drain->work);
        }
        for_each_cpu(cpu, &cpus_with_pcps)
-               flush_work(per_cpu_ptr(&pcpu_drain, cpu));
+               flush_work(&per_cpu_ptr(&pcpu_drain, cpu)->work);
 
        mutex_unlock(&pcpu_drain_mutex);
 }
@@ -3517,13 +3554,13 @@ void warn_alloc(gfp_t gfp_mask, nodemask_t *nodemask, const char *fmt, ...)
        va_start(args, fmt);
        vaf.fmt = fmt;
        vaf.va = &args;
-       pr_warn("%s: %pV, mode:%#x(%pGg), nodemask=%*pbl\n",
+       pr_warn("%s: %pV, mode:%#x(%pGg), nodemask=%*pbl",
                        current->comm, &vaf, gfp_mask, &gfp_mask,
                        nodemask_pr_args(nodemask));
        va_end(args);
 
        cpuset_print_current_mems_allowed();
-
+       pr_cont("\n");
        dump_stack();
        warn_alloc_show_mem(gfp_mask, nodemask);
 }
@@ -6029,7 +6066,7 @@ void __init sparse_memory_present_with_active_regions(int nid)
  * with no available memory, a warning is printed and the start and end
  * PFNs will be 0.
  */
-void __meminit get_pfn_range_for_nid(unsigned int nid,
+void __init get_pfn_range_for_nid(unsigned int nid,
                        unsigned long *start_pfn, unsigned long *end_pfn)
 {
        unsigned long this_start_pfn, this_end_pfn;
@@ -6078,7 +6115,7 @@ static void __init find_usable_zone_for_movable(void)
  * highest usable zone for ZONE_MOVABLE. This preserves the assumption that
  * zones within a node are in order of monotonic increases memory addresses
  */
-static void __meminit adjust_zone_range_for_zone_movable(int nid,
+static void __init adjust_zone_range_for_zone_movable(int nid,
                                        unsigned long zone_type,
                                        unsigned long node_start_pfn,
                                        unsigned long node_end_pfn,
@@ -6109,7 +6146,7 @@ static void __meminit adjust_zone_range_for_zone_movable(int nid,
  * Return the number of pages a zone spans in a node, including holes
  * present_pages = zone_spanned_pages_in_node() - zone_absent_pages_in_node()
  */
-static unsigned long __meminit zone_spanned_pages_in_node(int nid,
+static unsigned long __init zone_spanned_pages_in_node(int nid,
                                        unsigned long zone_type,
                                        unsigned long node_start_pfn,
                                        unsigned long node_end_pfn,
@@ -6144,7 +6181,7 @@ static unsigned long __meminit zone_spanned_pages_in_node(int nid,
  * Return the number of holes in a range on a node. If nid is MAX_NUMNODES,
  * then all holes in the requested range will be accounted for.
  */
-unsigned long __meminit __absent_pages_in_range(int nid,
+unsigned long __init __absent_pages_in_range(int nid,
                                unsigned long range_start_pfn,
                                unsigned long range_end_pfn)
 {
@@ -6174,7 +6211,7 @@ unsigned long __init absent_pages_in_range(unsigned long start_pfn,
 }
 
 /* Return the number of page frames in holes in a zone on a node */
-static unsigned long __meminit zone_absent_pages_in_node(int nid,
+static unsigned long __init zone_absent_pages_in_node(int nid,
                                        unsigned long zone_type,
                                        unsigned long node_start_pfn,
                                        unsigned long node_end_pfn,
@@ -6226,7 +6263,7 @@ static unsigned long __meminit zone_absent_pages_in_node(int nid,
 }
 
 #else /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */
-static inline unsigned long __meminit zone_spanned_pages_in_node(int nid,
+static inline unsigned long __init zone_spanned_pages_in_node(int nid,
                                        unsigned long zone_type,
                                        unsigned long node_start_pfn,
                                        unsigned long node_end_pfn,
@@ -6245,7 +6282,7 @@ static inline unsigned long __meminit zone_spanned_pages_in_node(int nid,
        return zones_size[zone_type];
 }
 
-static inline unsigned long __meminit zone_absent_pages_in_node(int nid,
+static inline unsigned long __init zone_absent_pages_in_node(int nid,
                                                unsigned long zone_type,
                                                unsigned long node_start_pfn,
                                                unsigned long node_end_pfn,
@@ -6259,7 +6296,7 @@ static inline unsigned long __meminit zone_absent_pages_in_node(int nid,
 
 #endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */
 
-static void __meminit calculate_node_totalpages(struct pglist_data *pgdat,
+static void __init calculate_node_totalpages(struct pglist_data *pgdat,
                                                unsigned long node_start_pfn,
                                                unsigned long node_end_pfn,
                                                unsigned long *zones_size,
@@ -6585,12 +6622,6 @@ static void __ref alloc_node_mem_map(struct pglist_data *pgdat) { }
 #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
 static inline void pgdat_set_deferred_range(pg_data_t *pgdat)
 {
-       /*
-        * We start only with one section of pages, more pages are added as
-        * needed until the rest of deferred pages are initialized.
-        */
-       pgdat->static_init_pgcnt = min_t(unsigned long, PAGES_PER_SECTION,
-                                               pgdat->node_spanned_pages);
        pgdat->first_deferred_pfn = ULONG_MAX;
 }
 #else
@@ -7193,7 +7224,7 @@ void adjust_managed_page_count(struct page *page, long count)
 }
 EXPORT_SYMBOL(adjust_managed_page_count);
 
-unsigned long free_reserved_area(void *start, void *end, int poison, char *s)
+unsigned long free_reserved_area(void *start, void *end, int poison, const char *s)
 {
        void *pos;
        unsigned long pages = 0;