x86-64, NUMA: Revert NUMA affine page table allocation
authorTejun Heo <tj@kernel.org>
Fri, 4 Mar 2011 09:26:36 +0000 (10:26 +0100)
committerTejun Heo <tj@kernel.org>
Fri, 4 Mar 2011 09:26:36 +0000 (10:26 +0100)
This patch reverts NUMA affine page table allocation added by commit
1411e0ec31 (x86-64, numa: Put pgtable to local node memory).

The commit made an undocumented change where the kernel linear mapping
strictly follows intersection of e820 memory map and NUMA
configuration.  If the physical memory configuration has holes or NUMA
nodes are not properly aligned, this leads to using unnecessarily
smaller mapping size which leads to increased TLB pressure.  For
details,

  http://thread.gmane.org/gmane.linux.kernel/1104672

Patches to fix the problem have been proposed but the underlying code
needs more cleanup and the approach itself seems a bit heavy handed
and it has been determined to revert the feature for now and come back
to it in the next developement cycle.

  http://thread.gmane.org/gmane.linux.kernel/1105959

As init_memory_mapping_high() callsites have been consolidated since
the commit, reverting is done manually.  Also, the RED-PEN comment in
arch/x86/mm/init.c is not restored as the problem no longer exists
with memblock based top-down early memory allocation.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Yinghai Lu <yinghai@kernel.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
arch/x86/include/asm/page_types.h
arch/x86/kernel/setup.c
arch/x86/mm/init_64.c
arch/x86/mm/numa_64.c

index 97e6007e4eddf487fabe7b65815308477e4f6063..bce688d54c12383e29b360bb7688b3fc9d385830 100644 (file)
@@ -54,8 +54,6 @@ static inline phys_addr_t get_max_mapped(void)
 extern unsigned long init_memory_mapping(unsigned long start,
                                         unsigned long end);
 
-void init_memory_mapping_high(void);
-
 extern void initmem_init(void);
 extern void free_initmem(void);
 
index 46e684f85b36bd98568b1296aeeb6a79d4a55768..c3a606c41ce0ef81eaf70606f70bcfa85c6b83df 100644 (file)
@@ -963,6 +963,14 @@ void __init setup_arch(char **cmdline_p)
        max_low_pfn_mapped = init_memory_mapping(0, max_low_pfn<<PAGE_SHIFT);
        max_pfn_mapped = max_low_pfn_mapped;
 
+#ifdef CONFIG_X86_64
+       if (max_pfn > max_low_pfn) {
+               max_pfn_mapped = init_memory_mapping(1UL<<32,
+                                                    max_pfn<<PAGE_SHIFT);
+               /* can we preseve max_low_pfn ?*/
+               max_low_pfn = max_pfn;
+       }
+#endif
        memblock.current_limit = get_max_mapped();
 
        /*
index 470cc4704a9aa8bfda1c655506b1deb87f393c2d..c8813aa39740c22c8d3c9c716b81a82a571de1d5 100644 (file)
@@ -606,63 +606,9 @@ kernel_physical_mapping_init(unsigned long start,
 void __init initmem_init(void)
 {
        memblock_x86_register_active_regions(0, 0, max_pfn);
-       init_memory_mapping_high();
 }
 #endif
 
-struct mapping_work_data {
-       unsigned long start;
-       unsigned long end;
-       unsigned long pfn_mapped;
-};
-
-static int __init_refok
-mapping_work_fn(unsigned long start_pfn, unsigned long end_pfn, void *datax)
-{
-       struct mapping_work_data *data = datax;
-       unsigned long pfn_mapped;
-       unsigned long final_start, final_end;
-
-       final_start = max_t(unsigned long, start_pfn<<PAGE_SHIFT, data->start);
-       final_end = min_t(unsigned long, end_pfn<<PAGE_SHIFT, data->end);
-
-       if (final_end <= final_start)
-               return 0;
-
-       pfn_mapped = init_memory_mapping(final_start, final_end);
-
-       if (pfn_mapped > data->pfn_mapped)
-               data->pfn_mapped = pfn_mapped;
-
-       return 0;
-}
-
-static unsigned long __init_refok
-init_memory_mapping_active_regions(unsigned long start, unsigned long end)
-{
-       struct mapping_work_data data;
-
-       data.start = start;
-       data.end = end;
-       data.pfn_mapped = 0;
-
-       work_with_active_regions(MAX_NUMNODES, mapping_work_fn, &data);
-
-       return data.pfn_mapped;
-}
-
-void __init_refok init_memory_mapping_high(void)
-{
-       if (max_pfn > max_low_pfn) {
-               max_pfn_mapped = init_memory_mapping_active_regions(1UL<<32,
-                                                        max_pfn<<PAGE_SHIFT);
-               /* can we preserve max_low_pfn ? */
-               max_low_pfn = max_pfn;
-
-               memblock.current_limit = get_max_mapped();
-       }
-}
-
 void __init paging_init(void)
 {
        unsigned long max_zone_pfns[MAX_NR_ZONES];
index 74064e8ae79fd17b1d5253859b6433d1f722c8e0..86491ba568d92e480903a06d6a4ec9061bfc9c1f 100644 (file)
@@ -543,8 +543,6 @@ static int __init numa_register_memblks(struct numa_meminfo *mi)
        if (!numa_meminfo_cover_memory(mi))
                return -EINVAL;
 
-       init_memory_mapping_high();
-
        /* Finally register nodes. */
        for_each_node_mask(nid, node_possible_map) {
                u64 start = (u64)max_pfn << PAGE_SHIFT;