x86: init memory debugging
[sfrench/cifs-2.6.git] / arch / x86 / mm / init_32.c
index 730a5b177b1fa2237a08d3c465a009bbef4e4734..8d7f723cfc28181786f78c63c894198a6dcc0cb3 100644 (file)
@@ -27,7 +27,6 @@
 #include <linux/bootmem.h>
 #include <linux/slab.h>
 #include <linux/proc_fs.h>
-#include <linux/efi.h>
 #include <linux/memory_hotplug.h>
 #include <linux/initrd.h>
 #include <linux/cpumask.h>
@@ -42,6 +41,7 @@
 #include <asm/apic.h>
 #include <asm/tlb.h>
 #include <asm/tlbflush.h>
+#include <asm/pgalloc.h>
 #include <asm/sections.h>
 #include <asm/paravirt.h>
 
@@ -66,7 +66,7 @@ static pmd_t * __init one_md_table_init(pgd_t *pgd)
        if (!(pgd_val(*pgd) & _PAGE_PRESENT)) {
                pmd_table = (pmd_t *) alloc_bootmem_low_pages(PAGE_SIZE);
 
-               paravirt_alloc_pd(__pa(pmd_table) >> PAGE_SHIFT);
+               paravirt_alloc_pd(&init_mm, __pa(pmd_table) >> PAGE_SHIFT);
                set_pgd(pgd, __pgd(__pa(pmd_table) | _PAGE_PRESENT));
                pud = pud_offset(pgd, 0);
                if (pmd_table != pmd_offset(pud, 0))
@@ -85,13 +85,20 @@ static pmd_t * __init one_md_table_init(pgd_t *pgd)
 static pte_t * __init one_page_table_init(pmd_t *pmd)
 {
        if (!(pmd_val(*pmd) & _PAGE_PRESENT)) {
-               pte_t *page_table = (pte_t *) alloc_bootmem_low_pages(PAGE_SIZE);
+               pte_t *page_table = NULL;
+
+#ifdef CONFIG_DEBUG_PAGEALLOC
+               page_table = (pte_t *) alloc_bootmem_pages(PAGE_SIZE);
+#endif
+               if (!page_table)
+                       page_table =
+                               (pte_t *)alloc_bootmem_low_pages(PAGE_SIZE);
 
                paravirt_alloc_pt(&init_mm, __pa(page_table) >> PAGE_SHIFT);
                set_pmd(pmd, __pmd(__pa(page_table) | _PAGE_TABLE));
                BUG_ON(page_table != pte_offset_kernel(pmd, 0));
        }
-       
+
        return pte_offset_kernel(pmd, 0);
 }
 
@@ -158,16 +165,25 @@ static void __init kernel_physical_mapping_init(pgd_t *pgd_base)
                pmd = one_md_table_init(pgd);
                if (pfn >= max_low_pfn)
                        continue;
-               for (pmd_idx = 0; pmd_idx < PTRS_PER_PMD && pfn < max_low_pfn; pmd++, pmd_idx++) {
+               for (pmd_idx = 0;
+                    pmd_idx < PTRS_PER_PMD && pfn < max_low_pfn;
+                    pmd++, pmd_idx++) {
                        unsigned int address = pfn * PAGE_SIZE + PAGE_OFFSET;
 
-                       /* Map with big pages if possible, otherwise create normal page tables. */
+                       /* Map with big pages if possible, otherwise
+                          create normal page tables. */
                        if (cpu_has_pse) {
-                               unsigned int address2 = (pfn + PTRS_PER_PTE - 1) * PAGE_SIZE + PAGE_OFFSET + PAGE_SIZE-1;
-                               if (is_kernel_text(address) || is_kernel_text(address2))
-                                       set_pmd(pmd, pfn_pmd(pfn, PAGE_KERNEL_LARGE_EXEC));
-                               else
-                                       set_pmd(pmd, pfn_pmd(pfn, PAGE_KERNEL_LARGE));
+                               unsigned int address2;
+                               pgprot_t prot = PAGE_KERNEL_LARGE;
+
+                               address2 = (pfn + PTRS_PER_PTE - 1) * PAGE_SIZE +
+                                       PAGE_OFFSET + PAGE_SIZE-1;
+
+                               if (is_kernel_text(address) ||
+                                   is_kernel_text(address2))
+                                       prot = PAGE_KERNEL_LARGE_EXEC;
+
+                               set_pmd(pmd, pfn_pmd(pfn, prot));
 
                                pfn += PTRS_PER_PTE;
                        } else {
@@ -176,10 +192,12 @@ static void __init kernel_physical_mapping_init(pgd_t *pgd_base)
                                for (pte_ofs = 0;
                                     pte_ofs < PTRS_PER_PTE && pfn < max_low_pfn;
                                     pte++, pfn++, pte_ofs++, address += PAGE_SIZE) {
+                                       pgprot_t prot = PAGE_KERNEL;
+
                                        if (is_kernel_text(address))
-                                               set_pte(pte, pfn_pte(pfn, PAGE_KERNEL_EXEC));
-                                       else
-                                               set_pte(pte, pfn_pte(pfn, PAGE_KERNEL));
+                                               prot = PAGE_KERNEL_EXEC;
+
+                                       set_pte(pte, pfn_pte(pfn, prot));
                                }
                        }
                }
@@ -193,45 +211,6 @@ static inline int page_kills_ppro(unsigned long pagenr)
        return 0;
 }
 
-int page_is_ram(unsigned long pagenr)
-{
-       int i;
-       unsigned long addr, end;
-
-       if (efi_enabled) {
-               efi_memory_desc_t *md;
-               void *p;
-
-               for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
-                       md = p;
-                       if (!is_available_memory(md))
-                               continue;
-                       addr = (md->phys_addr+PAGE_SIZE-1) >> PAGE_SHIFT;
-                       end = (md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT)) >> PAGE_SHIFT;
-
-                       if ((pagenr >= addr) && (pagenr < end))
-                               return 1;
-               }
-               return 0;
-       }
-
-       for (i = 0; i < e820.nr_map; i++) {
-
-               if (e820.map[i].type != E820_RAM)       /* not usable memory */
-                       continue;
-               /*
-                *      !!!FIXME!!! Some BIOSen report areas as RAM that
-                *      are not. Notably the 640->1Mb area. We need a sanity
-                *      check here.
-                */
-               addr = (e820.map[i].addr+PAGE_SIZE-1) >> PAGE_SHIFT;
-               end = (e820.map[i].addr+e820.map[i].size) >> PAGE_SHIFT;
-               if  ((pagenr >= addr) && (pagenr < end))
-                       return 1;
-       }
-       return 0;
-}
-
 #ifdef CONFIG_HIGHMEM
 pte_t *kmap_pte;
 pgprot_t kmap_prot;
@@ -314,8 +293,13 @@ extern void set_highmem_pages_init(int);
 static void __init set_highmem_pages_init(int bad_ppro)
 {
        int pfn;
-       for (pfn = highstart_pfn; pfn < highend_pfn; pfn++)
-               add_one_highpage_init(pfn_to_page(pfn), pfn, bad_ppro);
+       for (pfn = highstart_pfn; pfn < highend_pfn; pfn++) {
+               /*
+                * Holes under sparsemem might not have no mem_map[]:
+                */
+               if (pfn_valid(pfn))
+                       add_one_highpage_init(pfn_to_page(pfn), pfn, bad_ppro);
+       }
        totalram_pages += totalhigh_pages;
 }
 #endif /* CONFIG_FLATMEM */
@@ -326,9 +310,9 @@ static void __init set_highmem_pages_init(int bad_ppro)
 #define set_highmem_pages_init(bad_ppro) do { } while (0)
 #endif /* CONFIG_HIGHMEM */
 
-unsigned long long __PAGE_KERNEL = _PAGE_KERNEL;
+pteval_t __PAGE_KERNEL = _PAGE_KERNEL;
 EXPORT_SYMBOL(__PAGE_KERNEL);
-unsigned long long __PAGE_KERNEL_EXEC = _PAGE_KERNEL_EXEC;
+pteval_t __PAGE_KERNEL_EXEC = _PAGE_KERNEL_EXEC;
 
 #ifdef CONFIG_NUMA
 extern void __init remap_numa_kva(void);
@@ -360,7 +344,7 @@ void __init native_pagetable_setup_start(pgd_t *base)
        memset(&base[USER_PTRS_PER_PGD], 0,
               KERNEL_PGD_PTRS * sizeof(pgd_t));
 #else
-       paravirt_alloc_pd(__pa(swapper_pg_dir) >> PAGE_SHIFT);
+       paravirt_alloc_pd(&init_mm, __pa(base) >> PAGE_SHIFT);
 #endif
 }
 
@@ -423,9 +407,11 @@ static void __init pagetable_init (void)
         * Fixed mappings, only the page table structure has to be
         * created - mappings will be set by set_fixmap():
         */
+       early_ioremap_clear();
        vaddr = __fix_to_virt(__end_of_fixed_addresses - 1) & PMD_MASK;
        end = (FIXADDR_TOP + PMD_SIZE - 1) & PMD_MASK;
        page_table_range_init(vaddr, end, pgd_base);
+       early_ioremap_reset();
 
        permanent_kmaps_init(pgd_base);
 
@@ -473,11 +459,12 @@ void zap_low_mappings (void)
 
 int nx_enabled = 0;
 
+pteval_t __supported_pte_mask __read_mostly = ~_PAGE_NX;
+EXPORT_SYMBOL_GPL(__supported_pte_mask);
+
 #ifdef CONFIG_X86_PAE
 
 static int disable_nx __initdata = 0;
-u64 __supported_pte_mask __read_mostly = ~_PAGE_NX;
-EXPORT_SYMBOL_GPL(__supported_pte_mask);
 
 /*
  * noexec = on|off
@@ -520,34 +507,6 @@ static void __init set_nx(void)
        }
 }
 
-/*
- * Enables/disables executability of a given kernel page and
- * returns the previous setting.
- */
-int __init set_kernel_exec(unsigned long vaddr, int enable)
-{
-       pte_t *pte;
-       int ret = 1;
-
-       if (!nx_enabled)
-               goto out;
-
-       pte = lookup_address(vaddr);
-       BUG_ON(!pte);
-
-       if (!pte_exec_kernel(*pte))
-               ret = 0;
-
-       if (enable)
-               pte->pte_high &= ~(1 << (_PAGE_BIT_NX - 32));
-       else
-               pte->pte_high |= 1 << (_PAGE_BIT_NX - 32);
-       pte_update_defer(&init_mm, vaddr, pte);
-       __flush_tlb_all();
-out:
-       return ret;
-}
-
 #endif
 
 /*
@@ -735,35 +694,18 @@ int arch_add_memory(int nid, u64 start, u64 size)
        return __add_pages(zone, start_pfn, nr_pages);
 }
 
-int remove_memory(u64 start, u64 size)
-{
-       return -EINVAL;
-}
-EXPORT_SYMBOL_GPL(remove_memory);
 #endif
 
 struct kmem_cache *pmd_cache;
 
 void __init pgtable_cache_init(void)
 {
-       size_t pgd_size = PTRS_PER_PGD*sizeof(pgd_t);
-
-       if (PTRS_PER_PMD > 1) {
+       if (PTRS_PER_PMD > 1)
                pmd_cache = kmem_cache_create("pmd",
-                                       PTRS_PER_PMD*sizeof(pmd_t),
-                                       PTRS_PER_PMD*sizeof(pmd_t),
-                                       SLAB_PANIC,
-                                       pmd_ctor);
-               if (!SHARED_KERNEL_PMD) {
-                       /* If we're in PAE mode and have a non-shared
-                          kernel pmd, then the pgd size must be a
-                          page size.  This is because the pgd_list
-                          links through the page structure, so there
-                          can only be one pgd per page for this to
-                          work. */
-                       pgd_size = PAGE_SIZE;
-               }
-       }
+                                             PTRS_PER_PMD*sizeof(pmd_t),
+                                             PTRS_PER_PMD*sizeof(pmd_t),
+                                             SLAB_PANIC,
+                                             pmd_ctor);
 }
 
 /*
@@ -794,6 +736,8 @@ static int noinline do_test_wp_bit(void)
 }
 
 #ifdef CONFIG_DEBUG_RODATA
+const int rodata_test_data = 0xC3;
+EXPORT_SYMBOL_GPL(rodata_test_data);
 
 void mark_rodata_ro(void)
 {
@@ -806,25 +750,32 @@ void mark_rodata_ro(void)
        if (num_possible_cpus() <= 1)
 #endif
        {
-               change_page_attr(virt_to_page(start),
-                                size >> PAGE_SHIFT, PAGE_KERNEL_RX);
+               set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT);
                printk("Write protecting the kernel text: %luk\n", size >> 10);
+
+#ifdef CONFIG_CPA_DEBUG
+               printk("Testing CPA: Reverting %lx-%lx\n", start, start+size);
+               set_pages_rw(virt_to_page(start), size>>PAGE_SHIFT);
+
+               printk("Testing CPA: write protecting again\n");
+               set_pages_ro(virt_to_page(start), size>>PAGE_SHIFT);
+#endif
        }
 #endif
        start += size;
        size = (unsigned long)__end_rodata - start;
-       change_page_attr(virt_to_page(start),
-                        size >> PAGE_SHIFT, PAGE_KERNEL_RO);
+       set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT);
        printk("Write protecting the kernel read-only data: %luk\n",
               size >> 10);
+       rodata_test();
 
-       /*
-        * change_page_attr() requires a global_flush_tlb() call after it.
-        * We do this after the printk so that if something went wrong in the
-        * change, the printk gets out at least to give a better debug hint
-        * of who is the culprit.
-        */
-       global_flush_tlb();
+#ifdef CONFIG_CPA_DEBUG
+       printk("Testing CPA: undo %lx-%lx\n", start, start + size);
+       set_pages_rw(virt_to_page(start), size >> PAGE_SHIFT);
+
+       printk("Testing CPA: write protecting again\n");
+       set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT);
+#endif
 }
 #endif
 
@@ -832,6 +783,23 @@ void free_init_pages(char *what, unsigned long begin, unsigned long end)
 {
        unsigned long addr;
 
+#ifdef CONFIG_DEBUG_PAGEALLOC
+       /*
+        * If debugging page accesses then do not free this memory but
+        * mark them not present - any buggy init-section access will
+        * create a kernel page fault:
+        */
+       printk(KERN_INFO "debug: unmapping init memory %08lx..%08lx\n",
+               begin, PAGE_ALIGN(end));
+       set_memory_np(begin, (end - begin) >> PAGE_SHIFT);
+#else
+       /*
+        * We just marked the kernel text read only above, now that
+        * we are going to free part of that, we need to make that
+        * writeable first.
+        */
+       set_memory_rw(begin, (end - begin) >> PAGE_SHIFT);
+
        for (addr = begin; addr < end; addr += PAGE_SIZE) {
                ClearPageReserved(virt_to_page(addr));
                init_page_count(virt_to_page(addr));
@@ -840,6 +808,7 @@ void free_init_pages(char *what, unsigned long begin, unsigned long end)
                totalram_pages++;
        }
        printk(KERN_INFO "Freeing %s: %luk freed\n", what, (end - begin) >> 10);
+#endif
 }
 
 void free_initmem(void)