x86: Convert the rest of the code to support p4d_t
[sfrench/cifs-2.6.git] / arch / x86 / mm / init_64.c
index 15173d37f399610caf8969fa942b17b175dd466b..7bdda6f1d13511bf58372bc1b9ac317c461a1f5b 100644 (file)
@@ -97,28 +97,38 @@ void sync_global_pgds(unsigned long start, unsigned long end)
        unsigned long address;
 
        for (address = start; address <= end; address += PGDIR_SIZE) {
-               const pgd_t *pgd_ref = pgd_offset_k(address);
+               pgd_t *pgd_ref = pgd_offset_k(address);
+               const p4d_t *p4d_ref;
                struct page *page;
 
-               if (pgd_none(*pgd_ref))
+               /*
+                * With folded p4d, pgd_none() is always false, we need to
+                * handle synchonization on p4d level.
+                */
+               BUILD_BUG_ON(pgd_none(*pgd_ref));
+               p4d_ref = p4d_offset(pgd_ref, address);
+
+               if (p4d_none(*p4d_ref))
                        continue;
 
                spin_lock(&pgd_lock);
                list_for_each_entry(page, &pgd_list, lru) {
                        pgd_t *pgd;
+                       p4d_t *p4d;
                        spinlock_t *pgt_lock;
 
                        pgd = (pgd_t *)page_address(page) + pgd_index(address);
+                       p4d = p4d_offset(pgd, address);
                        /* the pgt_lock only for Xen */
                        pgt_lock = &pgd_page_get_mm(page)->page_table_lock;
                        spin_lock(pgt_lock);
 
-                       if (!pgd_none(*pgd_ref) && !pgd_none(*pgd))
-                               BUG_ON(pgd_page_vaddr(*pgd)
-                                      != pgd_page_vaddr(*pgd_ref));
+                       if (!p4d_none(*p4d_ref) && !p4d_none(*p4d))
+                               BUG_ON(p4d_page_vaddr(*p4d)
+                                      != p4d_page_vaddr(*p4d_ref));
 
-                       if (pgd_none(*pgd))
-                               set_pgd(pgd, *pgd_ref);
+                       if (p4d_none(*p4d))
+                               set_p4d(p4d, *p4d_ref);
 
                        spin_unlock(pgt_lock);
                }
@@ -149,16 +159,28 @@ static __ref void *spp_getpage(void)
        return ptr;
 }
 
-static pud_t *fill_pud(pgd_t *pgd, unsigned long vaddr)
+static p4d_t *fill_p4d(pgd_t *pgd, unsigned long vaddr)
 {
        if (pgd_none(*pgd)) {
-               pud_t *pud = (pud_t *)spp_getpage();
-               pgd_populate(&init_mm, pgd, pud);
-               if (pud != pud_offset(pgd, 0))
+               p4d_t *p4d = (p4d_t *)spp_getpage();
+               pgd_populate(&init_mm, pgd, p4d);
+               if (p4d != p4d_offset(pgd, 0))
                        printk(KERN_ERR "PAGETABLE BUG #00! %p <-> %p\n",
-                              pud, pud_offset(pgd, 0));
+                              p4d, p4d_offset(pgd, 0));
+       }
+       return p4d_offset(pgd, vaddr);
+}
+
+static pud_t *fill_pud(p4d_t *p4d, unsigned long vaddr)
+{
+       if (p4d_none(*p4d)) {
+               pud_t *pud = (pud_t *)spp_getpage();
+               p4d_populate(&init_mm, p4d, pud);
+               if (pud != pud_offset(p4d, 0))
+                       printk(KERN_ERR "PAGETABLE BUG #01! %p <-> %p\n",
+                              pud, pud_offset(p4d, 0));
        }
-       return pud_offset(pgd, vaddr);
+       return pud_offset(p4d, vaddr);
 }
 
 static pmd_t *fill_pmd(pud_t *pud, unsigned long vaddr)
@@ -167,7 +189,7 @@ static pmd_t *fill_pmd(pud_t *pud, unsigned long vaddr)
                pmd_t *pmd = (pmd_t *) spp_getpage();
                pud_populate(&init_mm, pud, pmd);
                if (pmd != pmd_offset(pud, 0))
-                       printk(KERN_ERR "PAGETABLE BUG #01! %p <-> %p\n",
+                       printk(KERN_ERR "PAGETABLE BUG #02! %p <-> %p\n",
                               pmd, pmd_offset(pud, 0));
        }
        return pmd_offset(pud, vaddr);
@@ -179,20 +201,15 @@ static pte_t *fill_pte(pmd_t *pmd, unsigned long vaddr)
                pte_t *pte = (pte_t *) spp_getpage();
                pmd_populate_kernel(&init_mm, pmd, pte);
                if (pte != pte_offset_kernel(pmd, 0))
-                       printk(KERN_ERR "PAGETABLE BUG #02!\n");
+                       printk(KERN_ERR "PAGETABLE BUG #03!\n");
        }
        return pte_offset_kernel(pmd, vaddr);
 }
 
-void set_pte_vaddr_pud(pud_t *pud_page, unsigned long vaddr, pte_t new_pte)
+static void __set_pte_vaddr(pud_t *pud, unsigned long vaddr, pte_t new_pte)
 {
-       pud_t *pud;
-       pmd_t *pmd;
-       pte_t *pte;
-
-       pud = pud_page + pud_index(vaddr);
-       pmd = fill_pmd(pud, vaddr);
-       pte = fill_pte(pmd, vaddr);
+       pmd_t *pmd = fill_pmd(pud, vaddr);
+       pte_t *pte = fill_pte(pmd, vaddr);
 
        set_pte(pte, new_pte);
 
@@ -203,10 +220,25 @@ void set_pte_vaddr_pud(pud_t *pud_page, unsigned long vaddr, pte_t new_pte)
        __flush_tlb_one(vaddr);
 }
 
+void set_pte_vaddr_p4d(p4d_t *p4d_page, unsigned long vaddr, pte_t new_pte)
+{
+       p4d_t *p4d = p4d_page + p4d_index(vaddr);
+       pud_t *pud = fill_pud(p4d, vaddr);
+
+       __set_pte_vaddr(pud, vaddr, new_pte);
+}
+
+void set_pte_vaddr_pud(pud_t *pud_page, unsigned long vaddr, pte_t new_pte)
+{
+       pud_t *pud = pud_page + pud_index(vaddr);
+
+       __set_pte_vaddr(pud, vaddr, new_pte);
+}
+
 void set_pte_vaddr(unsigned long vaddr, pte_t pteval)
 {
        pgd_t *pgd;
-       pud_t *pud_page;
+       p4d_t *p4d_page;
 
        pr_debug("set_pte_vaddr %lx to %lx\n", vaddr, native_pte_val(pteval));
 
@@ -216,17 +248,20 @@ void set_pte_vaddr(unsigned long vaddr, pte_t pteval)
                        "PGD FIXMAP MISSING, it should be setup in head.S!\n");
                return;
        }
-       pud_page = (pud_t*)pgd_page_vaddr(*pgd);
-       set_pte_vaddr_pud(pud_page, vaddr, pteval);
+
+       p4d_page = p4d_offset(pgd, 0);
+       set_pte_vaddr_p4d(p4d_page, vaddr, pteval);
 }
 
 pmd_t * __init populate_extra_pmd(unsigned long vaddr)
 {
        pgd_t *pgd;
+       p4d_t *p4d;
        pud_t *pud;
 
        pgd = pgd_offset_k(vaddr);
-       pud = fill_pud(pgd, vaddr);
+       p4d = fill_p4d(pgd, vaddr);
+       pud = fill_pud(p4d, vaddr);
        return fill_pmd(pud, vaddr);
 }
 
@@ -245,6 +280,7 @@ static void __init __init_extra_mapping(unsigned long phys, unsigned long size,
                                        enum page_cache_mode cache)
 {
        pgd_t *pgd;
+       p4d_t *p4d;
        pud_t *pud;
        pmd_t *pmd;
        pgprot_t prot;
@@ -255,11 +291,17 @@ static void __init __init_extra_mapping(unsigned long phys, unsigned long size,
        for (; size; phys += PMD_SIZE, size -= PMD_SIZE) {
                pgd = pgd_offset_k((unsigned long)__va(phys));
                if (pgd_none(*pgd)) {
+                       p4d = (p4d_t *) spp_getpage();
+                       set_pgd(pgd, __pgd(__pa(p4d) | _KERNPG_TABLE |
+                                               _PAGE_USER));
+               }
+               p4d = p4d_offset(pgd, (unsigned long)__va(phys));
+               if (p4d_none(*p4d)) {
                        pud = (pud_t *) spp_getpage();
-                       set_pgd(pgd, __pgd(__pa(pud) | _KERNPG_TABLE |
+                       set_p4d(p4d, __p4d(__pa(pud) | _KERNPG_TABLE |
                                                _PAGE_USER));
                }
-               pud = pud_offset(pgd, (unsigned long)__va(phys));
+               pud = pud_offset(p4d, (unsigned long)__va(phys));
                if (pud_none(*pud)) {
                        pmd = (pmd_t *) spp_getpage();
                        set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE |
@@ -563,12 +605,15 @@ kernel_physical_mapping_init(unsigned long paddr_start,
 
        for (; vaddr < vaddr_end; vaddr = vaddr_next) {
                pgd_t *pgd = pgd_offset_k(vaddr);
+               p4d_t *p4d;
                pud_t *pud;
 
                vaddr_next = (vaddr & PGDIR_MASK) + PGDIR_SIZE;
 
-               if (pgd_val(*pgd)) {
-                       pud = (pud_t *)pgd_page_vaddr(*pgd);
+               BUILD_BUG_ON(pgd_none(*pgd));
+               p4d = p4d_offset(pgd, vaddr);
+               if (p4d_val(*p4d)) {
+                       pud = (pud_t *)p4d_page_vaddr(*p4d);
                        paddr_last = phys_pud_init(pud, __pa(vaddr),
                                                   __pa(vaddr_end),
                                                   page_size_mask);
@@ -580,7 +625,7 @@ kernel_physical_mapping_init(unsigned long paddr_start,
                                           page_size_mask);
 
                spin_lock(&init_mm.page_table_lock);
-               pgd_populate(&init_mm, pgd, pud);
+               p4d_populate(&init_mm, p4d, pud);
                spin_unlock(&init_mm.page_table_lock);
                pgd_changed = true;
        }
@@ -726,6 +771,24 @@ static void __meminit free_pmd_table(pmd_t *pmd_start, pud_t *pud)
        spin_unlock(&init_mm.page_table_lock);
 }
 
+static void __meminit free_pud_table(pud_t *pud_start, p4d_t *p4d)
+{
+       pud_t *pud;
+       int i;
+
+       for (i = 0; i < PTRS_PER_PUD; i++) {
+               pud = pud_start + i;
+               if (!pud_none(*pud))
+                       return;
+       }
+
+       /* free a pud talbe */
+       free_pagetable(p4d_page(*p4d), 0);
+       spin_lock(&init_mm.page_table_lock);
+       p4d_clear(p4d);
+       spin_unlock(&init_mm.page_table_lock);
+}
+
 static void __meminit
 remove_pte_table(pte_t *pte_start, unsigned long addr, unsigned long end,
                 bool direct)
@@ -908,6 +971,32 @@ remove_pud_table(pud_t *pud_start, unsigned long addr, unsigned long end,
                update_page_count(PG_LEVEL_1G, -pages);
 }
 
+static void __meminit
+remove_p4d_table(p4d_t *p4d_start, unsigned long addr, unsigned long end,
+                bool direct)
+{
+       unsigned long next, pages = 0;
+       pud_t *pud_base;
+       p4d_t *p4d;
+
+       p4d = p4d_start + p4d_index(addr);
+       for (; addr < end; addr = next, p4d++) {
+               next = p4d_addr_end(addr, end);
+
+               if (!p4d_present(*p4d))
+                       continue;
+
+               BUILD_BUG_ON(p4d_large(*p4d));
+
+               pud_base = (pud_t *)p4d_page_vaddr(*p4d);
+               remove_pud_table(pud_base, addr, next, direct);
+               free_pud_table(pud_base, p4d);
+       }
+
+       if (direct)
+               update_page_count(PG_LEVEL_512G, -pages);
+}
+
 /* start and end are both virtual address. */
 static void __meminit
 remove_pagetable(unsigned long start, unsigned long end, bool direct)
@@ -915,7 +1004,7 @@ remove_pagetable(unsigned long start, unsigned long end, bool direct)
        unsigned long next;
        unsigned long addr;
        pgd_t *pgd;
-       pud_t *pud;
+       p4d_t *p4d;
 
        for (addr = start; addr < end; addr = next) {
                next = pgd_addr_end(addr, end);
@@ -924,8 +1013,8 @@ remove_pagetable(unsigned long start, unsigned long end, bool direct)
                if (!pgd_present(*pgd))
                        continue;
 
-               pud = (pud_t *)pgd_page_vaddr(*pgd);
-               remove_pud_table(pud, addr, next, direct);
+               p4d = (p4d_t *)pgd_page_vaddr(*pgd);
+               remove_p4d_table(p4d, addr, next, direct);
        }
 
        flush_tlb_all();
@@ -1090,6 +1179,7 @@ int kern_addr_valid(unsigned long addr)
 {
        unsigned long above = ((long)addr) >> __VIRTUAL_MASK_SHIFT;
        pgd_t *pgd;
+       p4d_t *p4d;
        pud_t *pud;
        pmd_t *pmd;
        pte_t *pte;
@@ -1101,7 +1191,11 @@ int kern_addr_valid(unsigned long addr)
        if (pgd_none(*pgd))
                return 0;
 
-       pud = pud_offset(pgd, addr);
+       p4d = p4d_offset(pgd, addr);
+       if (p4d_none(*p4d))
+               return 0;
+
+       pud = pud_offset(p4d, addr);
        if (pud_none(*pud))
                return 0;
 
@@ -1158,6 +1252,7 @@ static int __meminit vmemmap_populate_hugepages(unsigned long start,
        unsigned long addr;
        unsigned long next;
        pgd_t *pgd;
+       p4d_t *p4d;
        pud_t *pud;
        pmd_t *pmd;
 
@@ -1168,7 +1263,11 @@ static int __meminit vmemmap_populate_hugepages(unsigned long start,
                if (!pgd)
                        return -ENOMEM;
 
-               pud = vmemmap_pud_populate(pgd, addr, node);
+               p4d = vmemmap_p4d_populate(pgd, addr, node);
+               if (!p4d)
+                       return -ENOMEM;
+
+               pud = vmemmap_pud_populate(p4d, addr, node);
                if (!pud)
                        return -ENOMEM;
 
@@ -1236,6 +1335,7 @@ void register_page_bootmem_memmap(unsigned long section_nr,
        unsigned long end = (unsigned long)(start_page + size);
        unsigned long next;
        pgd_t *pgd;
+       p4d_t *p4d;
        pud_t *pud;
        pmd_t *pmd;
        unsigned int nr_pages;
@@ -1251,7 +1351,14 @@ void register_page_bootmem_memmap(unsigned long section_nr,
                }
                get_page_bootmem(section_nr, pgd_page(*pgd), MIX_SECTION_INFO);
 
-               pud = pud_offset(pgd, addr);
+               p4d = p4d_offset(pgd, addr);
+               if (p4d_none(*p4d)) {
+                       next = (addr + PAGE_SIZE) & PAGE_MASK;
+                       continue;
+               }
+               get_page_bootmem(section_nr, p4d_page(*p4d), MIX_SECTION_INFO);
+
+               pud = pud_offset(p4d, addr);
                if (pud_none(*pud)) {
                        next = (addr + PAGE_SIZE) & PAGE_MASK;
                        continue;