Merge branches 'work.misc' and 'work.dcache' of git://git.kernel.org/pub/scm/linux...
[sfrench/cifs-2.6.git] / arch / x86 / mm / pti.c
index a536ecc91847f8e724598b612cbed22816d39bca..d58b4aba9510f3d2a5f89f2df942d966b35cbe19 100644 (file)
 #define __GFP_NOTRACK  0
 #endif
 
+/*
+ * Define the page-table levels we clone for user-space on 32
+ * and 64 bit.
+ */
+#ifdef CONFIG_X86_64
+#define        PTI_LEVEL_KERNEL_IMAGE  PTI_CLONE_PMD
+#else
+#define        PTI_LEVEL_KERNEL_IMAGE  PTI_CLONE_PTE
+#endif
+
 static void __init pti_print_if_insecure(const char *reason)
 {
        if (boot_cpu_has_bug(X86_BUG_CPU_MELTDOWN))
@@ -176,7 +186,7 @@ static p4d_t *pti_user_pagetable_walk_p4d(unsigned long address)
 
        if (pgd_none(*pgd)) {
                unsigned long new_p4d_page = __get_free_page(gfp);
-               if (!new_p4d_page)
+               if (WARN_ON_ONCE(!new_p4d_page))
                        return NULL;
 
                set_pgd(pgd, __pgd(_KERNPG_TABLE | __pa(new_p4d_page)));
@@ -195,13 +205,17 @@ static p4d_t *pti_user_pagetable_walk_p4d(unsigned long address)
 static pmd_t *pti_user_pagetable_walk_pmd(unsigned long address)
 {
        gfp_t gfp = (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO);
-       p4d_t *p4d = pti_user_pagetable_walk_p4d(address);
+       p4d_t *p4d;
        pud_t *pud;
 
+       p4d = pti_user_pagetable_walk_p4d(address);
+       if (!p4d)
+               return NULL;
+
        BUILD_BUG_ON(p4d_large(*p4d) != 0);
        if (p4d_none(*p4d)) {
                unsigned long new_pud_page = __get_free_page(gfp);
-               if (!new_pud_page)
+               if (WARN_ON_ONCE(!new_pud_page))
                        return NULL;
 
                set_p4d(p4d, __p4d(_KERNPG_TABLE | __pa(new_pud_page)));
@@ -215,7 +229,7 @@ static pmd_t *pti_user_pagetable_walk_pmd(unsigned long address)
        }
        if (pud_none(*pud)) {
                unsigned long new_pmd_page = __get_free_page(gfp);
-               if (!new_pmd_page)
+               if (WARN_ON_ONCE(!new_pmd_page))
                        return NULL;
 
                set_pud(pud, __pud(_KERNPG_TABLE | __pa(new_pmd_page)));
@@ -224,7 +238,6 @@ static pmd_t *pti_user_pagetable_walk_pmd(unsigned long address)
        return pmd_offset(pud, address);
 }
 
-#ifdef CONFIG_X86_VSYSCALL_EMULATION
 /*
  * Walk the shadow copy of the page tables (optionally) trying to allocate
  * page table pages on the way down.  Does not support large pages.
@@ -237,9 +250,13 @@ static pmd_t *pti_user_pagetable_walk_pmd(unsigned long address)
 static __init pte_t *pti_user_pagetable_walk_pte(unsigned long address)
 {
        gfp_t gfp = (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO);
-       pmd_t *pmd = pti_user_pagetable_walk_pmd(address);
+       pmd_t *pmd;
        pte_t *pte;
 
+       pmd = pti_user_pagetable_walk_pmd(address);
+       if (!pmd)
+               return NULL;
+
        /* We can't do anything sensible if we hit a large mapping. */
        if (pmd_large(*pmd)) {
                WARN_ON(1);
@@ -262,6 +279,7 @@ static __init pte_t *pti_user_pagetable_walk_pte(unsigned long address)
        return pte;
 }
 
+#ifdef CONFIG_X86_VSYSCALL_EMULATION
 static void __init pti_setup_vsyscall(void)
 {
        pte_t *pte, *target_pte;
@@ -282,8 +300,14 @@ static void __init pti_setup_vsyscall(void)
 static void __init pti_setup_vsyscall(void) { }
 #endif
 
+enum pti_clone_level {
+       PTI_CLONE_PMD,
+       PTI_CLONE_PTE,
+};
+
 static void
-pti_clone_pmds(unsigned long start, unsigned long end, pmdval_t clear)
+pti_clone_pgtable(unsigned long start, unsigned long end,
+                 enum pti_clone_level level)
 {
        unsigned long addr;
 
@@ -291,7 +315,8 @@ pti_clone_pmds(unsigned long start, unsigned long end, pmdval_t clear)
         * Clone the populated PMDs which cover start to end. These PMD areas
         * can have holes.
         */
-       for (addr = start; addr < end; addr += PMD_SIZE) {
+       for (addr = start; addr < end;) {
+               pte_t *pte, *target_pte;
                pmd_t *pmd, *target_pmd;
                pgd_t *pgd;
                p4d_t *p4d;
@@ -307,44 +332,84 @@ pti_clone_pmds(unsigned long start, unsigned long end, pmdval_t clear)
                p4d = p4d_offset(pgd, addr);
                if (WARN_ON(p4d_none(*p4d)))
                        return;
+
                pud = pud_offset(p4d, addr);
-               if (pud_none(*pud))
+               if (pud_none(*pud)) {
+                       addr += PUD_SIZE;
                        continue;
+               }
+
                pmd = pmd_offset(pud, addr);
-               if (pmd_none(*pmd))
+               if (pmd_none(*pmd)) {
+                       addr += PMD_SIZE;
                        continue;
+               }
 
-               target_pmd = pti_user_pagetable_walk_pmd(addr);
-               if (WARN_ON(!target_pmd))
-                       return;
-
-               /*
-                * Only clone present PMDs.  This ensures only setting
-                * _PAGE_GLOBAL on present PMDs.  This should only be
-                * called on well-known addresses anyway, so a non-
-                * present PMD would be a surprise.
-                */
-               if (WARN_ON(!(pmd_flags(*pmd) & _PAGE_PRESENT)))
-                       return;
-
-               /*
-                * Setting 'target_pmd' below creates a mapping in both
-                * the user and kernel page tables.  It is effectively
-                * global, so set it as global in both copies.  Note:
-                * the X86_FEATURE_PGE check is not _required_ because
-                * the CPU ignores _PAGE_GLOBAL when PGE is not
-                * supported.  The check keeps consistentency with
-                * code that only set this bit when supported.
-                */
-               if (boot_cpu_has(X86_FEATURE_PGE))
-                       *pmd = pmd_set_flags(*pmd, _PAGE_GLOBAL);
-
-               /*
-                * Copy the PMD.  That is, the kernelmode and usermode
-                * tables will share the last-level page tables of this
-                * address range
-                */
-               *target_pmd = pmd_clear_flags(*pmd, clear);
+               if (pmd_large(*pmd) || level == PTI_CLONE_PMD) {
+                       target_pmd = pti_user_pagetable_walk_pmd(addr);
+                       if (WARN_ON(!target_pmd))
+                               return;
+
+                       /*
+                        * Only clone present PMDs.  This ensures only setting
+                        * _PAGE_GLOBAL on present PMDs.  This should only be
+                        * called on well-known addresses anyway, so a non-
+                        * present PMD would be a surprise.
+                        */
+                       if (WARN_ON(!(pmd_flags(*pmd) & _PAGE_PRESENT)))
+                               return;
+
+                       /*
+                        * Setting 'target_pmd' below creates a mapping in both
+                        * the user and kernel page tables.  It is effectively
+                        * global, so set it as global in both copies.  Note:
+                        * the X86_FEATURE_PGE check is not _required_ because
+                        * the CPU ignores _PAGE_GLOBAL when PGE is not
+                        * supported.  The check keeps consistentency with
+                        * code that only set this bit when supported.
+                        */
+                       if (boot_cpu_has(X86_FEATURE_PGE))
+                               *pmd = pmd_set_flags(*pmd, _PAGE_GLOBAL);
+
+                       /*
+                        * Copy the PMD.  That is, the kernelmode and usermode
+                        * tables will share the last-level page tables of this
+                        * address range
+                        */
+                       *target_pmd = *pmd;
+
+                       addr += PMD_SIZE;
+
+               } else if (level == PTI_CLONE_PTE) {
+
+                       /* Walk the page-table down to the pte level */
+                       pte = pte_offset_kernel(pmd, addr);
+                       if (pte_none(*pte)) {
+                               addr += PAGE_SIZE;
+                               continue;
+                       }
+
+                       /* Only clone present PTEs */
+                       if (WARN_ON(!(pte_flags(*pte) & _PAGE_PRESENT)))
+                               return;
+
+                       /* Allocate PTE in the user page-table */
+                       target_pte = pti_user_pagetable_walk_pte(addr);
+                       if (WARN_ON(!target_pte))
+                               return;
+
+                       /* Set GLOBAL bit in both PTEs */
+                       if (boot_cpu_has(X86_FEATURE_PGE))
+                               *pte = pte_set_flags(*pte, _PAGE_GLOBAL);
+
+                       /* Clone the PTE */
+                       *target_pte = *pte;
+
+                       addr += PAGE_SIZE;
+
+               } else {
+                       BUG();
+               }
        }
 }
 
@@ -359,6 +424,9 @@ static void __init pti_clone_p4d(unsigned long addr)
        pgd_t *kernel_pgd;
 
        user_p4d = pti_user_pagetable_walk_p4d(addr);
+       if (!user_p4d)
+               return;
+
        kernel_pgd = pgd_offset_k(addr);
        kernel_p4d = p4d_offset(kernel_pgd, addr);
        *user_p4d = *kernel_p4d;
@@ -387,7 +455,7 @@ static void __init pti_clone_user_shared(void)
        start = CPU_ENTRY_AREA_BASE;
        end   = start + (PAGE_SIZE * CPU_ENTRY_AREA_PAGES);
 
-       pti_clone_pmds(start, end, 0);
+       pti_clone_pgtable(start, end, PTI_CLONE_PMD);
 }
 #endif /* CONFIG_X86_64 */
 
@@ -406,9 +474,9 @@ static void __init pti_setup_espfix64(void)
  */
 static void pti_clone_entry_text(void)
 {
-       pti_clone_pmds((unsigned long) __entry_text_start,
-                       (unsigned long) __irqentry_text_end,
-                      _PAGE_RW);
+       pti_clone_pgtable((unsigned long) __entry_text_start,
+                         (unsigned long) __irqentry_text_end,
+                         PTI_CLONE_PMD);
 }
 
 /*
@@ -458,6 +526,13 @@ static inline bool pti_kernel_image_global_ok(void)
        return true;
 }
 
+/*
+ * This is the only user for these and it is not arch-generic
+ * like the other set_memory.h functions.  Just extern them.
+ */
+extern int set_memory_nonglobal(unsigned long addr, int numpages);
+extern int set_memory_global(unsigned long addr, int numpages);
+
 /*
  * For some configurations, map all of kernel text into the user page
  * tables.  This reduces TLB misses, especially on non-PCID systems.
@@ -470,7 +545,8 @@ static void pti_clone_kernel_text(void)
         * clone the areas past rodata, they might contain secrets.
         */
        unsigned long start = PFN_ALIGN(_text);
-       unsigned long end = (unsigned long)__end_rodata_aligned;
+       unsigned long end_clone  = (unsigned long)__end_rodata_aligned;
+       unsigned long end_global = PFN_ALIGN((unsigned long)__stop___ex_table);
 
        if (!pti_kernel_image_global_ok())
                return;
@@ -482,15 +558,19 @@ static void pti_clone_kernel_text(void)
         * pti_set_kernel_image_nonglobal() did to clear the
         * global bit.
         */
-       pti_clone_pmds(start, end, 0);
+       pti_clone_pgtable(start, end_clone, PTI_LEVEL_KERNEL_IMAGE);
+
+       /*
+        * pti_clone_pgtable() will set the global bit in any PMDs
+        * that it clones, but we also need to get any PTEs in
+        * the last level for areas that are not huge-page-aligned.
+        */
+
+       /* Set the global bit for normal non-__init kernel text: */
+       set_memory_global(start, (end_global - start) >> PAGE_SHIFT);
 }
 
-/*
- * This is the only user for it and it is not arch-generic like
- * the other set_memory.h functions.  Just extern it.
- */
-extern int set_memory_nonglobal(unsigned long addr, int numpages);
-static void pti_set_kernel_image_nonglobal(void)
+void pti_set_kernel_image_nonglobal(void)
 {
        /*
         * The identity map is created with PMDs, regardless of the
@@ -501,9 +581,11 @@ static void pti_set_kernel_image_nonglobal(void)
        unsigned long start = PFN_ALIGN(_text);
        unsigned long end = ALIGN((unsigned long)_end, PMD_PAGE_SIZE);
 
-       if (pti_kernel_image_global_ok())
-               return;
-
+       /*
+        * This clears _PAGE_GLOBAL from the entire kernel image.
+        * pti_clone_kernel_text() map put _PAGE_GLOBAL back for
+        * areas that are mapped to userspace.
+        */
        set_memory_nonglobal(start, (end - start) >> PAGE_SHIFT);
 }
 
@@ -517,6 +599,28 @@ void __init pti_init(void)
 
        pr_info("enabled\n");
 
+#ifdef CONFIG_X86_32
+       /*
+        * We check for X86_FEATURE_PCID here. But the init-code will
+        * clear the feature flag on 32 bit because the feature is not
+        * supported on 32 bit anyway. To print the warning we need to
+        * check with cpuid directly again.
+        */
+       if (cpuid_ecx(0x1) & BIT(17)) {
+               /* Use printk to work around pr_fmt() */
+               printk(KERN_WARNING "\n");
+               printk(KERN_WARNING "************************************************************\n");
+               printk(KERN_WARNING "** WARNING! WARNING! WARNING! WARNING! WARNING! WARNING!  **\n");
+               printk(KERN_WARNING "**                                                        **\n");
+               printk(KERN_WARNING "** You are using 32-bit PTI on a 64-bit PCID-capable CPU. **\n");
+               printk(KERN_WARNING "** Your performance will increase dramatically if you     **\n");
+               printk(KERN_WARNING "** switch to a 64-bit kernel!                             **\n");
+               printk(KERN_WARNING "**                                                        **\n");
+               printk(KERN_WARNING "** WARNING! WARNING! WARNING! WARNING! WARNING! WARNING!  **\n");
+               printk(KERN_WARNING "************************************************************\n");
+       }
+#endif
+
        pti_clone_user_shared();
 
        /* Undo all global bits from the init pagetables in head_64.S: */
@@ -542,4 +646,6 @@ void pti_finalize(void)
         */
        pti_clone_entry_text();
        pti_clone_kernel_text();
+
+       debug_checkwx_user();
 }