x86/mm/pat: Emulate PAT when it is disabled
[sfrench/cifs-2.6.git] / arch / x86 / mm / pat.c
index 35af6771a95ad6c126cca1f352b896998116876e..6dc7826e4797fd656ea9c19765c6fac8788ed848 100644 (file)
 #include "pat_internal.h"
 #include "mm_internal.h"
 
-#ifdef CONFIG_X86_PAT
-int __read_mostly pat_enabled = 1;
+#undef pr_fmt
+#define pr_fmt(fmt) "" fmt
+
+static bool boot_cpu_done;
+
+static int __read_mostly __pat_enabled = IS_ENABLED(CONFIG_X86_PAT);
 
 static inline void pat_disable(const char *reason)
 {
-       pat_enabled = 0;
-       printk(KERN_INFO "%s\n", reason);
+       __pat_enabled = 0;
+       pr_info("x86/PAT: %s\n", reason);
 }
 
 static int __init nopat(char *str)
@@ -48,13 +52,12 @@ static int __init nopat(char *str)
        return 0;
 }
 early_param("nopat", nopat);
-#else
-static inline void pat_disable(const char *reason)
+
+bool pat_enabled(void)
 {
-       (void)reason;
+       return !!__pat_enabled;
 }
-#endif
-
+EXPORT_SYMBOL_GPL(pat_enabled);
 
 int pat_debug_enable;
 
@@ -65,8 +68,6 @@ static int __init pat_debug_setup(char *str)
 }
 __setup("debugpat", pat_debug_setup);
 
-static u64 __read_mostly boot_pat_state;
-
 #ifdef CONFIG_X86_PAT
 /*
  * X86 PAT uses page flags WC and Uncached together to keep track of
@@ -174,78 +175,113 @@ static enum page_cache_mode pat_get_cache_mode(unsigned pat_val, char *msg)
  * configuration.
  * Using lower indices is preferred, so we start with highest index.
  */
-void pat_init_cache_modes(void)
+void pat_init_cache_modes(u64 pat)
 {
-       int i;
        enum page_cache_mode cache;
        char pat_msg[33];
-       u64 pat;
+       int i;
 
-       rdmsrl(MSR_IA32_CR_PAT, pat);
        pat_msg[32] = 0;
        for (i = 7; i >= 0; i--) {
                cache = pat_get_cache_mode((pat >> (i * 8)) & 7,
                                           pat_msg + 4 * i);
                update_cache_mode_entry(i, cache);
        }
-       pr_info("PAT configuration [0-7]: %s\n", pat_msg);
+       pr_info("x86/PAT: Configuration [0-7]: %s\n", pat_msg);
 }
 
 #define PAT(x, y)      ((u64)PAT_ ## y << ((x)*8))
 
-void pat_init(void)
+static void pat_bsp_init(u64 pat)
 {
-       u64 pat;
-       bool boot_cpu = !boot_pat_state;
+       u64 tmp_pat;
 
-       if (!pat_enabled)
+       if (!cpu_has_pat) {
+               pat_disable("PAT not supported by CPU.");
                return;
+       }
 
-       if (!cpu_has_pat) {
-               if (!boot_pat_state) {
-                       pat_disable("PAT not supported by CPU.");
-                       return;
-               } else {
-                       /*
-                        * If this happens we are on a secondary CPU, but
-                        * switched to PAT on the boot CPU. We have no way to
-                        * undo PAT.
-                        */
-                       printk(KERN_ERR "PAT enabled, "
-                              "but not supported by secondary CPU\n");
-                       BUG();
-               }
+       if (!pat_enabled())
+               goto done;
+
+       rdmsrl(MSR_IA32_CR_PAT, tmp_pat);
+       if (!tmp_pat) {
+               pat_disable("PAT MSR is 0, disabled.");
+               return;
        }
 
-       /* Set PWT to Write-Combining. All other bits stay the same */
-       /*
-        * PTE encoding used in Linux:
-        *      PAT
-        *      |PCD
-        *      ||PWT
-        *      |||
-        *      000 WB          _PAGE_CACHE_WB
-        *      001 WC          _PAGE_CACHE_WC
-        *      010 UC-         _PAGE_CACHE_UC_MINUS
-        *      011 UC          _PAGE_CACHE_UC
-        * PAT bit unused
-        */
-       pat = PAT(0, WB) | PAT(1, WC) | PAT(2, UC_MINUS) | PAT(3, UC) |
-             PAT(4, WB) | PAT(5, WC) | PAT(6, UC_MINUS) | PAT(7, UC);
-
-       /* Boot CPU check */
-       if (!boot_pat_state) {
-               rdmsrl(MSR_IA32_CR_PAT, boot_pat_state);
-               if (!boot_pat_state) {
-                       pat_disable("PAT read returns always zero, disabled.");
-                       return;
-               }
+       wrmsrl(MSR_IA32_CR_PAT, pat);
+
+done:
+       pat_init_cache_modes(pat);
+}
+
+static void pat_ap_init(u64 pat)
+{
+       if (!pat_enabled())
+               return;
+
+       if (!cpu_has_pat) {
+               /*
+                * If this happens we are on a secondary CPU, but switched to
+                * PAT on the boot CPU. We have no way to undo PAT.
+                */
+               panic("x86/PAT: PAT enabled, but not supported by secondary CPU\n");
        }
 
        wrmsrl(MSR_IA32_CR_PAT, pat);
+}
 
-       if (boot_cpu)
-               pat_init_cache_modes();
+void pat_init(void)
+{
+       u64 pat;
+
+       if (!pat_enabled()) {
+               /*
+                * No PAT. Emulate the PAT table that corresponds to the two
+                * cache bits, PWT (Write Through) and PCD (Cache Disable). This
+                * setup is the same as the BIOS default setup when the system
+                * has PAT but the "nopat" boot option has been specified. This
+                * emulated PAT table is used when MSR_IA32_CR_PAT returns 0.
+                *
+                * PTE encoding used:
+                *
+                *       PCD
+                *       |PWT  PAT
+                *       ||    slot
+                *       00    0    WB : _PAGE_CACHE_MODE_WB
+                *       01    1    WT : _PAGE_CACHE_MODE_WT
+                *       10    2    UC-: _PAGE_CACHE_MODE_UC_MINUS
+                *       11    3    UC : _PAGE_CACHE_MODE_UC
+                *
+                * NOTE: When WC or WP is used, it is redirected to UC- per
+                * the default setup in __cachemode2pte_tbl[].
+                */
+               pat = PAT(0, WB) | PAT(1, WT) | PAT(2, UC_MINUS) | PAT(3, UC) |
+                     PAT(4, WB) | PAT(5, WT) | PAT(6, UC_MINUS) | PAT(7, UC);
+       } else {
+               /*
+                * PTE encoding used in Linux:
+                *      PAT
+                *      |PCD
+                *      ||PWT
+                *      |||
+                *      000 WB          _PAGE_CACHE_WB
+                *      001 WC          _PAGE_CACHE_WC
+                *      010 UC-         _PAGE_CACHE_UC_MINUS
+                *      011 UC          _PAGE_CACHE_UC
+                * PAT bit unused
+                */
+               pat = PAT(0, WB) | PAT(1, WC) | PAT(2, UC_MINUS) | PAT(3, UC) |
+                     PAT(4, WB) | PAT(5, WC) | PAT(6, UC_MINUS) | PAT(7, UC);
+       }
+
+       if (!boot_cpu_done) {
+               pat_bsp_init(pat);
+               boot_cpu_done = true;
+       } else {
+               pat_ap_init(pat);
+       }
 }
 
 #undef PAT
@@ -267,9 +303,9 @@ static unsigned long pat_x_mtrr_type(u64 start, u64 end,
         * request is for WB.
         */
        if (req_type == _PAGE_CACHE_MODE_WB) {
-               u8 mtrr_type;
+               u8 mtrr_type, uniform;
 
-               mtrr_type = mtrr_type_lookup(start, end);
+               mtrr_type = mtrr_type_lookup(start, end, &uniform);
                if (mtrr_type != MTRR_TYPE_WRBACK)
                        return _PAGE_CACHE_MODE_UC_MINUS;
 
@@ -347,7 +383,7 @@ static int reserve_ram_pages_type(u64 start, u64 end,
                page = pfn_to_page(pfn);
                type = get_page_memtype(page);
                if (type != -1) {
-                       pr_info("reserve_ram_pages_type failed [mem %#010Lx-%#010Lx], track 0x%x, req 0x%x\n",
+                       pr_info("x86/PAT: reserve_ram_pages_type failed [mem %#010Lx-%#010Lx], track 0x%x, req 0x%x\n",
                                start, end - 1, type, req_type);
                        if (new_type)
                                *new_type = type;
@@ -400,7 +436,7 @@ int reserve_memtype(u64 start, u64 end, enum page_cache_mode req_type,
 
        BUG_ON(start >= end); /* end is exclusive */
 
-       if (!pat_enabled) {
+       if (!pat_enabled()) {
                /* This is identical to page table setting without PAT */
                if (new_type) {
                        if (req_type == _PAGE_CACHE_MODE_WC)
@@ -451,9 +487,9 @@ int reserve_memtype(u64 start, u64 end, enum page_cache_mode req_type,
 
        err = rbt_memtype_check_insert(new, new_type);
        if (err) {
-               printk(KERN_INFO "reserve_memtype failed [mem %#010Lx-%#010Lx], track %s, req %s\n",
-                      start, end - 1,
-                      cattr_name(new->type), cattr_name(req_type));
+               pr_info("x86/PAT: reserve_memtype failed [mem %#010Lx-%#010Lx], track %s, req %s\n",
+                       start, end - 1,
+                       cattr_name(new->type), cattr_name(req_type));
                kfree(new);
                spin_unlock(&memtype_lock);
 
@@ -475,7 +511,7 @@ int free_memtype(u64 start, u64 end)
        int is_range_ram;
        struct memtype *entry;
 
-       if (!pat_enabled)
+       if (!pat_enabled())
                return 0;
 
        /* Low ISA region is always mapped WB. No need to track */
@@ -497,8 +533,8 @@ int free_memtype(u64 start, u64 end)
        spin_unlock(&memtype_lock);
 
        if (!entry) {
-               printk(KERN_INFO "%s:%d freeing invalid memtype [mem %#010Lx-%#010Lx]\n",
-                      current->comm, current->pid, start, end - 1);
+               pr_info("x86/PAT: %s:%d freeing invalid memtype [mem %#010Lx-%#010Lx]\n",
+                       current->comm, current->pid, start, end - 1);
                return -EINVAL;
        }
 
@@ -623,13 +659,13 @@ static inline int range_is_allowed(unsigned long pfn, unsigned long size)
        u64 to = from + size;
        u64 cursor = from;
 
-       if (!pat_enabled)
+       if (!pat_enabled())
                return 1;
 
        while (cursor < to) {
                if (!devmem_is_allowed(pfn)) {
-                       printk(KERN_INFO "Program %s tried to access /dev/mem between [mem %#010Lx-%#010Lx], PAT prevents it\n",
-                              current->comm, from, to - 1);
+                       pr_info("x86/PAT: Program %s tried to access /dev/mem between [mem %#010Lx-%#010Lx], PAT prevents it\n",
+                               current->comm, from, to - 1);
                        return 0;
                }
                cursor += PAGE_SIZE;
@@ -659,7 +695,7 @@ int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn,
         * caching for the high addresses through the KEN pin, but
         * we maintain the tradition of paranoia in this code.
         */
-       if (!pat_enabled &&
+       if (!pat_enabled() &&
            !(boot_cpu_has(X86_FEATURE_MTRR) ||
              boot_cpu_has(X86_FEATURE_K6_MTRR) ||
              boot_cpu_has(X86_FEATURE_CYRIX_ARR) ||
@@ -698,8 +734,7 @@ int kernel_map_sync_memtype(u64 base, unsigned long size,
                                size;
 
        if (ioremap_change_attr((unsigned long)__va(base), id_sz, pcm) < 0) {
-               printk(KERN_INFO "%s:%d ioremap_change_attr failed %s "
-                       "for [mem %#010Lx-%#010Lx]\n",
+               pr_info("x86/PAT: %s:%d ioremap_change_attr failed %s for [mem %#010Lx-%#010Lx]\n",
                        current->comm, current->pid,
                        cattr_name(pcm),
                        base, (unsigned long long)(base + size-1));
@@ -729,12 +764,12 @@ static int reserve_pfn_range(u64 paddr, unsigned long size, pgprot_t *vma_prot,
         * the type requested matches the type of first page in the range.
         */
        if (is_ram) {
-               if (!pat_enabled)
+               if (!pat_enabled())
                        return 0;
 
                pcm = lookup_memtype(paddr);
                if (want_pcm != pcm) {
-                       printk(KERN_WARNING "%s:%d map pfn RAM range req %s for [mem %#010Lx-%#010Lx], got %s\n",
+                       pr_warn("x86/PAT: %s:%d map pfn RAM range req %s for [mem %#010Lx-%#010Lx], got %s\n",
                                current->comm, current->pid,
                                cattr_name(want_pcm),
                                (unsigned long long)paddr,
@@ -755,13 +790,12 @@ static int reserve_pfn_range(u64 paddr, unsigned long size, pgprot_t *vma_prot,
                if (strict_prot ||
                    !is_new_memtype_allowed(paddr, size, want_pcm, pcm)) {
                        free_memtype(paddr, paddr + size);
-                       printk(KERN_ERR "%s:%d map pfn expected mapping type %s"
-                               " for [mem %#010Lx-%#010Lx], got %s\n",
-                               current->comm, current->pid,
-                               cattr_name(want_pcm),
-                               (unsigned long long)paddr,
-                               (unsigned long long)(paddr + size - 1),
-                               cattr_name(pcm));
+                       pr_err("x86/PAT: %s:%d map pfn expected mapping type %s for [mem %#010Lx-%#010Lx], got %s\n",
+                              current->comm, current->pid,
+                              cattr_name(want_pcm),
+                              (unsigned long long)paddr,
+                              (unsigned long long)(paddr + size - 1),
+                              cattr_name(pcm));
                        return -EINVAL;
                }
                /*
@@ -844,7 +878,7 @@ int track_pfn_remap(struct vm_area_struct *vma, pgprot_t *prot,
                return ret;
        }
 
-       if (!pat_enabled)
+       if (!pat_enabled())
                return 0;
 
        /*
@@ -872,7 +906,7 @@ int track_pfn_insert(struct vm_area_struct *vma, pgprot_t *prot,
 {
        enum page_cache_mode pcm;
 
-       if (!pat_enabled)
+       if (!pat_enabled())
                return 0;
 
        /* Set prot based on lookup */
@@ -913,7 +947,7 @@ void untrack_pfn(struct vm_area_struct *vma, unsigned long pfn,
 
 pgprot_t pgprot_writecombine(pgprot_t prot)
 {
-       if (pat_enabled)
+       if (pat_enabled())
                return __pgprot(pgprot_val(prot) |
                                cachemode2protval(_PAGE_CACHE_MODE_WC));
        else
@@ -996,7 +1030,7 @@ static const struct file_operations memtype_fops = {
 
 static int __init pat_memtype_list_init(void)
 {
-       if (pat_enabled) {
+       if (pat_enabled()) {
                debugfs_create_file("pat_memtype_list", S_IRUSR,
                                    arch_debugfs_dir, NULL, &memtype_fops);
        }