Merge branch 'for-4.16/nfit' into libnvdimm-for-next
authorRoss Zwisler <ross.zwisler@linux.intel.com>
Sat, 3 Feb 2018 07:26:26 +0000 (00:26 -0700)
committerRoss Zwisler <ross.zwisler@linux.intel.com>
Sat, 3 Feb 2018 07:26:26 +0000 (00:26 -0700)
51 files changed:
arch/arm64/mm/mmu.c
arch/ia64/mm/discontig.c
arch/ia64/mm/init.c
arch/powerpc/mm/init_64.c
arch/powerpc/mm/mem.c
arch/powerpc/platforms/Kconfig
arch/powerpc/sysdev/axonram.c
arch/s390/mm/init.c
arch/s390/mm/vmem.c
arch/sh/mm/init.c
arch/sparc/mm/init_64.c
arch/x86/mm/init_32.c
arch/x86/mm/init_64.c
drivers/acpi/nfit/core.c
drivers/dax/device.c
drivers/dax/pmem.c
drivers/dax/super.c
drivers/nvdimm/btt.c
drivers/nvdimm/bus.c
drivers/nvdimm/namespace_devs.c
drivers/nvdimm/nd.h
drivers/nvdimm/pfn_devs.c
drivers/nvdimm/pmem.c
drivers/nvdimm/pmem.h
drivers/s390/block/Kconfig
drivers/s390/block/dcssblk.c
fs/Kconfig
fs/ext2/super.c
fs/ext4/super.c
include/linux/memory_hotplug.h
include/linux/memremap.h
include/linux/mm.h
include/linux/pfn_t.h
include/uapi/linux/ndctl.h
kernel/memremap.c
mm/gup.c
mm/hmm.c
mm/memory.c
mm/memory_hotplug.c
mm/page_alloc.c
mm/sparse-vmemmap.c
mm/sparse.c
tools/testing/nvdimm/Kbuild
tools/testing/nvdimm/acpi_nfit_test.c [new file with mode: 0644]
tools/testing/nvdimm/device_dax_test.c [new file with mode: 0644]
tools/testing/nvdimm/libnvdimm_test.c [new file with mode: 0644]
tools/testing/nvdimm/pmem_test.c [new file with mode: 0644]
tools/testing/nvdimm/test/iomap.c
tools/testing/nvdimm/test/nfit.c
tools/testing/nvdimm/test/nfit_test.h
tools/testing/nvdimm/watermark.h [new file with mode: 0644]

index 267d2b79d52d6e3918a18a2c590d797bdec6d3cb..0b1f13e0b4b36d98d0b21e4a5eede0754f2c5d21 100644 (file)
@@ -654,12 +654,14 @@ int kern_addr_valid(unsigned long addr)
 }
 #ifdef CONFIG_SPARSEMEM_VMEMMAP
 #if !ARM64_SWAPPER_USES_SECTION_MAPS
-int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node)
+int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
+               struct vmem_altmap *altmap)
 {
        return vmemmap_populate_basepages(start, end, node);
 }
 #else  /* !ARM64_SWAPPER_USES_SECTION_MAPS */
-int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node)
+int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
+               struct vmem_altmap *altmap)
 {
        unsigned long addr = start;
        unsigned long next;
@@ -694,7 +696,8 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node)
        return 0;
 }
 #endif /* CONFIG_ARM64_64K_PAGES */
-void vmemmap_free(unsigned long start, unsigned long end)
+void vmemmap_free(unsigned long start, unsigned long end,
+               struct vmem_altmap *altmap)
 {
 }
 #endif /* CONFIG_SPARSEMEM_VMEMMAP */
index 9b2d994cddf68cfe4d8417148df393aff76b7f8c..5ea0d8d0968bc58d89a08d9e654a1d03ed3c7f14 100644 (file)
@@ -754,12 +754,14 @@ void arch_refresh_nodedata(int update_node, pg_data_t *update_pgdat)
 #endif
 
 #ifdef CONFIG_SPARSEMEM_VMEMMAP
-int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node)
+int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
+               struct vmem_altmap *altmap)
 {
        return vmemmap_populate_basepages(start, end, node);
 }
 
-void vmemmap_free(unsigned long start, unsigned long end)
+void vmemmap_free(unsigned long start, unsigned long end,
+               struct vmem_altmap *altmap)
 {
 }
 #endif
index 7af4e05bb61e785989dac50c236b2cef64713f8f..18278b448530d3ac9302754cf170e261401fd008 100644 (file)
@@ -501,7 +501,7 @@ virtual_memmap_init(u64 start, u64 end, void *arg)
        if (map_start < map_end)
                memmap_init_zone((unsigned long)(map_end - map_start),
                                 args->nid, args->zone, page_to_pfn(map_start),
-                                MEMMAP_EARLY);
+                                MEMMAP_EARLY, NULL);
        return 0;
 }
 
@@ -509,9 +509,10 @@ void __meminit
 memmap_init (unsigned long size, int nid, unsigned long zone,
             unsigned long start_pfn)
 {
-       if (!vmem_map)
-               memmap_init_zone(size, nid, zone, start_pfn, MEMMAP_EARLY);
-       else {
+       if (!vmem_map) {
+               memmap_init_zone(size, nid, zone, start_pfn, MEMMAP_EARLY,
+                               NULL);
+       } else {
                struct page *start;
                struct memmap_init_callback_data args;
 
@@ -647,13 +648,14 @@ mem_init (void)
 }
 
 #ifdef CONFIG_MEMORY_HOTPLUG
-int arch_add_memory(int nid, u64 start, u64 size, bool want_memblock)
+int arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap,
+               bool want_memblock)
 {
        unsigned long start_pfn = start >> PAGE_SHIFT;
        unsigned long nr_pages = size >> PAGE_SHIFT;
        int ret;
 
-       ret = __add_pages(nid, start_pfn, nr_pages, want_memblock);
+       ret = __add_pages(nid, start_pfn, nr_pages, altmap, want_memblock);
        if (ret)
                printk("%s: Problem encountered in __add_pages() as ret=%d\n",
                       __func__,  ret);
@@ -662,7 +664,7 @@ int arch_add_memory(int nid, u64 start, u64 size, bool want_memblock)
 }
 
 #ifdef CONFIG_MEMORY_HOTREMOVE
-int arch_remove_memory(u64 start, u64 size)
+int arch_remove_memory(u64 start, u64 size, struct vmem_altmap *altmap)
 {
        unsigned long start_pfn = start >> PAGE_SHIFT;
        unsigned long nr_pages = size >> PAGE_SHIFT;
@@ -670,7 +672,7 @@ int arch_remove_memory(u64 start, u64 size)
        int ret;
 
        zone = page_zone(pfn_to_page(start_pfn));
-       ret = __remove_pages(zone, start_pfn, nr_pages);
+       ret = __remove_pages(zone, start_pfn, nr_pages, altmap);
        if (ret)
                pr_warn("%s: Problem encountered in __remove_pages() as"
                        " ret=%d\n", __func__,  ret);
index a07722531b32e3dfb329e38460085ef789b0128e..7a2251d99ed3dadcf6c104666755e55d5c3896d2 100644 (file)
@@ -183,7 +183,8 @@ static __meminit void vmemmap_list_populate(unsigned long phys,
        vmemmap_list = vmem_back;
 }
 
-int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node)
+int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
+               struct vmem_altmap *altmap)
 {
        unsigned long page_size = 1 << mmu_psize_defs[mmu_vmemmap_psize].shift;
 
@@ -193,17 +194,16 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node)
        pr_debug("vmemmap_populate %lx..%lx, node %d\n", start, end, node);
 
        for (; start < end; start += page_size) {
-               struct vmem_altmap *altmap;
                void *p;
                int rc;
 
                if (vmemmap_populated(start, page_size))
                        continue;
 
-               /* altmap lookups only work at section boundaries */
-               altmap = to_vmem_altmap(SECTION_ALIGN_DOWN(start));
-
-               p =  __vmemmap_alloc_block_buf(page_size, node, altmap);
+               if (altmap)
+                       p = altmap_alloc_block_buf(page_size, altmap);
+               else
+                       p = vmemmap_alloc_block_buf(page_size, node);
                if (!p)
                        return -ENOMEM;
 
@@ -257,7 +257,8 @@ static unsigned long vmemmap_list_free(unsigned long start)
        return vmem_back->phys;
 }
 
-void __ref vmemmap_free(unsigned long start, unsigned long end)
+void __ref vmemmap_free(unsigned long start, unsigned long end,
+               struct vmem_altmap *altmap)
 {
        unsigned long page_size = 1 << mmu_psize_defs[mmu_vmemmap_psize].shift;
        unsigned long page_order = get_order(page_size);
@@ -268,7 +269,6 @@ void __ref vmemmap_free(unsigned long start, unsigned long end)
 
        for (; start < end; start += page_size) {
                unsigned long nr_pages, addr;
-               struct vmem_altmap *altmap;
                struct page *section_base;
                struct page *page;
 
@@ -288,7 +288,6 @@ void __ref vmemmap_free(unsigned long start, unsigned long end)
                section_base = pfn_to_page(vmemmap_section_start(start));
                nr_pages = 1 << page_order;
 
-               altmap = to_vmem_altmap((unsigned long) section_base);
                if (altmap) {
                        vmem_altmap_free(altmap, nr_pages);
                } else if (PageReserved(page)) {
index 4362b86ef84c5daa404e806cf3802128b77b8532..22aa528b78a2444546ca4b28f116abe960ff483d 100644 (file)
@@ -127,7 +127,8 @@ int __weak remove_section_mapping(unsigned long start, unsigned long end)
        return -ENODEV;
 }
 
-int arch_add_memory(int nid, u64 start, u64 size, bool want_memblock)
+int arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap,
+               bool want_memblock)
 {
        unsigned long start_pfn = start >> PAGE_SHIFT;
        unsigned long nr_pages = size >> PAGE_SHIFT;
@@ -144,15 +145,14 @@ int arch_add_memory(int nid, u64 start, u64 size, bool want_memblock)
                return -EFAULT;
        }
 
-       return __add_pages(nid, start_pfn, nr_pages, want_memblock);
+       return __add_pages(nid, start_pfn, nr_pages, altmap, want_memblock);
 }
 
 #ifdef CONFIG_MEMORY_HOTREMOVE
-int arch_remove_memory(u64 start, u64 size)
+int arch_remove_memory(u64 start, u64 size, struct vmem_altmap *altmap)
 {
        unsigned long start_pfn = start >> PAGE_SHIFT;
        unsigned long nr_pages = size >> PAGE_SHIFT;
-       struct vmem_altmap *altmap;
        struct page *page;
        int ret;
 
@@ -161,11 +161,10 @@ int arch_remove_memory(u64 start, u64 size)
         * when querying the zone.
         */
        page = pfn_to_page(start_pfn);
-       altmap = to_vmem_altmap((unsigned long) page);
        if (altmap)
                page += vmem_altmap_offset(altmap);
 
-       ret = __remove_pages(page_zone(page), start_pfn, nr_pages);
+       ret = __remove_pages(page_zone(page), start_pfn, nr_pages, altmap);
        if (ret)
                return ret;
 
index 5a96a2763e4ae176cc5c6215db5da793f2fb81d5..2ce89b42a9f463298c70107f6c83d8c6d396aae7 100644 (file)
@@ -297,6 +297,7 @@ config AXON_RAM
        tristate "Axon DDR2 memory device driver"
        depends on PPC_IBM_CELL_BLADE && BLOCK
        select DAX
+       select FS_DAX_LIMITED
        default m
        help
          It registers one block device per Axon's DDR2 memory bank found
index 1b307c80b401083f50637daf1597f43ccabe70b9..cdbb0e59b3d31d287d9666369f2044d7b75cb3d5 100644 (file)
@@ -151,7 +151,7 @@ __axon_ram_direct_access(struct axon_ram_bank *bank, pgoff_t pgoff, long nr_page
        resource_size_t offset = pgoff * PAGE_SIZE;
 
        *kaddr = (void *) bank->io_addr + offset;
-       *pfn = phys_to_pfn_t(bank->ph_addr + offset, PFN_DEV);
+       *pfn = phys_to_pfn_t(bank->ph_addr + offset, PFN_DEV|PFN_SPECIAL);
        return (bank->size - offset) / PAGE_SIZE;
 }
 
index 671535e64abab615afca53c4b57ce89863981f9a..3fa3e532361227ad134f32b46c6c0db58d9d1240 100644 (file)
@@ -222,7 +222,8 @@ device_initcall(s390_cma_mem_init);
 
 #endif /* CONFIG_CMA */
 
-int arch_add_memory(int nid, u64 start, u64 size, bool want_memblock)
+int arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap,
+               bool want_memblock)
 {
        unsigned long start_pfn = PFN_DOWN(start);
        unsigned long size_pages = PFN_DOWN(size);
@@ -232,14 +233,14 @@ int arch_add_memory(int nid, u64 start, u64 size, bool want_memblock)
        if (rc)
                return rc;
 
-       rc = __add_pages(nid, start_pfn, size_pages, want_memblock);
+       rc = __add_pages(nid, start_pfn, size_pages, altmap, want_memblock);
        if (rc)
                vmem_remove_mapping(start, size);
        return rc;
 }
 
 #ifdef CONFIG_MEMORY_HOTREMOVE
-int arch_remove_memory(u64 start, u64 size)
+int arch_remove_memory(u64 start, u64 size, struct vmem_altmap *altmap)
 {
        /*
         * There is no hardware or firmware interface which could trigger a
index 3316d463fc2917f984d985f395cd85541cbd9650..db55561c598130475fc3828ce54c9ad394788dd0 100644 (file)
@@ -211,7 +211,8 @@ static void vmem_remove_range(unsigned long start, unsigned long size)
 /*
  * Add a backed mem_map array to the virtual mem_map array.
  */
-int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node)
+int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
+               struct vmem_altmap *altmap)
 {
        unsigned long pgt_prot, sgt_prot;
        unsigned long address = start;
@@ -296,7 +297,8 @@ out:
        return ret;
 }
 
-void vmemmap_free(unsigned long start, unsigned long end)
+void vmemmap_free(unsigned long start, unsigned long end,
+               struct vmem_altmap *altmap)
 {
 }
 
index bf726af5f1a5419707d288b5258d37b0051ab3dc..ce0bbaa7e40403b37d0d6dbdf107693dd24a2613 100644 (file)
@@ -485,20 +485,20 @@ void free_initrd_mem(unsigned long start, unsigned long end)
 #endif
 
 #ifdef CONFIG_MEMORY_HOTPLUG
-int arch_add_memory(int nid, u64 start, u64 size, bool want_memblock)
+int arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap,
+               bool want_memblock)
 {
        unsigned long start_pfn = PFN_DOWN(start);
        unsigned long nr_pages = size >> PAGE_SHIFT;
        int ret;
 
        /* We only have ZONE_NORMAL, so this is easy.. */
-       ret = __add_pages(nid, start_pfn, nr_pages, want_memblock);
+       ret = __add_pages(nid, start_pfn, nr_pages, altmap, want_memblock);
        if (unlikely(ret))
                printk("%s: Failed, __add_pages() == %d\n", __func__, ret);
 
        return ret;
 }
-EXPORT_SYMBOL_GPL(arch_add_memory);
 
 #ifdef CONFIG_NUMA
 int memory_add_physaddr_to_nid(u64 addr)
@@ -510,7 +510,7 @@ EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid);
 #endif
 
 #ifdef CONFIG_MEMORY_HOTREMOVE
-int arch_remove_memory(u64 start, u64 size)
+int arch_remove_memory(u64 start, u64 size, struct vmem_altmap *altmap)
 {
        unsigned long start_pfn = PFN_DOWN(start);
        unsigned long nr_pages = size >> PAGE_SHIFT;
@@ -518,7 +518,7 @@ int arch_remove_memory(u64 start, u64 size)
        int ret;
 
        zone = page_zone(pfn_to_page(start_pfn));
-       ret = __remove_pages(zone, start_pfn, nr_pages);
+       ret = __remove_pages(zone, start_pfn, nr_pages, altmap);
        if (unlikely(ret))
                pr_warn("%s: Failed, __remove_pages() == %d\n", __func__,
                        ret);
index 55ba62957e644116b2e30359f803081ddcd41313..995f9490334dd0001ceb0788d15b93e9f80821a4 100644 (file)
@@ -2628,7 +2628,7 @@ EXPORT_SYMBOL(_PAGE_CACHE);
 
 #ifdef CONFIG_SPARSEMEM_VMEMMAP
 int __meminit vmemmap_populate(unsigned long vstart, unsigned long vend,
-                              int node)
+                              int node, struct vmem_altmap *altmap)
 {
        unsigned long pte_base;
 
@@ -2671,7 +2671,8 @@ int __meminit vmemmap_populate(unsigned long vstart, unsigned long vend,
        return 0;
 }
 
-void vmemmap_free(unsigned long start, unsigned long end)
+void vmemmap_free(unsigned long start, unsigned long end,
+               struct vmem_altmap *altmap)
 {
 }
 #endif /* CONFIG_SPARSEMEM_VMEMMAP */
index 135c9a7898c7da908f1340f9750774b4327e63b3..79cb066f40c0d4a4607a7b7aa4e5524a203b5e10 100644 (file)
@@ -829,23 +829,24 @@ void __init mem_init(void)
 }
 
 #ifdef CONFIG_MEMORY_HOTPLUG
-int arch_add_memory(int nid, u64 start, u64 size, bool want_memblock)
+int arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap,
+               bool want_memblock)
 {
        unsigned long start_pfn = start >> PAGE_SHIFT;
        unsigned long nr_pages = size >> PAGE_SHIFT;
 
-       return __add_pages(nid, start_pfn, nr_pages, want_memblock);
+       return __add_pages(nid, start_pfn, nr_pages, altmap, want_memblock);
 }
 
 #ifdef CONFIG_MEMORY_HOTREMOVE
-int arch_remove_memory(u64 start, u64 size)
+int arch_remove_memory(u64 start, u64 size, struct vmem_altmap *altmap)
 {
        unsigned long start_pfn = start >> PAGE_SHIFT;
        unsigned long nr_pages = size >> PAGE_SHIFT;
        struct zone *zone;
 
        zone = page_zone(pfn_to_page(start_pfn));
-       return __remove_pages(zone, start_pfn, nr_pages);
+       return __remove_pages(zone, start_pfn, nr_pages, altmap);
 }
 #endif
 #endif
index 4a837289f2add8ac3548e68639f38f845ceee6a9..1ab42c8520693c9999e4b19d04eb565528404ed7 100644 (file)
@@ -772,12 +772,12 @@ static void update_end_of_memory_vars(u64 start, u64 size)
        }
 }
 
-int add_pages(int nid, unsigned long start_pfn,
-             unsigned long nr_pages, bool want_memblock)
+int add_pages(int nid, unsigned long start_pfn, unsigned long nr_pages,
+               struct vmem_altmap *altmap, bool want_memblock)
 {
        int ret;
 
-       ret = __add_pages(nid, start_pfn, nr_pages, want_memblock);
+       ret = __add_pages(nid, start_pfn, nr_pages, altmap, want_memblock);
        WARN_ON_ONCE(ret);
 
        /* update max_pfn, max_low_pfn and high_memory */
@@ -787,24 +787,24 @@ int add_pages(int nid, unsigned long start_pfn,
        return ret;
 }
 
-int arch_add_memory(int nid, u64 start, u64 size, bool want_memblock)
+int arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap,
+               bool want_memblock)
 {
        unsigned long start_pfn = start >> PAGE_SHIFT;
        unsigned long nr_pages = size >> PAGE_SHIFT;
 
        init_memory_mapping(start, start + size);
 
-       return add_pages(nid, start_pfn, nr_pages, want_memblock);
+       return add_pages(nid, start_pfn, nr_pages, altmap, want_memblock);
 }
-EXPORT_SYMBOL_GPL(arch_add_memory);
 
 #define PAGE_INUSE 0xFD
 
-static void __meminit free_pagetable(struct page *page, int order)
+static void __meminit free_pagetable(struct page *page, int order,
+               struct vmem_altmap *altmap)
 {
        unsigned long magic;
        unsigned int nr_pages = 1 << order;
-       struct vmem_altmap *altmap = to_vmem_altmap((unsigned long) page);
 
        if (altmap) {
                vmem_altmap_free(altmap, nr_pages);
@@ -826,7 +826,8 @@ static void __meminit free_pagetable(struct page *page, int order)
                free_pages((unsigned long)page_address(page), order);
 }
 
-static void __meminit free_pte_table(pte_t *pte_start, pmd_t *pmd)
+static void __meminit free_pte_table(pte_t *pte_start, pmd_t *pmd,
+               struct vmem_altmap *altmap)
 {
        pte_t *pte;
        int i;
@@ -838,13 +839,14 @@ static void __meminit free_pte_table(pte_t *pte_start, pmd_t *pmd)
        }
 
        /* free a pte talbe */
-       free_pagetable(pmd_page(*pmd), 0);
+       free_pagetable(pmd_page(*pmd), 0, altmap);
        spin_lock(&init_mm.page_table_lock);
        pmd_clear(pmd);
        spin_unlock(&init_mm.page_table_lock);
 }
 
-static void __meminit free_pmd_table(pmd_t *pmd_start, pud_t *pud)
+static void __meminit free_pmd_table(pmd_t *pmd_start, pud_t *pud,
+               struct vmem_altmap *altmap)
 {
        pmd_t *pmd;
        int i;
@@ -856,13 +858,14 @@ static void __meminit free_pmd_table(pmd_t *pmd_start, pud_t *pud)
        }
 
        /* free a pmd talbe */
-       free_pagetable(pud_page(*pud), 0);
+       free_pagetable(pud_page(*pud), 0, altmap);
        spin_lock(&init_mm.page_table_lock);
        pud_clear(pud);
        spin_unlock(&init_mm.page_table_lock);
 }
 
-static void __meminit free_pud_table(pud_t *pud_start, p4d_t *p4d)
+static void __meminit free_pud_table(pud_t *pud_start, p4d_t *p4d,
+               struct vmem_altmap *altmap)
 {
        pud_t *pud;
        int i;
@@ -874,7 +877,7 @@ static void __meminit free_pud_table(pud_t *pud_start, p4d_t *p4d)
        }
 
        /* free a pud talbe */
-       free_pagetable(p4d_page(*p4d), 0);
+       free_pagetable(p4d_page(*p4d), 0, altmap);
        spin_lock(&init_mm.page_table_lock);
        p4d_clear(p4d);
        spin_unlock(&init_mm.page_table_lock);
@@ -882,7 +885,7 @@ static void __meminit free_pud_table(pud_t *pud_start, p4d_t *p4d)
 
 static void __meminit
 remove_pte_table(pte_t *pte_start, unsigned long addr, unsigned long end,
-                bool direct)
+                struct vmem_altmap *altmap, bool direct)
 {
        unsigned long next, pages = 0;
        pte_t *pte;
@@ -913,7 +916,7 @@ remove_pte_table(pte_t *pte_start, unsigned long addr, unsigned long end,
                         * freed when offlining, or simplely not in use.
                         */
                        if (!direct)
-                               free_pagetable(pte_page(*pte), 0);
+                               free_pagetable(pte_page(*pte), 0, altmap);
 
                        spin_lock(&init_mm.page_table_lock);
                        pte_clear(&init_mm, addr, pte);
@@ -936,7 +939,7 @@ remove_pte_table(pte_t *pte_start, unsigned long addr, unsigned long end,
 
                        page_addr = page_address(pte_page(*pte));
                        if (!memchr_inv(page_addr, PAGE_INUSE, PAGE_SIZE)) {
-                               free_pagetable(pte_page(*pte), 0);
+                               free_pagetable(pte_page(*pte), 0, altmap);
 
                                spin_lock(&init_mm.page_table_lock);
                                pte_clear(&init_mm, addr, pte);
@@ -953,7 +956,7 @@ remove_pte_table(pte_t *pte_start, unsigned long addr, unsigned long end,
 
 static void __meminit
 remove_pmd_table(pmd_t *pmd_start, unsigned long addr, unsigned long end,
-                bool direct)
+                bool direct, struct vmem_altmap *altmap)
 {
        unsigned long next, pages = 0;
        pte_t *pte_base;
@@ -972,7 +975,8 @@ remove_pmd_table(pmd_t *pmd_start, unsigned long addr, unsigned long end,
                            IS_ALIGNED(next, PMD_SIZE)) {
                                if (!direct)
                                        free_pagetable(pmd_page(*pmd),
-                                                      get_order(PMD_SIZE));
+                                                      get_order(PMD_SIZE),
+                                                      altmap);
 
                                spin_lock(&init_mm.page_table_lock);
                                pmd_clear(pmd);
@@ -986,7 +990,8 @@ remove_pmd_table(pmd_t *pmd_start, unsigned long addr, unsigned long end,
                                if (!memchr_inv(page_addr, PAGE_INUSE,
                                                PMD_SIZE)) {
                                        free_pagetable(pmd_page(*pmd),
-                                                      get_order(PMD_SIZE));
+                                                      get_order(PMD_SIZE),
+                                                      altmap);
 
                                        spin_lock(&init_mm.page_table_lock);
                                        pmd_clear(pmd);
@@ -998,8 +1003,8 @@ remove_pmd_table(pmd_t *pmd_start, unsigned long addr, unsigned long end,
                }
 
                pte_base = (pte_t *)pmd_page_vaddr(*pmd);
-               remove_pte_table(pte_base, addr, next, direct);
-               free_pte_table(pte_base, pmd);
+               remove_pte_table(pte_base, addr, next, altmap, direct);
+               free_pte_table(pte_base, pmd, altmap);
        }
 
        /* Call free_pmd_table() in remove_pud_table(). */
@@ -1009,7 +1014,7 @@ remove_pmd_table(pmd_t *pmd_start, unsigned long addr, unsigned long end,
 
 static void __meminit
 remove_pud_table(pud_t *pud_start, unsigned long addr, unsigned long end,
-                bool direct)
+                struct vmem_altmap *altmap, bool direct)
 {
        unsigned long next, pages = 0;
        pmd_t *pmd_base;
@@ -1028,7 +1033,8 @@ remove_pud_table(pud_t *pud_start, unsigned long addr, unsigned long end,
                            IS_ALIGNED(next, PUD_SIZE)) {
                                if (!direct)
                                        free_pagetable(pud_page(*pud),
-                                                      get_order(PUD_SIZE));
+                                                      get_order(PUD_SIZE),
+                                                      altmap);
 
                                spin_lock(&init_mm.page_table_lock);
                                pud_clear(pud);
@@ -1042,7 +1048,8 @@ remove_pud_table(pud_t *pud_start, unsigned long addr, unsigned long end,
                                if (!memchr_inv(page_addr, PAGE_INUSE,
                                                PUD_SIZE)) {
                                        free_pagetable(pud_page(*pud),
-                                                      get_order(PUD_SIZE));
+                                                      get_order(PUD_SIZE),
+                                                      altmap);
 
                                        spin_lock(&init_mm.page_table_lock);
                                        pud_clear(pud);
@@ -1054,8 +1061,8 @@ remove_pud_table(pud_t *pud_start, unsigned long addr, unsigned long end,
                }
 
                pmd_base = pmd_offset(pud, 0);
-               remove_pmd_table(pmd_base, addr, next, direct);
-               free_pmd_table(pmd_base, pud);
+               remove_pmd_table(pmd_base, addr, next, direct, altmap);
+               free_pmd_table(pmd_base, pud, altmap);
        }
 
        if (direct)
@@ -1064,7 +1071,7 @@ remove_pud_table(pud_t *pud_start, unsigned long addr, unsigned long end,
 
 static void __meminit
 remove_p4d_table(p4d_t *p4d_start, unsigned long addr, unsigned long end,
-                bool direct)
+                struct vmem_altmap *altmap, bool direct)
 {
        unsigned long next, pages = 0;
        pud_t *pud_base;
@@ -1080,14 +1087,14 @@ remove_p4d_table(p4d_t *p4d_start, unsigned long addr, unsigned long end,
                BUILD_BUG_ON(p4d_large(*p4d));
 
                pud_base = pud_offset(p4d, 0);
-               remove_pud_table(pud_base, addr, next, direct);
+               remove_pud_table(pud_base, addr, next, altmap, direct);
                /*
                 * For 4-level page tables we do not want to free PUDs, but in the
                 * 5-level case we should free them. This code will have to change
                 * to adapt for boot-time switching between 4 and 5 level page tables.
                 */
                if (CONFIG_PGTABLE_LEVELS == 5)
-                       free_pud_table(pud_base, p4d);
+                       free_pud_table(pud_base, p4d, altmap);
        }
 
        if (direct)
@@ -1096,7 +1103,8 @@ remove_p4d_table(p4d_t *p4d_start, unsigned long addr, unsigned long end,
 
 /* start and end are both virtual address. */
 static void __meminit
-remove_pagetable(unsigned long start, unsigned long end, bool direct)
+remove_pagetable(unsigned long start, unsigned long end, bool direct,
+               struct vmem_altmap *altmap)
 {
        unsigned long next;
        unsigned long addr;
@@ -1111,15 +1119,16 @@ remove_pagetable(unsigned long start, unsigned long end, bool direct)
                        continue;
 
                p4d = p4d_offset(pgd, 0);
-               remove_p4d_table(p4d, addr, next, direct);
+               remove_p4d_table(p4d, addr, next, altmap, direct);
        }
 
        flush_tlb_all();
 }
 
-void __ref vmemmap_free(unsigned long start, unsigned long end)
+void __ref vmemmap_free(unsigned long start, unsigned long end,
+               struct vmem_altmap *altmap)
 {
-       remove_pagetable(start, end, false);
+       remove_pagetable(start, end, false, altmap);
 }
 
 #ifdef CONFIG_MEMORY_HOTREMOVE
@@ -1129,24 +1138,22 @@ kernel_physical_mapping_remove(unsigned long start, unsigned long end)
        start = (unsigned long)__va(start);
        end = (unsigned long)__va(end);
 
-       remove_pagetable(start, end, true);
+       remove_pagetable(start, end, true, NULL);
 }
 
-int __ref arch_remove_memory(u64 start, u64 size)
+int __ref arch_remove_memory(u64 start, u64 size, struct vmem_altmap *altmap)
 {
        unsigned long start_pfn = start >> PAGE_SHIFT;
        unsigned long nr_pages = size >> PAGE_SHIFT;
        struct page *page = pfn_to_page(start_pfn);
-       struct vmem_altmap *altmap;
        struct zone *zone;
        int ret;
 
        /* With altmap the first mapped page is offset from @start */
-       altmap = to_vmem_altmap((unsigned long) page);
        if (altmap)
                page += vmem_altmap_offset(altmap);
        zone = page_zone(page);
-       ret = __remove_pages(zone, start_pfn, nr_pages);
+       ret = __remove_pages(zone, start_pfn, nr_pages, altmap);
        WARN_ON_ONCE(ret);
        kernel_physical_mapping_remove(start, start + size);
 
@@ -1378,7 +1385,10 @@ static int __meminit vmemmap_populate_hugepages(unsigned long start,
                if (pmd_none(*pmd)) {
                        void *p;
 
-                       p = __vmemmap_alloc_block_buf(PMD_SIZE, node, altmap);
+                       if (altmap)
+                               p = altmap_alloc_block_buf(PMD_SIZE, altmap);
+                       else
+                               p = vmemmap_alloc_block_buf(PMD_SIZE, node);
                        if (p) {
                                pte_t entry;
 
@@ -1411,9 +1421,9 @@ static int __meminit vmemmap_populate_hugepages(unsigned long start,
        return 0;
 }
 
-int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node)
+int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
+               struct vmem_altmap *altmap)
 {
-       struct vmem_altmap *altmap = to_vmem_altmap(start);
        int err;
 
        if (boot_cpu_has(X86_FEATURE_PSE))
index aa9d00db763a2383663e36f20a3a8509c08907da..bbe48ad20886c8530fe525ffe9f35725d1df1ddc 100644 (file)
@@ -1883,6 +1883,9 @@ static int acpi_nfit_register_dimms(struct acpi_nfit_desc *acpi_desc)
                struct kernfs_node *nfit_kernfs;
 
                nvdimm = nfit_mem->nvdimm;
+               if (!nvdimm)
+                       continue;
+
                nfit_kernfs = sysfs_get_dirent(nvdimm_kobj(nvdimm)->sd, "nfit");
                if (nfit_kernfs)
                        nfit_mem->flags_attr = sysfs_get_dirent(nfit_kernfs,
index 7b0bf825c4e73c588ff93183cf5315665d69e082..2137dbc29877db4f72c340e0b02869d974d3b7a9 100644 (file)
@@ -133,7 +133,7 @@ struct dax_region *alloc_dax_region(struct device *parent, int region_id,
        dax_region->base = addr;
        if (sysfs_create_groups(&parent->kobj, dax_region_attribute_groups)) {
                kfree(dax_region);
-               return NULL;;
+               return NULL;
        }
 
        kref_get(&dax_region->kref);
index 8d8c852ba8f209c5118987aa2dbb98cf3290d213..31b6ecce4c64205f1109532f222b39b980e75a00 100644 (file)
@@ -21,6 +21,7 @@
 struct dax_pmem {
        struct device *dev;
        struct percpu_ref ref;
+       struct dev_pagemap pgmap;
        struct completion cmp;
 };
 
@@ -69,20 +70,23 @@ static int dax_pmem_probe(struct device *dev)
        struct nd_namespace_common *ndns;
        struct nd_dax *nd_dax = to_nd_dax(dev);
        struct nd_pfn *nd_pfn = &nd_dax->nd_pfn;
-       struct vmem_altmap __altmap, *altmap = NULL;
 
        ndns = nvdimm_namespace_common_probe(dev);
        if (IS_ERR(ndns))
                return PTR_ERR(ndns);
        nsio = to_nd_namespace_io(&ndns->dev);
 
+       dax_pmem = devm_kzalloc(dev, sizeof(*dax_pmem), GFP_KERNEL);
+       if (!dax_pmem)
+               return -ENOMEM;
+
        /* parse the 'pfn' info block via ->rw_bytes */
        rc = devm_nsio_enable(dev, nsio);
        if (rc)
                return rc;
-       altmap = nvdimm_setup_pfn(nd_pfn, &res, &__altmap);
-       if (IS_ERR(altmap))
-               return PTR_ERR(altmap);
+       rc = nvdimm_setup_pfn(nd_pfn, &dax_pmem->pgmap);
+       if (rc)
+               return rc;
        devm_nsio_disable(dev, nsio);
 
        pfn_sb = nd_pfn->pfn_sb;
@@ -94,10 +98,6 @@ static int dax_pmem_probe(struct device *dev)
                return -EBUSY;
        }
 
-       dax_pmem = devm_kzalloc(dev, sizeof(*dax_pmem), GFP_KERNEL);
-       if (!dax_pmem)
-               return -ENOMEM;
-
        dax_pmem->dev = dev;
        init_completion(&dax_pmem->cmp);
        rc = percpu_ref_init(&dax_pmem->ref, dax_pmem_percpu_release, 0,
@@ -110,7 +110,8 @@ static int dax_pmem_probe(struct device *dev)
        if (rc)
                return rc;
 
-       addr = devm_memremap_pages(dev, &res, &dax_pmem->ref, altmap);
+       dax_pmem->pgmap.ref = &dax_pmem->ref;
+       addr = devm_memremap_pages(dev, &dax_pmem->pgmap);
        if (IS_ERR(addr))
                return PTR_ERR(addr);
 
@@ -120,6 +121,7 @@ static int dax_pmem_probe(struct device *dev)
                return rc;
 
        /* adjust the dax_region resource to the start of data */
+       memcpy(&res, &dax_pmem->pgmap.res, sizeof(res));
        res.start += le64_to_cpu(pfn_sb->dataoff);
 
        rc = sscanf(dev_name(&ndns->dev), "namespace%d.%d", &region_id, &id);
index 3ec8046726017bad8afe69e120a0c54e0f9f5229..473af694ad1cbee97db06646a715776a5d9e7889 100644 (file)
@@ -15,6 +15,7 @@
 #include <linux/mount.h>
 #include <linux/magic.h>
 #include <linux/genhd.h>
+#include <linux/pfn_t.h>
 #include <linux/cdev.h>
 #include <linux/hash.h>
 #include <linux/slab.h>
@@ -123,6 +124,15 @@ int __bdev_dax_supported(struct super_block *sb, int blocksize)
                return len < 0 ? len : -EIO;
        }
 
+       if ((IS_ENABLED(CONFIG_FS_DAX_LIMITED) && pfn_t_special(pfn))
+                       || pfn_t_devmap(pfn))
+               /* pass */;
+       else {
+               pr_debug("VFS (%s): error: dax support not enabled\n",
+                               sb->s_id);
+               return -EOPNOTSUPP;
+       }
+
        return 0;
 }
 EXPORT_SYMBOL_GPL(__bdev_dax_supported);
index c586bcdb5190b1c9f6447cb9a380568a03840988..2ef544f10ec8a3b2016bac8e9bc38475113eb5ef 100644 (file)
@@ -753,6 +753,7 @@ static struct arena_info *alloc_arena(struct btt *btt, size_t size,
                return NULL;
        arena->nd_btt = btt->nd_btt;
        arena->sector_size = btt->sector_size;
+       mutex_init(&arena->err_lock);
 
        if (!size)
                return arena;
@@ -891,7 +892,6 @@ static int discover_arenas(struct btt *btt)
                        goto out;
                }
 
-               mutex_init(&arena->err_lock);
                ret = btt_freelist_init(arena);
                if (ret)
                        goto out;
index 0a5e6cd758fe3fcef15aa38332cd5673e4de29d3..78eabc3a1ab1337b68aef7b427c1ee59f570438c 100644 (file)
@@ -1142,9 +1142,6 @@ int __init nvdimm_bus_init(void)
 {
        int rc;
 
-       BUILD_BUG_ON(sizeof(struct nd_smart_payload) != 128);
-       BUILD_BUG_ON(sizeof(struct nd_smart_threshold_payload) != 8);
-
        rc = bus_register(&nvdimm_bus_type);
        if (rc)
                return rc;
index bb3ba8cf24d4503801f3534f01a0e1f86cb7a641..658ada497be0a43f0a9df0d4a71aeaaa8df13bdb 100644 (file)
@@ -2408,7 +2408,7 @@ static struct device **scan_labels(struct nd_region *nd_region)
 
 static struct device **create_namespaces(struct nd_region *nd_region)
 {
-       struct nd_mapping *nd_mapping = &nd_region->mapping[0];
+       struct nd_mapping *nd_mapping;
        struct device **devs;
        int i;
 
index e958f3724c417e74807102c2fcfb2d3a5da2ef7f..8d6375ee0fda73c7ce12de259fddfd81449d9b8d 100644 (file)
@@ -368,15 +368,14 @@ unsigned int pmem_sector_size(struct nd_namespace_common *ndns);
 void nvdimm_badblocks_populate(struct nd_region *nd_region,
                struct badblocks *bb, const struct resource *res);
 #if IS_ENABLED(CONFIG_ND_CLAIM)
-struct vmem_altmap *nvdimm_setup_pfn(struct nd_pfn *nd_pfn,
-               struct resource *res, struct vmem_altmap *altmap);
+int nvdimm_setup_pfn(struct nd_pfn *nd_pfn, struct dev_pagemap *pgmap);
 int devm_nsio_enable(struct device *dev, struct nd_namespace_io *nsio);
 void devm_nsio_disable(struct device *dev, struct nd_namespace_io *nsio);
 #else
-static inline struct vmem_altmap *nvdimm_setup_pfn(struct nd_pfn *nd_pfn,
-               struct resource *res, struct vmem_altmap *altmap)
+static inline int nvdimm_setup_pfn(struct nd_pfn *nd_pfn,
+                                  struct dev_pagemap *pgmap)
 {
-       return ERR_PTR(-ENXIO);
+       return -ENXIO;
 }
 static inline int devm_nsio_enable(struct device *dev,
                struct nd_namespace_io *nsio)
index 2adada1a58551776186d6f6928a437d462734a48..f5c4e8c6e29d49bc5d42be0cf6c5ceffd98da1f5 100644 (file)
@@ -542,9 +542,10 @@ static unsigned long init_altmap_reserve(resource_size_t base)
        return reserve;
 }
 
-static struct vmem_altmap *__nvdimm_setup_pfn(struct nd_pfn *nd_pfn,
-               struct resource *res, struct vmem_altmap *altmap)
+static int __nvdimm_setup_pfn(struct nd_pfn *nd_pfn, struct dev_pagemap *pgmap)
 {
+       struct resource *res = &pgmap->res;
+       struct vmem_altmap *altmap = &pgmap->altmap;
        struct nd_pfn_sb *pfn_sb = nd_pfn->pfn_sb;
        u64 offset = le64_to_cpu(pfn_sb->dataoff);
        u32 start_pad = __le32_to_cpu(pfn_sb->start_pad);
@@ -561,11 +562,13 @@ static struct vmem_altmap *__nvdimm_setup_pfn(struct nd_pfn *nd_pfn,
        res->start += start_pad;
        res->end -= end_trunc;
 
+       pgmap->type = MEMORY_DEVICE_HOST;
+
        if (nd_pfn->mode == PFN_MODE_RAM) {
                if (offset < SZ_8K)
-                       return ERR_PTR(-EINVAL);
+                       return -EINVAL;
                nd_pfn->npfns = le64_to_cpu(pfn_sb->npfns);
-               altmap = NULL;
+               pgmap->altmap_valid = false;
        } else if (nd_pfn->mode == PFN_MODE_PMEM) {
                nd_pfn->npfns = PFN_SECTION_ALIGN_UP((resource_size(res)
                                        - offset) / PAGE_SIZE);
@@ -577,10 +580,11 @@ static struct vmem_altmap *__nvdimm_setup_pfn(struct nd_pfn *nd_pfn,
                memcpy(altmap, &__altmap, sizeof(*altmap));
                altmap->free = PHYS_PFN(offset - SZ_8K);
                altmap->alloc = 0;
+               pgmap->altmap_valid = true;
        } else
-               return ERR_PTR(-ENXIO);
+               return -ENXIO;
 
-       return altmap;
+       return 0;
 }
 
 static u64 phys_pmem_align_down(struct nd_pfn *nd_pfn, u64 phys)
@@ -708,19 +712,18 @@ static int nd_pfn_init(struct nd_pfn *nd_pfn)
  * Determine the effective resource range and vmem_altmap from an nd_pfn
  * instance.
  */
-struct vmem_altmap *nvdimm_setup_pfn(struct nd_pfn *nd_pfn,
-               struct resource *res, struct vmem_altmap *altmap)
+int nvdimm_setup_pfn(struct nd_pfn *nd_pfn, struct dev_pagemap *pgmap)
 {
        int rc;
 
        if (!nd_pfn->uuid || !nd_pfn->ndns)
-               return ERR_PTR(-ENODEV);
+               return -ENODEV;
 
        rc = nd_pfn_init(nd_pfn);
        if (rc)
-               return ERR_PTR(rc);
+               return rc;
 
-       /* we need a valid pfn_sb before we can init a vmem_altmap */
-       return __nvdimm_setup_pfn(nd_pfn, res, altmap);
+       /* we need a valid pfn_sb before we can init a dev_pagemap */
+       return __nvdimm_setup_pfn(nd_pfn, pgmap);
 }
 EXPORT_SYMBOL_GPL(nvdimm_setup_pfn);
index 8aa542398db4799ebecf8cca6ea5378281bc9abb..10041ac4032c038db09109b8f757a8719d26f902 100644 (file)
@@ -299,34 +299,34 @@ static int pmem_attach_disk(struct device *dev,
 {
        struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev);
        struct nd_region *nd_region = to_nd_region(dev->parent);
-       struct vmem_altmap __altmap, *altmap = NULL;
        int nid = dev_to_node(dev), fua, wbc;
        struct resource *res = &nsio->res;
+       struct resource bb_res;
        struct nd_pfn *nd_pfn = NULL;
        struct dax_device *dax_dev;
        struct nd_pfn_sb *pfn_sb;
        struct pmem_device *pmem;
-       struct resource pfn_res;
        struct request_queue *q;
        struct device *gendev;
        struct gendisk *disk;
        void *addr;
+       int rc;
+
+       pmem = devm_kzalloc(dev, sizeof(*pmem), GFP_KERNEL);
+       if (!pmem)
+               return -ENOMEM;
 
        /* while nsio_rw_bytes is active, parse a pfn info block if present */
        if (is_nd_pfn(dev)) {
                nd_pfn = to_nd_pfn(dev);
-               altmap = nvdimm_setup_pfn(nd_pfn, &pfn_res, &__altmap);
-               if (IS_ERR(altmap))
-                       return PTR_ERR(altmap);
+               rc = nvdimm_setup_pfn(nd_pfn, &pmem->pgmap);
+               if (rc)
+                       return rc;
        }
 
        /* we're attaching a block device, disable raw namespace access */
        devm_nsio_disable(dev, nsio);
 
-       pmem = devm_kzalloc(dev, sizeof(*pmem), GFP_KERNEL);
-       if (!pmem)
-               return -ENOMEM;
-
        dev_set_drvdata(dev, pmem);
        pmem->phys_addr = res->start;
        pmem->size = resource_size(res);
@@ -352,19 +352,22 @@ static int pmem_attach_disk(struct device *dev,
                return -ENOMEM;
 
        pmem->pfn_flags = PFN_DEV;
+       pmem->pgmap.ref = &q->q_usage_counter;
        if (is_nd_pfn(dev)) {
-               addr = devm_memremap_pages(dev, &pfn_res, &q->q_usage_counter,
-                               altmap);
+               addr = devm_memremap_pages(dev, &pmem->pgmap);
                pfn_sb = nd_pfn->pfn_sb;
                pmem->data_offset = le64_to_cpu(pfn_sb->dataoff);
-               pmem->pfn_pad = resource_size(res) - resource_size(&pfn_res);
+               pmem->pfn_pad = resource_size(res) -
+                       resource_size(&pmem->pgmap.res);
                pmem->pfn_flags |= PFN_MAP;
-               res = &pfn_res; /* for badblocks populate */
-               res->start += pmem->data_offset;
+               memcpy(&bb_res, &pmem->pgmap.res, sizeof(bb_res));
+               bb_res.start += pmem->data_offset;
        } else if (pmem_should_map_pages(dev)) {
-               addr = devm_memremap_pages(dev, &nsio->res,
-                               &q->q_usage_counter, NULL);
+               memcpy(&pmem->pgmap.res, &nsio->res, sizeof(pmem->pgmap.res));
+               pmem->pgmap.altmap_valid = false;
+               addr = devm_memremap_pages(dev, &pmem->pgmap);
                pmem->pfn_flags |= PFN_MAP;
+               memcpy(&bb_res, &pmem->pgmap.res, sizeof(bb_res));
        } else
                addr = devm_memremap(dev, pmem->phys_addr,
                                pmem->size, ARCH_MEMREMAP_PMEM);
@@ -403,7 +406,7 @@ static int pmem_attach_disk(struct device *dev,
                        / 512);
        if (devm_init_badblocks(dev, &pmem->bb))
                return -ENOMEM;
-       nvdimm_badblocks_populate(nd_region, &pmem->bb, res);
+       nvdimm_badblocks_populate(nd_region, &pmem->bb, &bb_res);
        disk->bb = &pmem->bb;
 
        dax_dev = alloc_dax(pmem, disk->disk_name, &pmem_dax_ops);
index 6a3cd2a10db699db2069568d8094d9f1bf8bc319..a64ebc78b5dffbac4072fc722d9961650e681f88 100644 (file)
@@ -22,6 +22,7 @@ struct pmem_device {
        struct badblocks        bb;
        struct dax_device       *dax_dev;
        struct gendisk          *disk;
+       struct dev_pagemap      pgmap;
 };
 
 long __pmem_direct_access(struct pmem_device *pmem, pgoff_t pgoff,
index bc27d716aa6b2d404863a22a4242c2ccbd9fc6e6..1444333210c74929479cdaf2b9b411b74eb74b95 100644 (file)
@@ -16,6 +16,7 @@ config BLK_DEV_XPRAM
 config DCSSBLK
        def_tristate m
        select DAX
+       select FS_DAX_LIMITED
        prompt "DCSSBLK support"
        depends on S390 && BLOCK
        help
index 6aaefb78043696e658e36b6637b4e6dde59b5c83..9cae08b36b8022b590bdb550113212d102d94043 100644 (file)
@@ -916,7 +916,8 @@ __dcssblk_direct_access(struct dcssblk_dev_info *dev_info, pgoff_t pgoff,
 
        dev_sz = dev_info->end - dev_info->start + 1;
        *kaddr = (void *) dev_info->start + offset;
-       *pfn = __pfn_to_pfn_t(PFN_DOWN(dev_info->start + offset), PFN_DEV);
+       *pfn = __pfn_to_pfn_t(PFN_DOWN(dev_info->start + offset),
+                       PFN_DEV|PFN_SPECIAL);
 
        return (dev_sz - offset) / PAGE_SIZE;
 }
index 7aee6d699fd6b38949df0563281473c3afb445c2..b40128bf6d1a98da68c1d365913f32b9f6cf3b80 100644 (file)
@@ -58,6 +58,13 @@ config FS_DAX_PMD
        depends on ZONE_DEVICE
        depends on TRANSPARENT_HUGEPAGE
 
+# Selected by DAX drivers that do not expect filesystem DAX to support
+# get_user_pages() of DAX mappings. I.e. "limited" indicates no support
+# for fork() of processes with MAP_SHARED mappings or support for
+# direct-I/O to a DAX mapping.
+config FS_DAX_LIMITED
+       bool
+
 endif # BLOCK
 
 # Posix ACL utility routines
index 7646818ab266ff81b86003b28f661fdbd7d9ba2f..38f9222606eeb22409c6e0b87044a8c45a66cb68 100644 (file)
@@ -959,8 +959,11 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
 
        if (sbi->s_mount_opt & EXT2_MOUNT_DAX) {
                err = bdev_dax_supported(sb, blocksize);
-               if (err)
-                       goto failed_mount;
+               if (err) {
+                       ext2_msg(sb, KERN_ERR,
+                               "DAX unsupported by block device. Turning off DAX.");
+                       sbi->s_mount_opt &= ~EXT2_MOUNT_DAX;
+               }
        }
 
        /* If the blocksize doesn't match, re-read the thing.. */
index 7c46693a14d763d53b84eec9602f4a854918bb4d..18873ea89e088fe5551f8253f67ecb644b0efc99 100644 (file)
@@ -3710,11 +3710,14 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
                if (ext4_has_feature_inline_data(sb)) {
                        ext4_msg(sb, KERN_ERR, "Cannot use DAX on a filesystem"
                                        " that may contain inline data");
-                       goto failed_mount;
+                       sbi->s_mount_opt &= ~EXT4_MOUNT_DAX;
                }
                err = bdev_dax_supported(sb, blocksize);
-               if (err)
-                       goto failed_mount;
+               if (err) {
+                       ext4_msg(sb, KERN_ERR,
+                               "DAX unsupported by block device. Turning off DAX.");
+                       sbi->s_mount_opt &= ~EXT4_MOUNT_DAX;
+               }
        }
 
        if (ext4_has_feature_encrypt(sb) && es->s_encryption_level) {
index 58e110aee7ab4d68a954ad850fd626d71ed28b55..aba5f86eb038b4d554138001fdbafdb9b43bff7c 100644 (file)
@@ -13,6 +13,7 @@ struct pglist_data;
 struct mem_section;
 struct memory_block;
 struct resource;
+struct vmem_altmap;
 
 #ifdef CONFIG_MEMORY_HOTPLUG
 /*
@@ -125,24 +126,26 @@ static inline bool movable_node_is_enabled(void)
 
 #ifdef CONFIG_MEMORY_HOTREMOVE
 extern bool is_pageblock_removable_nolock(struct page *page);
-extern int arch_remove_memory(u64 start, u64 size);
+extern int arch_remove_memory(u64 start, u64 size,
+               struct vmem_altmap *altmap);
 extern int __remove_pages(struct zone *zone, unsigned long start_pfn,
-       unsigned long nr_pages);
+       unsigned long nr_pages, struct vmem_altmap *altmap);
 #endif /* CONFIG_MEMORY_HOTREMOVE */
 
 /* reasonably generic interface to expand the physical pages */
-extern int __add_pages(int nid, unsigned long start_pfn,
-       unsigned long nr_pages, bool want_memblock);
+extern int __add_pages(int nid, unsigned long start_pfn, unsigned long nr_pages,
+               struct vmem_altmap *altmap, bool want_memblock);
 
 #ifndef CONFIG_ARCH_HAS_ADD_PAGES
 static inline int add_pages(int nid, unsigned long start_pfn,
-                           unsigned long nr_pages, bool want_memblock)
+               unsigned long nr_pages, struct vmem_altmap *altmap,
+               bool want_memblock)
 {
-       return __add_pages(nid, start_pfn, nr_pages, want_memblock);
+       return __add_pages(nid, start_pfn, nr_pages, altmap, want_memblock);
 }
 #else /* ARCH_HAS_ADD_PAGES */
-int add_pages(int nid, unsigned long start_pfn,
-             unsigned long nr_pages, bool want_memblock);
+int add_pages(int nid, unsigned long start_pfn, unsigned long nr_pages,
+               struct vmem_altmap *altmap, bool want_memblock);
 #endif /* ARCH_HAS_ADD_PAGES */
 
 #ifdef CONFIG_NUMA
@@ -318,15 +321,17 @@ extern int walk_memory_range(unsigned long start_pfn, unsigned long end_pfn,
                void *arg, int (*func)(struct memory_block *, void *));
 extern int add_memory(int nid, u64 start, u64 size);
 extern int add_memory_resource(int nid, struct resource *resource, bool online);
-extern int arch_add_memory(int nid, u64 start, u64 size, bool want_memblock);
+extern int arch_add_memory(int nid, u64 start, u64 size,
+               struct vmem_altmap *altmap, bool want_memblock);
 extern void move_pfn_range_to_zone(struct zone *zone, unsigned long start_pfn,
-               unsigned long nr_pages);
+               unsigned long nr_pages, struct vmem_altmap *altmap);
 extern int offline_pages(unsigned long start_pfn, unsigned long nr_pages);
 extern bool is_memblock_offlined(struct memory_block *mem);
 extern void remove_memory(int nid, u64 start, u64 size);
-extern int sparse_add_one_section(struct pglist_data *pgdat, unsigned long start_pfn);
+extern int sparse_add_one_section(struct pglist_data *pgdat,
+               unsigned long start_pfn, struct vmem_altmap *altmap);
 extern void sparse_remove_one_section(struct zone *zone, struct mem_section *ms,
-               unsigned long map_offset);
+               unsigned long map_offset, struct vmem_altmap *altmap);
 extern struct page *sparse_decode_mem_map(unsigned long coded_mem_map,
                                          unsigned long pnum);
 extern bool allow_online_pfn_range(int nid, unsigned long pfn, unsigned long nr_pages,
index 10d23c3670481813c540b3e7a92f925c336b4a63..7b4899c06f49c7c727da4ae64eb83d10e7a14e83 100644 (file)
@@ -26,18 +26,6 @@ struct vmem_altmap {
        unsigned long alloc;
 };
 
-unsigned long vmem_altmap_offset(struct vmem_altmap *altmap);
-void vmem_altmap_free(struct vmem_altmap *altmap, unsigned long nr_pfns);
-
-#ifdef CONFIG_ZONE_DEVICE
-struct vmem_altmap *to_vmem_altmap(unsigned long memmap_start);
-#else
-static inline struct vmem_altmap *to_vmem_altmap(unsigned long memmap_start)
-{
-       return NULL;
-}
-#endif
-
 /*
  * Specialize ZONE_DEVICE memory into multiple types each having differents
  * usage.
@@ -125,8 +113,9 @@ typedef void (*dev_page_free_t)(struct page *page, void *data);
 struct dev_pagemap {
        dev_page_fault_t page_fault;
        dev_page_free_t page_free;
-       struct vmem_altmap *altmap;
-       const struct resource *res;
+       struct vmem_altmap altmap;
+       bool altmap_valid;
+       struct resource res;
        struct percpu_ref *ref;
        struct device *dev;
        void *data;
@@ -134,15 +123,17 @@ struct dev_pagemap {
 };
 
 #ifdef CONFIG_ZONE_DEVICE
-void *devm_memremap_pages(struct device *dev, struct resource *res,
-               struct percpu_ref *ref, struct vmem_altmap *altmap);
-struct dev_pagemap *find_dev_pagemap(resource_size_t phys);
+void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap);
+struct dev_pagemap *get_dev_pagemap(unsigned long pfn,
+               struct dev_pagemap *pgmap);
+
+unsigned long vmem_altmap_offset(struct vmem_altmap *altmap);
+void vmem_altmap_free(struct vmem_altmap *altmap, unsigned long nr_pfns);
 
 static inline bool is_zone_device_page(const struct page *page);
 #else
 static inline void *devm_memremap_pages(struct device *dev,
-               struct resource *res, struct percpu_ref *ref,
-               struct vmem_altmap *altmap)
+               struct dev_pagemap *pgmap)
 {
        /*
         * Fail attempts to call devm_memremap_pages() without
@@ -153,11 +144,22 @@ static inline void *devm_memremap_pages(struct device *dev,
        return ERR_PTR(-ENXIO);
 }
 
-static inline struct dev_pagemap *find_dev_pagemap(resource_size_t phys)
+static inline struct dev_pagemap *get_dev_pagemap(unsigned long pfn,
+               struct dev_pagemap *pgmap)
 {
        return NULL;
 }
-#endif
+
+static inline unsigned long vmem_altmap_offset(struct vmem_altmap *altmap)
+{
+       return 0;
+}
+
+static inline void vmem_altmap_free(struct vmem_altmap *altmap,
+               unsigned long nr_pfns)
+{
+}
+#endif /* CONFIG_ZONE_DEVICE */
 
 #if defined(CONFIG_DEVICE_PRIVATE) || defined(CONFIG_DEVICE_PUBLIC)
 static inline bool is_device_private_page(const struct page *page)
@@ -173,39 +175,6 @@ static inline bool is_device_public_page(const struct page *page)
 }
 #endif /* CONFIG_DEVICE_PRIVATE || CONFIG_DEVICE_PUBLIC */
 
-/**
- * get_dev_pagemap() - take a new live reference on the dev_pagemap for @pfn
- * @pfn: page frame number to lookup page_map
- * @pgmap: optional known pgmap that already has a reference
- *
- * @pgmap allows the overhead of a lookup to be bypassed when @pfn lands in the
- * same mapping.
- */
-static inline struct dev_pagemap *get_dev_pagemap(unsigned long pfn,
-               struct dev_pagemap *pgmap)
-{
-       const struct resource *res = pgmap ? pgmap->res : NULL;
-       resource_size_t phys = PFN_PHYS(pfn);
-
-       /*
-        * In the cached case we're already holding a live reference so
-        * we can simply do a blind increment
-        */
-       if (res && phys >= res->start && phys <= res->end) {
-               percpu_ref_get(pgmap->ref);
-               return pgmap;
-       }
-
-       /* fall back to slow path lookup */
-       rcu_read_lock();
-       pgmap = find_dev_pagemap(phys);
-       if (pgmap && !percpu_ref_tryget_live(pgmap->ref))
-               pgmap = NULL;
-       rcu_read_unlock();
-
-       return pgmap;
-}
-
 static inline void put_dev_pagemap(struct dev_pagemap *pgmap)
 {
        if (pgmap)
index ea818ff739cdfbb433fc10634ed5ac77eacbc5b7..09637c353de0a785261a3918185ecf8d585c91fc 100644 (file)
@@ -2069,8 +2069,8 @@ static inline void zero_resv_unavail(void) {}
 #endif
 
 extern void set_dma_reserve(unsigned long new_dma_reserve);
-extern void memmap_init_zone(unsigned long, int, unsigned long,
-                               unsigned long, enum memmap_context);
+extern void memmap_init_zone(unsigned long, int, unsigned long, unsigned long,
+               enum memmap_context, struct vmem_altmap *);
 extern void setup_per_zone_wmarks(void);
 extern int __meminit init_per_zone_wmark_min(void);
 extern void mem_init(void);
@@ -2538,7 +2538,8 @@ void sparse_mem_maps_populate_node(struct page **map_map,
                                   unsigned long map_count,
                                   int nodeid);
 
-struct page *sparse_mem_map_populate(unsigned long pnum, int nid);
+struct page *sparse_mem_map_populate(unsigned long pnum, int nid,
+               struct vmem_altmap *altmap);
 pgd_t *vmemmap_pgd_populate(unsigned long addr, int node);
 p4d_t *vmemmap_p4d_populate(pgd_t *pgd, unsigned long addr, int node);
 pud_t *vmemmap_pud_populate(p4d_t *p4d, unsigned long addr, int node);
@@ -2546,20 +2547,17 @@ pmd_t *vmemmap_pmd_populate(pud_t *pud, unsigned long addr, int node);
 pte_t *vmemmap_pte_populate(pmd_t *pmd, unsigned long addr, int node);
 void *vmemmap_alloc_block(unsigned long size, int node);
 struct vmem_altmap;
-void *__vmemmap_alloc_block_buf(unsigned long size, int node,
-               struct vmem_altmap *altmap);
-static inline void *vmemmap_alloc_block_buf(unsigned long size, int node)
-{
-       return __vmemmap_alloc_block_buf(size, node, NULL);
-}
-
+void *vmemmap_alloc_block_buf(unsigned long size, int node);
+void *altmap_alloc_block_buf(unsigned long size, struct vmem_altmap *altmap);
 void vmemmap_verify(pte_t *, int, unsigned long, unsigned long);
 int vmemmap_populate_basepages(unsigned long start, unsigned long end,
                               int node);
-int vmemmap_populate(unsigned long start, unsigned long end, int node);
+int vmemmap_populate(unsigned long start, unsigned long end, int node,
+               struct vmem_altmap *altmap);
 void vmemmap_populate_print_last(void);
 #ifdef CONFIG_MEMORY_HOTPLUG
-void vmemmap_free(unsigned long start, unsigned long end);
+void vmemmap_free(unsigned long start, unsigned long end,
+               struct vmem_altmap *altmap);
 #endif
 void register_page_bootmem_memmap(unsigned long section_nr, struct page *map,
                                  unsigned long nr_pages);
index 43b1d7648e826260e193cdb0a49cb038e4896958..a03c2642a87c259c23e1157dfaf21d46080eec91 100644 (file)
 #define PFN_SG_LAST (1ULL << (BITS_PER_LONG_LONG - 2))
 #define PFN_DEV (1ULL << (BITS_PER_LONG_LONG - 3))
 #define PFN_MAP (1ULL << (BITS_PER_LONG_LONG - 4))
+#define PFN_SPECIAL (1ULL << (BITS_PER_LONG_LONG - 5))
 
 #define PFN_FLAGS_TRACE \
+       { PFN_SPECIAL,  "SPECIAL" }, \
        { PFN_SG_CHAIN, "SG_CHAIN" }, \
        { PFN_SG_LAST,  "SG_LAST" }, \
        { PFN_DEV,      "DEV" }, \
@@ -120,4 +122,15 @@ pud_t pud_mkdevmap(pud_t pud);
 #endif
 #endif /* __HAVE_ARCH_PTE_DEVMAP */
 
+#ifdef __HAVE_ARCH_PTE_SPECIAL
+static inline bool pfn_t_special(pfn_t pfn)
+{
+       return (pfn.val & PFN_SPECIAL) == PFN_SPECIAL;
+}
+#else
+static inline bool pfn_t_special(pfn_t pfn)
+{
+       return false;
+}
+#endif /* __HAVE_ARCH_PTE_SPECIAL */
 #endif /* _LINUX_PFN_T_H_ */
index 3f03567631cb092dc8f51aae740dd2a3472730e9..7e27070b9440374f4c8e32187e8739c39318c889 100644 (file)
 
 #include <linux/types.h>
 
-struct nd_cmd_smart {
-       __u32 status;
-       __u8 data[128];
-} __packed;
-
-#define ND_SMART_HEALTH_VALID  (1 << 0)
-#define ND_SMART_SPARES_VALID  (1 << 1)
-#define ND_SMART_USED_VALID    (1 << 2)
-#define ND_SMART_TEMP_VALID    (1 << 3)
-#define ND_SMART_CTEMP_VALID   (1 << 4)
-#define ND_SMART_ALARM_VALID   (1 << 9)
-#define ND_SMART_SHUTDOWN_VALID        (1 << 10)
-#define ND_SMART_VENDOR_VALID  (1 << 11)
-#define ND_SMART_SPARE_TRIP    (1 << 0)
-#define ND_SMART_TEMP_TRIP     (1 << 1)
-#define ND_SMART_CTEMP_TRIP    (1 << 2)
-#define ND_SMART_NON_CRITICAL_HEALTH   (1 << 0)
-#define ND_SMART_CRITICAL_HEALTH       (1 << 1)
-#define ND_SMART_FATAL_HEALTH          (1 << 2)
-
-struct nd_smart_payload {
-       __u32 flags;
-       __u8 reserved0[4];
-       __u8 health;
-       __u8 spares;
-       __u8 life_used;
-       __u8 alarm_flags;
-       __u16 temperature;
-       __u16 ctrl_temperature;
-       __u8 reserved1[15];
-       __u8 shutdown_state;
-       __u32 vendor_size;
-       __u8 vendor_data[92];
-} __packed;
-
-struct nd_cmd_smart_threshold {
-       __u32 status;
-       __u8 data[8];
-} __packed;
-
-struct nd_smart_threshold_payload {
-       __u8 alarm_control;
-       __u8 reserved0;
-       __u16 temperature;
-       __u8 spares;
-       __u8 reserved[3];
-} __packed;
-
 struct nd_cmd_dimm_flags {
        __u32 status;
        __u32 flags;
@@ -211,12 +163,6 @@ static inline const char *nvdimm_cmd_name(unsigned cmd)
 
 #define ND_IOCTL 'N'
 
-#define ND_IOCTL_SMART                 _IOWR(ND_IOCTL, ND_CMD_SMART,\
-                                       struct nd_cmd_smart)
-
-#define ND_IOCTL_SMART_THRESHOLD       _IOWR(ND_IOCTL, ND_CMD_SMART_THRESHOLD,\
-                                       struct nd_cmd_smart_threshold)
-
 #define ND_IOCTL_DIMM_FLAGS            _IOWR(ND_IOCTL, ND_CMD_DIMM_FLAGS,\
                                        struct nd_cmd_dimm_flags)
 
@@ -263,7 +209,7 @@ enum nd_driver_flags {
 };
 
 enum {
-       ND_MIN_NAMESPACE_SIZE = 0x00400000,
+       ND_MIN_NAMESPACE_SIZE = PAGE_SIZE,
 };
 
 enum ars_masks {
index 403ab9cdb949a0483bd82c811a3383eed77e5246..4849be5f9b3c30120f0f964dfd32f5bbf1546a54 100644 (file)
@@ -188,13 +188,6 @@ static RADIX_TREE(pgmap_radix, GFP_KERNEL);
 #define SECTION_MASK ~((1UL << PA_SECTION_SHIFT) - 1)
 #define SECTION_SIZE (1UL << PA_SECTION_SHIFT)
 
-struct page_map {
-       struct resource res;
-       struct percpu_ref *ref;
-       struct dev_pagemap pgmap;
-       struct vmem_altmap altmap;
-};
-
 static unsigned long order_at(struct resource *res, unsigned long pgoff)
 {
        unsigned long phys_pgoff = PHYS_PFN(res->start) + pgoff;
@@ -248,34 +241,36 @@ int device_private_entry_fault(struct vm_area_struct *vma,
 EXPORT_SYMBOL(device_private_entry_fault);
 #endif /* CONFIG_DEVICE_PRIVATE */
 
-static void pgmap_radix_release(struct resource *res)
+static void pgmap_radix_release(struct resource *res, unsigned long end_pgoff)
 {
        unsigned long pgoff, order;
 
        mutex_lock(&pgmap_lock);
-       foreach_order_pgoff(res, order, pgoff)
+       foreach_order_pgoff(res, order, pgoff) {
+               if (pgoff >= end_pgoff)
+                       break;
                radix_tree_delete(&pgmap_radix, PHYS_PFN(res->start) + pgoff);
+       }
        mutex_unlock(&pgmap_lock);
 
        synchronize_rcu();
 }
 
-static unsigned long pfn_first(struct page_map *page_map)
+static unsigned long pfn_first(struct dev_pagemap *pgmap)
 {
-       struct dev_pagemap *pgmap = &page_map->pgmap;
-       const struct resource *res = &page_map->res;
-       struct vmem_altmap *altmap = pgmap->altmap;
+       const struct resource *res = &pgmap->res;
+       struct vmem_altmap *altmap = &pgmap->altmap;
        unsigned long pfn;
 
        pfn = res->start >> PAGE_SHIFT;
-       if (altmap)
+       if (pgmap->altmap_valid)
                pfn += vmem_altmap_offset(altmap);
        return pfn;
 }
 
-static unsigned long pfn_end(struct page_map *page_map)
+static unsigned long pfn_end(struct dev_pagemap *pgmap)
 {
-       const struct resource *res = &page_map->res;
+       const struct resource *res = &pgmap->res;
 
        return (res->start + resource_size(res)) >> PAGE_SHIFT;
 }
@@ -283,15 +278,15 @@ static unsigned long pfn_end(struct page_map *page_map)
 #define for_each_device_pfn(pfn, map) \
        for (pfn = pfn_first(map); pfn < pfn_end(map); pfn++)
 
-static void devm_memremap_pages_release(struct device *dev, void *data)
+static void devm_memremap_pages_release(void *data)
 {
-       struct page_map *page_map = data;
-       struct resource *res = &page_map->res;
+       struct dev_pagemap *pgmap = data;
+       struct device *dev = pgmap->dev;
+       struct resource *res = &pgmap->res;
        resource_size_t align_start, align_size;
-       struct dev_pagemap *pgmap = &page_map->pgmap;
        unsigned long pfn;
 
-       for_each_device_pfn(pfn, page_map)
+       for_each_device_pfn(pfn, pgmap)
                put_page(pfn_to_page(pfn));
 
        if (percpu_ref_tryget_live(pgmap->ref)) {
@@ -301,56 +296,51 @@ static void devm_memremap_pages_release(struct device *dev, void *data)
 
        /* pages are dead and unused, undo the arch mapping */
        align_start = res->start & ~(SECTION_SIZE - 1);
-       align_size = ALIGN(resource_size(res), SECTION_SIZE);
+       align_size = ALIGN(res->start + resource_size(res), SECTION_SIZE)
+               - align_start;
 
        mem_hotplug_begin();
-       arch_remove_memory(align_start, align_size);
+       arch_remove_memory(align_start, align_size, pgmap->altmap_valid ?
+                       &pgmap->altmap : NULL);
        mem_hotplug_done();
 
        untrack_pfn(NULL, PHYS_PFN(align_start), align_size);
-       pgmap_radix_release(res);
-       dev_WARN_ONCE(dev, pgmap->altmap && pgmap->altmap->alloc,
-                       "%s: failed to free all reserved pages\n", __func__);
-}
-
-/* assumes rcu_read_lock() held at entry */
-struct dev_pagemap *find_dev_pagemap(resource_size_t phys)
-{
-       struct page_map *page_map;
-
-       WARN_ON_ONCE(!rcu_read_lock_held());
-
-       page_map = radix_tree_lookup(&pgmap_radix, PHYS_PFN(phys));
-       return page_map ? &page_map->pgmap : NULL;
+       pgmap_radix_release(res, -1);
+       dev_WARN_ONCE(dev, pgmap->altmap.alloc,
+                     "%s: failed to free all reserved pages\n", __func__);
 }
 
 /**
  * devm_memremap_pages - remap and provide memmap backing for the given resource
  * @dev: hosting device for @res
- * @res: "host memory" address range
- * @ref: a live per-cpu reference count
- * @altmap: optional descriptor for allocating the memmap from @res
+ * @pgmap: pointer to a struct dev_pgmap
  *
  * Notes:
- * 1/ @ref must be 'live' on entry and 'dead' before devm_memunmap_pages() time
- *    (or devm release event). The expected order of events is that @ref has
+ * 1/ At a minimum the res, ref and type members of @pgmap must be initialized
+ *    by the caller before passing it to this function
+ *
+ * 2/ The altmap field may optionally be initialized, in which case altmap_valid
+ *    must be set to true
+ *
+ * 3/ pgmap.ref must be 'live' on entry and 'dead' before devm_memunmap_pages()
+ *    time (or devm release event). The expected order of events is that ref has
  *    been through percpu_ref_kill() before devm_memremap_pages_release(). The
  *    wait for the completion of all references being dropped and
  *    percpu_ref_exit() must occur after devm_memremap_pages_release().
  *
- * 2/ @res is expected to be a host memory range that could feasibly be
+ * 4/ res is expected to be a host memory range that could feasibly be
  *    treated as a "System RAM" range, i.e. not a device mmio range, but
  *    this is not enforced.
  */
-void *devm_memremap_pages(struct device *dev, struct resource *res,
-               struct percpu_ref *ref, struct vmem_altmap *altmap)
+void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap)
 {
        resource_size_t align_start, align_size, align_end;
+       struct vmem_altmap *altmap = pgmap->altmap_valid ?
+                       &pgmap->altmap : NULL;
        unsigned long pfn, pgoff, order;
        pgprot_t pgprot = PAGE_KERNEL;
-       struct dev_pagemap *pgmap;
-       struct page_map *page_map;
        int error, nid, is_ram, i = 0;
+       struct resource *res = &pgmap->res;
 
        align_start = res->start & ~(SECTION_SIZE - 1);
        align_size = ALIGN(res->start + resource_size(res), SECTION_SIZE)
@@ -367,47 +357,18 @@ void *devm_memremap_pages(struct device *dev, struct resource *res,
        if (is_ram == REGION_INTERSECTS)
                return __va(res->start);
 
-       if (!ref)
+       if (!pgmap->ref)
                return ERR_PTR(-EINVAL);
 
-       page_map = devres_alloc_node(devm_memremap_pages_release,
-                       sizeof(*page_map), GFP_KERNEL, dev_to_node(dev));
-       if (!page_map)
-               return ERR_PTR(-ENOMEM);
-       pgmap = &page_map->pgmap;
-
-       memcpy(&page_map->res, res, sizeof(*res));
-
        pgmap->dev = dev;
-       if (altmap) {
-               memcpy(&page_map->altmap, altmap, sizeof(*altmap));
-               pgmap->altmap = &page_map->altmap;
-       }
-       pgmap->ref = ref;
-       pgmap->res = &page_map->res;
-       pgmap->type = MEMORY_DEVICE_HOST;
-       pgmap->page_fault = NULL;
-       pgmap->page_free = NULL;
-       pgmap->data = NULL;
 
        mutex_lock(&pgmap_lock);
        error = 0;
        align_end = align_start + align_size - 1;
 
        foreach_order_pgoff(res, order, pgoff) {
-               struct dev_pagemap *dup;
-
-               rcu_read_lock();
-               dup = find_dev_pagemap(res->start + PFN_PHYS(pgoff));
-               rcu_read_unlock();
-               if (dup) {
-                       dev_err(dev, "%s: %pr collides with mapping for %s\n",
-                                       __func__, res, dev_name(dup->dev));
-                       error = -EBUSY;
-                       break;
-               }
                error = __radix_tree_insert(&pgmap_radix,
-                               PHYS_PFN(res->start) + pgoff, order, page_map);
+                               PHYS_PFN(res->start) + pgoff, order, pgmap);
                if (error) {
                        dev_err(dev, "%s: failed: %d\n", __func__, error);
                        break;
@@ -427,16 +388,16 @@ void *devm_memremap_pages(struct device *dev, struct resource *res,
                goto err_pfn_remap;
 
        mem_hotplug_begin();
-       error = arch_add_memory(nid, align_start, align_size, false);
+       error = arch_add_memory(nid, align_start, align_size, altmap, false);
        if (!error)
                move_pfn_range_to_zone(&NODE_DATA(nid)->node_zones[ZONE_DEVICE],
                                        align_start >> PAGE_SHIFT,
-                                       align_size >> PAGE_SHIFT);
+                                       align_size >> PAGE_SHIFT, altmap);
        mem_hotplug_done();
        if (error)
                goto err_add_memory;
 
-       for_each_device_pfn(pfn, page_map) {
+       for_each_device_pfn(pfn, pgmap) {
                struct page *page = pfn_to_page(pfn);
 
                /*
@@ -447,19 +408,21 @@ void *devm_memremap_pages(struct device *dev, struct resource *res,
                 */
                list_del(&page->lru);
                page->pgmap = pgmap;
-               percpu_ref_get(ref);
+               percpu_ref_get(pgmap->ref);
                if (!(++i % 1024))
                        cond_resched();
        }
-       devres_add(dev, page_map);
+
+       devm_add_action(dev, devm_memremap_pages_release, pgmap);
+
        return __va(res->start);
 
  err_add_memory:
        untrack_pfn(NULL, PHYS_PFN(align_start), align_size);
  err_pfn_remap:
  err_radix:
-       pgmap_radix_release(res);
-       devres_free(page_map);
+       pgmap_radix_release(res, pgoff);
+       devres_free(pgmap);
        return ERR_PTR(error);
 }
 EXPORT_SYMBOL(devm_memremap_pages);
@@ -475,34 +438,39 @@ void vmem_altmap_free(struct vmem_altmap *altmap, unsigned long nr_pfns)
        altmap->alloc -= nr_pfns;
 }
 
-struct vmem_altmap *to_vmem_altmap(unsigned long memmap_start)
+/**
+ * get_dev_pagemap() - take a new live reference on the dev_pagemap for @pfn
+ * @pfn: page frame number to lookup page_map
+ * @pgmap: optional known pgmap that already has a reference
+ *
+ * If @pgmap is non-NULL and covers @pfn it will be returned as-is.  If @pgmap
+ * is non-NULL but does not cover @pfn the reference to it will be released.
+ */
+struct dev_pagemap *get_dev_pagemap(unsigned long pfn,
+               struct dev_pagemap *pgmap)
 {
-       /*
-        * 'memmap_start' is the virtual address for the first "struct
-        * page" in this range of the vmemmap array.  In the case of
-        * CONFIG_SPARSEMEM_VMEMMAP a page_to_pfn conversion is simple
-        * pointer arithmetic, so we can perform this to_vmem_altmap()
-        * conversion without concern for the initialization state of
-        * the struct page fields.
-        */
-       struct page *page = (struct page *) memmap_start;
-       struct dev_pagemap *pgmap;
+       resource_size_t phys = PFN_PHYS(pfn);
 
        /*
-        * Unconditionally retrieve a dev_pagemap associated with the
-        * given physical address, this is only for use in the
-        * arch_{add|remove}_memory() for setting up and tearing down
-        * the memmap.
+        * In the cached case we're already holding a live reference.
         */
+       if (pgmap) {
+               if (phys >= pgmap->res.start && phys <= pgmap->res.end)
+                       return pgmap;
+               put_dev_pagemap(pgmap);
+       }
+
+       /* fall back to slow path lookup */
        rcu_read_lock();
-       pgmap = find_dev_pagemap(__pfn_to_phys(page_to_pfn(page)));
+       pgmap = radix_tree_lookup(&pgmap_radix, PHYS_PFN(phys));
+       if (pgmap && !percpu_ref_tryget_live(pgmap->ref))
+               pgmap = NULL;
        rcu_read_unlock();
 
-       return pgmap ? pgmap->altmap : NULL;
+       return pgmap;
 }
 #endif /* CONFIG_ZONE_DEVICE */
 
-
 #if IS_ENABLED(CONFIG_DEVICE_PRIVATE) ||  IS_ENABLED(CONFIG_DEVICE_PUBLIC)
 void put_zone_device_private_or_public_page(struct page *page)
 {
index e0d82b6706d72d82637bca5eaef1e35e15a1abdf..3affe7544b0c49fc7e5c80dcd54b8ecbfcbfe6de 100644 (file)
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -1410,7 +1410,6 @@ static int gup_pte_range(pmd_t pmd, unsigned long addr, unsigned long end,
 
                VM_BUG_ON_PAGE(compound_head(page) != head, page);
 
-               put_dev_pagemap(pgmap);
                SetPageReferenced(page);
                pages[*nr] = page;
                (*nr)++;
@@ -1420,6 +1419,8 @@ static int gup_pte_range(pmd_t pmd, unsigned long addr, unsigned long end,
        ret = 1;
 
 pte_unmap:
+       if (pgmap)
+               put_dev_pagemap(pgmap);
        pte_unmap(ptem);
        return ret;
 }
@@ -1459,10 +1460,12 @@ static int __gup_device_huge(unsigned long pfn, unsigned long addr,
                SetPageReferenced(page);
                pages[*nr] = page;
                get_page(page);
-               put_dev_pagemap(pgmap);
                (*nr)++;
                pfn++;
        } while (addr += PAGE_SIZE, addr != end);
+
+       if (pgmap)
+               put_dev_pagemap(pgmap);
        return 1;
 }
 
index ea19742a5d60b1a6270629a024d88a13b9c5f3c1..320fdc87f064364a4f8d2835cf430a1c3be58915 100644 (file)
--- a/mm/hmm.c
+++ b/mm/hmm.c
@@ -838,10 +838,10 @@ static void hmm_devmem_release(struct device *dev, void *data)
 
        mem_hotplug_begin();
        if (resource->desc == IORES_DESC_DEVICE_PRIVATE_MEMORY)
-               __remove_pages(zone, start_pfn, npages);
+               __remove_pages(zone, start_pfn, npages, NULL);
        else
                arch_remove_memory(start_pfn << PAGE_SHIFT,
-                                  npages << PAGE_SHIFT);
+                                  npages << PAGE_SHIFT, NULL);
        mem_hotplug_done();
 
        hmm_devmem_radix_release(resource);
@@ -882,7 +882,7 @@ static int hmm_devmem_pages_create(struct hmm_devmem *devmem)
        else
                devmem->pagemap.type = MEMORY_DEVICE_PRIVATE;
 
-       devmem->pagemap.res = devmem->resource;
+       devmem->pagemap.res = *devmem->resource;
        devmem->pagemap.page_fault = hmm_devmem_fault;
        devmem->pagemap.page_free = hmm_devmem_free;
        devmem->pagemap.dev = devmem->device;
@@ -931,17 +931,18 @@ static int hmm_devmem_pages_create(struct hmm_devmem *devmem)
         * want the linear mapping and thus use arch_add_memory().
         */
        if (devmem->pagemap.type == MEMORY_DEVICE_PUBLIC)
-               ret = arch_add_memory(nid, align_start, align_size, false);
+               ret = arch_add_memory(nid, align_start, align_size, NULL,
+                               false);
        else
                ret = add_pages(nid, align_start >> PAGE_SHIFT,
-                               align_size >> PAGE_SHIFT, false);
+                               align_size >> PAGE_SHIFT, NULL, false);
        if (ret) {
                mem_hotplug_done();
                goto error_add_memory;
        }
        move_pfn_range_to_zone(&NODE_DATA(nid)->node_zones[ZONE_DEVICE],
                                align_start >> PAGE_SHIFT,
-                               align_size >> PAGE_SHIFT);
+                               align_size >> PAGE_SHIFT, NULL);
        mem_hotplug_done();
 
        for (pfn = devmem->pfn_first; pfn < devmem->pfn_last; pfn++) {
index ca5674cbaff2b65c4e51086e5922fbbd274f2cfa..46b6c33b7f04713a4cf9423c156324b0740d34f2 100644 (file)
@@ -1897,12 +1897,26 @@ int vm_insert_pfn_prot(struct vm_area_struct *vma, unsigned long addr,
 }
 EXPORT_SYMBOL(vm_insert_pfn_prot);
 
+static bool vm_mixed_ok(struct vm_area_struct *vma, pfn_t pfn)
+{
+       /* these checks mirror the abort conditions in vm_normal_page */
+       if (vma->vm_flags & VM_MIXEDMAP)
+               return true;
+       if (pfn_t_devmap(pfn))
+               return true;
+       if (pfn_t_special(pfn))
+               return true;
+       if (is_zero_pfn(pfn_t_to_pfn(pfn)))
+               return true;
+       return false;
+}
+
 static int __vm_insert_mixed(struct vm_area_struct *vma, unsigned long addr,
                        pfn_t pfn, bool mkwrite)
 {
        pgprot_t pgprot = vma->vm_page_prot;
 
-       BUG_ON(!(vma->vm_flags & VM_MIXEDMAP));
+       BUG_ON(!vm_mixed_ok(vma, pfn));
 
        if (addr < vma->vm_start || addr >= vma->vm_end)
                return -EFAULT;
index c52aa05b106c76dd5b469bc66e0227348d7524e5..12df8a5fadcc78c0bfcdb72ece882d760458445a 100644 (file)
@@ -250,7 +250,7 @@ void __init register_page_bootmem_info_node(struct pglist_data *pgdat)
 #endif /* CONFIG_HAVE_BOOTMEM_INFO_NODE */
 
 static int __meminit __add_section(int nid, unsigned long phys_start_pfn,
-               bool want_memblock)
+               struct vmem_altmap *altmap, bool want_memblock)
 {
        int ret;
        int i;
@@ -258,7 +258,7 @@ static int __meminit __add_section(int nid, unsigned long phys_start_pfn,
        if (pfn_valid(phys_start_pfn))
                return -EEXIST;
 
-       ret = sparse_add_one_section(NODE_DATA(nid), phys_start_pfn);
+       ret = sparse_add_one_section(NODE_DATA(nid), phys_start_pfn, altmap);
        if (ret < 0)
                return ret;
 
@@ -292,18 +292,17 @@ static int __meminit __add_section(int nid, unsigned long phys_start_pfn,
  * add the new pages.
  */
 int __ref __add_pages(int nid, unsigned long phys_start_pfn,
-                       unsigned long nr_pages, bool want_memblock)
+               unsigned long nr_pages, struct vmem_altmap *altmap,
+               bool want_memblock)
 {
        unsigned long i;
        int err = 0;
        int start_sec, end_sec;
-       struct vmem_altmap *altmap;
 
        /* during initialize mem_map, align hot-added range to section */
        start_sec = pfn_to_section_nr(phys_start_pfn);
        end_sec = pfn_to_section_nr(phys_start_pfn + nr_pages - 1);
 
-       altmap = to_vmem_altmap((unsigned long) pfn_to_page(phys_start_pfn));
        if (altmap) {
                /*
                 * Validate altmap is within bounds of the total request
@@ -318,7 +317,8 @@ int __ref __add_pages(int nid, unsigned long phys_start_pfn,
        }
 
        for (i = start_sec; i <= end_sec; i++) {
-               err = __add_section(nid, section_nr_to_pfn(i), want_memblock);
+               err = __add_section(nid, section_nr_to_pfn(i), altmap,
+                               want_memblock);
 
                /*
                 * EEXIST is finally dealt with by ioresource collision
@@ -334,7 +334,6 @@ int __ref __add_pages(int nid, unsigned long phys_start_pfn,
 out:
        return err;
 }
-EXPORT_SYMBOL_GPL(__add_pages);
 
 #ifdef CONFIG_MEMORY_HOTREMOVE
 /* find the smallest valid pfn in the range [start_pfn, end_pfn) */
@@ -537,7 +536,7 @@ static void __remove_zone(struct zone *zone, unsigned long start_pfn)
 }
 
 static int __remove_section(struct zone *zone, struct mem_section *ms,
-               unsigned long map_offset)
+               unsigned long map_offset, struct vmem_altmap *altmap)
 {
        unsigned long start_pfn;
        int scn_nr;
@@ -554,7 +553,7 @@ static int __remove_section(struct zone *zone, struct mem_section *ms,
        start_pfn = section_nr_to_pfn((unsigned long)scn_nr);
        __remove_zone(zone, start_pfn);
 
-       sparse_remove_one_section(zone, ms, map_offset);
+       sparse_remove_one_section(zone, ms, map_offset, altmap);
        return 0;
 }
 
@@ -570,7 +569,7 @@ static int __remove_section(struct zone *zone, struct mem_section *ms,
  * calling offline_pages().
  */
 int __remove_pages(struct zone *zone, unsigned long phys_start_pfn,
-                unsigned long nr_pages)
+                unsigned long nr_pages, struct vmem_altmap *altmap)
 {
        unsigned long i;
        unsigned long map_offset = 0;
@@ -578,10 +577,6 @@ int __remove_pages(struct zone *zone, unsigned long phys_start_pfn,
 
        /* In the ZONE_DEVICE case device driver owns the memory region */
        if (is_dev_zone(zone)) {
-               struct page *page = pfn_to_page(phys_start_pfn);
-               struct vmem_altmap *altmap;
-
-               altmap = to_vmem_altmap((unsigned long) page);
                if (altmap)
                        map_offset = vmem_altmap_offset(altmap);
        } else {
@@ -612,7 +607,8 @@ int __remove_pages(struct zone *zone, unsigned long phys_start_pfn,
        for (i = 0; i < sections_to_remove; i++) {
                unsigned long pfn = phys_start_pfn + i*PAGES_PER_SECTION;
 
-               ret = __remove_section(zone, __pfn_to_section(pfn), map_offset);
+               ret = __remove_section(zone, __pfn_to_section(pfn), map_offset,
+                               altmap);
                map_offset = 0;
                if (ret)
                        break;
@@ -802,8 +798,8 @@ static void __meminit resize_pgdat_range(struct pglist_data *pgdat, unsigned lon
        pgdat->node_spanned_pages = max(start_pfn + nr_pages, old_end_pfn) - pgdat->node_start_pfn;
 }
 
-void __ref move_pfn_range_to_zone(struct zone *zone,
-               unsigned long start_pfn, unsigned long nr_pages)
+void __ref move_pfn_range_to_zone(struct zone *zone, unsigned long start_pfn,
+               unsigned long nr_pages, struct vmem_altmap *altmap)
 {
        struct pglist_data *pgdat = zone->zone_pgdat;
        int nid = pgdat->node_id;
@@ -828,7 +824,8 @@ void __ref move_pfn_range_to_zone(struct zone *zone,
         * expects the zone spans the pfn range. All the pages in the range
         * are reserved so nobody should be touching them so we should be safe
         */
-       memmap_init_zone(nr_pages, nid, zone_idx(zone), start_pfn, MEMMAP_HOTPLUG);
+       memmap_init_zone(nr_pages, nid, zone_idx(zone), start_pfn,
+                       MEMMAP_HOTPLUG, altmap);
 
        set_zone_contiguous(zone);
 }
@@ -900,7 +897,7 @@ static struct zone * __meminit move_pfn_range(int online_type, int nid,
        struct zone *zone;
 
        zone = zone_for_pfn_range(online_type, nid, start_pfn, nr_pages);
-       move_pfn_range_to_zone(zone, start_pfn, nr_pages);
+       move_pfn_range_to_zone(zone, start_pfn, nr_pages, NULL);
        return zone;
 }
 
@@ -1149,7 +1146,7 @@ int __ref add_memory_resource(int nid, struct resource *res, bool online)
        }
 
        /* call arch's memory hotadd */
-       ret = arch_add_memory(nid, start, size, true);
+       ret = arch_add_memory(nid, start, size, NULL, true);
 
        if (ret < 0)
                goto error;
@@ -1891,7 +1888,7 @@ void __ref remove_memory(int nid, u64 start, u64 size)
        memblock_free(start, size);
        memblock_remove(start, size);
 
-       arch_remove_memory(start, size);
+       arch_remove_memory(start, size, NULL);
 
        try_offline_node(nid);
 
index 76c9688b6a0a75fc1c28e90920a8c9498c5e6d06..2bb7f163baca178a2295e60401e8f3f9730d44a3 100644 (file)
@@ -5314,9 +5314,9 @@ void __ref build_all_zonelists(pg_data_t *pgdat)
  * done. Non-atomic initialization, single-pass.
  */
 void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone,
-               unsigned long start_pfn, enum memmap_context context)
+               unsigned long start_pfn, enum memmap_context context,
+               struct vmem_altmap *altmap)
 {
-       struct vmem_altmap *altmap = to_vmem_altmap(__pfn_to_phys(start_pfn));
        unsigned long end_pfn = start_pfn + size;
        pg_data_t *pgdat = NODE_DATA(nid);
        unsigned long pfn;
@@ -5417,7 +5417,7 @@ static void __meminit zone_init_free_lists(struct zone *zone)
 
 #ifndef __HAVE_ARCH_MEMMAP_INIT
 #define memmap_init(size, nid, zone, start_pfn) \
-       memmap_init_zone((size), (nid), (zone), (start_pfn), MEMMAP_EARLY)
+       memmap_init_zone((size), (nid), (zone), (start_pfn), MEMMAP_EARLY, NULL)
 #endif
 
 static int zone_batchsize(struct zone *zone)
index 17acf01791fa832e1c8414cecc98034f2d652662..bd0276d5f66b17f6b6c78e503b91972c7677e599 100644 (file)
@@ -74,7 +74,7 @@ void * __meminit vmemmap_alloc_block(unsigned long size, int node)
 }
 
 /* need to make sure size is all the same during early stage */
-static void * __meminit alloc_block_buf(unsigned long size, int node)
+void * __meminit vmemmap_alloc_block_buf(unsigned long size, int node)
 {
        void *ptr;
 
@@ -107,33 +107,16 @@ static unsigned long __meminit vmem_altmap_nr_free(struct vmem_altmap *altmap)
 }
 
 /**
- * vmem_altmap_alloc - allocate pages from the vmem_altmap reservation
- * @altmap - reserved page pool for the allocation
- * @nr_pfns - size (in pages) of the allocation
+ * altmap_alloc_block_buf - allocate pages from the device page map
+ * @altmap:    device page map
+ * @size:      size (in bytes) of the allocation
  *
- * Allocations are aligned to the size of the request
+ * Allocations are aligned to the size of the request.
  */
-static unsigned long __meminit vmem_altmap_alloc(struct vmem_altmap *altmap,
-               unsigned long nr_pfns)
-{
-       unsigned long pfn = vmem_altmap_next_pfn(altmap);
-       unsigned long nr_align;
-
-       nr_align = 1UL << find_first_bit(&nr_pfns, BITS_PER_LONG);
-       nr_align = ALIGN(pfn, nr_align) - pfn;
-
-       if (nr_pfns + nr_align > vmem_altmap_nr_free(altmap))
-               return ULONG_MAX;
-       altmap->alloc += nr_pfns;
-       altmap->align += nr_align;
-       return pfn + nr_align;
-}
-
-static void * __meminit altmap_alloc_block_buf(unsigned long size,
+void * __meminit altmap_alloc_block_buf(unsigned long size,
                struct vmem_altmap *altmap)
 {
-       unsigned long pfn, nr_pfns;
-       void *ptr;
+       unsigned long pfn, nr_pfns, nr_align;
 
        if (size & ~PAGE_MASK) {
                pr_warn_once("%s: allocations must be multiple of PAGE_SIZE (%ld)\n",
@@ -141,25 +124,20 @@ static void * __meminit altmap_alloc_block_buf(unsigned long size,
                return NULL;
        }
 
+       pfn = vmem_altmap_next_pfn(altmap);
        nr_pfns = size >> PAGE_SHIFT;
-       pfn = vmem_altmap_alloc(altmap, nr_pfns);
-       if (pfn < ULONG_MAX)
-               ptr = __va(__pfn_to_phys(pfn));
-       else
-               ptr = NULL;
-       pr_debug("%s: pfn: %#lx alloc: %ld align: %ld nr: %#lx\n",
-                       __func__, pfn, altmap->alloc, altmap->align, nr_pfns);
+       nr_align = 1UL << find_first_bit(&nr_pfns, BITS_PER_LONG);
+       nr_align = ALIGN(pfn, nr_align) - pfn;
+       if (nr_pfns + nr_align > vmem_altmap_nr_free(altmap))
+               return NULL;
 
-       return ptr;
-}
+       altmap->alloc += nr_pfns;
+       altmap->align += nr_align;
+       pfn += nr_align;
 
-/* need to make sure size is all the same during early stage */
-void * __meminit __vmemmap_alloc_block_buf(unsigned long size, int node,
-               struct vmem_altmap *altmap)
-{
-       if (altmap)
-               return altmap_alloc_block_buf(size, altmap);
-       return alloc_block_buf(size, node);
+       pr_debug("%s: pfn: %#lx alloc: %ld align: %ld nr: %#lx\n",
+                       __func__, pfn, altmap->alloc, altmap->align, nr_pfns);
+       return __va(__pfn_to_phys(pfn));
 }
 
 void __meminit vmemmap_verify(pte_t *pte, int node,
@@ -178,7 +156,7 @@ pte_t * __meminit vmemmap_pte_populate(pmd_t *pmd, unsigned long addr, int node)
        pte_t *pte = pte_offset_kernel(pmd, addr);
        if (pte_none(*pte)) {
                pte_t entry;
-               void *p = alloc_block_buf(PAGE_SIZE, node);
+               void *p = vmemmap_alloc_block_buf(PAGE_SIZE, node);
                if (!p)
                        return NULL;
                entry = pfn_pte(__pa(p) >> PAGE_SHIFT, PAGE_KERNEL);
@@ -278,7 +256,8 @@ int __meminit vmemmap_populate_basepages(unsigned long start,
        return 0;
 }
 
-struct page * __meminit sparse_mem_map_populate(unsigned long pnum, int nid)
+struct page * __meminit sparse_mem_map_populate(unsigned long pnum, int nid,
+               struct vmem_altmap *altmap)
 {
        unsigned long start;
        unsigned long end;
@@ -288,7 +267,7 @@ struct page * __meminit sparse_mem_map_populate(unsigned long pnum, int nid)
        start = (unsigned long)map;
        end = (unsigned long)(map + PAGES_PER_SECTION);
 
-       if (vmemmap_populate(start, end, nid))
+       if (vmemmap_populate(start, end, nid, altmap))
                return NULL;
 
        return map;
@@ -318,7 +297,7 @@ void __init sparse_mem_maps_populate_node(struct page **map_map,
                if (!present_section_nr(pnum))
                        continue;
 
-               map_map[pnum] = sparse_mem_map_populate(pnum, nodeid);
+               map_map[pnum] = sparse_mem_map_populate(pnum, nodeid, NULL);
                if (map_map[pnum])
                        continue;
                ms = __nr_to_section(pnum);
index 2609aba121e89cc5c8b656f4ef3539545c434091..2583174b1d625692f8f4235b5353a2e1308f7e98 100644 (file)
@@ -417,7 +417,8 @@ static void __init sparse_early_usemaps_alloc_node(void *data,
 }
 
 #ifndef CONFIG_SPARSEMEM_VMEMMAP
-struct page __init *sparse_mem_map_populate(unsigned long pnum, int nid)
+struct page __init *sparse_mem_map_populate(unsigned long pnum, int nid,
+               struct vmem_altmap *altmap)
 {
        struct page *map;
        unsigned long size;
@@ -472,7 +473,7 @@ void __init sparse_mem_maps_populate_node(struct page **map_map,
 
                if (!present_section_nr(pnum))
                        continue;
-               map_map[pnum] = sparse_mem_map_populate(pnum, nodeid);
+               map_map[pnum] = sparse_mem_map_populate(pnum, nodeid, NULL);
                if (map_map[pnum])
                        continue;
                ms = __nr_to_section(pnum);
@@ -500,7 +501,7 @@ static struct page __init *sparse_early_mem_map_alloc(unsigned long pnum)
        struct mem_section *ms = __nr_to_section(pnum);
        int nid = sparse_early_nid(ms);
 
-       map = sparse_mem_map_populate(pnum, nid);
+       map = sparse_mem_map_populate(pnum, nid, NULL);
        if (map)
                return map;
 
@@ -678,17 +679,19 @@ void offline_mem_sections(unsigned long start_pfn, unsigned long end_pfn)
 #endif
 
 #ifdef CONFIG_SPARSEMEM_VMEMMAP
-static inline struct page *kmalloc_section_memmap(unsigned long pnum, int nid)
+static inline struct page *kmalloc_section_memmap(unsigned long pnum, int nid,
+               struct vmem_altmap *altmap)
 {
        /* This will make the necessary allocations eventually. */
-       return sparse_mem_map_populate(pnum, nid);
+       return sparse_mem_map_populate(pnum, nid, altmap);
 }
-static void __kfree_section_memmap(struct page *memmap)
+static void __kfree_section_memmap(struct page *memmap,
+               struct vmem_altmap *altmap)
 {
        unsigned long start = (unsigned long)memmap;
        unsigned long end = (unsigned long)(memmap + PAGES_PER_SECTION);
 
-       vmemmap_free(start, end);
+       vmemmap_free(start, end, altmap);
 }
 #ifdef CONFIG_MEMORY_HOTREMOVE
 static void free_map_bootmem(struct page *memmap)
@@ -696,7 +699,7 @@ static void free_map_bootmem(struct page *memmap)
        unsigned long start = (unsigned long)memmap;
        unsigned long end = (unsigned long)(memmap + PAGES_PER_SECTION);
 
-       vmemmap_free(start, end);
+       vmemmap_free(start, end, NULL);
 }
 #endif /* CONFIG_MEMORY_HOTREMOVE */
 #else
@@ -721,12 +724,14 @@ got_map_ptr:
        return ret;
 }
 
-static inline struct page *kmalloc_section_memmap(unsigned long pnum, int nid)
+static inline struct page *kmalloc_section_memmap(unsigned long pnum, int nid,
+               struct vmem_altmap *altmap)
 {
        return __kmalloc_section_memmap();
 }
 
-static void __kfree_section_memmap(struct page *memmap)
+static void __kfree_section_memmap(struct page *memmap,
+               struct vmem_altmap *altmap)
 {
        if (is_vmalloc_addr(memmap))
                vfree(memmap);
@@ -773,7 +778,8 @@ static void free_map_bootmem(struct page *memmap)
  * set.  If this is <=0, then that means that the passed-in
  * map was not consumed and must be freed.
  */
-int __meminit sparse_add_one_section(struct pglist_data *pgdat, unsigned long start_pfn)
+int __meminit sparse_add_one_section(struct pglist_data *pgdat,
+               unsigned long start_pfn, struct vmem_altmap *altmap)
 {
        unsigned long section_nr = pfn_to_section_nr(start_pfn);
        struct mem_section *ms;
@@ -789,12 +795,12 @@ int __meminit sparse_add_one_section(struct pglist_data *pgdat, unsigned long st
        ret = sparse_index_init(section_nr, pgdat->node_id);
        if (ret < 0 && ret != -EEXIST)
                return ret;
-       memmap = kmalloc_section_memmap(section_nr, pgdat->node_id);
+       memmap = kmalloc_section_memmap(section_nr, pgdat->node_id, altmap);
        if (!memmap)
                return -ENOMEM;
        usemap = __kmalloc_section_usemap();
        if (!usemap) {
-               __kfree_section_memmap(memmap);
+               __kfree_section_memmap(memmap, altmap);
                return -ENOMEM;
        }
 
@@ -816,7 +822,7 @@ out:
        pgdat_resize_unlock(pgdat, &flags);
        if (ret <= 0) {
                kfree(usemap);
-               __kfree_section_memmap(memmap);
+               __kfree_section_memmap(memmap, altmap);
        }
        return ret;
 }
@@ -843,7 +849,8 @@ static inline void clear_hwpoisoned_pages(struct page *memmap, int nr_pages)
 }
 #endif
 
-static void free_section_usemap(struct page *memmap, unsigned long *usemap)
+static void free_section_usemap(struct page *memmap, unsigned long *usemap,
+               struct vmem_altmap *altmap)
 {
        struct page *usemap_page;
 
@@ -857,7 +864,7 @@ static void free_section_usemap(struct page *memmap, unsigned long *usemap)
        if (PageSlab(usemap_page) || PageCompound(usemap_page)) {
                kfree(usemap);
                if (memmap)
-                       __kfree_section_memmap(memmap);
+                       __kfree_section_memmap(memmap, altmap);
                return;
        }
 
@@ -871,7 +878,7 @@ static void free_section_usemap(struct page *memmap, unsigned long *usemap)
 }
 
 void sparse_remove_one_section(struct zone *zone, struct mem_section *ms,
-               unsigned long map_offset)
+               unsigned long map_offset, struct vmem_altmap *altmap)
 {
        struct page *memmap = NULL;
        unsigned long *usemap = NULL, flags;
@@ -889,7 +896,7 @@ void sparse_remove_one_section(struct zone *zone, struct mem_section *ms,
 
        clear_hwpoisoned_pages(memmap + map_offset,
                        PAGES_PER_SECTION - map_offset);
-       free_section_usemap(memmap, usemap);
+       free_section_usemap(memmap, usemap, altmap);
 }
 #endif /* CONFIG_MEMORY_HOTREMOVE */
 #endif /* CONFIG_MEMORY_HOTPLUG */
index db33b28c5ef32c302f6422474a2b6d669faa8347..0392153a0009b04455c49938f931784d47ad9544 100644 (file)
@@ -37,10 +37,12 @@ obj-$(CONFIG_DEV_DAX_PMEM) += dax_pmem.o
 
 nfit-y := $(ACPI_SRC)/core.o
 nfit-$(CONFIG_X86_MCE) += $(ACPI_SRC)/mce.o
+nfit-y += acpi_nfit_test.o
 nfit-y += config_check.o
 
 nd_pmem-y := $(NVDIMM_SRC)/pmem.o
 nd_pmem-y += pmem-dax.o
+nd_pmem-y += pmem_test.o
 nd_pmem-y += config_check.o
 
 nd_btt-y := $(NVDIMM_SRC)/btt.o
@@ -57,6 +59,7 @@ dax-y += config_check.o
 
 device_dax-y := $(DAX_SRC)/device.o
 device_dax-y += dax-dev.o
+device_dax-y += device_dax_test.o
 device_dax-y += config_check.o
 
 dax_pmem-y := $(DAX_SRC)/pmem.o
@@ -75,6 +78,7 @@ libnvdimm-$(CONFIG_ND_CLAIM) += $(NVDIMM_SRC)/claim.o
 libnvdimm-$(CONFIG_BTT) += $(NVDIMM_SRC)/btt_devs.o
 libnvdimm-$(CONFIG_NVDIMM_PFN) += $(NVDIMM_SRC)/pfn_devs.o
 libnvdimm-$(CONFIG_NVDIMM_DAX) += $(NVDIMM_SRC)/dax_devs.o
+libnvdimm-y += libnvdimm_test.o
 libnvdimm-y += config_check.o
 
 obj-m += test/
diff --git a/tools/testing/nvdimm/acpi_nfit_test.c b/tools/testing/nvdimm/acpi_nfit_test.c
new file mode 100644 (file)
index 0000000..4352151
--- /dev/null
@@ -0,0 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright(c) 2018 Intel Corporation. All rights reserved.
+
+#include <linux/module.h>
+#include <linux/printk.h>
+#include "watermark.h"
+
+nfit_test_watermark(acpi_nfit);
diff --git a/tools/testing/nvdimm/device_dax_test.c b/tools/testing/nvdimm/device_dax_test.c
new file mode 100644 (file)
index 0000000..24b17bf
--- /dev/null
@@ -0,0 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright(c) 2018 Intel Corporation. All rights reserved.
+
+#include <linux/module.h>
+#include <linux/printk.h>
+#include "watermark.h"
+
+nfit_test_watermark(device_dax);
diff --git a/tools/testing/nvdimm/libnvdimm_test.c b/tools/testing/nvdimm/libnvdimm_test.c
new file mode 100644 (file)
index 0000000..00ca30b
--- /dev/null
@@ -0,0 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright(c) 2018 Intel Corporation. All rights reserved.
+
+#include <linux/module.h>
+#include <linux/printk.h>
+#include "watermark.h"
+
+nfit_test_watermark(libnvdimm);
diff --git a/tools/testing/nvdimm/pmem_test.c b/tools/testing/nvdimm/pmem_test.c
new file mode 100644 (file)
index 0000000..fd38f92
--- /dev/null
@@ -0,0 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright(c) 2018 Intel Corporation. All rights reserved.
+
+#include <linux/module.h>
+#include <linux/printk.h>
+#include "watermark.h"
+
+nfit_test_watermark(pmem);
index e1f75a1914a15491b79ec95b0f18c6617565873b..ff9d3a5825e1f6fbdf4cc7ee89ad7a7cdb9c0fe3 100644 (file)
@@ -104,15 +104,14 @@ void *__wrap_devm_memremap(struct device *dev, resource_size_t offset,
 }
 EXPORT_SYMBOL(__wrap_devm_memremap);
 
-void *__wrap_devm_memremap_pages(struct device *dev, struct resource *res,
-               struct percpu_ref *ref, struct vmem_altmap *altmap)
+void *__wrap_devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap)
 {
-       resource_size_t offset = res->start;
+       resource_size_t offset = pgmap->res.start;
        struct nfit_test_resource *nfit_res = get_nfit_res(offset);
 
        if (nfit_res)
                return nfit_res->buf + offset - nfit_res->res.start;
-       return devm_memremap_pages(dev, res, ref, altmap);
+       return devm_memremap_pages(dev, pgmap);
 }
 EXPORT_SYMBOL(__wrap_devm_memremap_pages);
 
index de1373a7ed4f0412cbad07d46922340686bf23e5..620fa78b3b1b33ab7e87f20e076bc6323b68e41f 100644 (file)
@@ -27,6 +27,7 @@
 #include <nfit.h>
 #include <nd.h>
 #include "nfit_test.h"
+#include "../watermark.h"
 
 /*
  * Generate an NFIT table to describe the following topology:
@@ -137,6 +138,14 @@ static u32 handle[] = {
 
 static unsigned long dimm_fail_cmd_flags[NUM_DCR];
 
+struct nfit_test_fw {
+       enum intel_fw_update_state state;
+       u32 context;
+       u64 version;
+       u32 size_received;
+       u64 end_time;
+};
+
 struct nfit_test {
        struct acpi_nfit_desc acpi_desc;
        struct platform_device pdev;
@@ -168,8 +177,11 @@ struct nfit_test {
                spinlock_t lock;
        } ars_state;
        struct device *dimm_dev[NUM_DCR];
+       struct nd_intel_smart *smart;
+       struct nd_intel_smart_threshold *smart_threshold;
        struct badrange badrange;
        struct work_struct work;
+       struct nfit_test_fw *fw;
 };
 
 static struct workqueue_struct *nfit_wq;
@@ -181,6 +193,226 @@ static struct nfit_test *to_nfit_test(struct device *dev)
        return container_of(pdev, struct nfit_test, pdev);
 }
 
+static int nd_intel_test_get_fw_info(struct nfit_test *t,
+               struct nd_intel_fw_info *nd_cmd, unsigned int buf_len,
+               int idx)
+{
+       struct device *dev = &t->pdev.dev;
+       struct nfit_test_fw *fw = &t->fw[idx];
+
+       dev_dbg(dev, "%s(nfit_test: %p nd_cmd: %p, buf_len: %u, idx: %d\n",
+                       __func__, t, nd_cmd, buf_len, idx);
+
+       if (buf_len < sizeof(*nd_cmd))
+               return -EINVAL;
+
+       nd_cmd->status = 0;
+       nd_cmd->storage_size = INTEL_FW_STORAGE_SIZE;
+       nd_cmd->max_send_len = INTEL_FW_MAX_SEND_LEN;
+       nd_cmd->query_interval = INTEL_FW_QUERY_INTERVAL;
+       nd_cmd->max_query_time = INTEL_FW_QUERY_MAX_TIME;
+       nd_cmd->update_cap = 0;
+       nd_cmd->fis_version = INTEL_FW_FIS_VERSION;
+       nd_cmd->run_version = 0;
+       nd_cmd->updated_version = fw->version;
+
+       return 0;
+}
+
+static int nd_intel_test_start_update(struct nfit_test *t,
+               struct nd_intel_fw_start *nd_cmd, unsigned int buf_len,
+               int idx)
+{
+       struct device *dev = &t->pdev.dev;
+       struct nfit_test_fw *fw = &t->fw[idx];
+
+       dev_dbg(dev, "%s(nfit_test: %p nd_cmd: %p buf_len: %u idx: %d)\n",
+                       __func__, t, nd_cmd, buf_len, idx);
+
+       if (buf_len < sizeof(*nd_cmd))
+               return -EINVAL;
+
+       if (fw->state != FW_STATE_NEW) {
+               /* extended status, FW update in progress */
+               nd_cmd->status = 0x10007;
+               return 0;
+       }
+
+       fw->state = FW_STATE_IN_PROGRESS;
+       fw->context++;
+       fw->size_received = 0;
+       nd_cmd->status = 0;
+       nd_cmd->context = fw->context;
+
+       dev_dbg(dev, "%s: context issued: %#x\n", __func__, nd_cmd->context);
+
+       return 0;
+}
+
+static int nd_intel_test_send_data(struct nfit_test *t,
+               struct nd_intel_fw_send_data *nd_cmd, unsigned int buf_len,
+               int idx)
+{
+       struct device *dev = &t->pdev.dev;
+       struct nfit_test_fw *fw = &t->fw[idx];
+       u32 *status = (u32 *)&nd_cmd->data[nd_cmd->length];
+
+       dev_dbg(dev, "%s(nfit_test: %p nd_cmd: %p buf_len: %u idx: %d)\n",
+                       __func__, t, nd_cmd, buf_len, idx);
+
+       if (buf_len < sizeof(*nd_cmd))
+               return -EINVAL;
+
+
+       dev_dbg(dev, "%s: cmd->status: %#x\n", __func__, *status);
+       dev_dbg(dev, "%s: cmd->data[0]: %#x\n", __func__, nd_cmd->data[0]);
+       dev_dbg(dev, "%s: cmd->data[%u]: %#x\n", __func__, nd_cmd->length-1,
+                       nd_cmd->data[nd_cmd->length-1]);
+
+       if (fw->state != FW_STATE_IN_PROGRESS) {
+               dev_dbg(dev, "%s: not in IN_PROGRESS state\n", __func__);
+               *status = 0x5;
+               return 0;
+       }
+
+       if (nd_cmd->context != fw->context) {
+               dev_dbg(dev, "%s: incorrect context: in: %#x correct: %#x\n",
+                               __func__, nd_cmd->context, fw->context);
+               *status = 0x10007;
+               return 0;
+       }
+
+       /*
+        * check offset + len > size of fw storage
+        * check length is > max send length
+        */
+       if (nd_cmd->offset + nd_cmd->length > INTEL_FW_STORAGE_SIZE ||
+                       nd_cmd->length > INTEL_FW_MAX_SEND_LEN) {
+               *status = 0x3;
+               dev_dbg(dev, "%s: buffer boundary violation\n", __func__);
+               return 0;
+       }
+
+       fw->size_received += nd_cmd->length;
+       dev_dbg(dev, "%s: copying %u bytes, %u bytes so far\n",
+                       __func__, nd_cmd->length, fw->size_received);
+       *status = 0;
+       return 0;
+}
+
+static int nd_intel_test_finish_fw(struct nfit_test *t,
+               struct nd_intel_fw_finish_update *nd_cmd,
+               unsigned int buf_len, int idx)
+{
+       struct device *dev = &t->pdev.dev;
+       struct nfit_test_fw *fw = &t->fw[idx];
+
+       dev_dbg(dev, "%s(nfit_test: %p nd_cmd: %p buf_len: %u idx: %d)\n",
+                       __func__, t, nd_cmd, buf_len, idx);
+
+       if (fw->state == FW_STATE_UPDATED) {
+               /* update already done, need cold boot */
+               nd_cmd->status = 0x20007;
+               return 0;
+       }
+
+       dev_dbg(dev, "%s: context: %#x  ctrl_flags: %#x\n",
+                       __func__, nd_cmd->context, nd_cmd->ctrl_flags);
+
+       switch (nd_cmd->ctrl_flags) {
+       case 0: /* finish */
+               if (nd_cmd->context != fw->context) {
+                       dev_dbg(dev, "%s: incorrect context: in: %#x correct: %#x\n",
+                                       __func__, nd_cmd->context,
+                                       fw->context);
+                       nd_cmd->status = 0x10007;
+                       return 0;
+               }
+               nd_cmd->status = 0;
+               fw->state = FW_STATE_VERIFY;
+               /* set 1 second of time for firmware "update" */
+               fw->end_time = jiffies + HZ;
+               break;
+
+       case 1: /* abort */
+               fw->size_received = 0;
+               /* successfully aborted status */
+               nd_cmd->status = 0x40007;
+               fw->state = FW_STATE_NEW;
+               dev_dbg(dev, "%s: abort successful\n", __func__);
+               break;
+
+       default: /* bad control flag */
+               dev_warn(dev, "%s: unknown control flag: %#x\n",
+                               __func__, nd_cmd->ctrl_flags);
+               return -EINVAL;
+       }
+
+       return 0;
+}
+
+static int nd_intel_test_finish_query(struct nfit_test *t,
+               struct nd_intel_fw_finish_query *nd_cmd,
+               unsigned int buf_len, int idx)
+{
+       struct device *dev = &t->pdev.dev;
+       struct nfit_test_fw *fw = &t->fw[idx];
+
+       dev_dbg(dev, "%s(nfit_test: %p nd_cmd: %p buf_len: %u idx: %d)\n",
+                       __func__, t, nd_cmd, buf_len, idx);
+
+       if (buf_len < sizeof(*nd_cmd))
+               return -EINVAL;
+
+       if (nd_cmd->context != fw->context) {
+               dev_dbg(dev, "%s: incorrect context: in: %#x correct: %#x\n",
+                               __func__, nd_cmd->context, fw->context);
+               nd_cmd->status = 0x10007;
+               return 0;
+       }
+
+       dev_dbg(dev, "%s context: %#x\n", __func__, nd_cmd->context);
+
+       switch (fw->state) {
+       case FW_STATE_NEW:
+               nd_cmd->updated_fw_rev = 0;
+               nd_cmd->status = 0;
+               dev_dbg(dev, "%s: new state\n", __func__);
+               break;
+
+       case FW_STATE_IN_PROGRESS:
+               /* sequencing error */
+               nd_cmd->status = 0x40007;
+               nd_cmd->updated_fw_rev = 0;
+               dev_dbg(dev, "%s: sequence error\n", __func__);
+               break;
+
+       case FW_STATE_VERIFY:
+               if (time_is_after_jiffies64(fw->end_time)) {
+                       nd_cmd->updated_fw_rev = 0;
+                       nd_cmd->status = 0x20007;
+                       dev_dbg(dev, "%s: still verifying\n", __func__);
+                       break;
+               }
+
+               dev_dbg(dev, "%s: transition out verify\n", __func__);
+               fw->state = FW_STATE_UPDATED;
+               /* we are going to fall through if it's "done" */
+       case FW_STATE_UPDATED:
+               nd_cmd->status = 0;
+               /* bogus test version */
+               fw->version = nd_cmd->updated_fw_rev =
+                       INTEL_FW_FAKE_VERSION;
+               dev_dbg(dev, "%s: updated\n", __func__);
+               break;
+
+       default: /* we should never get here */
+               return -EINVAL;
+       }
+
+       return 0;
+}
+
 static int nfit_test_cmd_get_config_size(struct nd_cmd_get_config_size *nd_cmd,
                unsigned int buf_len)
 {
@@ -440,39 +672,66 @@ static int nfit_test_cmd_translate_spa(struct nvdimm_bus *bus,
        return 0;
 }
 
-static int nfit_test_cmd_smart(struct nd_cmd_smart *smart, unsigned int buf_len)
+static int nfit_test_cmd_smart(struct nd_intel_smart *smart, unsigned int buf_len,
+               struct nd_intel_smart *smart_data)
 {
-       static const struct nd_smart_payload smart_data = {
-               .flags = ND_SMART_HEALTH_VALID | ND_SMART_TEMP_VALID
-                       | ND_SMART_SPARES_VALID | ND_SMART_ALARM_VALID
-                       | ND_SMART_USED_VALID | ND_SMART_SHUTDOWN_VALID,
-               .health = ND_SMART_NON_CRITICAL_HEALTH,
-               .temperature = 23 * 16,
-               .spares = 75,
-               .alarm_flags = ND_SMART_SPARE_TRIP | ND_SMART_TEMP_TRIP,
-               .life_used = 5,
-               .shutdown_state = 0,
-               .vendor_size = 0,
-       };
-
        if (buf_len < sizeof(*smart))
                return -EINVAL;
-       memcpy(smart->data, &smart_data, sizeof(smart_data));
+       memcpy(smart, smart_data, sizeof(*smart));
        return 0;
 }
 
-static int nfit_test_cmd_smart_threshold(struct nd_cmd_smart_threshold *smart_t,
-               unsigned int buf_len)
+static int nfit_test_cmd_smart_threshold(
+               struct nd_intel_smart_threshold *out,
+               unsigned int buf_len,
+               struct nd_intel_smart_threshold *smart_t)
 {
-       static const struct nd_smart_threshold_payload smart_t_data = {
-               .alarm_control = ND_SMART_SPARE_TRIP | ND_SMART_TEMP_TRIP,
-               .temperature = 40 * 16,
-               .spares = 5,
-       };
-
        if (buf_len < sizeof(*smart_t))
                return -EINVAL;
-       memcpy(smart_t->data, &smart_t_data, sizeof(smart_t_data));
+       memcpy(out, smart_t, sizeof(*smart_t));
+       return 0;
+}
+
+static void smart_notify(struct device *bus_dev,
+               struct device *dimm_dev, struct nd_intel_smart *smart,
+               struct nd_intel_smart_threshold *thresh)
+{
+       dev_dbg(dimm_dev, "%s: alarm: %#x spares: %d (%d) mtemp: %d (%d) ctemp: %d (%d)\n",
+                       __func__, thresh->alarm_control, thresh->spares,
+                       smart->spares, thresh->media_temperature,
+                       smart->media_temperature, thresh->ctrl_temperature,
+                       smart->ctrl_temperature);
+       if (((thresh->alarm_control & ND_INTEL_SMART_SPARE_TRIP)
+                               && smart->spares
+                               <= thresh->spares)
+                       || ((thresh->alarm_control & ND_INTEL_SMART_TEMP_TRIP)
+                               && smart->media_temperature
+                               >= thresh->media_temperature)
+                       || ((thresh->alarm_control & ND_INTEL_SMART_CTEMP_TRIP)
+                               && smart->ctrl_temperature
+                               >= thresh->ctrl_temperature)) {
+               device_lock(bus_dev);
+               __acpi_nvdimm_notify(dimm_dev, 0x81);
+               device_unlock(bus_dev);
+       }
+}
+
+static int nfit_test_cmd_smart_set_threshold(
+               struct nd_intel_smart_set_threshold *in,
+               unsigned int buf_len,
+               struct nd_intel_smart_threshold *thresh,
+               struct nd_intel_smart *smart,
+               struct device *bus_dev, struct device *dimm_dev)
+{
+       unsigned int size;
+
+       size = sizeof(*in) - 4;
+       if (buf_len < size)
+               return -EINVAL;
+       memcpy(thresh->data, in, size);
+       in->status = 0;
+       smart_notify(bus_dev, dimm_dev, smart, thresh);
+
        return 0;
 }
 
@@ -563,6 +822,52 @@ static int nfit_test_cmd_ars_inject_status(struct nfit_test *t,
        return 0;
 }
 
+static int nd_intel_test_cmd_set_lss_status(struct nfit_test *t,
+               struct nd_intel_lss *nd_cmd, unsigned int buf_len)
+{
+       struct device *dev = &t->pdev.dev;
+
+       if (buf_len < sizeof(*nd_cmd))
+               return -EINVAL;
+
+       switch (nd_cmd->enable) {
+       case 0:
+               nd_cmd->status = 0;
+               dev_dbg(dev, "%s: Latch System Shutdown Status disabled\n",
+                               __func__);
+               break;
+       case 1:
+               nd_cmd->status = 0;
+               dev_dbg(dev, "%s: Latch System Shutdown Status enabled\n",
+                               __func__);
+               break;
+       default:
+               dev_warn(dev, "Unknown enable value: %#x\n", nd_cmd->enable);
+               nd_cmd->status = 0x3;
+               break;
+       }
+
+
+       return 0;
+}
+
+static int get_dimm(struct nfit_mem *nfit_mem, unsigned int func)
+{
+       int i;
+
+       /* lookup per-dimm data */
+       for (i = 0; i < ARRAY_SIZE(handle); i++)
+               if (__to_nfit_memdev(nfit_mem)->device_handle == handle[i])
+                       break;
+       if (i >= ARRAY_SIZE(handle))
+               return -ENXIO;
+
+       if ((1 << func) & dimm_fail_cmd_flags[i])
+               return -EIO;
+
+       return i;
+}
+
 static int nfit_test_ctl(struct nvdimm_bus_descriptor *nd_desc,
                struct nvdimm *nvdimm, unsigned int cmd, void *buf,
                unsigned int buf_len, int *cmd_rc)
@@ -591,22 +896,57 @@ static int nfit_test_ctl(struct nvdimm_bus_descriptor *nd_desc,
                        func = call_pkg->nd_command;
                        if (call_pkg->nd_family != nfit_mem->family)
                                return -ENOTTY;
+
+                       i = get_dimm(nfit_mem, func);
+                       if (i < 0)
+                               return i;
+
+                       switch (func) {
+                       case ND_INTEL_ENABLE_LSS_STATUS:
+                               return nd_intel_test_cmd_set_lss_status(t,
+                                               buf, buf_len);
+                       case ND_INTEL_FW_GET_INFO:
+                               return nd_intel_test_get_fw_info(t, buf,
+                                               buf_len, i - t->dcr_idx);
+                       case ND_INTEL_FW_START_UPDATE:
+                               return nd_intel_test_start_update(t, buf,
+                                               buf_len, i - t->dcr_idx);
+                       case ND_INTEL_FW_SEND_DATA:
+                               return nd_intel_test_send_data(t, buf,
+                                               buf_len, i - t->dcr_idx);
+                       case ND_INTEL_FW_FINISH_UPDATE:
+                               return nd_intel_test_finish_fw(t, buf,
+                                               buf_len, i - t->dcr_idx);
+                       case ND_INTEL_FW_FINISH_QUERY:
+                               return nd_intel_test_finish_query(t, buf,
+                                               buf_len, i - t->dcr_idx);
+                       case ND_INTEL_SMART:
+                               return nfit_test_cmd_smart(buf, buf_len,
+                                               &t->smart[i - t->dcr_idx]);
+                       case ND_INTEL_SMART_THRESHOLD:
+                               return nfit_test_cmd_smart_threshold(buf,
+                                               buf_len,
+                                               &t->smart_threshold[i -
+                                                       t->dcr_idx]);
+                       case ND_INTEL_SMART_SET_THRESHOLD:
+                               return nfit_test_cmd_smart_set_threshold(buf,
+                                               buf_len,
+                                               &t->smart_threshold[i -
+                                                       t->dcr_idx],
+                                               &t->smart[i - t->dcr_idx],
+                                               &t->pdev.dev, t->dimm_dev[i]);
+                       default:
+                               return -ENOTTY;
+                       }
                }
 
                if (!test_bit(cmd, &cmd_mask)
                                || !test_bit(func, &nfit_mem->dsm_mask))
                        return -ENOTTY;
 
-               /* lookup label space for the given dimm */
-               for (i = 0; i < ARRAY_SIZE(handle); i++)
-                       if (__to_nfit_memdev(nfit_mem)->device_handle ==
-                                       handle[i])
-                               break;
-               if (i >= ARRAY_SIZE(handle))
-                       return -ENXIO;
-
-               if ((1 << func) & dimm_fail_cmd_flags[i])
-                       return -EIO;
+               i = get_dimm(nfit_mem, func);
+               if (i < 0)
+                       return i;
 
                switch (func) {
                case ND_CMD_GET_CONFIG_SIZE:
@@ -620,15 +960,6 @@ static int nfit_test_ctl(struct nvdimm_bus_descriptor *nd_desc,
                        rc = nfit_test_cmd_set_config_data(buf, buf_len,
                                t->label[i - t->dcr_idx]);
                        break;
-               case ND_CMD_SMART:
-                       rc = nfit_test_cmd_smart(buf, buf_len);
-                       break;
-               case ND_CMD_SMART_THRESHOLD:
-                       rc = nfit_test_cmd_smart_threshold(buf, buf_len);
-                       device_lock(&t->pdev.dev);
-                       __acpi_nvdimm_notify(t->dimm_dev[i], 0x81);
-                       device_unlock(&t->pdev.dev);
-                       break;
                default:
                        return -ENOTTY;
                }
@@ -872,6 +1203,44 @@ static const struct attribute_group *nfit_test_dimm_attribute_groups[] = {
        NULL,
 };
 
+static void smart_init(struct nfit_test *t)
+{
+       int i;
+       const struct nd_intel_smart_threshold smart_t_data = {
+               .alarm_control = ND_INTEL_SMART_SPARE_TRIP
+                       | ND_INTEL_SMART_TEMP_TRIP,
+               .media_temperature = 40 * 16,
+               .ctrl_temperature = 30 * 16,
+               .spares = 5,
+       };
+       const struct nd_intel_smart smart_data = {
+               .flags = ND_INTEL_SMART_HEALTH_VALID
+                       | ND_INTEL_SMART_SPARES_VALID
+                       | ND_INTEL_SMART_ALARM_VALID
+                       | ND_INTEL_SMART_USED_VALID
+                       | ND_INTEL_SMART_SHUTDOWN_VALID
+                       | ND_INTEL_SMART_MTEMP_VALID,
+               .health = ND_INTEL_SMART_NON_CRITICAL_HEALTH,
+               .media_temperature = 23 * 16,
+               .ctrl_temperature = 30 * 16,
+               .pmic_temperature = 40 * 16,
+               .spares = 75,
+               .alarm_flags = ND_INTEL_SMART_SPARE_TRIP
+                       | ND_INTEL_SMART_TEMP_TRIP,
+               .ait_status = 1,
+               .life_used = 5,
+               .shutdown_state = 0,
+               .vendor_size = 0,
+               .shutdown_count = 100,
+       };
+
+       for (i = 0; i < t->num_dcr; i++) {
+               memcpy(&t->smart[i], &smart_data, sizeof(smart_data));
+               memcpy(&t->smart_threshold[i], &smart_t_data,
+                               sizeof(smart_t_data));
+       }
+}
+
 static int nfit_test0_alloc(struct nfit_test *t)
 {
        size_t nfit_size = sizeof(struct acpi_nfit_system_address) * NUM_SPA
@@ -940,6 +1309,7 @@ static int nfit_test0_alloc(struct nfit_test *t)
                        return -ENOMEM;
        }
 
+       smart_init(t);
        return ars_state_init(&t->pdev.dev, &t->ars_state);
 }
 
@@ -970,6 +1340,7 @@ static int nfit_test1_alloc(struct nfit_test *t)
        if (!t->spa_set[1])
                return -ENOMEM;
 
+       smart_init(t);
        return ars_state_init(&t->pdev.dev, &t->ars_state);
 }
 
@@ -1652,17 +2023,24 @@ static void nfit_test0_setup(struct nfit_test *t)
        set_bit(ND_CMD_GET_CONFIG_SIZE, &acpi_desc->dimm_cmd_force_en);
        set_bit(ND_CMD_GET_CONFIG_DATA, &acpi_desc->dimm_cmd_force_en);
        set_bit(ND_CMD_SET_CONFIG_DATA, &acpi_desc->dimm_cmd_force_en);
-       set_bit(ND_CMD_SMART, &acpi_desc->dimm_cmd_force_en);
+       set_bit(ND_INTEL_SMART, &acpi_desc->dimm_cmd_force_en);
+       set_bit(ND_INTEL_SMART_THRESHOLD, &acpi_desc->dimm_cmd_force_en);
+       set_bit(ND_INTEL_SMART_SET_THRESHOLD, &acpi_desc->dimm_cmd_force_en);
        set_bit(ND_CMD_ARS_CAP, &acpi_desc->bus_cmd_force_en);
        set_bit(ND_CMD_ARS_START, &acpi_desc->bus_cmd_force_en);
        set_bit(ND_CMD_ARS_STATUS, &acpi_desc->bus_cmd_force_en);
        set_bit(ND_CMD_CLEAR_ERROR, &acpi_desc->bus_cmd_force_en);
        set_bit(ND_CMD_CALL, &acpi_desc->bus_cmd_force_en);
-       set_bit(ND_CMD_SMART_THRESHOLD, &acpi_desc->dimm_cmd_force_en);
        set_bit(NFIT_CMD_TRANSLATE_SPA, &acpi_desc->bus_nfit_cmd_force_en);
        set_bit(NFIT_CMD_ARS_INJECT_SET, &acpi_desc->bus_nfit_cmd_force_en);
        set_bit(NFIT_CMD_ARS_INJECT_CLEAR, &acpi_desc->bus_nfit_cmd_force_en);
        set_bit(NFIT_CMD_ARS_INJECT_GET, &acpi_desc->bus_nfit_cmd_force_en);
+       set_bit(ND_INTEL_FW_GET_INFO, &acpi_desc->dimm_cmd_force_en);
+       set_bit(ND_INTEL_FW_START_UPDATE, &acpi_desc->dimm_cmd_force_en);
+       set_bit(ND_INTEL_FW_SEND_DATA, &acpi_desc->dimm_cmd_force_en);
+       set_bit(ND_INTEL_FW_FINISH_UPDATE, &acpi_desc->dimm_cmd_force_en);
+       set_bit(ND_INTEL_FW_FINISH_QUERY, &acpi_desc->dimm_cmd_force_en);
+       set_bit(ND_INTEL_ENABLE_LSS_STATUS, &acpi_desc->dimm_cmd_force_en);
 }
 
 static void nfit_test1_setup(struct nfit_test *t)
@@ -1760,6 +2138,7 @@ static void nfit_test1_setup(struct nfit_test *t)
        set_bit(ND_CMD_ARS_START, &acpi_desc->bus_cmd_force_en);
        set_bit(ND_CMD_ARS_STATUS, &acpi_desc->bus_cmd_force_en);
        set_bit(ND_CMD_CLEAR_ERROR, &acpi_desc->bus_cmd_force_en);
+       set_bit(ND_INTEL_ENABLE_LSS_STATUS, &acpi_desc->dimm_cmd_force_en);
 }
 
 static int nfit_test_blk_do_io(struct nd_blk_region *ndbr, resource_size_t dpa,
@@ -2064,10 +2443,18 @@ static int nfit_test_probe(struct platform_device *pdev)
                                sizeof(struct nfit_test_dcr *), GFP_KERNEL);
                nfit_test->dcr_dma = devm_kcalloc(dev, num,
                                sizeof(dma_addr_t), GFP_KERNEL);
+               nfit_test->smart = devm_kcalloc(dev, num,
+                               sizeof(struct nd_intel_smart), GFP_KERNEL);
+               nfit_test->smart_threshold = devm_kcalloc(dev, num,
+                               sizeof(struct nd_intel_smart_threshold),
+                               GFP_KERNEL);
+               nfit_test->fw = devm_kcalloc(dev, num,
+                               sizeof(struct nfit_test_fw), GFP_KERNEL);
                if (nfit_test->dimm && nfit_test->dimm_dma && nfit_test->label
                                && nfit_test->label_dma && nfit_test->dcr
                                && nfit_test->dcr_dma && nfit_test->flush
-                               && nfit_test->flush_dma)
+                               && nfit_test->flush_dma
+                               && nfit_test->fw)
                        /* pass */;
                else
                        return -ENOMEM;
@@ -2169,6 +2556,11 @@ static __init int nfit_test_init(void)
 {
        int rc, i;
 
+       pmem_test();
+       libnvdimm_test();
+       acpi_nfit_test();
+       device_dax_test();
+
        nfit_test_setup(nfit_test_lookup, nfit_test_evaluate_dsm);
 
        nfit_wq = create_singlethread_workqueue("nfit");
index 113b44675a71860548e21cd77148f4a5dea03f32..428344519cdf72cd32747e3457ab0f50d722fb2d 100644 (file)
@@ -84,6 +84,140 @@ struct nd_cmd_ars_err_inj_stat {
        } __packed record[0];
 } __packed;
 
+#define ND_INTEL_SMART                  1
+#define ND_INTEL_SMART_THRESHOLD        2
+#define ND_INTEL_ENABLE_LSS_STATUS     10
+#define ND_INTEL_FW_GET_INFO           12
+#define ND_INTEL_FW_START_UPDATE       13
+#define ND_INTEL_FW_SEND_DATA          14
+#define ND_INTEL_FW_FINISH_UPDATE      15
+#define ND_INTEL_FW_FINISH_QUERY       16
+#define ND_INTEL_SMART_SET_THRESHOLD   17
+
+#define ND_INTEL_SMART_HEALTH_VALID             (1 << 0)
+#define ND_INTEL_SMART_SPARES_VALID             (1 << 1)
+#define ND_INTEL_SMART_USED_VALID               (1 << 2)
+#define ND_INTEL_SMART_MTEMP_VALID              (1 << 3)
+#define ND_INTEL_SMART_CTEMP_VALID              (1 << 4)
+#define ND_INTEL_SMART_SHUTDOWN_COUNT_VALID     (1 << 5)
+#define ND_INTEL_SMART_AIT_STATUS_VALID         (1 << 6)
+#define ND_INTEL_SMART_PTEMP_VALID              (1 << 7)
+#define ND_INTEL_SMART_ALARM_VALID              (1 << 9)
+#define ND_INTEL_SMART_SHUTDOWN_VALID           (1 << 10)
+#define ND_INTEL_SMART_VENDOR_VALID             (1 << 11)
+#define ND_INTEL_SMART_SPARE_TRIP               (1 << 0)
+#define ND_INTEL_SMART_TEMP_TRIP                (1 << 1)
+#define ND_INTEL_SMART_CTEMP_TRIP               (1 << 2)
+#define ND_INTEL_SMART_NON_CRITICAL_HEALTH      (1 << 0)
+#define ND_INTEL_SMART_CRITICAL_HEALTH          (1 << 1)
+#define ND_INTEL_SMART_FATAL_HEALTH             (1 << 2)
+
+struct nd_intel_smart {
+       __u32 status;
+       union {
+               struct {
+                       __u32 flags;
+                       __u8 reserved0[4];
+                       __u8 health;
+                       __u8 spares;
+                       __u8 life_used;
+                       __u8 alarm_flags;
+                       __u16 media_temperature;
+                       __u16 ctrl_temperature;
+                       __u32 shutdown_count;
+                       __u8 ait_status;
+                       __u16 pmic_temperature;
+                       __u8 reserved1[8];
+                       __u8 shutdown_state;
+                       __u32 vendor_size;
+                       __u8 vendor_data[92];
+               } __packed;
+               __u8 data[128];
+       };
+} __packed;
+
+struct nd_intel_smart_threshold {
+       __u32 status;
+       union {
+               struct {
+                       __u16 alarm_control;
+                       __u8 spares;
+                       __u16 media_temperature;
+                       __u16 ctrl_temperature;
+                       __u8 reserved[1];
+               } __packed;
+               __u8 data[8];
+       };
+} __packed;
+
+struct nd_intel_smart_set_threshold {
+       __u16 alarm_control;
+       __u8 spares;
+       __u16 media_temperature;
+       __u16 ctrl_temperature;
+       __u32 status;
+} __packed;
+
+#define INTEL_FW_STORAGE_SIZE          0x100000
+#define INTEL_FW_MAX_SEND_LEN          0xFFEC
+#define INTEL_FW_QUERY_INTERVAL                250000
+#define INTEL_FW_QUERY_MAX_TIME                3000000
+#define INTEL_FW_FIS_VERSION           0x0105
+#define INTEL_FW_FAKE_VERSION          0xffffffffabcd
+
+enum intel_fw_update_state {
+       FW_STATE_NEW = 0,
+       FW_STATE_IN_PROGRESS,
+       FW_STATE_VERIFY,
+       FW_STATE_UPDATED,
+};
+
+struct nd_intel_fw_info {
+       __u32 status;
+       __u32 storage_size;
+       __u32 max_send_len;
+       __u32 query_interval;
+       __u32 max_query_time;
+       __u8 update_cap;
+       __u8 reserved[3];
+       __u32 fis_version;
+       __u64 run_version;
+       __u64 updated_version;
+} __packed;
+
+struct nd_intel_fw_start {
+       __u32 status;
+       __u32 context;
+} __packed;
+
+/* this one has the output first because the variable input data size */
+struct nd_intel_fw_send_data {
+       __u32 context;
+       __u32 offset;
+       __u32 length;
+       __u8 data[0];
+/* this field is not declared due ot variable data from input */
+/*     __u32 status; */
+} __packed;
+
+struct nd_intel_fw_finish_update {
+       __u8 ctrl_flags;
+       __u8 reserved[3];
+       __u32 context;
+       __u32 status;
+} __packed;
+
+struct nd_intel_fw_finish_query {
+       __u32 context;
+       __u32 status;
+       __u64 updated_fw_rev;
+} __packed;
+
+struct nd_intel_lss {
+       __u8 enable;
+       __u32 status;
+} __packed;
+
 union acpi_object;
 typedef void *acpi_handle;
 
diff --git a/tools/testing/nvdimm/watermark.h b/tools/testing/nvdimm/watermark.h
new file mode 100644 (file)
index 0000000..ed05287
--- /dev/null
@@ -0,0 +1,21 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright(c) 2018 Intel Corporation. All rights reserved.
+#ifndef _TEST_NVDIMM_WATERMARK_H_
+#define _TEST_NVDIMM_WATERMARK_H_
+int pmem_test(void);
+int libnvdimm_test(void);
+int acpi_nfit_test(void);
+int device_dax_test(void);
+
+/*
+ * dummy routine for nfit_test to validate it is linking to the properly
+ * mocked module and not the standard one from the base tree.
+ */
+#define nfit_test_watermark(x)                         \
+int x##_test(void)                                     \
+{                                                      \
+       pr_debug("%s for nfit_test\n", KBUILD_MODNAME); \
+       return 0;                                       \
+}                                                      \
+EXPORT_SYMBOL(x##_test)
+#endif /* _TEST_NVDIMM_WATERMARK_H_ */