dax: add a sysfs knob to control memmap_on_memory behavior
authorVishal Verma <vishal.l.verma@intel.com>
Wed, 24 Jan 2024 20:03:50 +0000 (12:03 -0800)
committerAndrew Morton <akpm@linux-foundation.org>
Thu, 22 Feb 2024 18:24:40 +0000 (10:24 -0800)
Add a sysfs knob for dax devices to control the memmap_on_memory setting
if the dax device were to be hotplugged as system memory.

The default memmap_on_memory setting for dax devices originating via pmem
or hmem is set to 'false' - i.e.  no memmap_on_memory semantics, to
preserve legacy behavior.  For dax devices via CXL, the default is on.
The sysfs control allows the administrator to override the above defaults
if needed.

Link: https://lkml.kernel.org/r/20240124-vv-dax_abi-v7-5-20d16cb8d23d@intel.com
Signed-off-by: Vishal Verma <vishal.l.verma@intel.com>
Tested-by: Li Zhijian <lizhijian@fujitsu.com>
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Reviewed-by: David Hildenbrand <david@redhat.com>
Reviewed-by: Huang, Ying <ying.huang@intel.com>
Reviewed-by: Alison Schofield <alison.schofield@intel.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Dave Jiang <dave.jiang@intel.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Oscar Salvador <osalvador@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Documentation/ABI/testing/sysfs-bus-dax
drivers/dax/bus.c

index 6359f7bc9bf430544ccbfbd19e3ce0e92e1ab6ad..b34266bfae49ae3be71c2dc73ba0b875fbb69354 100644 (file)
@@ -134,3 +134,20 @@ KernelVersion:     v5.1
 Contact:       nvdimm@lists.linux.dev
 Description:
                (RO) The id attribute indicates the region id of a dax region.
+
+What:          /sys/bus/dax/devices/daxX.Y/memmap_on_memory
+Date:          January, 2024
+KernelVersion: v6.8
+Contact:       nvdimm@lists.linux.dev
+Description:
+               (RW) Control the memmap_on_memory setting if the dax device
+               were to be hotplugged as system memory. This determines whether
+               the 'altmap' for the hotplugged memory will be placed on the
+               device being hotplugged (memmap_on_memory=1) or if it will be
+               placed on regular memory (memmap_on_memory=0). This attribute
+               must be set before the device is handed over to the 'kmem'
+               driver (i.e.  hotplugged into system-ram). Additionally, this
+               depends on CONFIG_MHP_MEMMAP_ON_MEMORY, and a globally enabled
+               memmap_on_memory parameter for memory_hotplug. This is
+               typically set on the kernel command line -
+               memory_hotplug.memmap_on_memory set to 'true' or 'force'."
index 0fd948a4443e38e825b6318cb9fee5684a7392f7..27c86d0ca7118d0b081a5cb340a84e79a7b4cbfd 100644 (file)
@@ -1349,6 +1349,48 @@ static ssize_t numa_node_show(struct device *dev,
 }
 static DEVICE_ATTR_RO(numa_node);
 
+static ssize_t memmap_on_memory_show(struct device *dev,
+                                    struct device_attribute *attr, char *buf)
+{
+       struct dev_dax *dev_dax = to_dev_dax(dev);
+
+       return sysfs_emit(buf, "%d\n", dev_dax->memmap_on_memory);
+}
+
+static ssize_t memmap_on_memory_store(struct device *dev,
+                                     struct device_attribute *attr,
+                                     const char *buf, size_t len)
+{
+       struct dev_dax *dev_dax = to_dev_dax(dev);
+       bool val;
+       int rc;
+
+       rc = kstrtobool(buf, &val);
+       if (rc)
+               return rc;
+
+       if (val == true && !mhp_supports_memmap_on_memory()) {
+               dev_dbg(dev, "memmap_on_memory is not available\n");
+               return -EOPNOTSUPP;
+       }
+
+       rc = down_write_killable(&dax_dev_rwsem);
+       if (rc)
+               return rc;
+
+       if (dev_dax->memmap_on_memory != val && dev->driver &&
+           to_dax_drv(dev->driver)->type == DAXDRV_KMEM_TYPE) {
+               up_write(&dax_dev_rwsem);
+               return -EBUSY;
+       }
+
+       dev_dax->memmap_on_memory = val;
+       up_write(&dax_dev_rwsem);
+
+       return len;
+}
+static DEVICE_ATTR_RW(memmap_on_memory);
+
 static umode_t dev_dax_visible(struct kobject *kobj, struct attribute *a, int n)
 {
        struct device *dev = container_of(kobj, struct device, kobj);
@@ -1375,6 +1417,7 @@ static struct attribute *dev_dax_attributes[] = {
        &dev_attr_align.attr,
        &dev_attr_resource.attr,
        &dev_attr_numa_node.attr,
+       &dev_attr_memmap_on_memory.attr,
        NULL,
 };