shmem: get_unmapped_area align huge page
authorHugh Dickins <hughd@google.com>
Tue, 26 Jul 2016 22:26:15 +0000 (15:26 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Tue, 26 Jul 2016 23:19:19 +0000 (16:19 -0700)
Provide a shmem_get_unmapped_area method in file_operations, called at
mmap time to decide the mapping address.  It could be conditional on
CONFIG_TRANSPARENT_HUGEPAGE, but save #ifdefs in other places by making
it unconditional.

shmem_get_unmapped_area() first calls the usual mm->get_unmapped_area
(which we treat as a black box, highly dependent on architecture and
config and executable layout).  Lots of conditions, and in most cases it
just goes with the address that chose; but when our huge stars are
rightly aligned, yet that did not provide a suitable address, go back to
ask for a larger arena, within which to align the mapping suitably.

There have to be some direct calls to shmem_get_unmapped_area(), not via
the file_operations: because of the way shmem_zero_setup() is called to
create a shmem object late in the mmap sequence, when MAP_SHARED is
requested with MAP_ANONYMOUS or /dev/zero.  Though this only matters
when /proc/sys/vm/shmem_huge has been set.

Link: http://lkml.kernel.org/r/1466021202-61880-29-git-send-email-kirill.shutemov@linux.intel.com
Signed-off-by: Hugh Dickins <hughd@google.com>
Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
drivers/char/mem.c
include/linux/shmem_fs.h
ipc/shm.c
mm/mmap.c
mm/shmem.c

index d633974e7f8b1d82914743e0a27bda495e1ff5af..a33163dbb913541ad6649de8e9a43f8256e1742a 100644 (file)
@@ -22,6 +22,7 @@
 #include <linux/device.h>
 #include <linux/highmem.h>
 #include <linux/backing-dev.h>
+#include <linux/shmem_fs.h>
 #include <linux/splice.h>
 #include <linux/pfn.h>
 #include <linux/export.h>
@@ -657,6 +658,28 @@ static int mmap_zero(struct file *file, struct vm_area_struct *vma)
        return 0;
 }
 
+static unsigned long get_unmapped_area_zero(struct file *file,
+                               unsigned long addr, unsigned long len,
+                               unsigned long pgoff, unsigned long flags)
+{
+#ifdef CONFIG_MMU
+       if (flags & MAP_SHARED) {
+               /*
+                * mmap_zero() will call shmem_zero_setup() to create a file,
+                * so use shmem's get_unmapped_area in case it can be huge;
+                * and pass NULL for file as in mmap.c's get_unmapped_area(),
+                * so as not to confuse shmem with our handle on "/dev/zero".
+                */
+               return shmem_get_unmapped_area(NULL, addr, len, pgoff, flags);
+       }
+
+       /* Otherwise flags & MAP_PRIVATE: with no shmem object beneath it */
+       return current->mm->get_unmapped_area(file, addr, len, pgoff, flags);
+#else
+       return -ENOSYS;
+#endif
+}
+
 static ssize_t write_full(struct file *file, const char __user *buf,
                          size_t count, loff_t *ppos)
 {
@@ -764,6 +787,7 @@ static const struct file_operations zero_fops = {
        .read_iter      = read_iter_zero,
        .write_iter     = write_iter_zero,
        .mmap           = mmap_zero,
+       .get_unmapped_area = get_unmapped_area_zero,
 #ifndef CONFIG_MMU
        .mmap_capabilities = zero_mmap_capabilities,
 #endif
index 466f18c73a495eb52ce8a18ad0b453578082dc64..ff2de4bab61f12823e2f8e69fbf1709013500390 100644 (file)
@@ -50,6 +50,8 @@ extern struct file *shmem_file_setup(const char *name,
 extern struct file *shmem_kernel_file_setup(const char *name, loff_t size,
                                            unsigned long flags);
 extern int shmem_zero_setup(struct vm_area_struct *);
+extern unsigned long shmem_get_unmapped_area(struct file *, unsigned long addr,
+               unsigned long len, unsigned long pgoff, unsigned long flags);
 extern int shmem_lock(struct file *file, int lock, struct user_struct *user);
 extern bool shmem_mapping(struct address_space *mapping);
 extern void shmem_unlock_mapping(struct address_space *mapping);
index 13282510bc0da71aa9789ff72ba015bede5c96e7..7fa5cbebbf19f75fd33caadd12327b60c6276ae9 100644 (file)
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -476,13 +476,15 @@ static const struct file_operations shm_file_operations = {
        .mmap           = shm_mmap,
        .fsync          = shm_fsync,
        .release        = shm_release,
-#ifndef CONFIG_MMU
        .get_unmapped_area      = shm_get_unmapped_area,
-#endif
        .llseek         = noop_llseek,
        .fallocate      = shm_fallocate,
 };
 
+/*
+ * shm_file_operations_huge is now identical to shm_file_operations,
+ * but we keep it distinct for the sake of is_file_shm_hugepages().
+ */
 static const struct file_operations shm_file_operations_huge = {
        .mmap           = shm_mmap,
        .fsync          = shm_fsync,
index a41872c8f2af09994e6728cc7d282984e7851bea..86b18f334f4f759d8342a5220f8ac850c770ab03 100644 (file)
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -25,6 +25,7 @@
 #include <linux/personality.h>
 #include <linux/security.h>
 #include <linux/hugetlb.h>
+#include <linux/shmem_fs.h>
 #include <linux/profile.h>
 #include <linux/export.h>
 #include <linux/mount.h>
@@ -1897,8 +1898,19 @@ get_unmapped_area(struct file *file, unsigned long addr, unsigned long len,
                return -ENOMEM;
 
        get_area = current->mm->get_unmapped_area;
-       if (file && file->f_op->get_unmapped_area)
-               get_area = file->f_op->get_unmapped_area;
+       if (file) {
+               if (file->f_op->get_unmapped_area)
+                       get_area = file->f_op->get_unmapped_area;
+       } else if (flags & MAP_SHARED) {
+               /*
+                * mmap_region() will call shmem_zero_setup() to create a file,
+                * so use shmem's get_unmapped_area in case it can be huge.
+                * do_mmap_pgoff() will clear pgoff, so match alignment.
+                */
+               pgoff = 0;
+               get_area = shmem_get_unmapped_area;
+       }
+
        addr = get_area(file, addr, len, pgoff, flags);
        if (IS_ERR_VALUE(addr))
                return addr;
index fd374f74d99fa82adf948ba282525be8fac97ab9..ab02b5bb5553694464449395ff6bf436c0c1040b 100644 (file)
@@ -1513,6 +1513,94 @@ static int shmem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
        return ret;
 }
 
+unsigned long shmem_get_unmapped_area(struct file *file,
+                                     unsigned long uaddr, unsigned long len,
+                                     unsigned long pgoff, unsigned long flags)
+{
+       unsigned long (*get_area)(struct file *,
+               unsigned long, unsigned long, unsigned long, unsigned long);
+       unsigned long addr;
+       unsigned long offset;
+       unsigned long inflated_len;
+       unsigned long inflated_addr;
+       unsigned long inflated_offset;
+
+       if (len > TASK_SIZE)
+               return -ENOMEM;
+
+       get_area = current->mm->get_unmapped_area;
+       addr = get_area(file, uaddr, len, pgoff, flags);
+
+       if (!IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE))
+               return addr;
+       if (IS_ERR_VALUE(addr))
+               return addr;
+       if (addr & ~PAGE_MASK)
+               return addr;
+       if (addr > TASK_SIZE - len)
+               return addr;
+
+       if (shmem_huge == SHMEM_HUGE_DENY)
+               return addr;
+       if (len < HPAGE_PMD_SIZE)
+               return addr;
+       if (flags & MAP_FIXED)
+               return addr;
+       /*
+        * Our priority is to support MAP_SHARED mapped hugely;
+        * and support MAP_PRIVATE mapped hugely too, until it is COWed.
+        * But if caller specified an address hint, respect that as before.
+        */
+       if (uaddr)
+               return addr;
+
+       if (shmem_huge != SHMEM_HUGE_FORCE) {
+               struct super_block *sb;
+
+               if (file) {
+                       VM_BUG_ON(file->f_op != &shmem_file_operations);
+                       sb = file_inode(file)->i_sb;
+               } else {
+                       /*
+                        * Called directly from mm/mmap.c, or drivers/char/mem.c
+                        * for "/dev/zero", to create a shared anonymous object.
+                        */
+                       if (IS_ERR(shm_mnt))
+                               return addr;
+                       sb = shm_mnt->mnt_sb;
+               }
+               if (SHMEM_SB(sb)->huge != SHMEM_HUGE_NEVER)
+                       return addr;
+       }
+
+       offset = (pgoff << PAGE_SHIFT) & (HPAGE_PMD_SIZE-1);
+       if (offset && offset + len < 2 * HPAGE_PMD_SIZE)
+               return addr;
+       if ((addr & (HPAGE_PMD_SIZE-1)) == offset)
+               return addr;
+
+       inflated_len = len + HPAGE_PMD_SIZE - PAGE_SIZE;
+       if (inflated_len > TASK_SIZE)
+               return addr;
+       if (inflated_len < len)
+               return addr;
+
+       inflated_addr = get_area(NULL, 0, inflated_len, 0, flags);
+       if (IS_ERR_VALUE(inflated_addr))
+               return addr;
+       if (inflated_addr & ~PAGE_MASK)
+               return addr;
+
+       inflated_offset = inflated_addr & (HPAGE_PMD_SIZE-1);
+       inflated_addr += offset - inflated_offset;
+       if (inflated_offset > offset)
+               inflated_addr += HPAGE_PMD_SIZE;
+
+       if (inflated_addr > TASK_SIZE - len)
+               return addr;
+       return inflated_addr;
+}
+
 #ifdef CONFIG_NUMA
 static int shmem_set_policy(struct vm_area_struct *vma, struct mempolicy *mpol)
 {
@@ -3261,6 +3349,7 @@ static const struct address_space_operations shmem_aops = {
 
 static const struct file_operations shmem_file_operations = {
        .mmap           = shmem_mmap,
+       .get_unmapped_area = shmem_get_unmapped_area,
 #ifdef CONFIG_TMPFS
        .llseek         = shmem_file_llseek,
        .read_iter      = shmem_file_read_iter,
@@ -3496,6 +3585,15 @@ void shmem_unlock_mapping(struct address_space *mapping)
 {
 }
 
+#ifdef CONFIG_MMU
+unsigned long shmem_get_unmapped_area(struct file *file,
+                                     unsigned long addr, unsigned long len,
+                                     unsigned long pgoff, unsigned long flags)
+{
+       return current->mm->get_unmapped_area(file, addr, len, pgoff, flags);
+}
+#endif
+
 void shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend)
 {
        truncate_inode_pages_range(inode->i_mapping, lstart, lend);