Group short-lived and reclaimable kernel allocations
[sfrench/cifs-2.6.git] / mm / shmem.c
index 0493e4d0bcaab2d5b0281446171b4de57598b8ee..76ecbac0d55bd8578de1f67d7655cf4b0babe995 100644 (file)
@@ -27,6 +27,7 @@
 #include <linux/init.h>
 #include <linux/fs.h>
 #include <linux/xattr.h>
+#include <linux/exportfs.h>
 #include <linux/generic_acl.h>
 #include <linux/mm.h>
 #include <linux/mman.h>
@@ -48,7 +49,6 @@
 #include <linux/ctype.h>
 #include <linux/migrate.h>
 #include <linux/highmem.h>
-#include <linux/backing-dev.h>
 
 #include <asm/uaccess.h>
 #include <asm/div64.h>
@@ -82,6 +82,7 @@ enum sgp_type {
        SGP_READ,       /* don't exceed i_size, don't allocate page */
        SGP_CACHE,      /* don't exceed i_size, may allocate page */
        SGP_WRITE,      /* may exceed i_size, may allocate page */
+       SGP_FAULT,      /* same as SGP_CACHE, return with page locked */
 };
 
 static int shmem_getpage(struct inode *inode, unsigned long idx,
@@ -93,8 +94,11 @@ static inline struct page *shmem_dir_alloc(gfp_t gfp_mask)
         * The above definition of ENTRIES_PER_PAGE, and the use of
         * BLOCKS_PER_PAGE on indirect pages, assume PAGE_CACHE_SIZE:
         * might be reconsidered if it ever diverges from PAGE_SIZE.
+        *
+        * Mobility flags are masked out as swap vectors cannot move
         */
-       return alloc_pages(gfp_mask, PAGE_CACHE_SHIFT-PAGE_SHIFT);
+       return alloc_pages((gfp_mask & ~GFP_MOVABLE_MASK) | __GFP_ZERO,
+                               PAGE_CACHE_SHIFT-PAGE_SHIFT);
 }
 
 static inline void shmem_dir_free(struct page *page)
@@ -372,7 +376,7 @@ static swp_entry_t *shmem_swp_alloc(struct shmem_inode_info *info, unsigned long
                }
 
                spin_unlock(&info->lock);
-               page = shmem_dir_alloc(mapping_gfp_mask(inode->i_mapping) | __GFP_ZERO);
+               page = shmem_dir_alloc(mapping_gfp_mask(inode->i_mapping));
                if (page)
                        set_page_private(page, 0);
                spin_lock(&info->lock);
@@ -967,7 +971,7 @@ static inline int shmem_parse_mpol(char *value, int *policy, nodemask_t *policy_
                *nodelist++ = '\0';
                if (nodelist_parse(nodelist, *policy_nodes))
                        goto out;
-               if (!nodes_subset(*policy_nodes, node_online_map))
+               if (!nodes_subset(*policy_nodes, node_states[N_HIGH_MEMORY]))
                        goto out;
        }
        if (!strcmp(value, "default")) {
@@ -992,9 +996,11 @@ static inline int shmem_parse_mpol(char *value, int *policy, nodemask_t *policy_
                        err = 0;
        } else if (!strcmp(value, "interleave")) {
                *policy = MPOL_INTERLEAVE;
-               /* Default to nodes online if no nodelist */
+               /*
+                * Default to online nodes with memory if no nodelist
+                */
                if (!nodelist)
-                       *policy_nodes = node_online_map;
+                       *policy_nodes = node_states[N_HIGH_MEMORY];
                err = 0;
        }
 out:
@@ -1056,7 +1062,8 @@ shmem_alloc_page(gfp_t gfp, struct shmem_inode_info *info,
        return page;
 }
 #else
-static inline int shmem_parse_mpol(char *value, int *policy, nodemask_t *policy_nodes)
+static inline int shmem_parse_mpol(char *value, int *policy,
+                                               nodemask_t *policy_nodes)
 {
        return 1;
 }
@@ -1096,11 +1103,15 @@ static int shmem_getpage(struct inode *inode, unsigned long idx,
 
        if (idx >= SHMEM_MAX_INDEX)
                return -EFBIG;
+
+       if (type)
+               *type = 0;
+
        /*
         * Normally, filepage is NULL on entry, and either found
         * uptodate immediately, or allocated and zeroed, or read
         * in under swappage, which is then assigned to filepage.
-        * But shmem_readpage and shmem_prepare_write pass in a locked
+        * But shmem_readpage and shmem_write_begin pass in a locked
         * filepage, which may be found not uptodate by other callers
         * too, and may need to be copied from the swappage read in.
         */
@@ -1129,9 +1140,9 @@ repeat:
                if (!swappage) {
                        shmem_swp_unmap(entry);
                        /* here we actually do the io */
-                       if (type && *type == VM_FAULT_MINOR) {
+                       if (type && !(*type & VM_FAULT_MAJOR)) {
                                __count_vm_event(PGMAJFAULT);
-                               *type = VM_FAULT_MAJOR;
+                               *type |= VM_FAULT_MAJOR;
                        }
                        spin_unlock(&info->lock);
                        swappage = shmem_swapin(info, swap, idx);
@@ -1285,8 +1296,10 @@ repeat:
        }
 done:
        if (*pagep != filepage) {
-               unlock_page(filepage);
                *pagep = filepage;
+               if (sgp != SGP_FAULT)
+                       unlock_page(filepage);
+
        }
        return 0;
 
@@ -1298,72 +1311,21 @@ failed:
        return error;
 }
 
-static struct page *shmem_nopage(struct vm_area_struct *vma,
-                                unsigned long address, int *type)
+static int shmem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 {
        struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
-       struct page *page = NULL;
-       unsigned long idx;
        int error;
+       int ret;
 
-       idx = (address - vma->vm_start) >> PAGE_SHIFT;
-       idx += vma->vm_pgoff;
-       idx >>= PAGE_CACHE_SHIFT - PAGE_SHIFT;
-       if (((loff_t) idx << PAGE_CACHE_SHIFT) >= i_size_read(inode))
-               return NOPAGE_SIGBUS;
+       if (((loff_t)vmf->pgoff << PAGE_CACHE_SHIFT) >= i_size_read(inode))
+               return VM_FAULT_SIGBUS;
 
-       error = shmem_getpage(inode, idx, &page, SGP_CACHE, type);
+       error = shmem_getpage(inode, vmf->pgoff, &vmf->page, SGP_FAULT, &ret);
        if (error)
-               return (error == -ENOMEM)? NOPAGE_OOM: NOPAGE_SIGBUS;
-
-       mark_page_accessed(page);
-       return page;
-}
-
-static int shmem_populate(struct vm_area_struct *vma,
-       unsigned long addr, unsigned long len,
-       pgprot_t prot, unsigned long pgoff, int nonblock)
-{
-       struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
-       struct mm_struct *mm = vma->vm_mm;
-       enum sgp_type sgp = nonblock? SGP_QUICK: SGP_CACHE;
-       unsigned long size;
-
-       size = (i_size_read(inode) + PAGE_SIZE - 1) >> PAGE_SHIFT;
-       if (pgoff >= size || pgoff + (len >> PAGE_SHIFT) > size)
-               return -EINVAL;
+               return ((error == -ENOMEM) ? VM_FAULT_OOM : VM_FAULT_SIGBUS);
 
-       while ((long) len > 0) {
-               struct page *page = NULL;
-               int err;
-               /*
-                * Will need changing if PAGE_CACHE_SIZE != PAGE_SIZE
-                */
-               err = shmem_getpage(inode, pgoff, &page, sgp, NULL);
-               if (err)
-                       return err;
-               /* Page may still be null, but only if nonblock was set. */
-               if (page) {
-                       mark_page_accessed(page);
-                       err = install_page(mm, vma, addr, page, prot);
-                       if (err) {
-                               page_cache_release(page);
-                               return err;
-                       }
-               } else if (vma->vm_flags & VM_NONLINEAR) {
-                       /* No page was found just because we can't read it in
-                        * now (being here implies nonblock != 0), but the page
-                        * may exist, so set the PTE to fault it in later. */
-                       err = install_file_pte(mm, vma, addr, pgoff, prot);
-                       if (err)
-                               return err;
-               }
-
-               len -= PAGE_SIZE;
-               addr += PAGE_SIZE;
-               pgoff++;
-       }
-       return 0;
+       mark_page_accessed(vmf->page);
+       return ret | VM_FAULT_LOCKED;
 }
 
 #ifdef CONFIG_NUMA
@@ -1410,6 +1372,7 @@ static int shmem_mmap(struct file *file, struct vm_area_struct *vma)
 {
        file_accessed(file);
        vma->vm_ops = &shmem_vm_ops;
+       vma->vm_flags |= VM_CAN_NONLINEAR;
        return 0;
 }
 
@@ -1485,7 +1448,7 @@ static const struct inode_operations shmem_symlink_inode_operations;
 static const struct inode_operations shmem_symlink_inline_operations;
 
 /*
- * Normally tmpfs avoids the use of shmem_readpage and shmem_prepare_write;
+ * Normally tmpfs avoids the use of shmem_readpage and shmem_write_begin;
  * but providing them allows a tmpfs file to be used for splice, sendfile, and
  * below the loop driver, in the generic fashion that many filesystems support.
  */
@@ -1498,10 +1461,30 @@ static int shmem_readpage(struct file *file, struct page *page)
 }
 
 static int
-shmem_prepare_write(struct file *file, struct page *page, unsigned offset, unsigned to)
+shmem_write_begin(struct file *file, struct address_space *mapping,
+                       loff_t pos, unsigned len, unsigned flags,
+                       struct page **pagep, void **fsdata)
 {
-       struct inode *inode = page->mapping->host;
-       return shmem_getpage(inode, page->index, &page, SGP_WRITE, NULL);
+       struct inode *inode = mapping->host;
+       pgoff_t index = pos >> PAGE_CACHE_SHIFT;
+       *pagep = NULL;
+       return shmem_getpage(inode, index, pagep, SGP_WRITE, NULL);
+}
+
+static int
+shmem_write_end(struct file *file, struct address_space *mapping,
+                       loff_t pos, unsigned len, unsigned copied,
+                       struct page *page, void *fsdata)
+{
+       struct inode *inode = mapping->host;
+
+       set_page_dirty(page);
+       page_cache_release(page);
+
+       if (pos+copied > inode->i_size)
+               i_size_write(inode, pos+copied);
+
+       return copied;
 }
 
 static ssize_t
@@ -2258,7 +2241,7 @@ static int shmem_fill_super(struct super_block *sb,
        unsigned long blocks = 0;
        unsigned long inodes = 0;
        int policy = MPOL_DEFAULT;
-       nodemask_t policy_nodes = node_online_map;
+       nodemask_t policy_nodes = node_states[N_HIGH_MEMORY];
 
 #ifdef CONFIG_TMPFS
        /*
@@ -2361,7 +2344,7 @@ static int init_inodecache(void)
 {
        shmem_inode_cachep = kmem_cache_create("shmem_inode_cache",
                                sizeof(struct shmem_inode_info),
-                               0, 0, init_once, NULL);
+                               0, 0, init_once);
        if (shmem_inode_cachep == NULL)
                return -ENOMEM;
        return 0;
@@ -2377,8 +2360,8 @@ static const struct address_space_operations shmem_aops = {
        .set_page_dirty = __set_page_dirty_no_writeback,
 #ifdef CONFIG_TMPFS
        .readpage       = shmem_readpage,
-       .prepare_write  = shmem_prepare_write,
-       .commit_write   = simple_commit_write,
+       .write_begin    = shmem_write_begin,
+       .write_end      = shmem_write_end,
 #endif
        .migratepage    = migrate_page,
 };
@@ -2455,8 +2438,7 @@ static const struct super_operations shmem_ops = {
 };
 
 static struct vm_operations_struct shmem_vm_ops = {
-       .nopage         = shmem_nopage,
-       .populate       = shmem_populate,
+       .fault          = shmem_fault,
 #ifdef CONFIG_NUMA
        .set_policy     = shmem_set_policy,
        .get_policy     = shmem_get_policy,