dax: New fault locking
[sfrench/cifs-2.6.git] / mm / filemap.c
index a8c69c8c0a90a1e50cfdc822ce2cbc3854e0c8e8..7b9a4b180cae2a8ae3c4d24e17a27db03e1f91fd 100644 (file)
@@ -160,13 +160,15 @@ static void page_cache_tree_delete(struct address_space *mapping,
                        return;
 
        /*
-        * Track node that only contains shadow entries.
+        * Track node that only contains shadow entries. DAX mappings contain
+        * no shadow entries and may contain other exceptional entries so skip
+        * those.
         *
         * Avoid acquiring the list_lru lock if already tracked.  The
         * list_empty() test is safe as node->private_list is
         * protected by mapping->tree_lock.
         */
-       if (!workingset_node_pages(node) &&
+       if (!dax_mapping(mapping) && !workingset_node_pages(node) &&
            list_empty(&node->private_list)) {
                node->private_data = mapping;
                list_lru_add(&workingset_shadow_nodes, &node->private_list);
@@ -265,7 +267,7 @@ void delete_from_page_cache(struct page *page)
 
        if (freepage)
                freepage(page);
-       page_cache_release(page);
+       put_page(page);
 }
 EXPORT_SYMBOL(delete_from_page_cache);
 
@@ -352,8 +354,8 @@ EXPORT_SYMBOL(filemap_flush);
 static int __filemap_fdatawait_range(struct address_space *mapping,
                                     loff_t start_byte, loff_t end_byte)
 {
-       pgoff_t index = start_byte >> PAGE_CACHE_SHIFT;
-       pgoff_t end = end_byte >> PAGE_CACHE_SHIFT;
+       pgoff_t index = start_byte >> PAGE_SHIFT;
+       pgoff_t end = end_byte >> PAGE_SHIFT;
        struct pagevec pvec;
        int nr_pages;
        int ret = 0;
@@ -550,7 +552,7 @@ int replace_page_cache_page(struct page *old, struct page *new, gfp_t gfp_mask)
                pgoff_t offset = old->index;
                freepage = mapping->a_ops->freepage;
 
-               page_cache_get(new);
+               get_page(new);
                new->mapping = mapping;
                new->index = offset;
 
@@ -572,7 +574,7 @@ int replace_page_cache_page(struct page *old, struct page *new, gfp_t gfp_mask)
                radix_tree_preload_end();
                if (freepage)
                        freepage(old);
-               page_cache_release(old);
+               put_page(old);
        }
 
        return error;
@@ -597,14 +599,24 @@ static int page_cache_tree_insert(struct address_space *mapping,
                if (!radix_tree_exceptional_entry(p))
                        return -EEXIST;
 
-               if (WARN_ON(dax_mapping(mapping)))
-                       return -EINVAL;
-
-               if (shadowp)
-                       *shadowp = p;
                mapping->nrexceptional--;
-               if (node)
-                       workingset_node_shadows_dec(node);
+               if (!dax_mapping(mapping)) {
+                       if (shadowp)
+                               *shadowp = p;
+                       if (node)
+                               workingset_node_shadows_dec(node);
+               } else {
+                       /* DAX can replace empty locked entry with a hole */
+                       WARN_ON_ONCE(p !=
+                               (void *)(RADIX_TREE_EXCEPTIONAL_ENTRY |
+                                        RADIX_DAX_ENTRY_LOCK));
+                       /* DAX accounts exceptional entries as normal pages */
+                       if (node)
+                               workingset_node_pages_dec(node);
+                       /* Wakeup waiters for exceptional entry lock */
+                       dax_wake_mapping_entry_waiter(mapping, page->index,
+                                                     false);
+               }
        }
        radix_tree_replace_slot(slot, page);
        mapping->nrpages++;
@@ -651,7 +663,7 @@ static int __add_to_page_cache_locked(struct page *page,
                return error;
        }
 
-       page_cache_get(page);
+       get_page(page);
        page->mapping = mapping;
        page->index = offset;
 
@@ -675,7 +687,7 @@ err_insert:
        spin_unlock_irq(&mapping->tree_lock);
        if (!huge)
                mem_cgroup_cancel_charge(page, memcg, false);
-       page_cache_release(page);
+       put_page(page);
        return error;
 }
 
@@ -1083,7 +1095,7 @@ repeat:
                 * include/linux/pagemap.h for details.
                 */
                if (unlikely(page != *pagep)) {
-                       page_cache_release(page);
+                       put_page(page);
                        goto repeat;
                }
        }
@@ -1121,7 +1133,7 @@ repeat:
                /* Has the page been truncated? */
                if (unlikely(page->mapping != mapping)) {
                        unlock_page(page);
-                       page_cache_release(page);
+                       put_page(page);
                        goto repeat;
                }
                VM_BUG_ON_PAGE(page->index != offset, page);
@@ -1168,7 +1180,7 @@ repeat:
        if (fgp_flags & FGP_LOCK) {
                if (fgp_flags & FGP_NOWAIT) {
                        if (!trylock_page(page)) {
-                               page_cache_release(page);
+                               put_page(page);
                                return NULL;
                        }
                } else {
@@ -1178,7 +1190,7 @@ repeat:
                /* Has the page been truncated? */
                if (unlikely(page->mapping != mapping)) {
                        unlock_page(page);
-                       page_cache_release(page);
+                       put_page(page);
                        goto repeat;
                }
                VM_BUG_ON_PAGE(page->index != offset, page);
@@ -1209,7 +1221,7 @@ no_page:
                err = add_to_page_cache_lru(page, mapping, offset,
                                gfp_mask & GFP_RECLAIM_MASK);
                if (unlikely(err)) {
-                       page_cache_release(page);
+                       put_page(page);
                        page = NULL;
                        if (err == -EEXIST)
                                goto repeat;
@@ -1278,7 +1290,7 @@ repeat:
 
                /* Has the page moved? */
                if (unlikely(page != *slot)) {
-                       page_cache_release(page);
+                       put_page(page);
                        goto repeat;
                }
 export:
@@ -1343,7 +1355,7 @@ repeat:
 
                /* Has the page moved? */
                if (unlikely(page != *slot)) {
-                       page_cache_release(page);
+                       put_page(page);
                        goto repeat;
                }
 
@@ -1405,7 +1417,7 @@ repeat:
 
                /* Has the page moved? */
                if (unlikely(page != *slot)) {
-                       page_cache_release(page);
+                       put_page(page);
                        goto repeat;
                }
 
@@ -1415,7 +1427,7 @@ repeat:
                 * negatives, which is just confusing to the caller.
                 */
                if (page->mapping == NULL || page->index != iter.index) {
-                       page_cache_release(page);
+                       put_page(page);
                        break;
                }
 
@@ -1482,7 +1494,7 @@ repeat:
 
                /* Has the page moved? */
                if (unlikely(page != *slot)) {
-                       page_cache_release(page);
+                       put_page(page);
                        goto repeat;
                }
 
@@ -1549,7 +1561,7 @@ repeat:
 
                /* Has the page moved? */
                if (unlikely(page != *slot)) {
-                       page_cache_release(page);
+                       put_page(page);
                        goto repeat;
                }
 export:
@@ -1610,11 +1622,11 @@ static ssize_t do_generic_file_read(struct file *filp, loff_t *ppos,
        unsigned int prev_offset;
        int error = 0;
 
-       index = *ppos >> PAGE_CACHE_SHIFT;
-       prev_index = ra->prev_pos >> PAGE_CACHE_SHIFT;
-       prev_offset = ra->prev_pos & (PAGE_CACHE_SIZE-1);
-       last_index = (*ppos + iter->count + PAGE_CACHE_SIZE-1) >> PAGE_CACHE_SHIFT;
-       offset = *ppos & ~PAGE_CACHE_MASK;
+       index = *ppos >> PAGE_SHIFT;
+       prev_index = ra->prev_pos >> PAGE_SHIFT;
+       prev_offset = ra->prev_pos & (PAGE_SIZE-1);
+       last_index = (*ppos + iter->count + PAGE_SIZE-1) >> PAGE_SHIFT;
+       offset = *ppos & ~PAGE_MASK;
 
        for (;;) {
                struct page *page;
@@ -1648,7 +1660,7 @@ find_page:
                        if (PageUptodate(page))
                                goto page_ok;
 
-                       if (inode->i_blkbits == PAGE_CACHE_SHIFT ||
+                       if (inode->i_blkbits == PAGE_SHIFT ||
                                        !mapping->a_ops->is_partially_uptodate)
                                goto page_not_up_to_date;
                        if (!trylock_page(page))
@@ -1672,18 +1684,18 @@ page_ok:
                 */
 
                isize = i_size_read(inode);
-               end_index = (isize - 1) >> PAGE_CACHE_SHIFT;
+               end_index = (isize - 1) >> PAGE_SHIFT;
                if (unlikely(!isize || index > end_index)) {
-                       page_cache_release(page);
+                       put_page(page);
                        goto out;
                }
 
                /* nr is the maximum number of bytes to copy from this page */
-               nr = PAGE_CACHE_SIZE;
+               nr = PAGE_SIZE;
                if (index == end_index) {
-                       nr = ((isize - 1) & ~PAGE_CACHE_MASK) + 1;
+                       nr = ((isize - 1) & ~PAGE_MASK) + 1;
                        if (nr <= offset) {
-                               page_cache_release(page);
+                               put_page(page);
                                goto out;
                        }
                }
@@ -1711,11 +1723,11 @@ page_ok:
 
                ret = copy_page_to_iter(page, offset, nr, iter);
                offset += ret;
-               index += offset >> PAGE_CACHE_SHIFT;
-               offset &= ~PAGE_CACHE_MASK;
+               index += offset >> PAGE_SHIFT;
+               offset &= ~PAGE_MASK;
                prev_offset = offset;
 
-               page_cache_release(page);
+               put_page(page);
                written += ret;
                if (!iov_iter_count(iter))
                        goto out;
@@ -1735,7 +1747,7 @@ page_not_up_to_date_locked:
                /* Did it get truncated before we got the lock? */
                if (!page->mapping) {
                        unlock_page(page);
-                       page_cache_release(page);
+                       put_page(page);
                        continue;
                }
 
@@ -1757,7 +1769,7 @@ readpage:
 
                if (unlikely(error)) {
                        if (error == AOP_TRUNCATED_PAGE) {
-                               page_cache_release(page);
+                               put_page(page);
                                error = 0;
                                goto find_page;
                        }
@@ -1774,7 +1786,7 @@ readpage:
                                         * invalidate_mapping_pages got it
                                         */
                                        unlock_page(page);
-                                       page_cache_release(page);
+                                       put_page(page);
                                        goto find_page;
                                }
                                unlock_page(page);
@@ -1789,7 +1801,7 @@ readpage:
 
 readpage_error:
                /* UHHUH! A synchronous read error occurred. Report it */
-               page_cache_release(page);
+               put_page(page);
                goto out;
 
 no_cached_page:
@@ -1805,7 +1817,7 @@ no_cached_page:
                error = add_to_page_cache_lru(page, mapping, index,
                                mapping_gfp_constraint(mapping, GFP_KERNEL));
                if (error) {
-                       page_cache_release(page);
+                       put_page(page);
                        if (error == -EEXIST) {
                                error = 0;
                                goto find_page;
@@ -1817,10 +1829,10 @@ no_cached_page:
 
 out:
        ra->prev_pos = prev_index;
-       ra->prev_pos <<= PAGE_CACHE_SHIFT;
+       ra->prev_pos <<= PAGE_SHIFT;
        ra->prev_pos |= prev_offset;
 
-       *ppos = ((loff_t)index << PAGE_CACHE_SHIFT) + offset;
+       *ppos = ((loff_t)index << PAGE_SHIFT) + offset;
        file_accessed(filp);
        return written ? written : error;
 }
@@ -1912,7 +1924,7 @@ static int page_cache_read(struct file *file, pgoff_t offset, gfp_t gfp_mask)
                else if (ret == -EEXIST)
                        ret = 0; /* losing race to add is OK */
 
-               page_cache_release(page);
+               put_page(page);
 
        } while (ret == AOP_TRUNCATED_PAGE);
 
@@ -2022,8 +2034,8 @@ int filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
        loff_t size;
        int ret = 0;
 
-       size = round_up(i_size_read(inode), PAGE_CACHE_SIZE);
-       if (offset >= size >> PAGE_CACHE_SHIFT)
+       size = round_up(i_size_read(inode), PAGE_SIZE);
+       if (offset >= size >> PAGE_SHIFT)
                return VM_FAULT_SIGBUS;
 
        /*
@@ -2049,7 +2061,7 @@ retry_find:
        }
 
        if (!lock_page_or_retry(page, vma->vm_mm, vmf->flags)) {
-               page_cache_release(page);
+               put_page(page);
                return ret | VM_FAULT_RETRY;
        }
 
@@ -2072,10 +2084,10 @@ retry_find:
         * Found the page and have a reference on it.
         * We must recheck i_size under page lock.
         */
-       size = round_up(i_size_read(inode), PAGE_CACHE_SIZE);
-       if (unlikely(offset >= size >> PAGE_CACHE_SHIFT)) {
+       size = round_up(i_size_read(inode), PAGE_SIZE);
+       if (unlikely(offset >= size >> PAGE_SHIFT)) {
                unlock_page(page);
-               page_cache_release(page);
+               put_page(page);
                return VM_FAULT_SIGBUS;
        }
 
@@ -2120,7 +2132,7 @@ page_not_uptodate:
                if (!PageUptodate(page))
                        error = -EIO;
        }
-       page_cache_release(page);
+       put_page(page);
 
        if (!error || error == AOP_TRUNCATED_PAGE)
                goto retry_find;
@@ -2164,7 +2176,7 @@ repeat:
 
                /* Has the page moved? */
                if (unlikely(page != *slot)) {
-                       page_cache_release(page);
+                       put_page(page);
                        goto repeat;
                }
 
@@ -2178,8 +2190,8 @@ repeat:
                if (page->mapping != mapping || !PageUptodate(page))
                        goto unlock;
 
-               size = round_up(i_size_read(mapping->host), PAGE_CACHE_SIZE);
-               if (page->index >= size >> PAGE_CACHE_SHIFT)
+               size = round_up(i_size_read(mapping->host), PAGE_SIZE);
+               if (page->index >= size >> PAGE_SHIFT)
                        goto unlock;
 
                pte = vmf->pte + page->index - vmf->pgoff;
@@ -2195,7 +2207,7 @@ repeat:
 unlock:
                unlock_page(page);
 skip:
-               page_cache_release(page);
+               put_page(page);
 next:
                if (iter.index == vmf->max_pgoff)
                        break;
@@ -2278,7 +2290,7 @@ static struct page *wait_on_page_read(struct page *page)
        if (!IS_ERR(page)) {
                wait_on_page_locked(page);
                if (!PageUptodate(page)) {
-                       page_cache_release(page);
+                       put_page(page);
                        page = ERR_PTR(-EIO);
                }
        }
@@ -2301,7 +2313,7 @@ repeat:
                        return ERR_PTR(-ENOMEM);
                err = add_to_page_cache_lru(page, mapping, index, gfp);
                if (unlikely(err)) {
-                       page_cache_release(page);
+                       put_page(page);
                        if (err == -EEXIST)
                                goto repeat;
                        /* Presumably ENOMEM for radix tree node */
@@ -2311,7 +2323,7 @@ repeat:
 filler:
                err = filler(data, page);
                if (err < 0) {
-                       page_cache_release(page);
+                       put_page(page);
                        return ERR_PTR(err);
                }
 
@@ -2364,7 +2376,7 @@ filler:
        /* Case c or d, restart the operation */
        if (!page->mapping) {
                unlock_page(page);
-               page_cache_release(page);
+               put_page(page);
                goto repeat;
        }
 
@@ -2511,7 +2523,7 @@ generic_file_direct_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos)
        struct iov_iter data;
 
        write_len = iov_iter_count(from);
-       end = (pos + write_len - 1) >> PAGE_CACHE_SHIFT;
+       end = (pos + write_len - 1) >> PAGE_SHIFT;
 
        written = filemap_write_and_wait_range(mapping, pos, pos + write_len - 1);
        if (written)
@@ -2525,7 +2537,7 @@ generic_file_direct_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos)
         */
        if (mapping->nrpages) {
                written = invalidate_inode_pages2_range(mapping,
-                                       pos >> PAGE_CACHE_SHIFT, end);
+                                       pos >> PAGE_SHIFT, end);
                /*
                 * If a page can not be invalidated, return 0 to fall back
                 * to buffered write.
@@ -2550,7 +2562,7 @@ generic_file_direct_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos)
         */
        if (mapping->nrpages) {
                invalidate_inode_pages2_range(mapping,
-                                             pos >> PAGE_CACHE_SHIFT, end);
+                                             pos >> PAGE_SHIFT, end);
        }
 
        if (written > 0) {
@@ -2611,8 +2623,8 @@ ssize_t generic_perform_write(struct file *file,
                size_t copied;          /* Bytes copied from user */
                void *fsdata;
 
-               offset = (pos & (PAGE_CACHE_SIZE - 1));
-               bytes = min_t(unsigned long, PAGE_CACHE_SIZE - offset,
+               offset = (pos & (PAGE_SIZE - 1));
+               bytes = min_t(unsigned long, PAGE_SIZE - offset,
                                                iov_iter_count(i));
 
 again:
@@ -2665,7 +2677,7 @@ again:
                         * because not all segments in the iov can be copied at
                         * once without a pagefault.
                         */
-                       bytes = min_t(unsigned long, PAGE_CACHE_SIZE - offset,
+                       bytes = min_t(unsigned long, PAGE_SIZE - offset,
                                                iov_iter_single_seg_count(i));
                        goto again;
                }
@@ -2752,8 +2764,8 @@ ssize_t __generic_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
                        iocb->ki_pos = endbyte + 1;
                        written += status;
                        invalidate_mapping_pages(mapping,
-                                                pos >> PAGE_CACHE_SHIFT,
-                                                endbyte >> PAGE_CACHE_SHIFT);
+                                                pos >> PAGE_SHIFT,
+                                                endbyte >> PAGE_SHIFT);
                } else {
                        /*
                         * We don't know how much we wrote, so just return