Merge branch 'for-linus' of git://oss.sgi.com:8090/xfs/xfs-2.6
authorLinus Torvalds <torvalds@woody.linux-foundation.org>
Thu, 19 Jul 2007 21:41:33 +0000 (14:41 -0700)
committerLinus Torvalds <torvalds@woody.linux-foundation.org>
Thu, 19 Jul 2007 21:41:33 +0000 (14:41 -0700)
* 'for-linus' of git://oss.sgi.com:8090/xfs/xfs-2.6:
  [XFS] Fix inode size update before data write in xfs_setattr
  [XFS] Allow punching holes to free space when at ENOSPC
  [XFS] Implement ->page_mkwrite in XFS.
  [FS] Implement block_page_mkwrite.

Manually fix up conflict with Nick's VM fault handling patches in
fs/xfs/linux-2.6/xfs_file.c

Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
fs/buffer.c
fs/xfs/linux-2.6/xfs_file.c
fs/xfs/xfs_vnodeops.c
include/linux/buffer_head.h

index 0f9006714230073ea656aa909de6cd18ed05e903..02ebb1f1d3b0e2fa70298d21dbb1b88d4471a65d 100644 (file)
@@ -2194,6 +2194,52 @@ int generic_commit_write(struct file *file, struct page *page,
        return 0;
 }
 
+/*
+ * block_page_mkwrite() is not allowed to change the file size as it gets
+ * called from a page fault handler when a page is first dirtied. Hence we must
+ * be careful to check for EOF conditions here. We set the page up correctly
+ * for a written page which means we get ENOSPC checking when writing into
+ * holes and correct delalloc and unwritten extent mapping on filesystems that
+ * support these features.
+ *
+ * We are not allowed to take the i_mutex here so we have to play games to
+ * protect against truncate races as the page could now be beyond EOF.  Because
+ * vmtruncate() writes the inode size before removing pages, once we have the
+ * page lock we can determine safely if the page is beyond EOF. If it is not
+ * beyond EOF, then the page is guaranteed safe against truncation until we
+ * unlock the page.
+ */
+int
+block_page_mkwrite(struct vm_area_struct *vma, struct page *page,
+                  get_block_t get_block)
+{
+       struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
+       unsigned long end;
+       loff_t size;
+       int ret = -EINVAL;
+
+       lock_page(page);
+       size = i_size_read(inode);
+       if ((page->mapping != inode->i_mapping) ||
+           ((page->index << PAGE_CACHE_SHIFT) > size)) {
+               /* page got truncated out from underneath us */
+               goto out_unlock;
+       }
+
+       /* page is wholly or partially inside EOF */
+       if (((page->index + 1) << PAGE_CACHE_SHIFT) > size)
+               end = size & ~PAGE_CACHE_MASK;
+       else
+               end = PAGE_CACHE_SIZE;
+
+       ret = block_prepare_write(page, 0, end, get_block);
+       if (!ret)
+               ret = block_commit_write(page, 0, end);
+
+out_unlock:
+       unlock_page(page);
+       return ret;
+}
 
 /*
  * nobh_prepare_write()'s prereads are special: the buffer_heads are freed
@@ -2977,6 +3023,7 @@ EXPORT_SYMBOL(__brelse);
 EXPORT_SYMBOL(__wait_on_buffer);
 EXPORT_SYMBOL(block_commit_write);
 EXPORT_SYMBOL(block_prepare_write);
+EXPORT_SYMBOL(block_page_mkwrite);
 EXPORT_SYMBOL(block_read_full_page);
 EXPORT_SYMBOL(block_sync_page);
 EXPORT_SYMBOL(block_truncate_page);
index 2d4be2f247b24cfc69efce59a0315f849feafebb..0d4001eafd16861daf44ef05ea0ee7200388dbf7 100644 (file)
@@ -413,6 +413,20 @@ xfs_file_open_exec(
 }
 #endif /* HAVE_FOP_OPEN_EXEC */
 
+/*
+ * mmap()d file has taken write protection fault and is being made
+ * writable. We can set the page state up correctly for a writable
+ * page, which means we can do correct delalloc accounting (ENOSPC
+ * checking!) and unwritten extent mapping.
+ */
+STATIC int
+xfs_vm_page_mkwrite(
+       struct vm_area_struct   *vma,
+       struct page             *page)
+{
+       return block_page_mkwrite(vma, page, xfs_get_blocks);
+}
+
 const struct file_operations xfs_file_operations = {
        .llseek         = generic_file_llseek,
        .read           = do_sync_read,
@@ -465,11 +479,13 @@ const struct file_operations xfs_dir_file_operations = {
 
 static struct vm_operations_struct xfs_file_vm_ops = {
        .fault          = filemap_fault,
+       .page_mkwrite   = xfs_vm_page_mkwrite,
 };
 
 #ifdef CONFIG_XFS_DMAPI
 static struct vm_operations_struct xfs_dmapi_file_vm_ops = {
        .fault          = xfs_vm_fault,
+       .page_mkwrite   = xfs_vm_page_mkwrite,
 #ifdef HAVE_VMOP_MPROTECT
        .mprotect       = xfs_vm_mprotect,
 #endif
index 79b522779aa485dca1bccbf101d5585f6c2a1d92..1a5ad8cd97b00d3d8cd24c04961ea075cb5e1635 100644 (file)
@@ -589,7 +589,30 @@ xfs_setattr(
                        code = xfs_igrow_start(ip, vap->va_size, credp);
                }
                xfs_iunlock(ip, XFS_ILOCK_EXCL);
-               vn_iowait(vp); /* wait for the completion of any pending DIOs */
+
+               /*
+                * We are going to log the inode size change in this
+                * transaction so any previous writes that are beyond the on
+                * disk EOF and the new EOF that have not been written out need
+                * to be written here. If we do not write the data out, we
+                * expose ourselves to the null files problem.
+                *
+                * Only flush from the on disk size to the smaller of the in
+                * memory file size or the new size as that's the range we
+                * really care about here and prevents waiting for other data
+                * not within the range we care about here.
+                */
+               if (!code &&
+                   (ip->i_size != ip->i_d.di_size) &&
+                   (vap->va_size > ip->i_d.di_size)) {
+                       code = bhv_vop_flush_pages(XFS_ITOV(ip),
+                                       ip->i_d.di_size, vap->va_size,
+                                       XFS_B_ASYNC, FI_NONE);
+               }
+
+               /* wait for all I/O to complete */
+               vn_iowait(vp);
+
                if (!code)
                        code = xfs_itruncate_data(ip, vap->va_size);
                if (code) {
@@ -4434,9 +4457,12 @@ xfs_free_file_space(
        while (!error && !done) {
 
                /*
-                * allocate and setup the transaction
+                * allocate and setup the transaction. Allow this
+                * transaction to dip into the reserve blocks to ensure
+                * the freeing of the space succeeds at ENOSPC.
                 */
                tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT);
+               tp->t_flags |= XFS_TRANS_RESERVE;
                error = xfs_trans_reserve(tp,
                                          resblks,
                                          XFS_WRITE_LOG_RES(mp),
index 5c6e12853a9bb758918e809de1c8a94c56a731f6..35cadad84b142b93a5128ebefc9f8cbaeb9ea62e 100644 (file)
@@ -209,6 +209,8 @@ int cont_prepare_write(struct page*, unsigned, unsigned, get_block_t*,
 int generic_cont_expand(struct inode *inode, loff_t size);
 int generic_cont_expand_simple(struct inode *inode, loff_t size);
 int block_commit_write(struct page *page, unsigned from, unsigned to);
+int block_page_mkwrite(struct vm_area_struct *vma, struct page *page,
+                               get_block_t get_block);
 void block_sync_page(struct page *);
 sector_t generic_block_bmap(struct address_space *, sector_t, get_block_t *);
 int generic_commit_write(struct file *, struct page *, unsigned, unsigned);