NTFS: Version 2.1.32 - Update file write from aio_write to write_iter.

author Anton Altaparmakov <anton@tuxera.com>

Wed, 11 Mar 2015 14:43:32 +0000 (10:43 -0400)

committer Al Viro <viro@zeniv.linux.org.uk>

Sun, 12 Apr 2015 02:24:33 +0000 (22:24 -0400)
author Anton Altaparmakov <anton@tuxera.com>
Wed, 11 Mar 2015 14:43:32 +0000 (10:43 -0400)
committer Al Viro <viro@zeniv.linux.org.uk>
Sun, 12 Apr 2015 02:24:33 +0000 (22:24 -0400)
diff --git a/fs/ntfs/Makefile b/fs/ntfs/Makefile

index 36ae529511c49140417cafe6559a167cd17d92e4..2ff263e6d363dba5f9621ad705c5795aaf855326 100644 (file)
--- a/fs/ntfs/Makefile
+++ b/fs/ntfs/Makefile
@@ -8,7 +8,7 @@ ntfs-y := aops.o attrib.o collate.o compress.o debug.o dir.o file.o \
  
  ntfs-$(CONFIG_NTFS_RW) += bitmap.o lcnalloc.o logfile.o quota.o usnjrnl.o
  
-ccflags-y := -DNTFS_VERSION=\"2.1.31\"
+ccflags-y := -DNTFS_VERSION=\"2.1.32\"
  ccflags-$(CONFIG_NTFS_DEBUG)   += -DDEBUG
  ccflags-$(CONFIG_NTFS_RW)      += -DNTFS_RW
  
diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c

index 1da9b2d184dc4e32d9ac9a95eb0ee2553c5a1e46..29139ffb43289e47d4c5d71c17fd2ff9c2cd3ba3 100644 (file)
--- a/fs/ntfs/file.c
+++ b/fs/ntfs/file.c
@@ -1,7 +1,7 @@
  /*
   * file.c - NTFS kernel file operations.  Part of the Linux-NTFS project.
   *
- * Copyright (c) 2001-2014 Anton Altaparmakov and Tuxera Inc.
+ * Copyright (c) 2001-2015 Anton Altaparmakov and Tuxera Inc.
   *
   * This program/include file is free software; you can redistribute it and/or
   * modify it under the terms of the GNU General Public License as published
@@ -329,62 +329,168 @@ err_out:
         return err;
  }
  
-/**
- * ntfs_fault_in_pages_readable -
- *
- * Fault a number of userspace pages into pagetables.
- *
- * Unlike include/linux/pagemap.h::fault_in_pages_readable(), this one copes
- * with more than two userspace pages as well as handling the single page case
- * elegantly.
- *
- * If you find this difficult to understand, then think of the while loop being
- * the following code, except that we do without the integer variable ret:
- *
- *     do {
- *             ret = __get_user(c, uaddr);
- *             uaddr += PAGE_SIZE;
- *     } while (!ret && uaddr < end);
- *
- * Note, the final __get_user() may well run out-of-bounds of the user buffer,
- * but _not_ out-of-bounds of the page the user buffer belongs to, and since
- * this is only a read and not a write, and since it is still in the same page,
- * it should not matter and this makes the code much simpler.
- */
-static inline void ntfs_fault_in_pages_readable(const char __user *uaddr,
-               int bytes)
+static ssize_t ntfs_prepare_file_for_write(struct file *file, loff_t *ppos,
+               size_t *count)
  {
-       const char __user *end;
-       volatile char c;
-
-       /* Set @end to the first byte outside the last page we care about. */
-       end = (const char __user*)PAGE_ALIGN((unsigned long)uaddr + bytes);
-
-       while (!__get_user(c, uaddr) && (uaddr += PAGE_SIZE, uaddr < end))
-               ;
-}
-
-/**
- * ntfs_fault_in_pages_readable_iovec -
- *
- * Same as ntfs_fault_in_pages_readable() but operates on an array of iovecs.
- */
-static inline void ntfs_fault_in_pages_readable_iovec(const struct iovec *iov,
-               size_t iov_ofs, int bytes)
-{
-       do {
-               const char __user *buf;
-               unsigned len;
+       loff_t pos;
+       s64 end, ll;
+       ssize_t err;
+       unsigned long flags;
+       struct inode *vi = file_inode(file);
+       ntfs_inode *base_ni, *ni = NTFS_I(vi);
+       ntfs_volume *vol = ni->vol;
  
-               buf = iov->iov_base + iov_ofs;
-               len = iov->iov_len - iov_ofs;
-               if (len > bytes)
-                       len = bytes;
-               ntfs_fault_in_pages_readable(buf, len);
-               bytes -= len;
-               iov++;
-               iov_ofs = 0;
-       } while (bytes);
+       ntfs_debug("Entering for i_ino 0x%lx, attribute type 0x%x, pos "
+                       "0x%llx, count 0x%lx.", vi->i_ino,
+                       (unsigned)le32_to_cpu(ni->type),
+                       (unsigned long long)*ppos, (unsigned long)*count);
+       /* We can write back this queue in page reclaim. */
+       current->backing_dev_info = inode_to_bdi(vi);
+       err = generic_write_checks(file, ppos, count, S_ISBLK(vi->i_mode));
+       if (unlikely(err))
+               goto out;
+       /*
+        * All checks have passed.  Before we start doing any writing we want
+        * to abort any totally illegal writes.
+        */
+       BUG_ON(NInoMstProtected(ni));
+       BUG_ON(ni->type != AT_DATA);
+       /* If file is encrypted, deny access, just like NT4. */
+       if (NInoEncrypted(ni)) {
+               /* Only $DATA attributes can be encrypted. */
+               /*
+                * Reminder for later: Encrypted files are _always_
+                * non-resident so that the content can always be encrypted.
+                */
+               ntfs_debug("Denying write access to encrypted file.");
+               err = -EACCES;
+               goto out;
+       }
+       if (NInoCompressed(ni)) {
+               /* Only unnamed $DATA attribute can be compressed. */
+               BUG_ON(ni->name_len);
+               /*
+                * Reminder for later: If resident, the data is not actually
+                * compressed.  Only on the switch to non-resident does
+                * compression kick in.  This is in contrast to encrypted files
+                * (see above).
+                */
+               ntfs_error(vi->i_sb, "Writing to compressed files is not "
+                               "implemented yet.  Sorry.");
+               err = -EOPNOTSUPP;
+               goto out;
+       }
+       if (*count == 0)
+               goto out;
+       base_ni = ni;
+       if (NInoAttr(ni))
+               base_ni = ni->ext.base_ntfs_ino;
+       err = file_remove_suid(file);
+       if (unlikely(err))
+               goto out;
+       /*
+        * Our ->update_time method always succeeds thus file_update_time()
+        * cannot fail either so there is no need to check the return code.
+        */
+       file_update_time(file);
+       pos = *ppos;
+       /* The first byte after the last cluster being written to. */
+       end = (pos + *count + vol->cluster_size_mask) &
+                       ~(u64)vol->cluster_size_mask;
+       /*
+        * If the write goes beyond the allocated size, extend the allocation
+        * to cover the whole of the write, rounded up to the nearest cluster.
+        */
+       read_lock_irqsave(&ni->size_lock, flags);
+       ll = ni->allocated_size;
+       read_unlock_irqrestore(&ni->size_lock, flags);
+       if (end > ll) {
+               /*
+                * Extend the allocation without changing the data size.
+                *
+                * Note we ensure the allocation is big enough to at least
+                * write some data but we do not require the allocation to be
+                * complete, i.e. it may be partial.
+                */
+               ll = ntfs_attr_extend_allocation(ni, end, -1, pos);
+               if (likely(ll >= 0)) {
+                       BUG_ON(pos >= ll);
+                       /* If the extension was partial truncate the write. */
+                       if (end > ll) {
+                               ntfs_debug("Truncating write to inode 0x%lx, "
+                                               "attribute type 0x%x, because "
+                                               "the allocation was only "
+                                               "partially extended.",
+                                               vi->i_ino, (unsigned)
+                                               le32_to_cpu(ni->type));
+                               *count = ll - pos;
+                       }
+               } else {
+                       err = ll;
+                       read_lock_irqsave(&ni->size_lock, flags);
+                       ll = ni->allocated_size;
+                       read_unlock_irqrestore(&ni->size_lock, flags);
+                       /* Perform a partial write if possible or fail. */
+                       if (pos < ll) {
+                               ntfs_debug("Truncating write to inode 0x%lx "
+                                               "attribute type 0x%x, because "
+                                               "extending the allocation "
+                                               "failed (error %d).",
+                                               vi->i_ino, (unsigned)
+                                               le32_to_cpu(ni->type),
+                                               (int)-err);
+                               *count = ll - pos;
+                       } else {
+                               if (err != -ENOSPC)
+                                       ntfs_error(vi->i_sb, "Cannot perform "
+                                                       "write to inode "
+                                                       "0x%lx, attribute "
+                                                       "type 0x%x, because "
+                                                       "extending the "
+                                                       "allocation failed "
+                                                       "(error %ld).",
+                                                       vi->i_ino, (unsigned)
+                                                       le32_to_cpu(ni->type),
+                                                       (long)-err);
+                               else
+                                       ntfs_debug("Cannot perform write to "
+                                                       "inode 0x%lx, "
+                                                       "attribute type 0x%x, "
+                                                       "because there is not "
+                                                       "space left.",
+                                                       vi->i_ino, (unsigned)
+                                                       le32_to_cpu(ni->type));
+                               goto out;
+                       }
+               }
+       }
+       /*
+        * If the write starts beyond the initialized size, extend it up to the
+        * beginning of the write and initialize all non-sparse space between
+        * the old initialized size and the new one.  This automatically also
+        * increments the vfs inode->i_size to keep it above or equal to the
+        * initialized_size.
+        */
+       read_lock_irqsave(&ni->size_lock, flags);
+       ll = ni->initialized_size;
+       read_unlock_irqrestore(&ni->size_lock, flags);
+       if (pos > ll) {
+               /*
+                * Wait for ongoing direct i/o to complete before proceeding.
+                * New direct i/o cannot start as we hold i_mutex.
+                */
+               inode_dio_wait(vi);
+               err = ntfs_attr_extend_initialized(ni, pos);
+               if (unlikely(err < 0))
+                       ntfs_error(vi->i_sb, "Cannot perform write to inode "
+                                       "0x%lx, attribute type 0x%x, because "
+                                       "extending the initialized size "
+                                       "failed (error %d).", vi->i_ino,
+                                       (unsigned)le32_to_cpu(ni->type),
+                                       (int)-err);
+       }
+out:
+       return err;
  }
  
  /**
@@ -421,8 +527,8 @@ static inline int __ntfs_grab_cache_pages(struct address_space *mapping,
                                         goto err_out;
                                 }
                         }
-                       err = add_to_page_cache_lru(*cached_page, mapping, index,
-                                       GFP_KERNEL);
+                       err = add_to_page_cache_lru(*cached_page, mapping,
+                                       index, GFP_KERNEL);
                         if (unlikely(err)) {
                                 if (err == -EEXIST)
                                         continue;
@@ -1268,180 +1374,6 @@ rl_not_mapped_enoent:
         return err;
  }
  
-/*
- * Copy as much as we can into the pages and return the number of bytes which
- * were successfully copied.  If a fault is encountered then clear the pages
- * out to (ofs + bytes) and return the number of bytes which were copied.
- */
-static inline size_t ntfs_copy_from_user(struct page **pages,
-               unsigned nr_pages, unsigned ofs, const char __user *buf,
-               size_t bytes)
-{
-       struct page **last_page = pages + nr_pages;
-       char *addr;
-       size_t total = 0;
-       unsigned len;
-       int left;
-
-       do {
-               len = PAGE_CACHE_SIZE - ofs;
-               if (len > bytes)
-                       len = bytes;
-               addr = kmap_atomic(*pages);
-               left = __copy_from_user_inatomic(addr + ofs, buf, len);
-               kunmap_atomic(addr);
-               if (unlikely(left)) {
-                       /* Do it the slow way. */
-                       addr = kmap(*pages);
-                       left = __copy_from_user(addr + ofs, buf, len);
-                       kunmap(*pages);
-                       if (unlikely(left))
-                               goto err_out;
-               }
-               total += len;
-               bytes -= len;
-               if (!bytes)
-                       break;
-               buf += len;
-               ofs = 0;
-       } while (++pages < last_page);
-out:
-       return total;
-err_out:
-       total += len - left;
-       /* Zero the rest of the target like __copy_from_user(). */
-       while (++pages < last_page) {
-               bytes -= len;
-               if (!bytes)
-                       break;
-               len = PAGE_CACHE_SIZE;
-               if (len > bytes)
-                       len = bytes;
-               zero_user(*pages, 0, len);
-       }
-       goto out;
-}
-
-static size_t __ntfs_copy_from_user_iovec_inatomic(char *vaddr,
-               const struct iovec *iov, size_t iov_ofs, size_t bytes)
-{
-       size_t total = 0;
-
-       while (1) {
-               const char __user *buf = iov->iov_base + iov_ofs;
-               unsigned len;
-               size_t left;
-
-               len = iov->iov_len - iov_ofs;
-               if (len > bytes)
-                       len = bytes;
-               left = __copy_from_user_inatomic(vaddr, buf, len);
-               total += len;
-               bytes -= len;
-               vaddr += len;
-               if (unlikely(left)) {
-                       total -= left;
-                       break;
-               }
-               if (!bytes)
-                       break;
-               iov++;
-               iov_ofs = 0;
-       }
-       return total;
-}
-
-static inline void ntfs_set_next_iovec(const struct iovec **iovp,
-               size_t *iov_ofsp, size_t bytes)
-{
-       const struct iovec *iov = *iovp;
-       size_t iov_ofs = *iov_ofsp;
-
-       while (bytes) {
-               unsigned len;
-
-               len = iov->iov_len - iov_ofs;
-               if (len > bytes)
-                       len = bytes;
-               bytes -= len;
-               iov_ofs += len;
-               if (iov->iov_len == iov_ofs) {
-                       iov++;
-                       iov_ofs = 0;
-               }
-       }
-       *iovp = iov;
-       *iov_ofsp = iov_ofs;
-}
-
-/*
- * This has the same side-effects and return value as ntfs_copy_from_user().
- * The difference is that on a fault we need to memset the remainder of the
- * pages (out to offset + bytes), to emulate ntfs_copy_from_user()'s
- * single-segment behaviour.
- *
- * We call the same helper (__ntfs_copy_from_user_iovec_inatomic()) both when
- * atomic and when not atomic.  This is ok because it calls
- * __copy_from_user_inatomic() and it is ok to call this when non-atomic.  In
- * fact, the only difference between __copy_from_user_inatomic() and
- * __copy_from_user() is that the latter calls might_sleep() and the former
- * should not zero the tail of the buffer on error.  And on many architectures
- * __copy_from_user_inatomic() is just defined to __copy_from_user() so it
- * makes no difference at all on those architectures.
- */
-static inline size_t ntfs_copy_from_user_iovec(struct page **pages,
-               unsigned nr_pages, unsigned ofs, const struct iovec **iov,
-               size_t *iov_ofs, size_t bytes)
-{
-       struct page **last_page = pages + nr_pages;
-       char *addr;
-       size_t copied, len, total = 0;
-
-       do {
-               len = PAGE_CACHE_SIZE - ofs;
-               if (len > bytes)
-                       len = bytes;
-               addr = kmap_atomic(*pages);
-               copied = __ntfs_copy_from_user_iovec_inatomic(addr + ofs,
-                               *iov, *iov_ofs, len);
-               kunmap_atomic(addr);
-               if (unlikely(copied != len)) {
-                       /* Do it the slow way. */
-                       addr = kmap(*pages);
-                       copied = __ntfs_copy_from_user_iovec_inatomic(addr +
-                                       ofs, *iov, *iov_ofs, len);
-                       if (unlikely(copied != len))
-                               goto err_out;
-                       kunmap(*pages);
-               }
-               total += len;
-               ntfs_set_next_iovec(iov, iov_ofs, len);
-               bytes -= len;
-               if (!bytes)
-                       break;
-               ofs = 0;
-       } while (++pages < last_page);
-out:
-       return total;
-err_out:
-       BUG_ON(copied > len);
-       /* Zero the rest of the target like __copy_from_user(). */
-       memset(addr + ofs + copied, 0, len - copied);
-       kunmap(*pages);
-       total += copied;
-       ntfs_set_next_iovec(iov, iov_ofs, copied);
-       while (++pages < last_page) {
-               bytes -= len;
-               if (!bytes)
-                       break;
-               len = PAGE_CACHE_SIZE;
-               if (len > bytes)
-                       len = bytes;
-               zero_user(*pages, 0, len);
-       }
-       goto out;
-}
-
  static inline void ntfs_flush_dcache_pages(struct page **pages,
                 unsigned nr_pages)
  {
@@ -1762,86 +1694,83 @@ err_out:
         return err;
  }
  
-static void ntfs_write_failed(struct address_space *mapping, loff_t to)
+/*
+ * Copy as much as we can into the pages and return the number of bytes which
+ * were successfully copied.  If a fault is encountered then clear the pages
+ * out to (ofs + bytes) and return the number of bytes which were copied.
+ */
+static size_t ntfs_copy_from_user_iter(struct page **pages, unsigned nr_pages,
+               unsigned ofs, struct iov_iter *i, size_t bytes)
  {
-       struct inode *inode = mapping->host;
+       struct page **last_page = pages + nr_pages;
+       size_t total = 0;
+       struct iov_iter data = *i;
+       unsigned len, copied;
  
-       if (to > inode->i_size) {
-               truncate_pagecache(inode, inode->i_size);
-               ntfs_truncate_vfs(inode);
-       }
+       do {
+               len = PAGE_CACHE_SIZE - ofs;
+               if (len > bytes)
+                       len = bytes;
+               copied = iov_iter_copy_from_user_atomic(*pages, &data, ofs,
+                               len);
+               total += copied;
+               bytes -= copied;
+               if (!bytes)
+                       break;
+               iov_iter_advance(&data, copied);
+               if (copied < len)
+                       goto err;
+               ofs = 0;
+       } while (++pages < last_page);
+out:
+       return total;
+err:
+       /* Zero the rest of the target like __copy_from_user(). */
+       len = PAGE_CACHE_SIZE - copied;
+       do {
+               if (len > bytes)
+                       len = bytes;
+               zero_user(*pages, copied, len);
+               bytes -= len;
+               copied = 0;
+               len = PAGE_CACHE_SIZE;
+       } while (++pages < last_page);
+       goto out;
  }
  
  /**
- * ntfs_file_buffered_write -
- *
- * Locking: The vfs is holding ->i_mutex on the inode.
+ * ntfs_perform_write - perform buffered write to a file
+ * @file:      file to write to
+ * @i:         iov_iter with data to write
+ * @pos:       byte offset in file at which to begin writing to
   */
-static ssize_t ntfs_file_buffered_write(struct kiocb *iocb,
-               const struct iovec *iov, unsigned long nr_segs,
-               loff_t pos, loff_t *ppos, size_t count)
+static ssize_t ntfs_perform_write(struct file *file, struct iov_iter *i,
+               loff_t pos)
  {
-       struct file *file = iocb->ki_filp;
         struct address_space *mapping = file->f_mapping;
         struct inode *vi = mapping->host;
         ntfs_inode *ni = NTFS_I(vi);
         ntfs_volume *vol = ni->vol;
         struct page *pages[NTFS_MAX_PAGES_PER_CLUSTER];
         struct page *cached_page = NULL;
-       char __user *buf = NULL;
-       s64 end, ll;
         VCN last_vcn;
         LCN lcn;
-       unsigned long flags;
-       size_t bytes, iov_ofs = 0;      /* Offset in the current iovec. */
-       ssize_t status, written;
+       size_t bytes;
+       ssize_t status, written = 0;
         unsigned nr_pages;
-       int err;
  
-       ntfs_debug("Entering for i_ino 0x%lx, attribute type 0x%x, "
-                       "pos 0x%llx, count 0x%lx.",
-                       vi->i_ino, (unsigned)le32_to_cpu(ni->type),
-                       (unsigned long long)pos, (unsigned long)count);
-       if (unlikely(!count))
-               return 0;
-       BUG_ON(NInoMstProtected(ni));
-       /*
-        * If the attribute is not an index root and it is encrypted or
-        * compressed, we cannot write to it yet.  Note we need to check for
-        * AT_INDEX_ALLOCATION since this is the type of both directory and
-        * index inodes.
-        */
-       if (ni->type != AT_INDEX_ALLOCATION) {
-               /* If file is encrypted, deny access, just like NT4. */
-               if (NInoEncrypted(ni)) {
-                       /*
-                        * Reminder for later: Encrypted files are _always_
-                        * non-resident so that the content can always be
-                        * encrypted.
-                        */
-                       ntfs_debug("Denying write access to encrypted file.");
-                       return -EACCES;
-               }
-               if (NInoCompressed(ni)) {
-                       /* Only unnamed $DATA attribute can be compressed. */
-                       BUG_ON(ni->type != AT_DATA);
-                       BUG_ON(ni->name_len);
-                       /*
-                        * Reminder for later: If resident, the data is not
-                        * actually compressed.  Only on the switch to non-
-                        * resident does compression kick in.  This is in
-                        * contrast to encrypted files (see above).
-                        */
-                       ntfs_error(vi->i_sb, "Writing to compressed files is "
-                                       "not implemented yet.  Sorry.");
-                       return -EOPNOTSUPP;
-               }
-       }
+       ntfs_debug("Entering for i_ino 0x%lx, attribute type 0x%x, pos "
+                       "0x%llx, count 0x%lx.", vi->i_ino,
+                       (unsigned)le32_to_cpu(ni->type),
+                       (unsigned long long)pos,
+                       (unsigned long)iov_iter_count(i));
         /*
          * If a previous ntfs_truncate() failed, repeat it and abort if it
          * fails again.
          */
         if (unlikely(NInoTruncateFailed(ni))) {
+               int err;
+
                 inode_dio_wait(vi);
                 err = ntfs_truncate(vi);
                 if (err || NInoTruncateFailed(ni)) {
@@ -1855,81 +1784,6 @@ static ssize_t ntfs_file_buffered_write(struct kiocb *iocb,
                         return err;
                 }
         }
-       /* The first byte after the write. */
-       end = pos + count;
-       /*
-        * If the write goes beyond the allocated size, extend the allocation
-        * to cover the whole of the write, rounded up to the nearest cluster.
-        */
-       read_lock_irqsave(&ni->size_lock, flags);
-       ll = ni->allocated_size;
-       read_unlock_irqrestore(&ni->size_lock, flags);
-       if (end > ll) {
-               /* Extend the allocation without changing the data size. */
-               ll = ntfs_attr_extend_allocation(ni, end, -1, pos);
-               if (likely(ll >= 0)) {
-                       BUG_ON(pos >= ll);
-                       /* If the extension was partial truncate the write. */
-                       if (end > ll) {
-                               ntfs_debug("Truncating write to inode 0x%lx, "
-                                               "attribute type 0x%x, because "
-                                               "the allocation was only "
-                                               "partially extended.",
-                                               vi->i_ino, (unsigned)
-                                               le32_to_cpu(ni->type));
-                               end = ll;
-                               count = ll - pos;
-                       }
-               } else {
-                       err = ll;
-                       read_lock_irqsave(&ni->size_lock, flags);
-                       ll = ni->allocated_size;
-                       read_unlock_irqrestore(&ni->size_lock, flags);
-                       /* Perform a partial write if possible or fail. */
-                       if (pos < ll) {
-                               ntfs_debug("Truncating write to inode 0x%lx, "
-                                               "attribute type 0x%x, because "
-                                               "extending the allocation "
-                                               "failed (error code %i).",
-                                               vi->i_ino, (unsigned)
-                                               le32_to_cpu(ni->type), err);
-                               end = ll;
-                               count = ll - pos;
-                       } else {
-                               ntfs_error(vol->sb, "Cannot perform write to "
-                                               "inode 0x%lx, attribute type "
-                                               "0x%x, because extending the "
-                                               "allocation failed (error "
-                                               "code %i).", vi->i_ino,
-                                               (unsigned)
-                                               le32_to_cpu(ni->type), err);
-                               return err;
-                       }
-               }
-       }
-       written = 0;
-       /*
-        * If the write starts beyond the initialized size, extend it up to the
-        * beginning of the write and initialize all non-sparse space between
-        * the old initialized size and the new one.  This automatically also
-        * increments the vfs inode->i_size to keep it above or equal to the
-        * initialized_size.
-        */
-       read_lock_irqsave(&ni->size_lock, flags);
-       ll = ni->initialized_size;
-       read_unlock_irqrestore(&ni->size_lock, flags);
-       if (pos > ll) {
-               err = ntfs_attr_extend_initialized(ni, pos);
-               if (err < 0) {
-                       ntfs_error(vol->sb, "Cannot perform write to inode "
-                                       "0x%lx, attribute type 0x%x, because "
-                                       "extending the initialized size "
-                                       "failed (error code %i).", vi->i_ino,
-                                       (unsigned)le32_to_cpu(ni->type), err);
-                       status = err;
-                       goto err_out;
-               }
-       }
         /*
          * Determine the number of pages per cluster for non-resident
          * attributes.
@@ -1937,10 +1791,7 @@ static ssize_t ntfs_file_buffered_write(struct kiocb *iocb,
         nr_pages = 1;
         if (vol->cluster_size > PAGE_CACHE_SIZE && NInoNonResident(ni))
                 nr_pages = vol->cluster_size >> PAGE_CACHE_SHIFT;
-       /* Finally, perform the actual write. */
         last_vcn = -1;
-       if (likely(nr_segs == 1))
-               buf = iov->iov_base;
         do {
                 VCN vcn;
                 pgoff_t idx, start_idx;
@@ -1965,10 +1816,10 @@ static ssize_t ntfs_file_buffered_write(struct kiocb *iocb,
                                                 vol->cluster_size_bits, false);
                                 up_read(&ni->runlist.lock);
                                 if (unlikely(lcn < LCN_HOLE)) {
-                                       status = -EIO;
                                         if (lcn == LCN_ENOMEM)
                                                 status = -ENOMEM;
-                                       else
+                                       else {
+                                               status = -EIO;
                                                 ntfs_error(vol->sb, "Cannot "
                                                         "perform write to "
                                                         "inode 0x%lx, "
@@ -1977,6 +1828,7 @@ static ssize_t ntfs_file_buffered_write(struct kiocb *iocb,
                                                         "is corrupt.",
                                                         vi->i_ino, (unsigned)
                                                         le32_to_cpu(ni->type));
+                                       }
                                         break;
                                 }
                                 if (lcn == LCN_HOLE) {
@@ -1989,8 +1841,9 @@ static ssize_t ntfs_file_buffered_write(struct kiocb *iocb,
                                 }
                         }
                 }
-               if (bytes > count)
-                       bytes = count;
+               if (bytes > iov_iter_count(i))
+                       bytes = iov_iter_count(i);
+again:
                 /*
                  * Bring in the user page(s) that we will copy from _first_.
                  * Otherwise there is a nasty deadlock on copying from the same
@@ -1999,10 +1852,10 @@ static ssize_t ntfs_file_buffered_write(struct kiocb *iocb,
                  * pages being swapped out between us bringing them into memory
                  * and doing the actual copying.
                  */
-               if (likely(nr_segs == 1))
-                       ntfs_fault_in_pages_readable(buf, bytes);
-               else
-                       ntfs_fault_in_pages_readable_iovec(iov, iov_ofs, bytes);
+               if (unlikely(iov_iter_fault_in_multipages_readable(i, bytes))) {
+                       status = -EFAULT;
+                       break;
+               }
                 /* Get and lock @do_pages starting at index @start_idx. */
                 status = __ntfs_grab_cache_pages(mapping, start_idx, do_pages,
                                 pages, &cached_page);
@@ -2018,56 +1871,57 @@ static ssize_t ntfs_file_buffered_write(struct kiocb *iocb,
                         status = ntfs_prepare_pages_for_non_resident_write(
                                         pages, do_pages, pos, bytes);
                         if (unlikely(status)) {
-                               loff_t i_size;
-
                                 do {
                                         unlock_page(pages[--do_pages]);
                                         page_cache_release(pages[do_pages]);
                                 } while (do_pages);
-                               /*
-                                * The write preparation may have instantiated
-                                * allocated space outside i_size.  Trim this
-                                * off again.  We can ignore any errors in this
-                                * case as we will just be waisting a bit of
-                                * allocated space, which is not a disaster.
-                                */
-                               i_size = i_size_read(vi);
-                               if (pos + bytes > i_size) {
-                                       ntfs_write_failed(mapping, pos + bytes);
-                               }
                                 break;
                         }
                 }
                 u = (pos >> PAGE_CACHE_SHIFT) - pages[0]->index;
-               if (likely(nr_segs == 1)) {
-                       copied = ntfs_copy_from_user(pages + u, do_pages - u,
-                                       ofs, buf, bytes);
-                       buf += copied;
-               } else
-                       copied = ntfs_copy_from_user_iovec(pages + u,
-                                       do_pages - u, ofs, &iov, &iov_ofs,
-                                       bytes);
+               copied = ntfs_copy_from_user_iter(pages + u, do_pages - u, ofs,
+                                       i, bytes);
                 ntfs_flush_dcache_pages(pages + u, do_pages - u);
-               status = ntfs_commit_pages_after_write(pages, do_pages, pos,
-                               bytes);
-               if (likely(!status)) {
-                       written += copied;
-                       count -= copied;
-                       pos += copied;
-                       if (unlikely(copied != bytes))
-                               status = -EFAULT;
+               status = 0;
+               if (likely(copied == bytes)) {
+                       status = ntfs_commit_pages_after_write(pages, do_pages,
+                                       pos, bytes);
+                       if (!status)
+                               status = bytes;
                 }
                 do {
                         unlock_page(pages[--do_pages]);
                         page_cache_release(pages[do_pages]);
                 } while (do_pages);
-               if (unlikely(status))
+               if (unlikely(status < 0))
                         break;
-               balance_dirty_pages_ratelimited(mapping);
+               copied = status;
                 cond_resched();
-       } while (count);
-err_out:
-       *ppos = pos;
+               if (unlikely(!copied)) {
+                       size_t sc;
+
+                       /*
+                        * We failed to copy anything.  Fall back to single
+                        * segment length write.
+                        *
+                        * This is needed to avoid possible livelock in the
+                        * case that all segments in the iov cannot be copied
+                        * at once without a pagefault.
+                        */
+                       sc = iov_iter_single_seg_count(i);
+                       if (bytes > sc)
+                               bytes = sc;
+                       goto again;
+               }
+               iov_iter_advance(i, copied);
+               pos += copied;
+               written += copied;
+               balance_dirty_pages_ratelimited(mapping);
+               if (fatal_signal_pending(current)) {
+                       status = -EINTR;
+                       break;
+               }
+       } while (iov_iter_count(i));
         if (cached_page)
                 page_cache_release(cached_page);
         ntfs_debug("Done.  Returning %s (written 0x%lx, status %li).",
@@ -2077,59 +1931,56 @@ err_out:
  }
  
  /**
- * ntfs_file_aio_write_nolock -
+ * ntfs_file_write_iter_nolock - write data to a file
+ * @iocb:      IO state structure (file, offset, etc.)
+ * @from:      iov_iter with data to write
+ *
+ * Basically the same as __generic_file_write_iter() except that it ends
+ * up calling ntfs_perform_write() instead of generic_perform_write() and that
+ * O_DIRECT is not implemented.
   */
-static ssize_t ntfs_file_aio_write_nolock(struct kiocb *iocb,
-               const struct iovec *iov, unsigned long nr_segs, loff_t *ppos)
+static ssize_t ntfs_file_write_iter_nolock(struct kiocb *iocb,
+               struct iov_iter *from)
  {
         struct file *file = iocb->ki_filp;
-       struct address_space *mapping = file->f_mapping;
-       struct inode *inode = mapping->host;
-       loff_t pos;
-       size_t count;           /* after file limit checks */
-       ssize_t written, err;
+       loff_t pos = iocb->ki_pos;
+       ssize_t written = 0;
+       ssize_t err;
+       size_t count = iov_iter_count(from);
  
-       count = iov_length(iov, nr_segs);
-       pos = *ppos;
-       /* We can write back this queue in page reclaim. */
-       current->backing_dev_info = inode_to_bdi(inode);
-       written = 0;
-       err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode));
-       if (err)
-               goto out;
-       if (!count)
-               goto out;
-       err = file_remove_suid(file);
-       if (err)
-               goto out;
-       err = file_update_time(file);
-       if (err)
-               goto out;
-       written = ntfs_file_buffered_write(iocb, iov, nr_segs, pos, ppos,
-                       count);
-out:
+       err = ntfs_prepare_file_for_write(file, &pos, &count);
+       if (count && !err) {
+               iov_iter_truncate(from, count);
+               written = ntfs_perform_write(file, from, pos);
+               if (likely(written >= 0))
+                       iocb->ki_pos = pos + written;
+       }
         current->backing_dev_info = NULL;
         return written ? written : err;
  }
  
  /**
- * ntfs_file_aio_write -
+ * ntfs_file_write_iter - simple wrapper for ntfs_file_write_iter_nolock()
+ * @iocb:      IO state structure
+ * @from:      iov_iter with data to write
+ *
+ * Basically the same as generic_file_write_iter() except that it ends up
+ * calling ntfs_file_write_iter_nolock() instead of
+ * __generic_file_write_iter().
   */
-static ssize_t ntfs_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
-               unsigned long nr_segs, loff_t pos)
+static ssize_t ntfs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
  {
         struct file *file = iocb->ki_filp;
-       struct address_space *mapping = file->f_mapping;
-       struct inode *inode = mapping->host;
+       struct inode *vi = file_inode(file);
         ssize_t ret;
  
-       BUG_ON(iocb->ki_pos != pos);
-
-       mutex_lock(&inode->i_mutex);
-       ret = ntfs_file_aio_write_nolock(iocb, iov, nr_segs, &iocb->ki_pos);
-       mutex_unlock(&inode->i_mutex);
+       mutex_lock(&vi->i_mutex);
+       ret = ntfs_file_write_iter_nolock(iocb, from);
+       mutex_unlock(&vi->i_mutex);
         if (ret > 0) {
-               int err = generic_write_sync(file, iocb->ki_pos - ret, ret);
+               ssize_t err;
+
+               err = generic_write_sync(file, iocb->ki_pos - ret, ret);
                 if (err < 0)
                         ret = err;
         }
@@ -2197,37 +2048,17 @@ static int ntfs_file_fsync(struct file *filp, loff_t start, loff_t end,
  #endif /* NTFS_RW */
  
  const struct file_operations ntfs_file_ops = {
-       .llseek         = generic_file_llseek,   /* Seek inside file. */
-       .read           = new_sync_read,         /* Read from file. */
-       .read_iter      = generic_file_read_iter, /* Async read from file. */
+       .llseek         = generic_file_llseek,
+       .read           = new_sync_read,
+       .read_iter      = generic_file_read_iter,
  #ifdef NTFS_RW
-       .write          = do_sync_write,         /* Write to file. */
-       .aio_write      = ntfs_file_aio_write,   /* Async write to file. */
-       /*.release      = ,*/                    /* Last file is closed.  See
-                                                   fs/ext2/file.c::
-                                                   ext2_release_file() for
-                                                   how to use this to discard
-                                                   preallocated space for
-                                                   write opened files. */
-       .fsync          = ntfs_file_fsync,       /* Sync a file to disk. */
-       /*.aio_fsync    = ,*/                    /* Sync all outstanding async
-                                                   i/o operations on a
-                                                   kiocb. */
+       .write          = new_sync_write,
+       .write_iter     = ntfs_file_write_iter,
+       .fsync          = ntfs_file_fsync,
  #endif /* NTFS_RW */
-       /*.ioctl        = ,*/                    /* Perform function on the
-                                                   mounted filesystem. */
-       .mmap           = generic_file_mmap,     /* Mmap file. */
-       .open           = ntfs_file_open,        /* Open file. */
-       .splice_read    = generic_file_splice_read /* Zero-copy data send with
-                                                   the data source being on
-                                                   the ntfs partition.  We do
-                                                   not need to care about the
-                                                   data destination. */
-       /*.sendpage     = ,*/                    /* Zero-copy data send with
-                                                   the data destination being
-                                                   on the ntfs partition.  We
-                                                   do not need to care about
-                                                   the data source. */
+       .mmap           = generic_file_mmap,
+       .open           = ntfs_file_open,
+       .splice_read    = generic_file_splice_read,
  };
  
  const struct inode_operations ntfs_file_inode_ops = {
author	Anton Altaparmakov <anton@tuxera.com>
	Wed, 11 Mar 2015 14:43:32 +0000 (10:43 -0400)
committer	Al Viro <viro@zeniv.linux.org.uk>
	Sun, 12 Apr 2015 02:24:33 +0000 (22:24 -0400)
fs/ntfs/Makefile		patch \| blob \| history
fs/ntfs/file.c		patch \| blob \| history