Revert "fs: do not prefault sys_write() user buffer pages"
authorLinus Torvalds <torvalds@linux-foundation.org>
Wed, 7 Oct 2015 07:32:38 +0000 (08:32 +0100)
committerLinus Torvalds <torvalds@linux-foundation.org>
Wed, 7 Oct 2015 07:32:38 +0000 (08:32 +0100)
This reverts commit 998ef75ddb5709bbea0bf1506cd2717348a3c647.

The commit itself does not appear to be buggy per se, but it is exposing
a bug in ext4 (and Ted thinks ext3 too, but we solved that by getting
rid of it).  It's too late in the release cycle to really worry about
this, even if Dave Hansen has a patch that may actually fix the
underlying ext4 problem.  We can (and should) revisit this for the next
release.

The problem is that moving the prefaulting later now exposes a special
case with partially successful writes that isn't handled correctly.  And
the prefaulting likely isn't normally even that much of a performance
issue - it looks like at least one reason Dave saw this in his
performance tests is that he also ran them on Skylake that now supports
the new SMAP code, which makes the normally very cheap user space
prefaulting noticeably more expensive.

Bisected-and-acked-by: Ted Ts'o <tytso@mit.edu>
Analyzed-and-acked-by: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
mm/filemap.c

index 72940fb38666811b80c146bc085a1c84fc0e7ecc..1cc5467cf36ce7852f7a0474d5fd3237b3dfff10 100644 (file)
@@ -2473,6 +2473,21 @@ ssize_t generic_perform_write(struct file *file,
                                                iov_iter_count(i));
 
 again:
+               /*
+                * Bring in the user page that we will copy from _first_.
+                * Otherwise there's a nasty deadlock on copying from the
+                * same page as we're writing to, without it being marked
+                * up-to-date.
+                *
+                * Not only is this an optimisation, but it is also required
+                * to check that the address is actually valid, when atomic
+                * usercopies are used, below.
+                */
+               if (unlikely(iov_iter_fault_in_readable(i, bytes))) {
+                       status = -EFAULT;
+                       break;
+               }
+
                status = a_ops->write_begin(file, mapping, pos, bytes, flags,
                                                &page, &fsdata);
                if (unlikely(status < 0))
@@ -2480,17 +2495,8 @@ again:
 
                if (mapping_writably_mapped(mapping))
                        flush_dcache_page(page);
-               /*
-                * 'page' is now locked.  If we are trying to copy from a
-                * mapping of 'page' in userspace, the copy might fault and
-                * would need PageUptodate() to complete.  But, page can not be
-                * made Uptodate without acquiring the page lock, which we hold.
-                * Deadlock.  Avoid with pagefault_disable().  Fix up below with
-                * iov_iter_fault_in_readable().
-                */
-               pagefault_disable();
+
                copied = iov_iter_copy_from_user_atomic(page, i, offset, bytes);
-               pagefault_enable();
                flush_dcache_page(page);
 
                status = a_ops->write_end(file, mapping, pos, bytes, copied,
@@ -2513,14 +2519,6 @@ again:
                         */
                        bytes = min_t(unsigned long, PAGE_CACHE_SIZE - offset,
                                                iov_iter_single_seg_count(i));
-                       /*
-                        * This is the fallback to recover if the copy from
-                        * userspace above faults.
-                        */
-                       if (unlikely(iov_iter_fault_in_readable(i, bytes))) {
-                               status = -EFAULT;
-                               break;
-                       }
                        goto again;
                }
                pos += copied;