Merge branch 'akpm' (patches from Andrew)
[sfrench/cifs-2.6.git] / fs / direct-io.c
index 5fa2211e49aee2186546d8db7a70c5295c7591f0..96415c65bbdc102f6a5dff696231e944d4fdccdb 100644 (file)
@@ -229,6 +229,7 @@ static ssize_t dio_complete(struct dio *dio, ssize_t ret, bool is_async)
 {
        loff_t offset = dio->iocb->ki_pos;
        ssize_t transferred = 0;
+       int err;
 
        /*
         * AIO submission can race with bio completion to get here while
@@ -258,8 +259,22 @@ static ssize_t dio_complete(struct dio *dio, ssize_t ret, bool is_async)
        if (ret == 0)
                ret = transferred;
 
+       /*
+        * Try again to invalidate clean pages which might have been cached by
+        * non-direct readahead, or faulted in by get_user_pages() if the source
+        * of the write was an mmap'ed region of the file we're writing.  Either
+        * one is a pretty crazy thing to do, so we don't support it 100%.  If
+        * this invalidation fails, tough, the write still worked...
+        */
+       if (ret > 0 && dio->op == REQ_OP_WRITE &&
+           dio->inode->i_mapping->nrpages) {
+               err = invalidate_inode_pages2_range(dio->inode->i_mapping,
+                                       offset >> PAGE_SHIFT,
+                                       (offset + ret - 1) >> PAGE_SHIFT);
+               WARN_ON_ONCE(err);
+       }
+
        if (dio->end_io) {
-               int err;
 
                // XXX: ki_pos??
                err = dio->end_io(dio->iocb, offset, ret, dio->private);
@@ -304,6 +319,7 @@ static void dio_bio_end_aio(struct bio *bio)
        struct dio *dio = bio->bi_private;
        unsigned long remaining;
        unsigned long flags;
+       bool defer_completion = false;
 
        /* cleanup the bio */
        dio_bio_complete(dio, bio);
@@ -315,7 +331,19 @@ static void dio_bio_end_aio(struct bio *bio)
        spin_unlock_irqrestore(&dio->bio_lock, flags);
 
        if (remaining == 0) {
-               if (dio->result && dio->defer_completion) {
+               /*
+                * Defer completion when defer_completion is set or
+                * when the inode has pages mapped and this is AIO write.
+                * We need to invalidate those pages because there is a
+                * chance they contain stale data in the case buffered IO
+                * went in between AIO submission and completion into the
+                * same region.
+                */
+               if (dio->result)
+                       defer_completion = dio->defer_completion ||
+                                          (dio->op == REQ_OP_WRITE &&
+                                           dio->inode->i_mapping->nrpages);
+               if (defer_completion) {
                        INIT_WORK(&dio->complete_work, dio_aio_complete_work);
                        queue_work(dio->inode->i_sb->s_dio_done_wq,
                                   &dio->complete_work);
@@ -838,7 +866,8 @@ out:
         */
        if (sdio->boundary) {
                ret = dio_send_cur_page(dio, sdio, map_bh);
-               dio_bio_submit(dio, sdio);
+               if (sdio->bio)
+                       dio_bio_submit(dio, sdio);
                put_page(sdio->cur_page);
                sdio->cur_page = NULL;
        }
@@ -1210,10 +1239,19 @@ do_blockdev_direct_IO(struct kiocb *iocb, struct inode *inode,
         * For AIO O_(D)SYNC writes we need to defer completions to a workqueue
         * so that we can call ->fsync.
         */
-       if (dio->is_async && iov_iter_rw(iter) == WRITE &&
-           ((iocb->ki_filp->f_flags & O_DSYNC) ||
-            IS_SYNC(iocb->ki_filp->f_mapping->host))) {
-               retval = dio_set_defer_completion(dio);
+       if (dio->is_async && iov_iter_rw(iter) == WRITE) {
+               retval = 0;
+               if ((iocb->ki_filp->f_flags & O_DSYNC) ||
+                   IS_SYNC(iocb->ki_filp->f_mapping->host))
+                       retval = dio_set_defer_completion(dio);
+               else if (!dio->inode->i_sb->s_dio_done_wq) {
+                       /*
+                        * In case of AIO write racing with buffered read we
+                        * need to defer completion. We can't decide this now,
+                        * however the workqueue needs to be initialized here.
+                        */
+                       retval = sb_init_dio_done_wq(dio->inode->i_sb);
+               }
                if (retval) {
                        /*
                         * We grab i_mutex only for reads so we don't have