block: fix O_DIRECT error handling for bio fragments
authorJens Axboe <axboe@kernel.dk>
Tue, 6 Aug 2019 19:34:31 +0000 (13:34 -0600)
committerJens Axboe <axboe@kernel.dk>
Wed, 7 Aug 2019 18:19:43 +0000 (12:19 -0600)
0eb6ddfb865c tried to fix this up, but introduced a use-after-free
of dio. Additionally, we still had an issue with error handling,
as reported by Darrick:

"I noticed a regression in xfs/747 (an unreleased xfstest for the
xfs_scrub media scanning feature) on 5.3-rc3.  I'll condense that down
to a simpler reproducer:

error-test: 0 209 linear 8:48 0
error-test: 209 1 error
error-test: 210 6446894 linear 8:48 210

Basically we have a ~3G /dev/sdd and we set up device mapper to fail IO
for sector 209 and to pass the io to the scsi device everywhere else.

On 5.3-rc3, performing a directio pread of this range with a < 1M buffer
(in other words, a request for fewer than MAX_BIO_PAGES bytes) yields
EIO like you'd expect:

pread64(3, 0x7f880e1c7000, 1048576, 0)  = -1 EIO (Input/output error)
pread: Input/output error
+++ exited with 0 +++

But doing it with a larger buffer succeeds(!):

pread64(3, "XFSB\0\0\20\0\0\0\0\0\0\fL\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 1146880, 0) = 1146880
read 1146880/1146880 bytes at offset 0
1 MiB, 1 ops; 0.0009 sec (1.124 GiB/sec and 1052.6316 ops/sec)
+++ exited with 0 +++

(Note that the part of the buffer corresponding to the dm-error area is
uninitialized)

On 5.3-rc2, both commands would fail with EIO like you'd expect.  The
only change between rc2 and rc3 is commit 0eb6ddfb865c ("block: Fix
__blkdev_direct_IO() for bio fragments").

AFAICT we end up in __blkdev_direct_IO with a 1120K buffer, which gets
split into two bios: one for the first BIO_MAX_PAGES worth of data (1MB)
and a second one for the 96k after that."

Fix this by noting that it's always safe to dereference dio if we get
BLK_QC_T_EAGAIN returned, as end_io hasn't been run for that case. So
we can safely increment the dio size before calling submit_bio(), and
then decrement it on failure (not that it really matters, as the bio
and dio are going away).

For error handling, return to the original method of just using 'ret'
for tracking the error, and the size tracking in dio->size.

Fixes: 0eb6ddfb865c ("block: Fix __blkdev_direct_IO() for bio fragments")
Fixes: 6a43074e2f46 ("block: properly handle IOCB_NOWAIT for async O_DIRECT IO")
Reported-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
fs/block_dev.c

index a6f7c892cb4a390f9d6357263cfc17fa8c2db9aa..131e2e0582a69f5ab60d35ae0850718b73ccece4 100644 (file)
@@ -349,7 +349,7 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages)
        loff_t pos = iocb->ki_pos;
        blk_qc_t qc = BLK_QC_T_NONE;
        gfp_t gfp;
-       ssize_t ret;
+       int ret;
 
        if ((pos | iov_iter_alignment(iter)) &
            (bdev_logical_block_size(bdev) - 1))
@@ -386,8 +386,6 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages)
 
        ret = 0;
        for (;;) {
-               int err;
-
                bio_set_dev(bio, bdev);
                bio->bi_iter.bi_sector = pos >> 9;
                bio->bi_write_hint = iocb->ki_hint;
@@ -395,10 +393,8 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages)
                bio->bi_end_io = blkdev_bio_end_io;
                bio->bi_ioprio = iocb->ki_ioprio;
 
-               err = bio_iov_iter_get_pages(bio, iter);
-               if (unlikely(err)) {
-                       if (!ret)
-                               ret = err;
+               ret = bio_iov_iter_get_pages(bio, iter);
+               if (unlikely(ret)) {
                        bio->bi_status = BLK_STS_IOERR;
                        bio_endio(bio);
                        break;
@@ -421,7 +417,6 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages)
                if (nowait)
                        bio->bi_opf |= (REQ_NOWAIT | REQ_NOWAIT_INLINE);
 
-               dio->size += bio->bi_iter.bi_size;
                pos += bio->bi_iter.bi_size;
 
                nr_pages = iov_iter_npages(iter, BIO_MAX_PAGES);
@@ -433,13 +428,13 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages)
                                polled = true;
                        }
 
+                       dio->size += bio->bi_iter.bi_size;
                        qc = submit_bio(bio);
                        if (qc == BLK_QC_T_EAGAIN) {
-                               if (!ret)
-                                       ret = -EAGAIN;
+                               dio->size -= bio->bi_iter.bi_size;
+                               ret = -EAGAIN;
                                goto error;
                        }
-                       ret = dio->size;
 
                        if (polled)
                                WRITE_ONCE(iocb->ki_cookie, qc);
@@ -460,18 +455,17 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages)
                        atomic_inc(&dio->ref);
                }
 
+               dio->size += bio->bi_iter.bi_size;
                qc = submit_bio(bio);
                if (qc == BLK_QC_T_EAGAIN) {
-                       if (!ret)
-                               ret = -EAGAIN;
+                       dio->size -= bio->bi_iter.bi_size;
+                       ret = -EAGAIN;
                        goto error;
                }
-               ret = dio->size;
 
                bio = bio_alloc(gfp, nr_pages);
                if (!bio) {
-                       if (!ret)
-                               ret = -EAGAIN;
+                       ret = -EAGAIN;
                        goto error;
                }
        }
@@ -496,6 +490,8 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages)
 out:
        if (!ret)
                ret = blk_status_to_errno(dio->bio.bi_status);
+       if (likely(!ret))
+               ret = dio->size;
 
        bio_put(&dio->bio);
        return ret;