Merge master.kernel.org:/pub/scm/linux/kernel/git/tmlind/linux-omap-upstream into...
[sfrench/cifs-2.6.git] / fs / ocfs2 / buffer_head_io.c
index d424041b38e9b568ffba1a287639f68f4c9d07f3..c9037414f4f65fad9f0b812b9ba75f7c9823cc6d 100644 (file)
@@ -58,7 +58,7 @@ int ocfs2_write_block(struct ocfs2_super *osb, struct buffer_head *bh,
                goto out;
        }
 
-       down(&OCFS2_I(inode)->ip_io_sem);
+       mutex_lock(&OCFS2_I(inode)->ip_io_mutex);
 
        lock_buffer(bh);
        set_buffer_uptodate(bh);
@@ -82,7 +82,7 @@ int ocfs2_write_block(struct ocfs2_super *osb, struct buffer_head *bh,
                brelse(bh);
        }
 
-       up(&OCFS2_I(inode)->ip_io_sem);
+       mutex_unlock(&OCFS2_I(inode)->ip_io_mutex);
 out:
        mlog_exit(ret);
        return ret;
@@ -97,8 +97,11 @@ int ocfs2_read_blocks(struct ocfs2_super *osb, u64 block, int nr,
        int i, ignore_cache = 0;
        struct buffer_head *bh;
 
-       mlog_entry("(block=(%"MLFu64"), nr=(%d), flags=%d, inode=%p)\n",
-                  block, nr, flags, inode);
+       mlog_entry("(block=(%llu), nr=(%d), flags=%d, inode=%p)\n",
+                  (unsigned long long)block, nr, flags, inode);
+
+       BUG_ON((flags & OCFS2_BH_READAHEAD) &&
+              (!inode || !(flags & OCFS2_BH_CACHED)));
 
        if (osb == NULL || osb->sb == NULL || bhs == NULL) {
                status = -EINVAL;
@@ -125,13 +128,13 @@ int ocfs2_read_blocks(struct ocfs2_super *osb, u64 block, int nr,
                flags &= ~OCFS2_BH_CACHED;
 
        if (inode)
-               down(&OCFS2_I(inode)->ip_io_sem);
+               mutex_lock(&OCFS2_I(inode)->ip_io_mutex);
        for (i = 0 ; i < nr ; i++) {
                if (bhs[i] == NULL) {
                        bhs[i] = sb_getblk(sb, block++);
                        if (bhs[i] == NULL) {
                                if (inode)
-                                       up(&OCFS2_I(inode)->ip_io_sem);
+                                       mutex_unlock(&OCFS2_I(inode)->ip_io_mutex);
                                status = -EIO;
                                mlog_errno(status);
                                goto bail;
@@ -140,12 +143,36 @@ int ocfs2_read_blocks(struct ocfs2_super *osb, u64 block, int nr,
                bh = bhs[i];
                ignore_cache = 0;
 
+               /* There are three read-ahead cases here which we need to
+                * be concerned with. All three assume a buffer has
+                * previously been submitted with OCFS2_BH_READAHEAD
+                * and it hasn't yet completed I/O.
+                *
+                * 1) The current request is sync to disk. This rarely
+                *    happens these days, and never when performance
+                *    matters - the code can just wait on the buffer
+                *    lock and re-submit.
+                *
+                * 2) The current request is cached, but not
+                *    readahead. ocfs2_buffer_uptodate() will return
+                *    false anyway, so we'll wind up waiting on the
+                *    buffer lock to do I/O. We re-check the request
+                *    with after getting the lock to avoid a re-submit.
+                *
+                * 3) The current request is readahead (and so must
+                *    also be a caching one). We short circuit if the
+                *    buffer is locked (under I/O) and if it's in the
+                *    uptodate cache. The re-check from #2 catches the
+                *    case that the previous read-ahead completes just
+                *    before our is-it-in-flight check.
+                */
+
                if (flags & OCFS2_BH_CACHED &&
                    !ocfs2_buffer_uptodate(inode, bh)) {
                        mlog(ML_UPTODATE,
-                            "bh (%llu), inode %"MLFu64" not uptodate\n",
+                            "bh (%llu), inode %llu not uptodate\n",
                             (unsigned long long)bh->b_blocknr,
-                            OCFS2_I(inode)->ip_blkno);
+                            (unsigned long long)OCFS2_I(inode)->ip_blkno);
                        ignore_cache = 1;
                }
 
@@ -169,6 +196,14 @@ int ocfs2_read_blocks(struct ocfs2_super *osb, u64 block, int nr,
                                continue;
                        }
 
+                       /* A read-ahead request was made - if the
+                        * buffer is already under read-ahead from a
+                        * previously submitted request than we are
+                        * done here. */
+                       if ((flags & OCFS2_BH_READAHEAD)
+                           && ocfs2_buffer_read_ahead(inode, bh))
+                               continue;
+
                        lock_buffer(bh);
                        if (buffer_jbd(bh)) {
 #ifdef CATCH_BH_JBD_RACES
@@ -181,13 +216,22 @@ int ocfs2_read_blocks(struct ocfs2_super *osb, u64 block, int nr,
                                continue;
 #endif
                        }
+
+                       /* Re-check ocfs2_buffer_uptodate() as a
+                        * previously read-ahead buffer may have
+                        * completed I/O while we were waiting for the
+                        * buffer lock. */
+                       if ((flags & OCFS2_BH_CACHED)
+                           && !(flags & OCFS2_BH_READAHEAD)
+                           && ocfs2_buffer_uptodate(inode, bh)) {
+                               unlock_buffer(bh);
+                               continue;
+                       }
+
                        clear_buffer_uptodate(bh);
                        get_bh(bh); /* for end_buffer_read_sync() */
                        bh->b_end_io = end_buffer_read_sync;
-                       if (flags & OCFS2_BH_READAHEAD)
-                               submit_bh(READA, bh);
-                       else
-                               submit_bh(READ, bh);
+                       submit_bh(READ, bh);
                        continue;
                }
        }
@@ -197,33 +241,39 @@ int ocfs2_read_blocks(struct ocfs2_super *osb, u64 block, int nr,
        for (i = (nr - 1); i >= 0; i--) {
                bh = bhs[i];
 
-               /* We know this can't have changed as we hold the
-                * inode sem. Avoid doing any work on the bh if the
-                * journal has it. */
-               if (!buffer_jbd(bh))
-                       wait_on_buffer(bh);
-
-               if (!buffer_uptodate(bh)) {
-                       /* Status won't be cleared from here on out,
-                        * so we can safely record this and loop back
-                        * to cleanup the other buffers. Don't need to
-                        * remove the clustered uptodate information
-                        * for this bh as it's not marked locally
-                        * uptodate. */
-                       status = -EIO;
-                       brelse(bh);
-                       bhs[i] = NULL;
-                       continue;
+               if (!(flags & OCFS2_BH_READAHEAD)) {
+                       /* We know this can't have changed as we hold the
+                        * inode sem. Avoid doing any work on the bh if the
+                        * journal has it. */
+                       if (!buffer_jbd(bh))
+                               wait_on_buffer(bh);
+
+                       if (!buffer_uptodate(bh)) {
+                               /* Status won't be cleared from here on out,
+                                * so we can safely record this and loop back
+                                * to cleanup the other buffers. Don't need to
+                                * remove the clustered uptodate information
+                                * for this bh as it's not marked locally
+                                * uptodate. */
+                               status = -EIO;
+                               brelse(bh);
+                               bhs[i] = NULL;
+                               continue;
+                       }
                }
 
+               /* Always set the buffer in the cache, even if it was
+                * a forced read, or read-ahead which hasn't yet
+                * completed. */
                if (inode)
                        ocfs2_set_buffer_uptodate(inode, bh);
        }
        if (inode)
-               up(&OCFS2_I(inode)->ip_io_sem);
+               mutex_unlock(&OCFS2_I(inode)->ip_io_mutex);
 
-       mlog(ML_BH_IO, "block=(%"MLFu64"), nr=(%d), cached=%s\n", block, nr,
-            (!(flags & OCFS2_BH_CACHED) || ignore_cache) ? "no" : "yes");
+       mlog(ML_BH_IO, "block=(%llu), nr=(%d), cached=%s, flags=0x%x\n", 
+            (unsigned long long)block, nr,
+            (!(flags & OCFS2_BH_CACHED) || ignore_cache) ? "no" : "yes", flags);
 
 bail: