xfs: attach inodes to the cluster buffer when dirtied
authorDave Chinner <dchinner@redhat.com>
Mon, 29 Jun 2020 21:49:18 +0000 (14:49 -0700)
committerDarrick J. Wong <darrick.wong@oracle.com>
Tue, 7 Jul 2020 14:15:08 +0000 (07:15 -0700)
Rather than attach inodes to the cluster buffer just when we are
doing IO, attach the inodes to the cluster buffer when they are
dirtied. The means the buffer always carries a list of dirty inodes
that reference it, and we can use that list to make more fundamental
changes to inode writeback that aren't otherwise possible.

Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Brian Foster <bfoster@redhat.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
fs/xfs/libxfs/xfs_trans_inode.c
fs/xfs/xfs_buf_item.c
fs/xfs/xfs_icache.c
fs/xfs/xfs_inode.c
fs/xfs/xfs_inode_item.c

index ad5974365c5898d9af25accbca02724580766725..e15129647e00c97143dcfd5b40cf376e3fd8d20f 100644 (file)
@@ -163,13 +163,16 @@ xfs_trans_log_inode(
                /*
                 * We need an explicit buffer reference for the log item but
                 * don't want the buffer to remain attached to the transaction.
-                * Hold the buffer but release the transaction reference.
+                * Hold the buffer but release the transaction reference once
+                * we've attached the inode log item to the buffer log item
+                * list.
                 */
                xfs_buf_hold(bp);
-               xfs_trans_brelse(tp, bp);
-
                spin_lock(&iip->ili_lock);
                iip->ili_item.li_buf = bp;
+               bp->b_flags |= _XBF_INODES;
+               list_add_tail(&iip->ili_item.li_bio_list, &bp->b_li_list);
+               xfs_trans_brelse(tp, bp);
        }
 
        /*
index ecb3362395af3bc50d5cb4f1349cd01512c39d8b..e9428c30862a9857823dedb602e77373b1d3c487 100644 (file)
@@ -465,6 +465,7 @@ xfs_buf_item_unpin(
                if (bip->bli_flags & XFS_BLI_STALE_INODE) {
                        xfs_buf_item_done(bp);
                        xfs_iflush_done(bp);
+                       ASSERT(list_empty(&bp->b_li_list));
                } else {
                        xfs_trans_ail_delete(lip, SHUTDOWN_LOG_IO_ERROR);
                        xfs_buf_item_relse(bp);
index dc90a81abb1aff3b391757e7cec5896fd8be16f9..58a750ce689c0d4d2e232f0f3996e874a6bc2d3a 100644 (file)
@@ -115,6 +115,7 @@ __xfs_inode_free(
 {
        /* asserts to verify all state is correct here */
        ASSERT(atomic_read(&ip->i_pincount) == 0);
+       ASSERT(!ip->i_itemp || list_empty(&ip->i_itemp->ili_item.li_bio_list));
        XFS_STATS_DEC(ip->i_mount, vn_active);
 
        call_rcu(&VFS_I(ip)->i_rcu, xfs_inode_free_callback);
index 1c3a8bed4875c53cb89e7c761afb00d36add75cb..c4586ac3656af7fd8c2b6f3c2f71858a46d55aeb 100644 (file)
@@ -2584,27 +2584,24 @@ retry:
                ASSERT(iip->ili_last_fields);
                goto out_iunlock;
        }
-       ASSERT(!iip || list_empty(&iip->ili_item.li_bio_list));
 
        /*
-        * Clean inodes can be released immediately.  Everything else has to go
-        * through xfs_iflush_abort() on journal commit as the flock
-        * synchronises removal of the inode from the cluster buffer against
-        * inode reclaim.
+        * Inodes not attached to the buffer can be released immediately.
+        * Everything else has to go through xfs_iflush_abort() on journal
+        * commit as the flock synchronises removal of the inode from the
+        * cluster buffer against inode reclaim.
         */
-       if (xfs_inode_clean(ip)) {
+       if (!iip || list_empty(&iip->ili_item.li_bio_list)) {
                xfs_ifunlock(ip);
                goto out_iunlock;
        }
 
        /* we have a dirty inode in memory that has not yet been flushed. */
-       ASSERT(iip->ili_fields);
        spin_lock(&iip->ili_lock);
        iip->ili_last_fields = iip->ili_fields;
        iip->ili_fields = 0;
        iip->ili_fsync_fields = 0;
        spin_unlock(&iip->ili_lock);
-       list_add_tail(&iip->ili_item.li_bio_list, &bp->b_li_list);
        ASSERT(iip->ili_last_fields);
 
 out_iunlock:
@@ -3818,19 +3815,8 @@ flush_out:
        xfs_trans_ail_copy_lsn(mp->m_ail, &iip->ili_flush_lsn,
                                &iip->ili_item.li_lsn);
 
-       /*
-        * Attach the inode item callback to the buffer whether the flush
-        * succeeded or not. If not, the caller will shut down and fail I/O
-        * completion on the buffer to remove the inode from the AIL and release
-        * the flush lock.
-        */
-       bp->b_flags |= _XBF_INODES;
-       list_add_tail(&iip->ili_item.li_bio_list, &bp->b_li_list);
-
        /* generate the checksum. */
        xfs_dinode_calc_crc(mp, dip);
-
-       ASSERT(!list_empty(&bp->b_li_list));
        return error;
 }
 
index 64bdda72f7b272fece024f8d9e9eaa9c3777ef54..697248b7eb2be9b4a883437857af0a9cced66c2a 100644 (file)
@@ -660,6 +660,10 @@ xfs_inode_item_destroy(
  * list for other inodes that will run this function. We remove them from the
  * buffer list so we can process all the inode IO completions in one AIL lock
  * traversal.
+ *
+ * Note: Now that we attach the log item to the buffer when we first log the
+ * inode in memory, we can have unflushed inodes on the buffer list here. These
+ * inodes will have a zero ili_last_fields, so skip over them here.
  */
 void
 xfs_iflush_done(
@@ -677,12 +681,15 @@ xfs_iflush_done(
         */
        list_for_each_entry_safe(lip, n, &bp->b_li_list, li_bio_list) {
                iip = INODE_ITEM(lip);
+
                if (xfs_iflags_test(iip->ili_inode, XFS_ISTALE)) {
-                       list_del_init(&lip->li_bio_list);
                        xfs_iflush_abort(iip->ili_inode);
                        continue;
                }
 
+               if (!iip->ili_last_fields)
+                       continue;
+
                list_move_tail(&lip->li_bio_list, &tmp);
 
                /* Do an unlocked check for needing the AIL lock. */
@@ -728,12 +735,16 @@ xfs_iflush_done(
                /*
                 * Remove the reference to the cluster buffer if the inode is
                 * clean in memory. Drop the buffer reference once we've dropped
-                * the locks we hold.
+                * the locks we hold. If the inode is dirty in memory, we need
+                * to put the inode item back on the buffer list for another
+                * pass through the flush machinery.
                 */
                ASSERT(iip->ili_item.li_buf == bp);
                if (!iip->ili_fields) {
                        iip->ili_item.li_buf = NULL;
                        drop_buffer = true;
+               } else {
+                       list_add(&lip->li_bio_list, &bp->b_li_list);
                }
                iip->ili_last_fields = 0;
                iip->ili_flush_lsn = 0;
@@ -777,6 +788,7 @@ xfs_iflush_abort(
                iip->ili_flush_lsn = 0;
                bp = iip->ili_item.li_buf;
                iip->ili_item.li_buf = NULL;
+               list_del_init(&iip->ili_item.li_bio_list);
                spin_unlock(&iip->ili_lock);
        }
        xfs_ifunlock(ip);