xfs: Ensure inode allocation buffers are fully replayed

author Dave Chinner <dchinner@redhat.com>

Thu, 20 May 2010 13:19:42 +0000 (23:19 +1000)

committer Alex Elder <aelder@sgi.com>

Mon, 24 May 2010 15:41:22 +0000 (10:41 -0500)
author Dave Chinner <dchinner@redhat.com>
Thu, 20 May 2010 13:19:42 +0000 (23:19 +1000)
committer Alex Elder <aelder@sgi.com>
Mon, 24 May 2010 15:41:22 +0000 (10:41 -0500)
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c

index bcbb66150838e17afbbf9176dc1a513febabb6a7..02a80984aa05f13189f4c99f186f1f098fa02e3d 100644 (file)
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c
@@ -254,6 +254,20 @@ xfs_buf_item_format(
         vecp++;
         nvecs = 1;
  
+       /*
+        * If it is an inode buffer, transfer the in-memory state to the
+        * format flags and clear the in-memory state. We do not transfer
+        * this state if the inode buffer allocation has not yet been committed
+        * to the log as setting the XFS_BLI_INODE_BUF flag will prevent
+        * correct replay of the inode allocation.
+        */
+       if (bip->bli_flags & XFS_BLI_INODE_BUF) {
+               if (!((bip->bli_flags & XFS_BLI_INODE_ALLOC_BUF) &&
+                     xfs_log_item_in_current_chkpt(&bip->bli_item)))
+                       bip->bli_format.blf_flags |= XFS_BLF_INODE_BUF;
+               bip->bli_flags &= ~XFS_BLI_INODE_BUF;
+       }
+
         if (bip->bli_flags & XFS_BLI_STALE) {
                 /*
                  * The buffer is stale, so all we need to log
diff --git a/fs/xfs/xfs_buf_item.h b/fs/xfs/xfs_buf_item.h

index 8cbb82b1d95c7e76e6ac925c60bcf98eb6eceedd..f20bb472d582f1e5b711c1593958b3593ad2167c 100644 (file)
--- a/fs/xfs/xfs_buf_item.h
+++ b/fs/xfs/xfs_buf_item.h
@@ -69,6 +69,7 @@ typedef struct xfs_buf_log_format {
  #define        XFS_BLI_LOGGED          0x08
  #define        XFS_BLI_INODE_ALLOC_BUF 0x10
  #define XFS_BLI_STALE_INODE    0x20
+#define        XFS_BLI_INODE_BUF       0x40
  
  #define XFS_BLI_FLAGS \
         { XFS_BLI_HOLD,         "HOLD" }, \
@@ -76,7 +77,8 @@ typedef struct xfs_buf_log_format {
         { XFS_BLI_STALE,        "STALE" }, \
         { XFS_BLI_LOGGED,       "LOGGED" }, \
         { XFS_BLI_INODE_ALLOC_BUF, "INODE_ALLOC" }, \
-       { XFS_BLI_STALE_INODE,  "STALE_INODE" }
+       { XFS_BLI_STALE_INODE,  "STALE_INODE" }, \
+       { XFS_BLI_INODE_BUF,    "INODE_BUF" }
  
  
  #ifdef __KERNEL__
diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h

index 4a0c57432e8fa5099270f6ce255775f2f1ae0df6..04c78e642cc83b237ed0c5d6258f82e0ccdcad26 100644 (file)
--- a/fs/xfs/xfs_log.h
+++ b/fs/xfs/xfs_log.h
@@ -198,6 +198,7 @@ xlog_tid_t xfs_log_get_trans_ident(struct xfs_trans *tp);
  int    xfs_log_commit_cil(struct xfs_mount *mp, struct xfs_trans *tp,
                                 struct xfs_log_vec *log_vector,
                                 xfs_lsn_t *commit_lsn, int flags);
+bool   xfs_log_item_in_current_chkpt(struct xfs_log_item *lip);
  
  #endif
  
diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c

index 9b21f80f31ce70d44d566c2efa8d75b6d2abe358..bb17cc044bf37c994dbf877292ecaf36aa3842fb 100644 (file)
--- a/fs/xfs/xfs_log_cil.c
+++ b/fs/xfs/xfs_log_cil.c
@@ -199,6 +199,15 @@ xlog_cil_insert(
         list_move_tail(&item->li_cil, &cil->xc_cil);
         ctx->nvecs += diff_iovecs;
  
+       /*
+        * If this is the first time the item is being committed to the CIL,
+        * store the sequence number on the log item so we can tell
+        * in future commits whether this is the first checkpoint the item is
+        * being committed into.
+        */
+       if (!item->li_seq)
+               item->li_seq = ctx->sequence;
+
         /*
          * Now transfer enough transaction reservation to the context ticket
          * for the checkpoint. The context ticket is special - the unit
@@ -325,6 +334,10 @@ xlog_cil_free_logvec(
   * For more specific information about the order of operations in
   * xfs_log_commit_cil() please refer to the comments in
   * xfs_trans_commit_iclog().
+ *
+ * Called with the context lock already held in read mode to lock out
+ * background commit, returns without it held once background commits are
+ * allowed again.
   */
  int
  xfs_log_commit_cil(
@@ -678,3 +691,35 @@ restart:
         spin_unlock(&cil->xc_cil_lock);
         return commit_lsn;
  }
+
+/*
+ * Check if the current log item was first committed in this sequence.
+ * We can't rely on just the log item being in the CIL, we have to check
+ * the recorded commit sequence number.
+ *
+ * Note: for this to be used in a non-racy manner, it has to be called with
+ * CIL flushing locked out. As a result, it should only be used during the
+ * transaction commit process when deciding what to format into the item.
+ */
+bool
+xfs_log_item_in_current_chkpt(
+       struct xfs_log_item *lip)
+{
+       struct xfs_cil_ctx *ctx;
+
+       if (!(lip->li_mountp->m_flags & XFS_MOUNT_DELAYLOG))
+               return false;
+       if (list_empty(&lip->li_cil))
+               return false;
+
+       ctx = lip->li_mountp->m_log->l_cilp->xc_ctx;
+
+       /*
+        * li_seq is written on the first commit of a log item to record the
+        * first checkpoint it is written to. Hence if it is different to the
+        * current sequence, we're in a new checkpoint.
+        */
+       if (XFS_LSN_CMP(lip->li_seq, ctx->sequence) != 0)
+               return false;
+       return true;
+}
diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h

index b1ea20c66b3e3cadedc2ce60384a23c9510f5de9..8c69e7824f68439d01d0b27009f961dbd8e0c947 100644 (file)
--- a/fs/xfs/xfs_trans.h
+++ b/fs/xfs/xfs_trans.h
@@ -835,6 +835,7 @@ typedef struct xfs_log_item {
         /* delayed logging */
         struct list_head                li_cil;         /* CIL pointers */
         struct xfs_log_vec              *li_lv;         /* active log vector */
+       xfs_lsn_t                       li_seq;         /* CIL commit seq */
  } xfs_log_item_t;
  
  #define        XFS_LI_IN_AIL   0x1
diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c

index 3390c3e7441b7ce0058c1a3f87dddbd410f0fd0e..63d81a22f4fd60c6d6abc93538c37e6480c8fd3b 100644 (file)
--- a/fs/xfs/xfs_trans_buf.c
+++ b/fs/xfs/xfs_trans_buf.c
@@ -792,7 +792,7 @@ xfs_trans_binval(
         XFS_BUF_UNDELAYWRITE(bp);
         XFS_BUF_STALE(bp);
         bip->bli_flags |= XFS_BLI_STALE;
-       bip->bli_flags &= ~(XFS_BLI_LOGGED | XFS_BLI_DIRTY);
+       bip->bli_flags &= ~(XFS_BLI_INODE_BUF | XFS_BLI_LOGGED | XFS_BLI_DIRTY);
         bip->bli_format.blf_flags &= ~XFS_BLF_INODE_BUF;
         bip->bli_format.blf_flags |= XFS_BLF_CANCEL;
         memset((char *)(bip->bli_format.blf_data_map), 0,
@@ -802,16 +802,16 @@ xfs_trans_binval(
  }
  
  /*
- * This call is used to indicate that the buffer contains on-disk
- * inodes which must be handled specially during recovery.  They
- * require special handling because only the di_next_unlinked from
- * the inodes in the buffer should be recovered.  The rest of the
- * data in the buffer is logged via the inodes themselves.
+ * This call is used to indicate that the buffer contains on-disk inodes which
+ * must be handled specially during recovery.  They require special handling
+ * because only the di_next_unlinked from the inodes in the buffer should be
+ * recovered.  The rest of the data in the buffer is logged via the inodes
+ * themselves.
   *
- * All we do is set the XFS_BLI_INODE_BUF flag in the buffer's log
- * format structure so that we'll know what to do at recovery time.
+ * All we do is set the XFS_BLI_INODE_BUF flag in the items flags so it can be
+ * transferred to the buffer's log format structure so that we'll know what to
+ * do at recovery time.
   */
-/* ARGSUSED */
  void
  xfs_trans_inode_buf(
         xfs_trans_t     *tp,
@@ -826,7 +826,7 @@ xfs_trans_inode_buf(
         bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *);
         ASSERT(atomic_read(&bip->bli_refcount) > 0);
  
-       bip->bli_format.blf_flags |= XFS_BLF_INODE_BUF;
+       bip->bli_flags |= XFS_BLI_INODE_BUF;
  }
  
  /*
author	Dave Chinner <dchinner@redhat.com>
	Thu, 20 May 2010 13:19:42 +0000 (23:19 +1000)
committer	Alex Elder <aelder@sgi.com>
	Mon, 24 May 2010 15:41:22 +0000 (10:41 -0500)
fs/xfs/xfs_buf_item.c		patch \| blob \| history
fs/xfs/xfs_buf_item.h		patch \| blob \| history
fs/xfs/xfs_log.h		patch \| blob \| history
fs/xfs/xfs_log_cil.c		patch \| blob \| history
fs/xfs/xfs_trans.h		patch \| blob \| history
fs/xfs/xfs_trans_buf.c		patch \| blob \| history