Merge tag 'xfs-5.3-merge-12' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux

author Linus Torvalds <torvalds@linux-foundation.org>

Sat, 13 Jul 2019 00:17:51 +0000 (17:17 -0700)

committer Linus Torvalds <torvalds@linux-foundation.org>

Sat, 13 Jul 2019 00:17:51 +0000 (17:17 -0700)
author Linus Torvalds <torvalds@linux-foundation.org>
Sat, 13 Jul 2019 00:17:51 +0000 (17:17 -0700)
committer Linus Torvalds <torvalds@linux-foundation.org>
Sat, 13 Jul 2019 00:17:51 +0000 (17:17 -0700)
diff --git a/Documentation/filesystems/xfs-self-describing-metadata.txt b/Documentation/filesystems/xfs-self-describing-metadata.txt

index 68604e67a495fce4db18e0c76e58237c73080854..8db0121d0980c4b7293f76eb8331d09162cc21e9 100644 (file)
--- a/Documentation/filesystems/xfs-self-describing-metadata.txt
+++ b/Documentation/filesystems/xfs-self-describing-metadata.txt
@@ -222,7 +222,7 @@ static void
  xfs_foo_read_verify(
         struct xfs_buf  *bp)
  {
-       struct xfs_mount *mp = bp->b_target->bt_mount;
+       struct xfs_mount *mp = bp->b_mount;
  
          if ((xfs_sb_version_hascrc(&mp->m_sb) &&
               !xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length),
@@ -245,7 +245,7 @@ static bool
  xfs_foo_verify(
         struct xfs_buf          *bp)
  {
-        struct xfs_mount       *mp = bp->b_target->bt_mount;
+        struct xfs_mount       *mp = bp->b_mount;
          struct xfs_ondisk_hdr  *hdr = bp->b_addr;
  
          if (hdr->magic != cpu_to_be32(XFS_FOO_MAGIC))
@@ -272,7 +272,7 @@ static bool
  xfs_foo_verify(
         struct xfs_buf          *bp)
  {
-        struct xfs_mount       *mp = bp->b_target->bt_mount;
+        struct xfs_mount       *mp = bp->b_mount;
          struct xfs_ondisk_hdr  *hdr = bp->b_addr;
  
          if (hdr->magic == cpu_to_be32(XFS_FOO_CRC_MAGIC)) {
@@ -297,7 +297,7 @@ static void
  xfs_foo_write_verify(
         struct xfs_buf  *bp)
  {
-       struct xfs_mount        *mp = bp->b_target->bt_mount;
+       struct xfs_mount        *mp = bp->b_mount;
         struct xfs_buf_log_item *bip = bp->b_fspriv;
  
         if (!xfs_foo_verify(bp)) {
diff --git a/MAINTAINERS b/MAINTAINERS

index c7d42239d8d9639ed6725c2784c3f01c23f3799e..1be025959be9ea58580a9cf133d995a561606200 100644 (file)
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -17544,7 +17544,13 @@ W:     http://xfs.org/
  T:     git git://git.kernel.org/pub/scm/fs/xfs/xfs-linux.git
  S:     Supported
  F:     Documentation/filesystems/xfs.txt
+F:     Documentation/ABI/testing/sysfs-fs-xfs
+F:     Documentation/filesystems/xfs.txt
+F:     Documentation/filesystems/xfs-delayed-logging-design.txt
+F:     Documentation/filesystems/xfs-self-describing-metadata.txt
  F:     fs/xfs/
+F:     include/uapi/linux/dqblk_xfs.h
+F:     include/uapi/linux/fsmap.h
  
  XILINX AXI ETHERNET DRIVER
  M:     Anirudha Sarangi <anirudh@xilinx.com>
diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile

index 91831975363b927687acab64d73b3f76f10e81b2..b74a471692970b825706fd96843c402597d509fc 100644 (file)
--- a/fs/xfs/Makefile
+++ b/fs/xfs/Makefile
@@ -62,6 +62,7 @@ xfs-y                         += xfs_aops.o \
                                    xfs_attr_inactive.o \
                                    xfs_attr_list.o \
                                    xfs_bmap_util.o \
+                                  xfs_bio_io.o \
                                    xfs_buf.o \
                                    xfs_dir2_readdir.o \
                                    xfs_discard.o \
@@ -80,9 +81,11 @@ xfs-y                                += xfs_aops.o \
                                    xfs_iops.o \
                                    xfs_inode.o \
                                    xfs_itable.o \
+                                  xfs_iwalk.o \
                                    xfs_message.o \
                                    xfs_mount.o \
                                    xfs_mru_cache.o \
+                                  xfs_pwork.o \
                                    xfs_reflink.o \
                                    xfs_stats.o \
                                    xfs_super.o \
@@ -104,12 +107,8 @@ xfs-y                              += xfs_log.o \
                                    xfs_rmap_item.o \
                                    xfs_log_recover.o \
                                    xfs_trans_ail.o \
-                                  xfs_trans_bmap.o \
                                    xfs_trans_buf.o \
-                                  xfs_trans_extfree.o \
-                                  xfs_trans_inode.o \
-                                  xfs_trans_refcount.o \
-                                  xfs_trans_rmap.o \
+                                  xfs_trans_inode.o
  
  # optional features
  xfs-$(CONFIG_XFS_QUOTA)                += xfs_dquot.o \
diff --git a/fs/xfs/kmem.c b/fs/xfs/kmem.c

index fdd9d6ede25ca74065c2b092c742031068270aae..16bb9a3286781fa41ad319dcbecc99f4af4251fa 100644 (file)
--- a/fs/xfs/kmem.c
+++ b/fs/xfs/kmem.c
@@ -3,12 +3,7 @@
   * Copyright (c) 2000-2005 Silicon Graphics, Inc.
   * All Rights Reserved.
   */
-#include <linux/mm.h>
  #include <linux/sched/mm.h>
-#include <linux/highmem.h>
-#include <linux/slab.h>
-#include <linux/swap.h>
-#include <linux/blkdev.h>
  #include <linux/backing-dev.h>
  #include "kmem.h"
  #include "xfs_message.h"
diff --git a/fs/xfs/kmem.h b/fs/xfs/kmem.h

index 8e6b3ba81c03e398d2b91c76c456760c2a97ae82..267655acd42681e0cec5253e94d05681bb9f2c13 100644 (file)
--- a/fs/xfs/kmem.h
+++ b/fs/xfs/kmem.h
@@ -124,4 +124,12 @@ kmem_zone_zalloc(kmem_zone_t *zone, xfs_km_flags_t flags)
         return kmem_zone_alloc(zone, flags | KM_ZERO);
  }
  
+static inline struct page *
+kmem_to_page(void *addr)
+{
+       if (is_vmalloc_addr(addr))
+               return vmalloc_to_page(addr);
+       return virt_to_page(addr);
+}
+
  #endif /* __XFS_SUPPORT_KMEM_H__ */
diff --git a/fs/xfs/libxfs/xfs_ag.c b/fs/xfs/libxfs/xfs_ag.c

index b0c89f54d1bb05f822f3148daec8d56800f62c28..5de296b34ab1f618ea489225cbcd054bfd9aa7f9 100644 (file)
--- a/fs/xfs/libxfs/xfs_ag.c
+++ b/fs/xfs/libxfs/xfs_ag.c
@@ -10,6 +10,7 @@
  #include "xfs_shared.h"
  #include "xfs_format.h"
  #include "xfs_trans_resv.h"
+#include "xfs_bit.h"
  #include "xfs_sb.h"
  #include "xfs_mount.h"
  #include "xfs_btree.h"
@@ -44,6 +45,12 @@ xfs_get_aghdr_buf(
         return bp;
  }
  
+static inline bool is_log_ag(struct xfs_mount *mp, struct aghdr_init_data *id)
+{
+       return mp->m_sb.sb_logstart > 0 &&
+              id->agno == XFS_FSB_TO_AGNO(mp, mp->m_sb.sb_logstart);
+}
+
  /*
   * Generic btree root block init function
   */
@@ -53,40 +60,85 @@ xfs_btroot_init(
         struct xfs_buf          *bp,
         struct aghdr_init_data  *id)
  {
-       xfs_btree_init_block(mp, bp, id->type, 0, 0, id->agno, 0);
+       xfs_btree_init_block(mp, bp, id->type, 0, 0, id->agno);
  }
  
-/*
- * Alloc btree root block init functions
- */
+/* Finish initializing a free space btree. */
  static void
-xfs_bnoroot_init(
+xfs_freesp_init_recs(
         struct xfs_mount        *mp,
         struct xfs_buf          *bp,
         struct aghdr_init_data  *id)
  {
         struct xfs_alloc_rec    *arec;
+       struct xfs_btree_block  *block = XFS_BUF_TO_BLOCK(bp);
  
-       xfs_btree_init_block(mp, bp, XFS_BTNUM_BNO, 0, 1, id->agno, 0);
         arec = XFS_ALLOC_REC_ADDR(mp, XFS_BUF_TO_BLOCK(bp), 1);
         arec->ar_startblock = cpu_to_be32(mp->m_ag_prealloc_blocks);
+
+       if (is_log_ag(mp, id)) {
+               struct xfs_alloc_rec    *nrec;
+               xfs_agblock_t           start = XFS_FSB_TO_AGBNO(mp,
+                                                       mp->m_sb.sb_logstart);
+
+               ASSERT(start >= mp->m_ag_prealloc_blocks);
+               if (start != mp->m_ag_prealloc_blocks) {
+                       /*
+                        * Modify first record to pad stripe align of log
+                        */
+                       arec->ar_blockcount = cpu_to_be32(start -
+                                               mp->m_ag_prealloc_blocks);
+                       nrec = arec + 1;
+
+                       /*
+                        * Insert second record at start of internal log
+                        * which then gets trimmed.
+                        */
+                       nrec->ar_startblock = cpu_to_be32(
+                                       be32_to_cpu(arec->ar_startblock) +
+                                       be32_to_cpu(arec->ar_blockcount));
+                       arec = nrec;
+                       be16_add_cpu(&block->bb_numrecs, 1);
+               }
+               /*
+                * Change record start to after the internal log
+                */
+               be32_add_cpu(&arec->ar_startblock, mp->m_sb.sb_logblocks);
+       }
+
+       /*
+        * Calculate the record block count and check for the case where
+        * the log might have consumed all available space in the AG. If
+        * so, reset the record count to 0 to avoid exposure of an invalid
+        * record start block.
+        */
         arec->ar_blockcount = cpu_to_be32(id->agsize -
                                           be32_to_cpu(arec->ar_startblock));
+       if (!arec->ar_blockcount)
+               block->bb_numrecs = 0;
  }
  
+/*
+ * Alloc btree root block init functions
+ */
  static void
-xfs_cntroot_init(
+xfs_bnoroot_init(
         struct xfs_mount        *mp,
         struct xfs_buf          *bp,
         struct aghdr_init_data  *id)
  {
-       struct xfs_alloc_rec    *arec;
+       xfs_btree_init_block(mp, bp, XFS_BTNUM_BNO, 0, 1, id->agno);
+       xfs_freesp_init_recs(mp, bp, id);
+}
  
-       xfs_btree_init_block(mp, bp, XFS_BTNUM_CNT, 0, 1, id->agno, 0);
-       arec = XFS_ALLOC_REC_ADDR(mp, XFS_BUF_TO_BLOCK(bp), 1);
-       arec->ar_startblock = cpu_to_be32(mp->m_ag_prealloc_blocks);
-       arec->ar_blockcount = cpu_to_be32(id->agsize -
-                                         be32_to_cpu(arec->ar_startblock));
+static void
+xfs_cntroot_init(
+       struct xfs_mount        *mp,
+       struct xfs_buf          *bp,
+       struct aghdr_init_data  *id)
+{
+       xfs_btree_init_block(mp, bp, XFS_BTNUM_CNT, 0, 1, id->agno);
+       xfs_freesp_init_recs(mp, bp, id);
  }
  
  /*
@@ -101,7 +153,7 @@ xfs_rmaproot_init(
         struct xfs_btree_block  *block = XFS_BUF_TO_BLOCK(bp);
         struct xfs_rmap_rec     *rrec;
  
-       xfs_btree_init_block(mp, bp, XFS_BTNUM_RMAP, 0, 4, id->agno, 0);
+       xfs_btree_init_block(mp, bp, XFS_BTNUM_RMAP, 0, 4, id->agno);
  
         /*
          * mark the AG header regions as static metadata The BNO
@@ -149,6 +201,18 @@ xfs_rmaproot_init(
                 rrec->rm_offset = 0;
                 be16_add_cpu(&block->bb_numrecs, 1);
         }
+
+       /* account for the log space */
+       if (is_log_ag(mp, id)) {
+               rrec = XFS_RMAP_REC_ADDR(block,
+                               be16_to_cpu(block->bb_numrecs) + 1);
+               rrec->rm_startblock = cpu_to_be32(
+                               XFS_FSB_TO_AGBNO(mp, mp->m_sb.sb_logstart));
+               rrec->rm_blockcount = cpu_to_be32(mp->m_sb.sb_logblocks);
+               rrec->rm_owner = cpu_to_be64(XFS_RMAP_OWN_LOG);
+               rrec->rm_offset = 0;
+               be16_add_cpu(&block->bb_numrecs, 1);
+       }
  }
  
  /*
@@ -209,6 +273,14 @@ xfs_agfblock_init(
                 agf->agf_refcount_level = cpu_to_be32(1);
                 agf->agf_refcount_blocks = cpu_to_be32(1);
         }
+
+       if (is_log_ag(mp, id)) {
+               int64_t logblocks = mp->m_sb.sb_logblocks;
+
+               be32_add_cpu(&agf->agf_freeblks, -logblocks);
+               agf->agf_longest = cpu_to_be32(id->agsize -
+                       XFS_FSB_TO_AGBNO(mp, mp->m_sb.sb_logstart) - logblocks);
+       }
  }
  
  static void
diff --git a/fs/xfs/libxfs/xfs_ag_resv.c b/fs/xfs/libxfs/xfs_ag_resv.c

index e2ba2a3b63b20a6378283e35e1c58c939f1d2476..87a9747f1d36b905755070d9cd85f15827d9afb7 100644 (file)
--- a/fs/xfs/libxfs/xfs_ag_resv.c
+++ b/fs/xfs/libxfs/xfs_ag_resv.c
@@ -9,20 +9,12 @@
  #include "xfs_format.h"
  #include "xfs_log_format.h"
  #include "xfs_trans_resv.h"
-#include "xfs_sb.h"
  #include "xfs_mount.h"
-#include "xfs_defer.h"
  #include "xfs_alloc.h"
  #include "xfs_errortag.h"
  #include "xfs_error.h"
  #include "xfs_trace.h"
-#include "xfs_cksum.h"
  #include "xfs_trans.h"
-#include "xfs_bit.h"
-#include "xfs_bmap.h"
-#include "xfs_bmap_btree.h"
-#include "xfs_ag_resv.h"
-#include "xfs_trans_space.h"
  #include "xfs_rmap_btree.h"
  #include "xfs_btree.h"
  #include "xfs_refcount_btree.h"
diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c

index a9ff3cf82cce0bb0e96bfed46cac5333a6be36ec..372ad55631fc447190e3eafeb47aca5d7a2a1a9a 100644 (file)
--- a/fs/xfs/libxfs/xfs_alloc.c
+++ b/fs/xfs/libxfs/xfs_alloc.c
@@ -13,7 +13,6 @@
  #include "xfs_sb.h"
  #include "xfs_mount.h"
  #include "xfs_defer.h"
-#include "xfs_inode.h"
  #include "xfs_btree.h"
  #include "xfs_rmap.h"
  #include "xfs_alloc_btree.h"
@@ -21,7 +20,6 @@
  #include "xfs_extent_busy.h"
  #include "xfs_errortag.h"
  #include "xfs_error.h"
-#include "xfs_cksum.h"
  #include "xfs_trace.h"
  #include "xfs_trans.h"
  #include "xfs_buf_item.h"
@@ -41,8 +39,6 @@ struct workqueue_struct *xfs_alloc_wq;
  STATIC int xfs_alloc_ag_vextent_exact(xfs_alloc_arg_t *);
  STATIC int xfs_alloc_ag_vextent_near(xfs_alloc_arg_t *);
  STATIC int xfs_alloc_ag_vextent_size(xfs_alloc_arg_t *);
-STATIC int xfs_alloc_ag_vextent_small(xfs_alloc_arg_t *,
-               xfs_btree_cur_t *, xfs_agblock_t *, xfs_extlen_t *, int *);
  
  /*
   * Size of the AGFL.  For CRC-enabled filesystes we steal a couple of slots in
@@ -555,7 +551,7 @@ static xfs_failaddr_t
  xfs_agfl_verify(
         struct xfs_buf  *bp)
  {
-       struct xfs_mount *mp = bp->b_target->bt_mount;
+       struct xfs_mount *mp = bp->b_mount;
         struct xfs_agfl *agfl = XFS_BUF_TO_AGFL(bp);
         int             i;
  
@@ -596,7 +592,7 @@ static void
  xfs_agfl_read_verify(
         struct xfs_buf  *bp)
  {
-       struct xfs_mount *mp = bp->b_target->bt_mount;
+       struct xfs_mount *mp = bp->b_mount;
         xfs_failaddr_t  fa;
  
         /*
@@ -621,7 +617,7 @@ static void
  xfs_agfl_write_verify(
         struct xfs_buf  *bp)
  {
-       struct xfs_mount        *mp = bp->b_target->bt_mount;
+       struct xfs_mount        *mp = bp->b_mount;
         struct xfs_buf_log_item *bip = bp->b_log_item;
         xfs_failaddr_t          fa;
  
@@ -699,6 +695,107 @@ xfs_alloc_update_counters(
   * Allocation group level functions.
   */
  
+/*
+ * Deal with the case where only small freespaces remain. Either return the
+ * contents of the last freespace record, or allocate space from the freelist if
+ * there is nothing in the tree.
+ */
+STATIC int                     /* error */
+xfs_alloc_ag_vextent_small(
+       struct xfs_alloc_arg    *args,  /* allocation argument structure */
+       struct xfs_btree_cur    *ccur,  /* optional by-size cursor */
+       xfs_agblock_t           *fbnop, /* result block number */
+       xfs_extlen_t            *flenp, /* result length */
+       int                     *stat)  /* status: 0-freelist, 1-normal/none */
+{
+       int                     error = 0;
+       xfs_agblock_t           fbno = NULLAGBLOCK;
+       xfs_extlen_t            flen = 0;
+       int                     i = 0;
+
+       /*
+        * If a cntbt cursor is provided, try to allocate the largest record in
+        * the tree. Try the AGFL if the cntbt is empty, otherwise fail the
+        * allocation. Make sure to respect minleft even when pulling from the
+        * freelist.
+        */
+       if (ccur)
+               error = xfs_btree_decrement(ccur, 0, &i);
+       if (error)
+               goto error;
+       if (i) {
+               error = xfs_alloc_get_rec(ccur, &fbno, &flen, &i);
+               if (error)
+                       goto error;
+               XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error);
+               goto out;
+       }
+
+       if (args->minlen != 1 || args->alignment != 1 ||
+           args->resv == XFS_AG_RESV_AGFL ||
+           (be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_flcount) <=
+            args->minleft))
+               goto out;
+
+       error = xfs_alloc_get_freelist(args->tp, args->agbp, &fbno, 0);
+       if (error)
+               goto error;
+       if (fbno == NULLAGBLOCK)
+               goto out;
+
+       xfs_extent_busy_reuse(args->mp, args->agno, fbno, 1,
+                             xfs_alloc_allow_busy_reuse(args->datatype));
+
+       if (xfs_alloc_is_userdata(args->datatype)) {
+               struct xfs_buf  *bp;
+
+               bp = xfs_btree_get_bufs(args->mp, args->tp, args->agno, fbno);
+               if (!bp) {
+                       error = -EFSCORRUPTED;
+                       goto error;
+               }
+               xfs_trans_binval(args->tp, bp);
+       }
+       *fbnop = args->agbno = fbno;
+       *flenp = args->len = 1;
+       XFS_WANT_CORRUPTED_GOTO(args->mp,
+               fbno < be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length),
+               error);
+       args->wasfromfl = 1;
+       trace_xfs_alloc_small_freelist(args);
+
+       /*
+        * If we're feeding an AGFL block to something that doesn't live in the
+        * free space, we need to clear out the OWN_AG rmap.
+        */
+       error = xfs_rmap_free(args->tp, args->agbp, args->agno, fbno, 1,
+                             &XFS_RMAP_OINFO_AG);
+       if (error)
+               goto error;
+
+       *stat = 0;
+       return 0;
+
+out:
+       /*
+        * Can't do the allocation, give up.
+        */
+       if (flen < args->minlen) {
+               args->agbno = NULLAGBLOCK;
+               trace_xfs_alloc_small_notenough(args);
+               flen = 0;
+       }
+       *fbnop = fbno;
+       *flenp = flen;
+       *stat = 1;
+       trace_xfs_alloc_small_done(args);
+       return 0;
+
+error:
+       trace_xfs_alloc_small_error(args);
+       return error;
+}
+
  /*
   * Allocate a variable extent in the allocation group agno.
   * Type and bno are used to determine where in the allocation group the
@@ -1582,112 +1679,6 @@ out_nominleft:
         return 0;
  }
  
-/*
- * Deal with the case where only small freespaces remain.
- * Either return the contents of the last freespace record,
- * or allocate space from the freelist if there is nothing in the tree.
- */
-STATIC int                     /* error */
-xfs_alloc_ag_vextent_small(
-       xfs_alloc_arg_t *args,  /* allocation argument structure */
-       xfs_btree_cur_t *ccur,  /* by-size cursor */
-       xfs_agblock_t   *fbnop, /* result block number */
-       xfs_extlen_t    *flenp, /* result length */
-       int             *stat)  /* status: 0-freelist, 1-normal/none */
-{
-       int             error;
-       xfs_agblock_t   fbno;
-       xfs_extlen_t    flen;
-       int             i;
-
-       if ((error = xfs_btree_decrement(ccur, 0, &i)))
-               goto error0;
-       if (i) {
-               if ((error = xfs_alloc_get_rec(ccur, &fbno, &flen, &i)))
-                       goto error0;
-               XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
-       }
-       /*
-        * Nothing in the btree, try the freelist.  Make sure
-        * to respect minleft even when pulling from the
-        * freelist.
-        */
-       else if (args->minlen == 1 && args->alignment == 1 &&
-                args->resv != XFS_AG_RESV_AGFL &&
-                (be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_flcount)
-                 > args->minleft)) {
-               error = xfs_alloc_get_freelist(args->tp, args->agbp, &fbno, 0);
-               if (error)
-                       goto error0;
-               if (fbno != NULLAGBLOCK) {
-                       xfs_extent_busy_reuse(args->mp, args->agno, fbno, 1,
-                             xfs_alloc_allow_busy_reuse(args->datatype));
-
-                       if (xfs_alloc_is_userdata(args->datatype)) {
-                               xfs_buf_t       *bp;
-
-                               bp = xfs_btree_get_bufs(args->mp, args->tp,
-                                       args->agno, fbno, 0);
-                               if (!bp) {
-                                       error = -EFSCORRUPTED;
-                                       goto error0;
-                               }
-                               xfs_trans_binval(args->tp, bp);
-                       }
-                       args->len = 1;
-                       args->agbno = fbno;
-                       XFS_WANT_CORRUPTED_GOTO(args->mp,
-                               args->agbno + args->len <=
-                               be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length),
-                               error0);
-                       args->wasfromfl = 1;
-                       trace_xfs_alloc_small_freelist(args);
-
-                       /*
-                        * If we're feeding an AGFL block to something that
-                        * doesn't live in the free space, we need to clear
-                        * out the OWN_AG rmap.
-                        */
-                       error = xfs_rmap_free(args->tp, args->agbp, args->agno,
-                                       fbno, 1, &XFS_RMAP_OINFO_AG);
-                       if (error)
-                               goto error0;
-
-                       *stat = 0;
-                       return 0;
-               }
-               /*
-                * Nothing in the freelist.
-                */
-               else
-                       flen = 0;
-       }
-       /*
-        * Can't allocate from the freelist for some reason.
-        */
-       else {
-               fbno = NULLAGBLOCK;
-               flen = 0;
-       }
-       /*
-        * Can't do the allocation, give up.
-        */
-       if (flen < args->minlen) {
-               args->agbno = NULLAGBLOCK;
-               trace_xfs_alloc_small_notenough(args);
-               flen = 0;
-       }
-       *fbnop = fbno;
-       *flenp = flen;
-       *stat = 1;
-       trace_xfs_alloc_small_done(args);
-       return 0;
-
-error0:
-       trace_xfs_alloc_small_error(args);
-       return error;
-}
-
  /*
   * Free the extent starting at agno/bno for length.
   */
@@ -2095,7 +2086,7 @@ xfs_free_agfl_block(
         if (error)
                 return error;
  
-       bp = xfs_btree_get_bufs(tp->t_mountp, tp, agno, agbno, 0);
+       bp = xfs_btree_get_bufs(tp->t_mountp, tp, agno, agbno);
         if (!bp)
                 return -EFSCORRUPTED;
         xfs_trans_binval(tp, bp);
@@ -2586,7 +2577,7 @@ static xfs_failaddr_t
  xfs_agf_verify(
         struct xfs_buf          *bp)
  {
-       struct xfs_mount        *mp = bp->b_target->bt_mount;
+       struct xfs_mount        *mp = bp->b_mount;
         struct xfs_agf          *agf = XFS_BUF_TO_AGF(bp);
  
         if (xfs_sb_version_hascrc(&mp->m_sb)) {
@@ -2644,7 +2635,7 @@ static void
  xfs_agf_read_verify(
         struct xfs_buf  *bp)
  {
-       struct xfs_mount *mp = bp->b_target->bt_mount;
+       struct xfs_mount *mp = bp->b_mount;
         xfs_failaddr_t  fa;
  
         if (xfs_sb_version_hascrc(&mp->m_sb) &&
@@ -2661,7 +2652,7 @@ static void
  xfs_agf_write_verify(
         struct xfs_buf  *bp)
  {
-       struct xfs_mount        *mp = bp->b_target->bt_mount;
+       struct xfs_mount        *mp = bp->b_mount;
         struct xfs_buf_log_item *bip = bp->b_log_item;
         xfs_failaddr_t          fa;
  
@@ -3146,7 +3137,7 @@ xfs_alloc_has_record(
  
  /*
   * Walk all the blocks in the AGFL.  The @walk_fn can return any negative
- * error code or XFS_BTREE_QUERY_RANGE_ABORT.
+ * error code or XFS_ITER_*.
   */
  int
  xfs_agfl_walk(
diff --git a/fs/xfs/libxfs/xfs_alloc_btree.c b/fs/xfs/libxfs/xfs_alloc_btree.c

index 9fe949f6055ec32e89e08d3cf01608cdbb678f42..2a94543857a195e55cd96bc2026075ec7559dc7b 100644 (file)
--- a/fs/xfs/libxfs/xfs_alloc_btree.c
+++ b/fs/xfs/libxfs/xfs_alloc_btree.c
@@ -17,7 +17,6 @@
  #include "xfs_extent_busy.h"
  #include "xfs_error.h"
  #include "xfs_trace.h"
-#include "xfs_cksum.h"
  #include "xfs_trans.h"
  
  
@@ -292,7 +291,7 @@ static xfs_failaddr_t
  xfs_allocbt_verify(
         struct xfs_buf          *bp)
  {
-       struct xfs_mount        *mp = bp->b_target->bt_mount;
+       struct xfs_mount        *mp = bp->b_mount;
         struct xfs_btree_block  *block = XFS_BUF_TO_BLOCK(bp);
         struct xfs_perag        *pag = bp->b_pag;
         xfs_failaddr_t          fa;
diff --git a/fs/xfs/libxfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c

index c441f41f14e8ffc827fe0b1e1f1245779aa27a86..d48fcf11cc35a40616423f8de015b60817f2b74a 100644 (file)
--- a/fs/xfs/libxfs/xfs_attr.c
+++ b/fs/xfs/libxfs/xfs_attr.c
@@ -9,23 +9,18 @@
  #include "xfs_format.h"
  #include "xfs_log_format.h"
  #include "xfs_trans_resv.h"
-#include "xfs_bit.h"
  #include "xfs_mount.h"
  #include "xfs_defer.h"
  #include "xfs_da_format.h"
  #include "xfs_da_btree.h"
  #include "xfs_attr_sf.h"
  #include "xfs_inode.h"
-#include "xfs_alloc.h"
  #include "xfs_trans.h"
-#include "xfs_inode_item.h"
  #include "xfs_bmap.h"
-#include "xfs_bmap_util.h"
  #include "xfs_bmap_btree.h"
  #include "xfs_attr.h"
  #include "xfs_attr_leaf.h"
  #include "xfs_attr_remote.h"
-#include "xfs_error.h"
  #include "xfs_quota.h"
  #include "xfs_trans_space.h"
  #include "xfs_trace.h"
diff --git a/fs/xfs/libxfs/xfs_attr.h b/fs/xfs/libxfs/xfs_attr.h

index 3b0dce06e454f265eb7e97bb5712f0e10639e286..ff28ebf3b635d1940f1306006623a072602ecd5f 100644 (file)
--- a/fs/xfs/libxfs/xfs_attr.h
+++ b/fs/xfs/libxfs/xfs_attr.h
@@ -112,7 +112,13 @@ typedef struct xfs_attr_list_context {
         struct xfs_inode                *dp;            /* inode */
         struct attrlist_cursor_kern     *cursor;        /* position in list */
         char                            *alist;         /* output buffer */
-       int                             seen_enough;    /* T/F: seen enough of list? */
+
+       /*
+        * Abort attribute list iteration if non-zero.  Can be used to pass
+        * error values to the xfs_attr_list caller.
+        */
+       int                             seen_enough;
+
         ssize_t                         count;          /* num used entries */
         int                             dupcnt;         /* count dup hashvals seen */
         int                             bufsize;        /* total buffer size */
diff --git a/fs/xfs/libxfs/xfs_attr_leaf.c b/fs/xfs/libxfs/xfs_attr_leaf.c

index 1f6e3965ff7425456ca64477a713573cb5e7943a..70eb941d02e4dc406c3d16fc01cab71e9e68472f 100644 (file)
--- a/fs/xfs/libxfs/xfs_attr_leaf.c
+++ b/fs/xfs/libxfs/xfs_attr_leaf.c
@@ -10,14 +10,12 @@
  #include "xfs_format.h"
  #include "xfs_log_format.h"
  #include "xfs_trans_resv.h"
-#include "xfs_bit.h"
  #include "xfs_sb.h"
  #include "xfs_mount.h"
  #include "xfs_da_format.h"
  #include "xfs_da_btree.h"
  #include "xfs_inode.h"
  #include "xfs_trans.h"
-#include "xfs_inode_item.h"
  #include "xfs_bmap_btree.h"
  #include "xfs_bmap.h"
  #include "xfs_attr_sf.h"
@@ -27,7 +25,6 @@
  #include "xfs_error.h"
  #include "xfs_trace.h"
  #include "xfs_buf_item.h"
-#include "xfs_cksum.h"
  #include "xfs_dir2.h"
  #include "xfs_log.h"
  
@@ -240,7 +237,7 @@ xfs_attr3_leaf_verify(
         struct xfs_buf                  *bp)
  {
         struct xfs_attr3_icleaf_hdr     ichdr;
-       struct xfs_mount                *mp = bp->b_target->bt_mount;
+       struct xfs_mount                *mp = bp->b_mount;
         struct xfs_attr_leafblock       *leaf = bp->b_addr;
         struct xfs_attr_leaf_entry      *entries;
         uint32_t                        end;    /* must be 32bit - see below */
@@ -313,7 +310,7 @@ static void
  xfs_attr3_leaf_write_verify(
         struct xfs_buf  *bp)
  {
-       struct xfs_mount        *mp = bp->b_target->bt_mount;
+       struct xfs_mount        *mp = bp->b_mount;
         struct xfs_buf_log_item *bip = bp->b_log_item;
         struct xfs_attr3_leaf_hdr *hdr3 = bp->b_addr;
         xfs_failaddr_t          fa;
@@ -343,7 +340,7 @@ static void
  xfs_attr3_leaf_read_verify(
         struct xfs_buf          *bp)
  {
-       struct xfs_mount        *mp = bp->b_target->bt_mount;
+       struct xfs_mount        *mp = bp->b_mount;
         xfs_failaddr_t          fa;
  
         if (xfs_sb_version_hascrc(&mp->m_sb) &&
@@ -865,7 +862,7 @@ xfs_attr_shortform_allfit(
         struct xfs_attr3_icleaf_hdr leafhdr;
         int                     bytes;
         int                     i;
-       struct xfs_mount        *mp = bp->b_target->bt_mount;
+       struct xfs_mount        *mp = bp->b_mount;
  
         leaf = bp->b_addr;
         xfs_attr3_leaf_hdr_from_disk(mp->m_attr_geo, &leafhdr, leaf);
@@ -1525,7 +1522,7 @@ xfs_attr_leaf_order(
  {
         struct xfs_attr3_icleaf_hdr ichdr1;
         struct xfs_attr3_icleaf_hdr ichdr2;
-       struct xfs_mount *mp = leaf1_bp->b_target->bt_mount;
+       struct xfs_mount *mp = leaf1_bp->b_mount;
  
         xfs_attr3_leaf_hdr_from_disk(mp->m_attr_geo, &ichdr1, leaf1_bp->b_addr);
         xfs_attr3_leaf_hdr_from_disk(mp->m_attr_geo, &ichdr2, leaf2_bp->b_addr);
@@ -2568,7 +2565,7 @@ xfs_attr_leaf_lasthash(
  {
         struct xfs_attr3_icleaf_hdr ichdr;
         struct xfs_attr_leaf_entry *entries;
-       struct xfs_mount *mp = bp->b_target->bt_mount;
+       struct xfs_mount *mp = bp->b_mount;
  
         xfs_attr3_leaf_hdr_from_disk(mp->m_attr_geo, &ichdr, bp->b_addr);
         entries = xfs_attr3_leaf_entryp(bp->b_addr);
diff --git a/fs/xfs/libxfs/xfs_attr_remote.c b/fs/xfs/libxfs/xfs_attr_remote.c

index 65ff600a8067875f3d898481e1ef2c271d55bdd9..4eb30d3570457a1d3aab5b5b3ba2c18b3d62d890 100644 (file)
--- a/fs/xfs/libxfs/xfs_attr_remote.c
+++ b/fs/xfs/libxfs/xfs_attr_remote.c
@@ -16,18 +16,10 @@
  #include "xfs_da_format.h"
  #include "xfs_da_btree.h"
  #include "xfs_inode.h"
-#include "xfs_alloc.h"
  #include "xfs_trans.h"
-#include "xfs_inode_item.h"
  #include "xfs_bmap.h"
-#include "xfs_bmap_util.h"
  #include "xfs_attr.h"
-#include "xfs_attr_leaf.h"
-#include "xfs_attr_remote.h"
-#include "xfs_trans_space.h"
  #include "xfs_trace.h"
-#include "xfs_cksum.h"
-#include "xfs_buf_item.h"
  #include "xfs_error.h"
  
  #define ATTR_RMTVALUE_MAPSIZE  1       /* # of map entries at once */
@@ -111,7 +103,7 @@ __xfs_attr3_rmt_read_verify(
         bool            check_crc,
         xfs_failaddr_t  *failaddr)
  {
-       struct xfs_mount *mp = bp->b_target->bt_mount;
+       struct xfs_mount *mp = bp->b_mount;
         char            *ptr;
         int             len;
         xfs_daddr_t     bno;
@@ -175,7 +167,7 @@ static void
  xfs_attr3_rmt_write_verify(
         struct xfs_buf  *bp)
  {
-       struct xfs_mount *mp = bp->b_target->bt_mount;
+       struct xfs_mount *mp = bp->b_mount;
         xfs_failaddr_t  fa;
         int             blksize = mp->m_attr_geo->blksize;
         char            *ptr;
@@ -535,7 +527,7 @@ xfs_attr_rmtval_set(
                 dblkno = XFS_FSB_TO_DADDR(mp, map.br_startblock),
                 dblkcnt = XFS_FSB_TO_BB(mp, map.br_blockcount);
  
-               bp = xfs_buf_get(mp->m_ddev_targp, dblkno, dblkcnt, 0);
+               bp = xfs_buf_get(mp->m_ddev_targp, dblkno, dblkcnt);
                 if (!bp)
                         return -ENOMEM;
                 bp->b_ops = &xfs_attr3_rmt_buf_ops;
diff --git a/fs/xfs/libxfs/xfs_bit.c b/fs/xfs/libxfs/xfs_bit.c

index 40ce5f3094d19d399bca5758b428177dcadd7824..7071ff98fdbc8e569c4a274bd9c3ccbfa8a5ffc9 100644 (file)
--- a/fs/xfs/libxfs/xfs_bit.c
+++ b/fs/xfs/libxfs/xfs_bit.c
@@ -5,7 +5,6 @@
   */
  #include "xfs.h"
  #include "xfs_log_format.h"
-#include "xfs_bit.h"
  
  /*
   * XFS bit manipulation routines, used in non-realtime code.
diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c

index 356ebd1cbe82518c9898c2bb1a2befa0f5206c5b..baf0b72c0a37deddd4936556a985cb020bf5ff4d 100644 (file)
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -13,14 +13,10 @@
  #include "xfs_sb.h"
  #include "xfs_mount.h"
  #include "xfs_defer.h"
-#include "xfs_da_format.h"
-#include "xfs_da_btree.h"
  #include "xfs_dir2.h"
  #include "xfs_inode.h"
  #include "xfs_btree.h"
  #include "xfs_trans.h"
-#include "xfs_inode_item.h"
-#include "xfs_extfree_item.h"
  #include "xfs_alloc.h"
  #include "xfs_bmap.h"
  #include "xfs_bmap_util.h"
@@ -32,7 +28,6 @@
  #include "xfs_trans_space.h"
  #include "xfs_buf_item.h"
  #include "xfs_trace.h"
-#include "xfs_symlink.h"
  #include "xfs_attr_leaf.h"
  #include "xfs_filestream.h"
  #include "xfs_rmap.h"
@@ -370,7 +365,7 @@ xfs_bmap_check_leaf_extents(
                 bp = xfs_bmap_get_bp(cur, XFS_FSB_TO_DADDR(mp, bno));
                 if (!bp) {
                         bp_release = 1;
-                       error = xfs_btree_read_bufl(mp, NULL, bno, 0, &bp,
+                       error = xfs_btree_read_bufl(mp, NULL, bno, &bp,
                                                 XFS_BMAP_BTREE_REF,
                                                 &xfs_bmbt_buf_ops);
                         if (error)
@@ -454,7 +449,7 @@ xfs_bmap_check_leaf_extents(
                 bp = xfs_bmap_get_bp(cur, XFS_FSB_TO_DADDR(mp, bno));
                 if (!bp) {
                         bp_release = 1;
-                       error = xfs_btree_read_bufl(mp, NULL, bno, 0, &bp,
+                       error = xfs_btree_read_bufl(mp, NULL, bno, &bp,
                                                 XFS_BMAP_BTREE_REF,
                                                 &xfs_bmbt_buf_ops);
                         if (error)
@@ -619,7 +614,7 @@ xfs_bmap_btree_to_extents(
         XFS_WANT_CORRUPTED_RETURN(cur->bc_mp,
                         xfs_btree_check_lptr(cur, cbno, 1));
  #endif
-       error = xfs_btree_read_bufl(mp, tp, cbno, 0, &cbp, XFS_BMAP_BTREE_REF,
+       error = xfs_btree_read_bufl(mp, tp, cbno, &cbp, XFS_BMAP_BTREE_REF,
                                 &xfs_bmbt_buf_ops);
         if (error)
                 return error;
@@ -732,7 +727,7 @@ xfs_bmap_extents_to_btree(
         cur->bc_private.b.allocated++;
         ip->i_d.di_nblocks++;
         xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, 1L);
-       abp = xfs_btree_get_bufl(mp, tp, args.fsbno, 0);
+       abp = xfs_btree_get_bufl(mp, tp, args.fsbno);
         if (!abp) {
                 error = -EFSCORRUPTED;
                 goto out_unreserve_dquot;
@@ -878,7 +873,7 @@ xfs_bmap_local_to_extents(
         ASSERT(args.fsbno != NULLFSBLOCK);
         ASSERT(args.len == 1);
         tp->t_firstblock = args.fsbno;
-       bp = xfs_btree_get_bufl(args.mp, tp, args.fsbno, 0);
+       bp = xfs_btree_get_bufl(args.mp, tp, args.fsbno);
  
         /*
          * Initialize the block, copy the data and log the remote buffer.
@@ -1203,7 +1198,7 @@ xfs_iread_extents(
          * pointer (leftmost) at each level.
          */
         while (level-- > 0) {
-               error = xfs_btree_read_bufl(mp, tp, bno, 0, &bp,
+               error = xfs_btree_read_bufl(mp, tp, bno, &bp,
                                 XFS_BMAP_BTREE_REF, &xfs_bmbt_buf_ops);
                 if (error)
                         goto out;
@@ -1276,7 +1271,7 @@ xfs_iread_extents(
                  */
                 if (bno == NULLFSBLOCK)
                         break;
-               error = xfs_btree_read_bufl(mp, tp, bno, 0, &bp,
+               error = xfs_btree_read_bufl(mp, tp, bno, &bp,
                                 XFS_BMAP_BTREE_REF, &xfs_bmbt_buf_ops);
                 if (error)
                         goto out;
diff --git a/fs/xfs/libxfs/xfs_bmap_btree.c b/fs/xfs/libxfs/xfs_bmap_btree.c

index aff82ed112c93c26f43bed5ada5fd4b82e4e3711..fbb18ba5d90538af2a34361981248e0a16278d7c 100644 (file)
--- a/fs/xfs/libxfs/xfs_bmap_btree.c
+++ b/fs/xfs/libxfs/xfs_bmap_btree.c
@@ -11,10 +11,8 @@
  #include "xfs_trans_resv.h"
  #include "xfs_bit.h"
  #include "xfs_mount.h"
-#include "xfs_defer.h"
  #include "xfs_inode.h"
  #include "xfs_trans.h"
-#include "xfs_inode_item.h"
  #include "xfs_alloc.h"
  #include "xfs_btree.h"
  #include "xfs_bmap_btree.h"
@@ -22,7 +20,6 @@
  #include "xfs_error.h"
  #include "xfs_quota.h"
  #include "xfs_trace.h"
-#include "xfs_cksum.h"
  #include "xfs_rmap.h"
  
  /*
@@ -411,7 +408,7 @@ static xfs_failaddr_t
  xfs_bmbt_verify(
         struct xfs_buf          *bp)
  {
-       struct xfs_mount        *mp = bp->b_target->bt_mount;
+       struct xfs_mount        *mp = bp->b_mount;
         struct xfs_btree_block  *block = XFS_BUF_TO_BLOCK(bp);
         xfs_failaddr_t          fa;
         unsigned int            level;
diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c

index bbdae2b4559fc91d0e7f650fcfe6ed81868b5512..f1048efa4268053e3dbb39d6c9db1b685bbd7446 100644 (file)
--- a/fs/xfs/libxfs/xfs_btree.c
+++ b/fs/xfs/libxfs/xfs_btree.c
@@ -11,16 +11,13 @@
  #include "xfs_trans_resv.h"
  #include "xfs_bit.h"
  #include "xfs_mount.h"
-#include "xfs_defer.h"
  #include "xfs_inode.h"
  #include "xfs_trans.h"
-#include "xfs_inode_item.h"
  #include "xfs_buf_item.h"
  #include "xfs_btree.h"
  #include "xfs_errortag.h"
  #include "xfs_error.h"
  #include "xfs_trace.h"
-#include "xfs_cksum.h"
  #include "xfs_alloc.h"
  #include "xfs_log.h"
  
@@ -276,7 +273,7 @@ xfs_btree_lblock_calc_crc(
         struct xfs_btree_block  *block = XFS_BUF_TO_BLOCK(bp);
         struct xfs_buf_log_item *bip = bp->b_log_item;
  
-       if (!xfs_sb_version_hascrc(&bp->b_target->bt_mount->m_sb))
+       if (!xfs_sb_version_hascrc(&bp->b_mount->m_sb))
                 return;
         if (bip)
                 block->bb_u.l.bb_lsn = cpu_to_be64(bip->bli_item.li_lsn);
@@ -288,7 +285,7 @@ xfs_btree_lblock_verify_crc(
         struct xfs_buf          *bp)
  {
         struct xfs_btree_block  *block = XFS_BUF_TO_BLOCK(bp);
-       struct xfs_mount        *mp = bp->b_target->bt_mount;
+       struct xfs_mount        *mp = bp->b_mount;
  
         if (xfs_sb_version_hascrc(&mp->m_sb)) {
                 if (!xfs_log_check_lsn(mp, be64_to_cpu(block->bb_u.l.bb_lsn)))
@@ -314,7 +311,7 @@ xfs_btree_sblock_calc_crc(
         struct xfs_btree_block  *block = XFS_BUF_TO_BLOCK(bp);
         struct xfs_buf_log_item *bip = bp->b_log_item;
  
-       if (!xfs_sb_version_hascrc(&bp->b_target->bt_mount->m_sb))
+       if (!xfs_sb_version_hascrc(&bp->b_mount->m_sb))
                 return;
         if (bip)
                 block->bb_u.s.bb_lsn = cpu_to_be64(bip->bli_item.li_lsn);
@@ -326,7 +323,7 @@ xfs_btree_sblock_verify_crc(
         struct xfs_buf          *bp)
  {
         struct xfs_btree_block  *block = XFS_BUF_TO_BLOCK(bp);
-       struct xfs_mount        *mp = bp->b_target->bt_mount;
+       struct xfs_mount        *mp = bp->b_mount;
  
         if (xfs_sb_version_hascrc(&mp->m_sb)) {
                 if (!xfs_log_check_lsn(mp, be64_to_cpu(block->bb_u.s.bb_lsn)))
@@ -691,14 +688,13 @@ xfs_buf_t *                               /* buffer for fsbno */
  xfs_btree_get_bufl(
         xfs_mount_t     *mp,            /* file system mount point */
         xfs_trans_t     *tp,            /* transaction pointer */
-       xfs_fsblock_t   fsbno,          /* file system block number */
-       uint            lock)           /* lock flags for get_buf */
+       xfs_fsblock_t   fsbno)          /* file system block number */
  {
         xfs_daddr_t             d;              /* real disk block address */
  
         ASSERT(fsbno != NULLFSBLOCK);
         d = XFS_FSB_TO_DADDR(mp, fsbno);
-       return xfs_trans_get_buf(tp, mp->m_ddev_targp, d, mp->m_bsize, lock);
+       return xfs_trans_get_buf(tp, mp->m_ddev_targp, d, mp->m_bsize, 0);
  }
  
  /*
@@ -710,15 +706,14 @@ xfs_btree_get_bufs(
         xfs_mount_t     *mp,            /* file system mount point */
         xfs_trans_t     *tp,            /* transaction pointer */
         xfs_agnumber_t  agno,           /* allocation group number */
-       xfs_agblock_t   agbno,          /* allocation group block number */
-       uint            lock)           /* lock flags for get_buf */
+       xfs_agblock_t   agbno)          /* allocation group block number */
  {
         xfs_daddr_t             d;              /* real disk block address */
  
         ASSERT(agno != NULLAGNUMBER);
         ASSERT(agbno != NULLAGBLOCK);
         d = XFS_AGB_TO_DADDR(mp, agno, agbno);
-       return xfs_trans_get_buf(tp, mp->m_ddev_targp, d, mp->m_bsize, lock);
+       return xfs_trans_get_buf(tp, mp->m_ddev_targp, d, mp->m_bsize, 0);
  }
  
  /*
@@ -845,7 +840,6 @@ xfs_btree_read_bufl(
         struct xfs_mount        *mp,            /* file system mount point */
         struct xfs_trans        *tp,            /* transaction pointer */
         xfs_fsblock_t           fsbno,          /* file system block number */
-       uint                    lock,           /* lock flags for read_buf */
         struct xfs_buf          **bpp,          /* buffer for fsbno */
         int                     refval,         /* ref count value for buffer */
         const struct xfs_buf_ops *ops)
@@ -858,7 +852,7 @@ xfs_btree_read_bufl(
                 return -EFSCORRUPTED;
         d = XFS_FSB_TO_DADDR(mp, fsbno);
         error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, d,
-                                  mp->m_bsize, lock, &bp, ops);
+                                  mp->m_bsize, 0, &bp, ops);
         if (error)
                 return error;
         if (bp)
@@ -1185,11 +1179,10 @@ xfs_btree_init_block(
         xfs_btnum_t     btnum,
         __u16           level,
         __u16           numrecs,
-       __u64           owner,
-       unsigned int    flags)
+       __u64           owner)
  {
         xfs_btree_init_block_int(mp, XFS_BUF_TO_BLOCK(bp), bp->b_bn,
-                                btnum, level, numrecs, owner, flags);
+                                btnum, level, numrecs, owner, 0);
  }
  
  STATIC void
@@ -1288,7 +1281,6 @@ STATIC int
  xfs_btree_get_buf_block(
         struct xfs_btree_cur    *cur,
         union xfs_btree_ptr     *ptr,
-       int                     flags,
         struct xfs_btree_block  **block,
         struct xfs_buf          **bpp)
  {
@@ -1296,14 +1288,11 @@ xfs_btree_get_buf_block(
         xfs_daddr_t             d;
         int                     error;
  
-       /* need to sort out how callers deal with failures first */
-       ASSERT(!(flags & XBF_TRYLOCK));
-
         error = xfs_btree_ptr_to_daddr(cur, ptr, &d);
         if (error)
                 return error;
         *bpp = xfs_trans_get_buf(cur->bc_tp, mp->m_ddev_targp, d,
-                                mp->m_bsize, flags);
+                                mp->m_bsize, 0);
  
         if (!*bpp)
                 return -ENOMEM;
@@ -2706,7 +2695,7 @@ __xfs_btree_split(
         XFS_BTREE_STATS_INC(cur, alloc);
  
         /* Set up the new block as "right". */
-       error = xfs_btree_get_buf_block(cur, &rptr, 0, &right, &rbp);
+       error = xfs_btree_get_buf_block(cur, &rptr, &right, &rbp);
         if (error)
                 goto error0;
  
@@ -2961,7 +2950,7 @@ xfs_btree_new_iroot(
         XFS_BTREE_STATS_INC(cur, alloc);
  
         /* Copy the root into a real block. */
-       error = xfs_btree_get_buf_block(cur, &nptr, 0, &cblock, &cbp);
+       error = xfs_btree_get_buf_block(cur, &nptr, &cblock, &cbp);
         if (error)
                 goto error0;
  
@@ -3058,7 +3047,7 @@ xfs_btree_new_root(
         XFS_BTREE_STATS_INC(cur, alloc);
  
         /* Set up the new block. */
-       error = xfs_btree_get_buf_block(cur, &lptr, 0, &new, &nbp);
+       error = xfs_btree_get_buf_block(cur, &lptr, &new, &nbp);
         if (error)
                 goto error0;
  
@@ -4433,7 +4422,7 @@ xfs_btree_lblock_v5hdr_verify(
         struct xfs_buf          *bp,
         uint64_t                owner)
  {
-       struct xfs_mount        *mp = bp->b_target->bt_mount;
+       struct xfs_mount        *mp = bp->b_mount;
         struct xfs_btree_block  *block = XFS_BUF_TO_BLOCK(bp);
  
         if (!xfs_sb_version_hascrc(&mp->m_sb))
@@ -4454,7 +4443,7 @@ xfs_btree_lblock_verify(
         struct xfs_buf          *bp,
         unsigned int            max_recs)
  {
-       struct xfs_mount        *mp = bp->b_target->bt_mount;
+       struct xfs_mount        *mp = bp->b_mount;
         struct xfs_btree_block  *block = XFS_BUF_TO_BLOCK(bp);
  
         /* numrecs verification */
@@ -4484,7 +4473,7 @@ xfs_failaddr_t
  xfs_btree_sblock_v5hdr_verify(
         struct xfs_buf          *bp)
  {
-       struct xfs_mount        *mp = bp->b_target->bt_mount;
+       struct xfs_mount        *mp = bp->b_mount;
         struct xfs_btree_block  *block = XFS_BUF_TO_BLOCK(bp);
         struct xfs_perag        *pag = bp->b_pag;
  
@@ -4510,7 +4499,7 @@ xfs_btree_sblock_verify(
         struct xfs_buf          *bp,
         unsigned int            max_recs)
  {
-       struct xfs_mount        *mp = bp->b_target->bt_mount;
+       struct xfs_mount        *mp = bp->b_mount;
         struct xfs_btree_block  *block = XFS_BUF_TO_BLOCK(bp);
         xfs_agblock_t           agno;
  
diff --git a/fs/xfs/libxfs/xfs_btree.h b/fs/xfs/libxfs/xfs_btree.h

index e3b3e9dce5da3b24caaa52389f985fc571803b4a..fa3cd8ab9aba34aa1a16b3352ea0d368fb1026aa 100644 (file)
--- a/fs/xfs/libxfs/xfs_btree.h
+++ b/fs/xfs/libxfs/xfs_btree.h
@@ -301,8 +301,7 @@ struct xfs_buf *                            /* buffer for fsbno */
  xfs_btree_get_bufl(
         struct xfs_mount        *mp,    /* file system mount point */
         struct xfs_trans        *tp,    /* transaction pointer */
-       xfs_fsblock_t           fsbno,  /* file system block number */
-       uint                    lock);  /* lock flags for get_buf */
+       xfs_fsblock_t           fsbno); /* file system block number */
  
  /*
   * Get a buffer for the block, return it with no data read.
@@ -313,8 +312,7 @@ xfs_btree_get_bufs(
         struct xfs_mount        *mp,    /* file system mount point */
         struct xfs_trans        *tp,    /* transaction pointer */
         xfs_agnumber_t          agno,   /* allocation group number */
-       xfs_agblock_t           agbno,  /* allocation group block number */
-       uint                    lock);  /* lock flags for get_buf */
+       xfs_agblock_t           agbno); /* allocation group block number */
  
  /*
   * Check for the cursor referring to the last block at the given level.
@@ -345,7 +343,6 @@ xfs_btree_read_bufl(
         struct xfs_mount        *mp,    /* file system mount point */
         struct xfs_trans        *tp,    /* transaction pointer */
         xfs_fsblock_t           fsbno,  /* file system block number */
-       uint                    lock,   /* lock flags for read_buf */
         struct xfs_buf          **bpp,  /* buffer for fsbno */
         int                     refval, /* ref count value for buffer */
         const struct xfs_buf_ops *ops);
@@ -383,8 +380,7 @@ xfs_btree_init_block(
         xfs_btnum_t     btnum,
         __u16           level,
         __u16           numrecs,
-       __u64           owner,
-       unsigned int    flags);
+       __u64           owner);
  
  void
  xfs_btree_init_block_int(
@@ -469,8 +465,8 @@ uint xfs_btree_compute_maxlevels(uint *limits, unsigned long len);
  unsigned long long xfs_btree_calc_size(uint *limits, unsigned long long len);
  
  /* return codes */
-#define XFS_BTREE_QUERY_RANGE_CONTINUE 0       /* keep iterating */
-#define XFS_BTREE_QUERY_RANGE_ABORT    1       /* stop iterating */
+#define XFS_BTREE_QUERY_RANGE_CONTINUE (XFS_ITER_CONTINUE) /* keep iterating */
+#define XFS_BTREE_QUERY_RANGE_ABORT    (XFS_ITER_ABORT)    /* stop iterating */
  typedef int (*xfs_btree_query_range_fn)(struct xfs_btree_cur *cur,
                 union xfs_btree_rec *rec, void *priv);
  
diff --git a/fs/xfs/libxfs/xfs_da_btree.c b/fs/xfs/libxfs/xfs_da_btree.c

index e2737e2ac2aeb5e31a997ee3ed5f3800bf5ecfa7..d1c77fd0815da89cca4882c7bd4c19efac357745 100644 (file)
--- a/fs/xfs/libxfs/xfs_da_btree.c
+++ b/fs/xfs/libxfs/xfs_da_btree.c
@@ -12,20 +12,14 @@
  #include "xfs_trans_resv.h"
  #include "xfs_bit.h"
  #include "xfs_mount.h"
-#include "xfs_da_format.h"
-#include "xfs_da_btree.h"
  #include "xfs_dir2.h"
  #include "xfs_dir2_priv.h"
  #include "xfs_inode.h"
  #include "xfs_trans.h"
-#include "xfs_inode_item.h"
-#include "xfs_alloc.h"
  #include "xfs_bmap.h"
-#include "xfs_attr.h"
  #include "xfs_attr_leaf.h"
  #include "xfs_error.h"
  #include "xfs_trace.h"
-#include "xfs_cksum.h"
  #include "xfs_buf_item.h"
  #include "xfs_log.h"
  
@@ -126,7 +120,7 @@ xfs_da3_blkinfo_verify(
         struct xfs_buf          *bp,
         struct xfs_da3_blkinfo  *hdr3)
  {
-       struct xfs_mount        *mp = bp->b_target->bt_mount;
+       struct xfs_mount        *mp = bp->b_mount;
         struct xfs_da_blkinfo   *hdr = &hdr3->hdr;
  
         if (!xfs_verify_magic16(bp, hdr->magic))
@@ -148,7 +142,7 @@ static xfs_failaddr_t
  xfs_da3_node_verify(
         struct xfs_buf          *bp)
  {
-       struct xfs_mount        *mp = bp->b_target->bt_mount;
+       struct xfs_mount        *mp = bp->b_mount;
         struct xfs_da_intnode   *hdr = bp->b_addr;
         struct xfs_da3_icnode_hdr ichdr;
         const struct xfs_dir_ops *ops;
@@ -186,7 +180,7 @@ static void
  xfs_da3_node_write_verify(
         struct xfs_buf  *bp)
  {
-       struct xfs_mount        *mp = bp->b_target->bt_mount;
+       struct xfs_mount        *mp = bp->b_mount;
         struct xfs_buf_log_item *bip = bp->b_log_item;
         struct xfs_da3_node_hdr *hdr3 = bp->b_addr;
         xfs_failaddr_t          fa;
diff --git a/fs/xfs/libxfs/xfs_da_format.c b/fs/xfs/libxfs/xfs_da_format.c

index b39053dcb643976fe571a13db58202c0dcdb251f..b1ae572496b699b2c5e84b3f1792c850e32fae0b 100644 (file)
--- a/fs/xfs/libxfs/xfs_da_format.c
+++ b/fs/xfs/libxfs/xfs_da_format.c
@@ -11,11 +11,8 @@
  #include "xfs_log_format.h"
  #include "xfs_trans_resv.h"
  #include "xfs_mount.h"
-#include "xfs_da_format.h"
-#include "xfs_da_btree.h"
  #include "xfs_inode.h"
  #include "xfs_dir2.h"
-#include "xfs_dir2_priv.h"
  
  /*
   * Shortform directory ops
diff --git a/fs/xfs/libxfs/xfs_defer.c b/fs/xfs/libxfs/xfs_defer.c

index 1c6bf2105939f15534c95ce7531fc6715212da31..eb2be2a6a25a92927e963b5f7f5ae7469ebc0417 100644 (file)
--- a/fs/xfs/libxfs/xfs_defer.c
+++ b/fs/xfs/libxfs/xfs_defer.c
@@ -9,8 +9,6 @@
  #include "xfs_format.h"
  #include "xfs_log_format.h"
  #include "xfs_trans_resv.h"
-#include "xfs_bit.h"
-#include "xfs_sb.h"
  #include "xfs_mount.h"
  #include "xfs_defer.h"
  #include "xfs_trans.h"
diff --git a/fs/xfs/libxfs/xfs_dir2.c b/fs/xfs/libxfs/xfs_dir2.c

index 156ce95c9c4545de6b03cd638463e23f7fa4746e..67840723edbbc68dcde52eeb5747725896525abd 100644 (file)
--- a/fs/xfs/libxfs/xfs_dir2.c
+++ b/fs/xfs/libxfs/xfs_dir2.c
@@ -5,20 +5,16 @@
   */
  #include "xfs.h"
  #include "xfs_fs.h"
+#include "xfs_shared.h"
  #include "xfs_format.h"
  #include "xfs_log_format.h"
  #include "xfs_trans_resv.h"
  #include "xfs_mount.h"
-#include "xfs_defer.h"
-#include "xfs_da_format.h"
-#include "xfs_da_btree.h"
  #include "xfs_inode.h"
  #include "xfs_trans.h"
-#include "xfs_inode_item.h"
  #include "xfs_bmap.h"
  #include "xfs_dir2.h"
  #include "xfs_dir2_priv.h"
-#include "xfs_ialloc.h"
  #include "xfs_errortag.h"
  #include "xfs_error.h"
  #include "xfs_trace.h"
diff --git a/fs/xfs/libxfs/xfs_dir2_block.c b/fs/xfs/libxfs/xfs_dir2_block.c

index b7d6d78f4ce2f3ef263fd54d8523702045dc5c40..a6fb0cc2085eff66357a29411e1ae817da783e65 100644 (file)
--- a/fs/xfs/libxfs/xfs_dir2_block.c
+++ b/fs/xfs/libxfs/xfs_dir2_block.c
@@ -6,22 +6,19 @@
   */
  #include "xfs.h"
  #include "xfs_fs.h"
+#include "xfs_shared.h"
  #include "xfs_format.h"
  #include "xfs_log_format.h"
  #include "xfs_trans_resv.h"
  #include "xfs_mount.h"
-#include "xfs_da_format.h"
-#include "xfs_da_btree.h"
  #include "xfs_inode.h"
  #include "xfs_trans.h"
-#include "xfs_inode_item.h"
  #include "xfs_bmap.h"
  #include "xfs_buf_item.h"
  #include "xfs_dir2.h"
  #include "xfs_dir2_priv.h"
  #include "xfs_error.h"
  #include "xfs_trace.h"
-#include "xfs_cksum.h"
  #include "xfs_log.h"
  
  /*
@@ -50,7 +47,7 @@ static xfs_failaddr_t
  xfs_dir3_block_verify(
         struct xfs_buf          *bp)
  {
-       struct xfs_mount        *mp = bp->b_target->bt_mount;
+       struct xfs_mount        *mp = bp->b_mount;
         struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr;
  
         if (!xfs_verify_magic(bp, hdr3->magic))
@@ -71,7 +68,7 @@ static void
  xfs_dir3_block_read_verify(
         struct xfs_buf  *bp)
  {
-       struct xfs_mount        *mp = bp->b_target->bt_mount;
+       struct xfs_mount        *mp = bp->b_mount;
         xfs_failaddr_t          fa;
  
         if (xfs_sb_version_hascrc(&mp->m_sb) &&
@@ -88,7 +85,7 @@ static void
  xfs_dir3_block_write_verify(
         struct xfs_buf  *bp)
  {
-       struct xfs_mount        *mp = bp->b_target->bt_mount;
+       struct xfs_mount        *mp = bp->b_mount;
         struct xfs_buf_log_item *bip = bp->b_log_item;
         struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr;
         xfs_failaddr_t          fa;
diff --git a/fs/xfs/libxfs/xfs_dir2_data.c b/fs/xfs/libxfs/xfs_dir2_data.c

index b7b9ce002cb97838d2413ad579d499c582fda3d6..2c79be4c3153855d5dc3425bc590c135fe7e4786 100644 (file)
--- a/fs/xfs/libxfs/xfs_dir2_data.c
+++ b/fs/xfs/libxfs/xfs_dir2_data.c
@@ -6,19 +6,16 @@
   */
  #include "xfs.h"
  #include "xfs_fs.h"
+#include "xfs_shared.h"
  #include "xfs_format.h"
  #include "xfs_log_format.h"
  #include "xfs_trans_resv.h"
  #include "xfs_mount.h"
-#include "xfs_da_format.h"
-#include "xfs_da_btree.h"
  #include "xfs_inode.h"
  #include "xfs_dir2.h"
-#include "xfs_dir2_priv.h"
  #include "xfs_error.h"
  #include "xfs_trans.h"
  #include "xfs_buf_item.h"
-#include "xfs_cksum.h"
  #include "xfs_log.h"
  
  static xfs_failaddr_t xfs_dir2_data_freefind_verify(
@@ -50,14 +47,13 @@ __xfs_dir3_data_check(
         int                     i;              /* leaf index */
         int                     lastfree;       /* last entry was unused */
         xfs_dir2_leaf_entry_t   *lep=NULL;      /* block leaf entries */
-       xfs_mount_t             *mp;            /* filesystem mount point */
+       struct xfs_mount        *mp = bp->b_mount;
         char                    *p;             /* current data position */
         int                     stale;          /* count of stale leaves */
         struct xfs_name         name;
         const struct xfs_dir_ops *ops;
         struct xfs_da_geometry  *geo;
  
-       mp = bp->b_target->bt_mount;
         geo = mp->m_dir_geo;
  
         /*
@@ -249,7 +245,7 @@ static xfs_failaddr_t
  xfs_dir3_data_verify(
         struct xfs_buf          *bp)
  {
-       struct xfs_mount        *mp = bp->b_target->bt_mount;
+       struct xfs_mount        *mp = bp->b_mount;
         struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr;
  
         if (!xfs_verify_magic(bp, hdr3->magic))
@@ -298,7 +294,7 @@ static void
  xfs_dir3_data_read_verify(
         struct xfs_buf  *bp)
  {
-       struct xfs_mount        *mp = bp->b_target->bt_mount;
+       struct xfs_mount        *mp = bp->b_mount;
         xfs_failaddr_t          fa;
  
         if (xfs_sb_version_hascrc(&mp->m_sb) &&
@@ -315,7 +311,7 @@ static void
  xfs_dir3_data_write_verify(
         struct xfs_buf  *bp)
  {
-       struct xfs_mount        *mp = bp->b_target->bt_mount;
+       struct xfs_mount        *mp = bp->b_mount;
         struct xfs_buf_log_item *bip = bp->b_log_item;
         struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr;
         xfs_failaddr_t          fa;
diff --git a/fs/xfs/libxfs/xfs_dir2_leaf.c b/fs/xfs/libxfs/xfs_dir2_leaf.c

index 9c2a0a13ed61289b43cb2fbb6b86a29452c0323b..a53e4585a2f3ab6a88a5782a82820a62b971bffb 100644 (file)
--- a/fs/xfs/libxfs/xfs_dir2_leaf.c
+++ b/fs/xfs/libxfs/xfs_dir2_leaf.c
@@ -6,12 +6,11 @@
   */
  #include "xfs.h"
  #include "xfs_fs.h"
+#include "xfs_shared.h"
  #include "xfs_format.h"
  #include "xfs_log_format.h"
  #include "xfs_trans_resv.h"
  #include "xfs_mount.h"
-#include "xfs_da_format.h"
-#include "xfs_da_btree.h"
  #include "xfs_inode.h"
  #include "xfs_bmap.h"
  #include "xfs_dir2.h"
@@ -20,8 +19,6 @@
  #include "xfs_trace.h"
  #include "xfs_trans.h"
  #include "xfs_buf_item.h"
-#include "xfs_cksum.h"
-#include "xfs_log.h"
  
  /*
   * Local function declarations.
@@ -144,7 +141,7 @@ static xfs_failaddr_t
  xfs_dir3_leaf_verify(
         struct xfs_buf          *bp)
  {
-       struct xfs_mount        *mp = bp->b_target->bt_mount;
+       struct xfs_mount        *mp = bp->b_mount;
         struct xfs_dir2_leaf    *leaf = bp->b_addr;
         xfs_failaddr_t          fa;
  
@@ -159,7 +156,7 @@ static void
  xfs_dir3_leaf_read_verify(
         struct xfs_buf  *bp)
  {
-       struct xfs_mount        *mp = bp->b_target->bt_mount;
+       struct xfs_mount        *mp = bp->b_mount;
         xfs_failaddr_t          fa;
  
         if (xfs_sb_version_hascrc(&mp->m_sb) &&
@@ -176,7 +173,7 @@ static void
  xfs_dir3_leaf_write_verify(
         struct xfs_buf  *bp)
  {
-       struct xfs_mount        *mp = bp->b_target->bt_mount;
+       struct xfs_mount        *mp = bp->b_mount;
         struct xfs_buf_log_item *bip = bp->b_log_item;
         struct xfs_dir3_leaf_hdr *hdr3 = bp->b_addr;
         xfs_failaddr_t          fa;
diff --git a/fs/xfs/libxfs/xfs_dir2_node.c b/fs/xfs/libxfs/xfs_dir2_node.c

index 16731d2d684be4097277695d9d87937f4b5b0afe..afcc6642690a8d85aebab8849321b01c50016b98 100644 (file)
--- a/fs/xfs/libxfs/xfs_dir2_node.c
+++ b/fs/xfs/libxfs/xfs_dir2_node.c
@@ -6,12 +6,11 @@
   */
  #include "xfs.h"
  #include "xfs_fs.h"
+#include "xfs_shared.h"
  #include "xfs_format.h"
  #include "xfs_log_format.h"
  #include "xfs_trans_resv.h"
  #include "xfs_mount.h"
-#include "xfs_da_format.h"
-#include "xfs_da_btree.h"
  #include "xfs_inode.h"
  #include "xfs_bmap.h"
  #include "xfs_dir2.h"
@@ -20,7 +19,6 @@
  #include "xfs_trace.h"
  #include "xfs_trans.h"
  #include "xfs_buf_item.h"
-#include "xfs_cksum.h"
  #include "xfs_log.h"
  
  /*
@@ -84,7 +82,7 @@ static xfs_failaddr_t
  xfs_dir3_free_verify(
         struct xfs_buf          *bp)
  {
-       struct xfs_mount        *mp = bp->b_target->bt_mount;
+       struct xfs_mount        *mp = bp->b_mount;
         struct xfs_dir2_free_hdr *hdr = bp->b_addr;
  
         if (!xfs_verify_magic(bp, hdr->magic))
@@ -110,7 +108,7 @@ static void
  xfs_dir3_free_read_verify(
         struct xfs_buf  *bp)
  {
-       struct xfs_mount        *mp = bp->b_target->bt_mount;
+       struct xfs_mount        *mp = bp->b_mount;
         xfs_failaddr_t          fa;
  
         if (xfs_sb_version_hascrc(&mp->m_sb) &&
@@ -127,7 +125,7 @@ static void
  xfs_dir3_free_write_verify(
         struct xfs_buf  *bp)
  {
-       struct xfs_mount        *mp = bp->b_target->bt_mount;
+       struct xfs_mount        *mp = bp->b_mount;
         struct xfs_buf_log_item *bip = bp->b_log_item;
         struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr;
         xfs_failaddr_t          fa;
diff --git a/fs/xfs/libxfs/xfs_dir2_sf.c b/fs/xfs/libxfs/xfs_dir2_sf.c

index 585dfdb7b6b688f13d43fed7dd5878afedfbc8a9..033589257f54f8cb08b513f863faffa3ed3b134b 100644 (file)
--- a/fs/xfs/libxfs/xfs_dir2_sf.c
+++ b/fs/xfs/libxfs/xfs_dir2_sf.c
@@ -5,16 +5,13 @@
   */
  #include "xfs.h"
  #include "xfs_fs.h"
+#include "xfs_shared.h"
  #include "xfs_format.h"
  #include "xfs_log_format.h"
  #include "xfs_trans_resv.h"
  #include "xfs_mount.h"
-#include "xfs_da_format.h"
-#include "xfs_da_btree.h"
  #include "xfs_inode.h"
  #include "xfs_trans.h"
-#include "xfs_inode_item.h"
-#include "xfs_error.h"
  #include "xfs_dir2.h"
  #include "xfs_dir2_priv.h"
  #include "xfs_trace.h"
diff --git a/fs/xfs/libxfs/xfs_dquot_buf.c b/fs/xfs/libxfs/xfs_dquot_buf.c

index 88fa11071f9f1cb76373668f1e5645dc8ab6201e..e8bd688a4073d909df133b806ce7d79c27e00090 100644 (file)
--- a/fs/xfs/libxfs/xfs_dquot_buf.c
+++ b/fs/xfs/libxfs/xfs_dquot_buf.c
@@ -16,8 +16,6 @@
  #include "xfs_trans.h"
  #include "xfs_qm.h"
  #include "xfs_error.h"
-#include "xfs_cksum.h"
-#include "xfs_trace.h"
  
  int
  xfs_calc_dquots_per_chunk(
@@ -224,7 +222,7 @@ static xfs_failaddr_t
  xfs_dquot_buf_verify_struct(
         struct xfs_buf          *bp)
  {
-       struct xfs_mount        *mp = bp->b_target->bt_mount;
+       struct xfs_mount        *mp = bp->b_mount;
  
         return xfs_dquot_buf_verify(mp, bp, false);
  }
@@ -233,7 +231,7 @@ static void
  xfs_dquot_buf_read_verify(
         struct xfs_buf          *bp)
  {
-       struct xfs_mount        *mp = bp->b_target->bt_mount;
+       struct xfs_mount        *mp = bp->b_mount;
  
         if (!xfs_dquot_buf_verify_crc(mp, bp, false))
                 return;
@@ -250,7 +248,7 @@ static void
  xfs_dquot_buf_readahead_verify(
         struct xfs_buf  *bp)
  {
-       struct xfs_mount        *mp = bp->b_target->bt_mount;
+       struct xfs_mount        *mp = bp->b_mount;
  
         if (!xfs_dquot_buf_verify_crc(mp, bp, true) ||
             xfs_dquot_buf_verify(mp, bp, true) != NULL) {
@@ -268,7 +266,7 @@ static void
  xfs_dquot_buf_write_verify(
         struct xfs_buf          *bp)
  {
-       struct xfs_mount        *mp = bp->b_target->bt_mount;
+       struct xfs_mount        *mp = bp->b_mount;
  
         xfs_dquot_buf_verify(mp, bp, false);
  }
diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h

index 9bb3c48843ec216591e9a00994d56fda5d45fb7f..c968b60cee15bf14d8d6d590db9e8e83802ce117 100644 (file)
--- a/fs/xfs/libxfs/xfs_format.h
+++ b/fs/xfs/libxfs/xfs_format.h
@@ -1071,7 +1071,7 @@ static inline void xfs_dinode_put_rdev(struct xfs_dinode *dip, xfs_dev_t rdev)
  #define        XFS_INO_MASK(k)                 (uint32_t)((1ULL << (k)) - 1)
  #define        XFS_INO_OFFSET_BITS(mp)         (mp)->m_sb.sb_inopblog
  #define        XFS_INO_AGBNO_BITS(mp)          (mp)->m_sb.sb_agblklog
-#define        XFS_INO_AGINO_BITS(mp)          (mp)->m_agino_log
+#define        XFS_INO_AGINO_BITS(mp)          ((mp)->m_ino_geo.agino_log)
  #define        XFS_INO_AGNO_BITS(mp)           (mp)->m_agno_log
  #define        XFS_INO_BITS(mp)                \
         XFS_INO_AGNO_BITS(mp) + XFS_INO_AGINO_BITS(mp)
diff --git a/fs/xfs/libxfs/xfs_fs.h b/fs/xfs/libxfs/xfs_fs.h

index e7382c780ed7ec30ad13439a56752dd2097a27e7..52d03a3a02a4e4a8b21bae62d0871b494d642e2d 100644 (file)
--- a/fs/xfs/libxfs/xfs_fs.h
+++ b/fs/xfs/libxfs/xfs_fs.h
@@ -97,7 +97,7 @@ struct getbmapx {
   * For use by backup and restore programs to set the XFS on-disk inode
   * fields di_dmevmask and di_dmstate.  These must be set to exactly and
   * only values previously obtained via xfs_bulkstat!  (Specifically the
- * xfs_bstat_t fields bs_dmevmask and bs_dmstate.)
+ * struct xfs_bstat fields bs_dmevmask and bs_dmstate.)
   */
  #ifndef HAVE_FSDMIDATA
  struct fsdmidata {
@@ -328,7 +328,7 @@ typedef struct xfs_bstime {
         __s32           tv_nsec;        /* and nanoseconds      */
  } xfs_bstime_t;
  
-typedef struct xfs_bstat {
+struct xfs_bstat {
         __u64           bs_ino;         /* inode number                 */
         __u16           bs_mode;        /* type and mode                */
         __u16           bs_nlink;       /* number of links              */
@@ -356,7 +356,53 @@ typedef struct xfs_bstat {
         __u32           bs_dmevmask;    /* DMIG event mask              */
         __u16           bs_dmstate;     /* DMIG state info              */
         __u16           bs_aextents;    /* attribute number of extents  */
-} xfs_bstat_t;
+};
+
+/* New bulkstat structure that reports v5 features and fixes padding issues */
+struct xfs_bulkstat {
+       uint64_t        bs_ino;         /* inode number                 */
+       uint64_t        bs_size;        /* file size                    */
+
+       uint64_t        bs_blocks;      /* number of blocks             */
+       uint64_t        bs_xflags;      /* extended flags               */
+
+       uint64_t        bs_atime;       /* access time, seconds         */
+       uint64_t        bs_mtime;       /* modify time, seconds         */
+
+       uint64_t        bs_ctime;       /* inode change time, seconds   */
+       uint64_t        bs_btime;       /* creation time, seconds       */
+
+       uint32_t        bs_gen;         /* generation count             */
+       uint32_t        bs_uid;         /* user id                      */
+       uint32_t        bs_gid;         /* group id                     */
+       uint32_t        bs_projectid;   /* project id                   */
+
+       uint32_t        bs_atime_nsec;  /* access time, nanoseconds     */
+       uint32_t        bs_mtime_nsec;  /* modify time, nanoseconds     */
+       uint32_t        bs_ctime_nsec;  /* inode change time, nanoseconds */
+       uint32_t        bs_btime_nsec;  /* creation time, nanoseconds   */
+
+       uint32_t        bs_blksize;     /* block size                   */
+       uint32_t        bs_rdev;        /* device value                 */
+       uint32_t        bs_cowextsize_blks; /* cow extent size hint, blocks */
+       uint32_t        bs_extsize_blks; /* extent size hint, blocks    */
+
+       uint32_t        bs_nlink;       /* number of links              */
+       uint32_t        bs_extents;     /* number of extents            */
+       uint32_t        bs_aextents;    /* attribute number of extents  */
+       uint16_t        bs_version;     /* structure version            */
+       uint16_t        bs_forkoff;     /* inode fork offset in bytes   */
+
+       uint16_t        bs_sick;        /* sick inode metadata          */
+       uint16_t        bs_checked;     /* checked inode metadata       */
+       uint16_t        bs_mode;        /* type and mode                */
+       uint16_t        bs_pad2;        /* zeroed                       */
+
+       uint64_t        bs_pad[7];      /* zeroed                       */
+};
+
+#define XFS_BULKSTAT_VERSION_V1        (1)
+#define XFS_BULKSTAT_VERSION_V5        (5)
  
  /* bs_sick flags */
  #define XFS_BS_SICK_INODE      (1 << 0)  /* inode core */
@@ -374,7 +420,7 @@ typedef struct xfs_bstat {
   * to retain compatibility with "old" filesystems).
   */
  static inline uint32_t
-bstat_get_projid(struct xfs_bstat *bs)
+bstat_get_projid(const struct xfs_bstat *bs)
  {
         return (uint32_t)bs->bs_projid_hi << 16 | bs->bs_projid_lo;
  }
@@ -382,23 +428,79 @@ bstat_get_projid(struct xfs_bstat *bs)
  /*
   * The user-level BulkStat Request interface structure.
   */
-typedef struct xfs_fsop_bulkreq {
+struct xfs_fsop_bulkreq {
         __u64           __user *lastip; /* last inode # pointer         */
         __s32           icount;         /* count of entries in buffer   */
         void            __user *ubuffer;/* user buffer for inode desc.  */
         __s32           __user *ocount; /* output count pointer         */
-} xfs_fsop_bulkreq_t;
-
+};
  
  /*
   * Structures returned from xfs_inumbers routine (XFS_IOC_FSINUMBERS).
   */
-typedef struct xfs_inogrp {
+struct xfs_inogrp {
         __u64           xi_startino;    /* starting inode number        */
         __s32           xi_alloccount;  /* # bits set in allocmask      */
         __u64           xi_allocmask;   /* mask of allocated inodes     */
-} xfs_inogrp_t;
+};
  
+/* New inumbers structure that reports v5 features and fixes padding issues */
+struct xfs_inumbers {
+       uint64_t        xi_startino;    /* starting inode number        */
+       uint64_t        xi_allocmask;   /* mask of allocated inodes     */
+       uint8_t         xi_alloccount;  /* # bits set in allocmask      */
+       uint8_t         xi_version;     /* version                      */
+       uint8_t         xi_padding[6];  /* zero                         */
+};
+
+#define XFS_INUMBERS_VERSION_V1        (1)
+#define XFS_INUMBERS_VERSION_V5        (5)
+
+/* Header for bulk inode requests. */
+struct xfs_bulk_ireq {
+       uint64_t        ino;            /* I/O: start with this inode   */
+       uint32_t        flags;          /* I/O: operation flags         */
+       uint32_t        icount;         /* I: count of entries in buffer */
+       uint32_t        ocount;         /* O: count of entries filled out */
+       uint32_t        agno;           /* I: see comment for IREQ_AGNO */
+       uint64_t        reserved[5];    /* must be zero                 */
+};
+
+/*
+ * Only return results from the specified @agno.  If @ino is zero, start
+ * with the first inode of @agno.
+ */
+#define XFS_BULK_IREQ_AGNO     (1 << 0)
+
+/*
+ * Return bulkstat information for a single inode, where @ino value is a
+ * special value, not a literal inode number.  See the XFS_BULK_IREQ_SPECIAL_*
+ * values below.  Not compatible with XFS_BULK_IREQ_AGNO.
+ */
+#define XFS_BULK_IREQ_SPECIAL  (1 << 1)
+
+#define XFS_BULK_IREQ_FLAGS_ALL        (XFS_BULK_IREQ_AGNO | \
+                                XFS_BULK_IREQ_SPECIAL)
+
+/* Operate on the root directory inode. */
+#define XFS_BULK_IREQ_SPECIAL_ROOT     (1)
+
+/*
+ * ioctl structures for v5 bulkstat and inumbers requests
+ */
+struct xfs_bulkstat_req {
+       struct xfs_bulk_ireq    hdr;
+       struct xfs_bulkstat     bulkstat[];
+};
+#define XFS_BULKSTAT_REQ_SIZE(nr)      (sizeof(struct xfs_bulkstat_req) + \
+                                        (nr) * sizeof(struct xfs_bulkstat))
+
+struct xfs_inumbers_req {
+       struct xfs_bulk_ireq    hdr;
+       struct xfs_inumbers     inumbers[];
+};
+#define XFS_INUMBERS_REQ_SIZE(nr)      (sizeof(struct xfs_inumbers_req) + \
+                                        (nr) * sizeof(struct xfs_inumbers))
  
  /*
   * Error injection.
@@ -529,7 +631,7 @@ typedef struct xfs_swapext
         xfs_off_t       sx_offset;      /* offset into file */
         xfs_off_t       sx_length;      /* leng from offset */
         char            sx_pad[16];     /* pad space, unused */
-       xfs_bstat_t     sx_stat;        /* stat of target b4 copy */
+       struct xfs_bstat sx_stat;       /* stat of target b4 copy */
  } xfs_swapext_t;
  
  /*
@@ -701,6 +803,8 @@ struct xfs_scrub_metadata {
  #define XFS_IOC_FSGEOMETRY_V4       _IOR ('X', 124, struct xfs_fsop_geom_v4)
  #define XFS_IOC_GOINGDOWN           _IOR ('X', 125, uint32_t)
  #define XFS_IOC_FSGEOMETRY          _IOR ('X', 126, struct xfs_fsop_geom)
+#define XFS_IOC_BULKSTAT            _IOR ('X', 127, struct xfs_bulkstat_req)
+#define XFS_IOC_INUMBERS            _IOR ('X', 128, struct xfs_inumbers_req)
  /*     XFS_IOC_GETFSUUID ---------- deprecated 140      */
  
  
diff --git a/fs/xfs/libxfs/xfs_health.h b/fs/xfs/libxfs/xfs_health.h

index 49ddfeac19f25bfac8b1b4ecb1d76d645fb1e1f5..272005ac8c882db9f84f68abb525572d04ef5ddd 100644 (file)
--- a/fs/xfs/libxfs/xfs_health.h
+++ b/fs/xfs/libxfs/xfs_health.h
@@ -185,6 +185,6 @@ xfs_inode_is_healthy(struct xfs_inode *ip)
  
  void xfs_fsop_geom_health(struct xfs_mount *mp, struct xfs_fsop_geom *geo);
  void xfs_ag_geom_health(struct xfs_perag *pag, struct xfs_ag_geometry *ageo);
-void xfs_bulkstat_health(struct xfs_inode *ip, struct xfs_bstat *bs);
+void xfs_bulkstat_health(struct xfs_inode *ip, struct xfs_bulkstat *bs);
  
  #endif /* __XFS_HEALTH_H__ */
diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c

index fe9898875097f5cd8506f9664f636a393cce2e2e..04377ab75863033cbcb31312a6e6f5356498b3a3 100644 (file)
--- a/fs/xfs/libxfs/xfs_ialloc.c
+++ b/fs/xfs/libxfs/xfs_ialloc.c
@@ -12,17 +12,14 @@
  #include "xfs_bit.h"
  #include "xfs_sb.h"
  #include "xfs_mount.h"
-#include "xfs_defer.h"
  #include "xfs_inode.h"
  #include "xfs_btree.h"
  #include "xfs_ialloc.h"
  #include "xfs_ialloc_btree.h"
  #include "xfs_alloc.h"
-#include "xfs_rtalloc.h"
  #include "xfs_errortag.h"
  #include "xfs_error.h"
  #include "xfs_bmap.h"
-#include "xfs_cksum.h"
  #include "xfs_trans.h"
  #include "xfs_buf_item.h"
  #include "xfs_icreate_item.h"
@@ -31,20 +28,6 @@
  #include "xfs_log.h"
  #include "xfs_rmap.h"
  
-
-/*
- * Allocation group level functions.
- */
-int
-xfs_ialloc_cluster_alignment(
-       struct xfs_mount        *mp)
-{
-       if (xfs_sb_version_hasalign(&mp->m_sb) &&
-           mp->m_sb.sb_inoalignmt >= xfs_icluster_size_fsb(mp))
-               return mp->m_sb.sb_inoalignmt;
-       return 1;
-}
-
  /*
   * Lookup a record by ino in the btree given by cur.
   */
@@ -299,7 +282,7 @@ xfs_ialloc_inode_init(
          * sizes, manipulate the inodes in buffers  which are multiples of the
          * blocks size.
          */
-       nbufs = length / mp->m_blocks_per_cluster;
+       nbufs = length / M_IGEO(mp)->blocks_per_cluster;
  
         /*
          * Figure out what version number to use in the inodes we create.  If
@@ -343,9 +326,10 @@ xfs_ialloc_inode_init(
                  * Get the block.
                  */
                 d = XFS_AGB_TO_DADDR(mp, agno, agbno +
-                               (j * mp->m_blocks_per_cluster));
+                               (j * M_IGEO(mp)->blocks_per_cluster));
                 fbuf = xfs_trans_get_buf(tp, mp->m_ddev_targp, d,
-                                        mp->m_bsize * mp->m_blocks_per_cluster,
+                                        mp->m_bsize *
+                                        M_IGEO(mp)->blocks_per_cluster,
                                          XBF_UNMAPPED);
                 if (!fbuf)
                         return -ENOMEM;
@@ -353,7 +337,7 @@ xfs_ialloc_inode_init(
                 /* Initialize the inode buffers and log them appropriately. */
                 fbuf->b_ops = &xfs_inode_buf_ops;
                 xfs_buf_zero(fbuf, 0, BBTOB(fbuf->b_length));
-               for (i = 0; i < mp->m_inodes_per_cluster; i++) {
+               for (i = 0; i < M_IGEO(mp)->inodes_per_cluster; i++) {
                         int     ioffset = i << mp->m_sb.sb_inodelog;
                         uint    isize = xfs_dinode_size(version);
  
@@ -616,24 +600,26 @@ error:
   * Allocate new inodes in the allocation group specified by agbp.
   * Return 0 for success, else error code.
   */
-STATIC int                             /* error code or 0 */
+STATIC int
  xfs_ialloc_ag_alloc(
-       xfs_trans_t     *tp,            /* transaction pointer */
-       xfs_buf_t       *agbp,          /* alloc group buffer */
-       int             *alloc)
+       struct xfs_trans        *tp,
+       struct xfs_buf          *agbp,
+       int                     *alloc)
  {
-       xfs_agi_t       *agi;           /* allocation group header */
-       xfs_alloc_arg_t args;           /* allocation argument structure */
-       xfs_agnumber_t  agno;
-       int             error;
-       xfs_agino_t     newino;         /* new first inode's number */
-       xfs_agino_t     newlen;         /* new number of inodes */
-       int             isaligned = 0;  /* inode allocation at stripe unit */
-                                       /* boundary */
-       uint16_t        allocmask = (uint16_t) -1; /* init. to full chunk */
+       struct xfs_agi          *agi;
+       struct xfs_alloc_arg    args;
+       xfs_agnumber_t          agno;
+       int                     error;
+       xfs_agino_t             newino;         /* new first inode's number */
+       xfs_agino_t             newlen;         /* new number of inodes */
+       int                     isaligned = 0;  /* inode allocation at stripe */
+                                               /* unit boundary */
+       /* init. to full chunk */
+       uint16_t                allocmask = (uint16_t) -1;
         struct xfs_inobt_rec_incore rec;
-       struct xfs_perag *pag;
-       int             do_sparse = 0;
+       struct xfs_perag        *pag;
+       struct xfs_ino_geometry *igeo = M_IGEO(tp->t_mountp);
+       int                     do_sparse = 0;
  
         memset(&args, 0, sizeof(args));
         args.tp = tp;
@@ -644,7 +630,7 @@ xfs_ialloc_ag_alloc(
  #ifdef DEBUG
         /* randomly do sparse inode allocations */
         if (xfs_sb_version_hassparseinodes(&tp->t_mountp->m_sb) &&
-           args.mp->m_ialloc_min_blks < args.mp->m_ialloc_blks)
+           igeo->ialloc_min_blks < igeo->ialloc_blks)
                 do_sparse = prandom_u32() & 1;
  #endif
  
@@ -652,12 +638,12 @@ xfs_ialloc_ag_alloc(
          * Locking will ensure that we don't have two callers in here
          * at one time.
          */
-       newlen = args.mp->m_ialloc_inos;
-       if (args.mp->m_maxicount &&
+       newlen = igeo->ialloc_inos;
+       if (igeo->maxicount &&
             percpu_counter_read_positive(&args.mp->m_icount) + newlen >
-                                                       args.mp->m_maxicount)
+                                                       igeo->maxicount)
                 return -ENOSPC;
-       args.minlen = args.maxlen = args.mp->m_ialloc_blks;
+       args.minlen = args.maxlen = igeo->ialloc_blks;
         /*
          * First try to allocate inodes contiguous with the last-allocated
          * chunk of inodes.  If the filesystem is striped, this will fill
@@ -667,7 +653,7 @@ xfs_ialloc_ag_alloc(
         newino = be32_to_cpu(agi->agi_newino);
         agno = be32_to_cpu(agi->agi_seqno);
         args.agbno = XFS_AGINO_TO_AGBNO(args.mp, newino) +
-                    args.mp->m_ialloc_blks;
+                    igeo->ialloc_blks;
         if (do_sparse)
                 goto sparse_alloc;
         if (likely(newino != NULLAGINO &&
@@ -690,10 +676,10 @@ xfs_ialloc_ag_alloc(
                  * but not to use them in the actual exact allocation.
                  */
                 args.alignment = 1;
-               args.minalignslop = args.mp->m_cluster_align - 1;
+               args.minalignslop = igeo->cluster_align - 1;
  
                 /* Allow space for the inode btree to split. */
-               args.minleft = args.mp->m_in_maxlevels - 1;
+               args.minleft = igeo->inobt_maxlevels - 1;
                 if ((error = xfs_alloc_vextent(&args)))
                         return error;
  
@@ -720,12 +706,12 @@ xfs_ialloc_ag_alloc(
                  * pieces, so don't need alignment anyway.
                  */
                 isaligned = 0;
-               if (args.mp->m_sinoalign) {
+               if (igeo->ialloc_align) {
                         ASSERT(!(args.mp->m_flags & XFS_MOUNT_NOALIGN));
                         args.alignment = args.mp->m_dalign;
                         isaligned = 1;
                 } else
-                       args.alignment = args.mp->m_cluster_align;
+                       args.alignment = igeo->cluster_align;
                 /*
                  * Need to figure out where to allocate the inode blocks.
                  * Ideally they should be spaced out through the a.g.
@@ -741,7 +727,7 @@ xfs_ialloc_ag_alloc(
                 /*
                  * Allow space for the inode btree to split.
                  */
-               args.minleft = args.mp->m_in_maxlevels - 1;
+               args.minleft = igeo->inobt_maxlevels - 1;
                 if ((error = xfs_alloc_vextent(&args)))
                         return error;
         }
@@ -754,7 +740,7 @@ xfs_ialloc_ag_alloc(
                 args.type = XFS_ALLOCTYPE_NEAR_BNO;
                 args.agbno = be32_to_cpu(agi->agi_root);
                 args.fsbno = XFS_AGB_TO_FSB(args.mp, agno, args.agbno);
-               args.alignment = args.mp->m_cluster_align;
+               args.alignment = igeo->cluster_align;
                 if ((error = xfs_alloc_vextent(&args)))
                         return error;
         }
@@ -764,7 +750,7 @@ xfs_ialloc_ag_alloc(
          * the sparse allocation length is smaller than a full chunk.
          */
         if (xfs_sb_version_hassparseinodes(&args.mp->m_sb) &&
-           args.mp->m_ialloc_min_blks < args.mp->m_ialloc_blks &&
+           igeo->ialloc_min_blks < igeo->ialloc_blks &&
             args.fsbno == NULLFSBLOCK) {
  sparse_alloc:
                 args.type = XFS_ALLOCTYPE_NEAR_BNO;
@@ -773,7 +759,7 @@ sparse_alloc:
                 args.alignment = args.mp->m_sb.sb_spino_align;
                 args.prod = 1;
  
-               args.minlen = args.mp->m_ialloc_min_blks;
+               args.minlen = igeo->ialloc_min_blks;
                 args.maxlen = args.minlen;
  
                 /*
@@ -789,7 +775,7 @@ sparse_alloc:
                 args.min_agbno = args.mp->m_sb.sb_inoalignmt;
                 args.max_agbno = round_down(args.mp->m_sb.sb_agblocks,
                                             args.mp->m_sb.sb_inoalignmt) -
-                                args.mp->m_ialloc_blks;
+                                igeo->ialloc_blks;
  
                 error = xfs_alloc_vextent(&args);
                 if (error)
@@ -1006,7 +992,7 @@ xfs_ialloc_ag_select(
                  * space needed for alignment of inode chunks when checking the
                  * longest contiguous free space in the AG - this prevents us
                  * from getting ENOSPC because we have free space larger than
-                * m_ialloc_blks but alignment constraints prevent us from using
+                * ialloc_blks but alignment constraints prevent us from using
                  * it.
                  *
                  * If we can't find an AG with space for full alignment slack to
@@ -1015,9 +1001,9 @@ xfs_ialloc_ag_select(
                  * if we fail allocation due to alignment issues then it is most
                  * likely a real ENOSPC condition.
                  */
-               ineed = mp->m_ialloc_min_blks;
+               ineed = M_IGEO(mp)->ialloc_min_blks;
                 if (flags && ineed > 1)
-                       ineed += mp->m_cluster_align;
+                       ineed += M_IGEO(mp)->cluster_align;
                 longest = pag->pagf_longest;
                 if (!longest)
                         longest = pag->pagf_flcount > 0;
@@ -1703,6 +1689,7 @@ xfs_dialloc(
         int                     noroom = 0;
         xfs_agnumber_t          start_agno;
         struct xfs_perag        *pag;
+       struct xfs_ino_geometry *igeo = M_IGEO(mp);
         int                     okalloc = 1;
  
         if (*IO_agbp) {
@@ -1733,9 +1720,9 @@ xfs_dialloc(
          * Read rough value of mp->m_icount by percpu_counter_read_positive,
          * which will sacrifice the preciseness but improve the performance.
          */
-       if (mp->m_maxicount &&
-           percpu_counter_read_positive(&mp->m_icount) + mp->m_ialloc_inos
-                                                       > mp->m_maxicount) {
+       if (igeo->maxicount &&
+           percpu_counter_read_positive(&mp->m_icount) + igeo->ialloc_inos
+                                                       > igeo->maxicount) {
                 noroom = 1;
                 okalloc = 0;
         }
@@ -1852,7 +1839,8 @@ xfs_difree_inode_chunk(
         if (!xfs_inobt_issparse(rec->ir_holemask)) {
                 /* not sparse, calculate extent info directly */
                 xfs_bmap_add_free(tp, XFS_AGB_TO_FSB(mp, agno, sagbno),
-                                 mp->m_ialloc_blks, &XFS_RMAP_OINFO_INODES);
+                                 M_IGEO(mp)->ialloc_blks,
+                                 &XFS_RMAP_OINFO_INODES);
                 return;
         }
  
@@ -2261,7 +2249,7 @@ xfs_imap_lookup(
  
         /* check that the returned record contains the required inode */
         if (rec.ir_startino > agino ||
-           rec.ir_startino + mp->m_ialloc_inos <= agino)
+           rec.ir_startino + M_IGEO(mp)->ialloc_inos <= agino)
                 return -EINVAL;
  
         /* for untrusted inodes check it is allocated first */
@@ -2352,7 +2340,7 @@ xfs_imap(
          * If the inode cluster size is the same as the blocksize or
          * smaller we get to the buffer by simple arithmetics.
          */
-       if (mp->m_blocks_per_cluster == 1) {
+       if (M_IGEO(mp)->blocks_per_cluster == 1) {
                 offset = XFS_INO_TO_OFFSET(mp, ino);
                 ASSERT(offset < mp->m_sb.sb_inopblock);
  
@@ -2368,8 +2356,8 @@ xfs_imap(
          * find the location. Otherwise we have to do a btree
          * lookup to find the location.
          */
-       if (mp->m_inoalign_mask) {
-               offset_agbno = agbno & mp->m_inoalign_mask;
+       if (M_IGEO(mp)->inoalign_mask) {
+               offset_agbno = agbno & M_IGEO(mp)->inoalign_mask;
                 chunk_agbno = agbno - offset_agbno;
         } else {
                 error = xfs_imap_lookup(mp, tp, agno, agino, agbno,
@@ -2381,13 +2369,13 @@ xfs_imap(
  out_map:
         ASSERT(agbno >= chunk_agbno);
         cluster_agbno = chunk_agbno +
-               ((offset_agbno / mp->m_blocks_per_cluster) *
-                mp->m_blocks_per_cluster);
+               ((offset_agbno / M_IGEO(mp)->blocks_per_cluster) *
+                M_IGEO(mp)->blocks_per_cluster);
         offset = ((agbno - cluster_agbno) * mp->m_sb.sb_inopblock) +
                 XFS_INO_TO_OFFSET(mp, ino);
  
         imap->im_blkno = XFS_AGB_TO_DADDR(mp, agno, cluster_agbno);
-       imap->im_len = XFS_FSB_TO_BB(mp, mp->m_blocks_per_cluster);
+       imap->im_len = XFS_FSB_TO_BB(mp, M_IGEO(mp)->blocks_per_cluster);
         imap->im_boffset = (unsigned short)(offset << mp->m_sb.sb_inodelog);
  
         /*
@@ -2408,20 +2396,6 @@ out_map:
         return 0;
  }
  
-/*
- * Compute and fill in value of m_in_maxlevels.
- */
-void
-xfs_ialloc_compute_maxlevels(
-       xfs_mount_t     *mp)            /* file system mount structure */
-{
-       uint            inodes;
-
-       inodes = (1LL << XFS_INO_AGINO_BITS(mp)) >> XFS_INODES_PER_CHUNK_LOG;
-       mp->m_in_maxlevels = xfs_btree_compute_maxlevels(mp->m_inobt_mnr,
-                                                        inodes);
-}
-
  /*
   * Log specified fields for the ag hdr (inode section). The growth of the agi
   * structure over time requires that we interpret the buffer as two logical
@@ -2493,7 +2467,7 @@ static xfs_failaddr_t
  xfs_agi_verify(
         struct xfs_buf  *bp)
  {
-       struct xfs_mount *mp = bp->b_target->bt_mount;
+       struct xfs_mount *mp = bp->b_mount;
         struct xfs_agi  *agi = XFS_BUF_TO_AGI(bp);
         int             i;
  
@@ -2545,7 +2519,7 @@ static void
  xfs_agi_read_verify(
         struct xfs_buf  *bp)
  {
-       struct xfs_mount *mp = bp->b_target->bt_mount;
+       struct xfs_mount *mp = bp->b_mount;
         xfs_failaddr_t  fa;
  
         if (xfs_sb_version_hascrc(&mp->m_sb) &&
@@ -2562,7 +2536,7 @@ static void
  xfs_agi_write_verify(
         struct xfs_buf  *bp)
  {
-       struct xfs_mount        *mp = bp->b_target->bt_mount;
+       struct xfs_mount        *mp = bp->b_mount;
         struct xfs_buf_log_item *bip = bp->b_log_item;
         xfs_failaddr_t          fa;
  
@@ -2768,3 +2742,110 @@ xfs_ialloc_count_inodes(
         *freecount = ci.freecount;
         return 0;
  }
+
+/*
+ * Initialize inode-related geometry information.
+ *
+ * Compute the inode btree min and max levels and set maxicount.
+ *
+ * Set the inode cluster size.  This may still be overridden by the file
+ * system block size if it is larger than the chosen cluster size.
+ *
+ * For v5 filesystems, scale the cluster size with the inode size to keep a
+ * constant ratio of inode per cluster buffer, but only if mkfs has set the
+ * inode alignment value appropriately for larger cluster sizes.
+ *
+ * Then compute the inode cluster alignment information.
+ */
+void
+xfs_ialloc_setup_geometry(
+       struct xfs_mount        *mp)
+{
+       struct xfs_sb           *sbp = &mp->m_sb;
+       struct xfs_ino_geometry *igeo = M_IGEO(mp);
+       uint64_t                icount;
+       uint                    inodes;
+
+       /* Compute inode btree geometry. */
+       igeo->agino_log = sbp->sb_inopblog + sbp->sb_agblklog;
+       igeo->inobt_mxr[0] = xfs_inobt_maxrecs(mp, sbp->sb_blocksize, 1);
+       igeo->inobt_mxr[1] = xfs_inobt_maxrecs(mp, sbp->sb_blocksize, 0);
+       igeo->inobt_mnr[0] = igeo->inobt_mxr[0] / 2;
+       igeo->inobt_mnr[1] = igeo->inobt_mxr[1] / 2;
+
+       igeo->ialloc_inos = max_t(uint16_t, XFS_INODES_PER_CHUNK,
+                       sbp->sb_inopblock);
+       igeo->ialloc_blks = igeo->ialloc_inos >> sbp->sb_inopblog;
+
+       if (sbp->sb_spino_align)
+               igeo->ialloc_min_blks = sbp->sb_spino_align;
+       else
+               igeo->ialloc_min_blks = igeo->ialloc_blks;
+
+       /* Compute and fill in value of m_ino_geo.inobt_maxlevels. */
+       inodes = (1LL << XFS_INO_AGINO_BITS(mp)) >> XFS_INODES_PER_CHUNK_LOG;
+       igeo->inobt_maxlevels = xfs_btree_compute_maxlevels(igeo->inobt_mnr,
+                       inodes);
+
+       /* Set the maximum inode count for this filesystem. */
+       if (sbp->sb_imax_pct) {
+               /*
+                * Make sure the maximum inode count is a multiple
+                * of the units we allocate inodes in.
+                */
+               icount = sbp->sb_dblocks * sbp->sb_imax_pct;
+               do_div(icount, 100);
+               do_div(icount, igeo->ialloc_blks);
+               igeo->maxicount = XFS_FSB_TO_INO(mp,
+                               icount * igeo->ialloc_blks);
+       } else {
+               igeo->maxicount = 0;
+       }
+
+       /*
+        * Compute the desired size of an inode cluster buffer size, which
+        * starts at 8K and (on v5 filesystems) scales up with larger inode
+        * sizes.
+        *
+        * Preserve the desired inode cluster size because the sparse inodes
+        * feature uses that desired size (not the actual size) to compute the
+        * sparse inode alignment.  The mount code validates this value, so we
+        * cannot change the behavior.
+        */
+       igeo->inode_cluster_size_raw = XFS_INODE_BIG_CLUSTER_SIZE;
+       if (xfs_sb_version_hascrc(&mp->m_sb)) {
+               int     new_size = igeo->inode_cluster_size_raw;
+
+               new_size *= mp->m_sb.sb_inodesize / XFS_DINODE_MIN_SIZE;
+               if (mp->m_sb.sb_inoalignmt >= XFS_B_TO_FSBT(mp, new_size))
+                       igeo->inode_cluster_size_raw = new_size;
+       }
+
+       /* Calculate inode cluster ratios. */
+       if (igeo->inode_cluster_size_raw > mp->m_sb.sb_blocksize)
+               igeo->blocks_per_cluster = XFS_B_TO_FSBT(mp,
+                               igeo->inode_cluster_size_raw);
+       else
+               igeo->blocks_per_cluster = 1;
+       igeo->inode_cluster_size = XFS_FSB_TO_B(mp, igeo->blocks_per_cluster);
+       igeo->inodes_per_cluster = XFS_FSB_TO_INO(mp, igeo->blocks_per_cluster);
+
+       /* Calculate inode cluster alignment. */
+       if (xfs_sb_version_hasalign(&mp->m_sb) &&
+           mp->m_sb.sb_inoalignmt >= igeo->blocks_per_cluster)
+               igeo->cluster_align = mp->m_sb.sb_inoalignmt;
+       else
+               igeo->cluster_align = 1;
+       igeo->inoalign_mask = igeo->cluster_align - 1;
+       igeo->cluster_align_inodes = XFS_FSB_TO_INO(mp, igeo->cluster_align);
+
+       /*
+        * If we are using stripe alignment, check whether
+        * the stripe unit is a multiple of the inode alignment
+        */
+       if (mp->m_dalign && igeo->inoalign_mask &&
+           !(mp->m_dalign & igeo->inoalign_mask))
+               igeo->ialloc_align = mp->m_dalign;
+       else
+               igeo->ialloc_align = 0;
+}
diff --git a/fs/xfs/libxfs/xfs_ialloc.h b/fs/xfs/libxfs/xfs_ialloc.h

index e936b7cc93893061f11ac7338ad7a9ba8f01ae27..323592d563d520f9f940fc1913d475bb6ce90708 100644 (file)
--- a/fs/xfs/libxfs/xfs_ialloc.h
+++ b/fs/xfs/libxfs/xfs_ialloc.h
@@ -23,16 +23,6 @@ struct xfs_icluster {
                                          * sparse chunks */
  };
  
-/* Calculate and return the number of filesystem blocks per inode cluster */
-static inline int
-xfs_icluster_size_fsb(
-       struct xfs_mount        *mp)
-{
-       if (mp->m_sb.sb_blocksize >= mp->m_inode_cluster_size)
-               return 1;
-       return mp->m_inode_cluster_size >> mp->m_sb.sb_blocklog;
-}
-
  /*
   * Make an inode pointer out of the buffer/offset.
   */
@@ -95,13 +85,6 @@ xfs_imap(
         struct xfs_imap *imap,          /* location map structure */
         uint            flags);         /* flags for inode btree lookup */
  
-/*
- * Compute and fill in value of m_in_maxlevels.
- */
-void
-xfs_ialloc_compute_maxlevels(
-       struct xfs_mount *mp);          /* file system mount structure */
-
  /*
   * Log specified fields for the ag hdr (inode section)
   */
@@ -168,5 +151,6 @@ int xfs_inobt_insert_rec(struct xfs_btree_cur *cur, uint16_t holemask,
                 int *stat);
  
  int xfs_ialloc_cluster_alignment(struct xfs_mount *mp);
+void xfs_ialloc_setup_geometry(struct xfs_mount *mp);
  
  #endif /* __XFS_IALLOC_H__ */
diff --git a/fs/xfs/libxfs/xfs_ialloc_btree.c b/fs/xfs/libxfs/xfs_ialloc_btree.c

index bc2dfacd2f4a01c83864361862442ab63c4a73b2..b82992f795aa969024ba9c1b326691ab93662fb2 100644 (file)
--- a/fs/xfs/libxfs/xfs_ialloc_btree.c
+++ b/fs/xfs/libxfs/xfs_ialloc_btree.c
@@ -11,14 +11,12 @@
  #include "xfs_trans_resv.h"
  #include "xfs_bit.h"
  #include "xfs_mount.h"
-#include "xfs_inode.h"
  #include "xfs_btree.h"
  #include "xfs_ialloc.h"
  #include "xfs_ialloc_btree.h"
  #include "xfs_alloc.h"
  #include "xfs_error.h"
  #include "xfs_trace.h"
-#include "xfs_cksum.h"
  #include "xfs_trans.h"
  #include "xfs_rmap.h"
  
@@ -28,7 +26,7 @@ xfs_inobt_get_minrecs(
         struct xfs_btree_cur    *cur,
         int                     level)
  {
-       return cur->bc_mp->m_inobt_mnr[level != 0];
+       return M_IGEO(cur->bc_mp)->inobt_mnr[level != 0];
  }
  
  STATIC struct xfs_btree_cur *
@@ -164,7 +162,7 @@ xfs_inobt_get_maxrecs(
         struct xfs_btree_cur    *cur,
         int                     level)
  {
-       return cur->bc_mp->m_inobt_mxr[level != 0];
+       return M_IGEO(cur->bc_mp)->inobt_mxr[level != 0];
  }
  
  STATIC void
@@ -255,7 +253,7 @@ static xfs_failaddr_t
  xfs_inobt_verify(
         struct xfs_buf          *bp)
  {
-       struct xfs_mount        *mp = bp->b_target->bt_mount;
+       struct xfs_mount        *mp = bp->b_mount;
         struct xfs_btree_block  *block = XFS_BUF_TO_BLOCK(bp);
         xfs_failaddr_t          fa;
         unsigned int            level;
@@ -281,10 +279,11 @@ xfs_inobt_verify(
  
         /* level verification */
         level = be16_to_cpu(block->bb_level);
-       if (level >= mp->m_in_maxlevels)
+       if (level >= M_IGEO(mp)->inobt_maxlevels)
                 return __this_address;
  
-       return xfs_btree_sblock_verify(bp, mp->m_inobt_mxr[level != 0]);
+       return xfs_btree_sblock_verify(bp,
+                       M_IGEO(mp)->inobt_mxr[level != 0]);
  }
  
  static void
@@ -546,7 +545,7 @@ xfs_inobt_max_size(
         xfs_agblock_t           agblocks = xfs_ag_block_count(mp, agno);
  
         /* Bail out if we're uninitialized, which can happen in mkfs. */
-       if (mp->m_inobt_mxr[0] == 0)
+       if (M_IGEO(mp)->inobt_mxr[0] == 0)
                 return 0;
  
         /*
@@ -558,11 +557,41 @@ xfs_inobt_max_size(
             XFS_FSB_TO_AGNO(mp, mp->m_sb.sb_logstart) == agno)
                 agblocks -= mp->m_sb.sb_logblocks;
  
-       return xfs_btree_calc_size(mp->m_inobt_mnr,
+       return xfs_btree_calc_size(M_IGEO(mp)->inobt_mnr,
                                 (uint64_t)agblocks * mp->m_sb.sb_inopblock /
                                         XFS_INODES_PER_CHUNK);
  }
  
+/* Read AGI and create inobt cursor. */
+int
+xfs_inobt_cur(
+       struct xfs_mount        *mp,
+       struct xfs_trans        *tp,
+       xfs_agnumber_t          agno,
+       xfs_btnum_t             which,
+       struct xfs_btree_cur    **curpp,
+       struct xfs_buf          **agi_bpp)
+{
+       struct xfs_btree_cur    *cur;
+       int                     error;
+
+       ASSERT(*agi_bpp == NULL);
+       ASSERT(*curpp == NULL);
+
+       error = xfs_ialloc_read_agi(mp, tp, agno, agi_bpp);
+       if (error)
+               return error;
+
+       cur = xfs_inobt_init_cursor(mp, tp, *agi_bpp, agno, which);
+       if (!cur) {
+               xfs_trans_brelse(tp, *agi_bpp);
+               *agi_bpp = NULL;
+               return -ENOMEM;
+       }
+       *curpp = cur;
+       return 0;
+}
+
  static int
  xfs_inobt_count_blocks(
         struct xfs_mount        *mp,
@@ -571,15 +600,14 @@ xfs_inobt_count_blocks(
         xfs_btnum_t             btnum,
         xfs_extlen_t            *tree_blocks)
  {
-       struct xfs_buf          *agbp;
-       struct xfs_btree_cur    *cur;
+       struct xfs_buf          *agbp = NULL;
+       struct xfs_btree_cur    *cur = NULL;
         int                     error;
  
-       error = xfs_ialloc_read_agi(mp, tp, agno, &agbp);
+       error = xfs_inobt_cur(mp, tp, agno, btnum, &cur, &agbp);
         if (error)
                 return error;
  
-       cur = xfs_inobt_init_cursor(mp, tp, agbp, agno, btnum);
         error = xfs_btree_count_blocks(cur, tree_blocks);
         xfs_btree_del_cursor(cur, error);
         xfs_trans_brelse(tp, agbp);
@@ -619,5 +647,5 @@ xfs_iallocbt_calc_size(
         struct xfs_mount        *mp,
         unsigned long long      len)
  {
-       return xfs_btree_calc_size(mp->m_inobt_mnr, len);
+       return xfs_btree_calc_size(M_IGEO(mp)->inobt_mnr, len);
  }
diff --git a/fs/xfs/libxfs/xfs_ialloc_btree.h b/fs/xfs/libxfs/xfs_ialloc_btree.h

index ebdd0c6b8766228bdd04b6ad854413a15c88ee62..951305ecaae1b951d6435a6ebed037f1f6da053e 100644 (file)
--- a/fs/xfs/libxfs/xfs_ialloc_btree.h
+++ b/fs/xfs/libxfs/xfs_ialloc_btree.h
@@ -64,5 +64,8 @@ int xfs_finobt_calc_reserves(struct xfs_mount *mp, struct xfs_trans *tp,
                 xfs_agnumber_t agno, xfs_extlen_t *ask, xfs_extlen_t *used);
  extern xfs_extlen_t xfs_iallocbt_calc_size(struct xfs_mount *mp,
                 unsigned long long len);
+int xfs_inobt_cur(struct xfs_mount *mp, struct xfs_trans *tp,
+               xfs_agnumber_t agno, xfs_btnum_t btnum,
+               struct xfs_btree_cur **curpp, struct xfs_buf **agi_bpp);
  
  #endif /* __XFS_IALLOC_BTREE_H__ */
diff --git a/fs/xfs/libxfs/xfs_iext_tree.c b/fs/xfs/libxfs/xfs_iext_tree.c

index bc690f2409faab3135fc1cf857263fab99faf2a8..27aa3f2bc4bc4273c96ede2a3cfbc91bc8d7e6bb 100644 (file)
--- a/fs/xfs/libxfs/xfs_iext_tree.c
+++ b/fs/xfs/libxfs/xfs_iext_tree.c
@@ -3,18 +3,14 @@
   * Copyright (c) 2017 Christoph Hellwig.
   */
  
-#include <linux/cache.h>
-#include <linux/kernel.h>
-#include <linux/slab.h>
  #include "xfs.h"
+#include "xfs_shared.h"
  #include "xfs_format.h"
  #include "xfs_bit.h"
  #include "xfs_log_format.h"
  #include "xfs_inode.h"
-#include "xfs_inode_fork.h"
  #include "xfs_trans_resv.h"
  #include "xfs_mount.h"
-#include "xfs_bmap.h"
  #include "xfs_trace.h"
  
  /*
diff --git a/fs/xfs/libxfs/xfs_inode_buf.c b/fs/xfs/libxfs/xfs_inode_buf.c

index e021d5133ccb42d7b51f916180420bb41421aa09..28ab3c5255e1875727e74655bc6134ca7ba5a858 100644 (file)
--- a/fs/xfs/libxfs/xfs_inode_buf.c
+++ b/fs/xfs/libxfs/xfs_inode_buf.c
@@ -10,11 +10,9 @@
  #include "xfs_log_format.h"
  #include "xfs_trans_resv.h"
  #include "xfs_mount.h"
-#include "xfs_defer.h"
  #include "xfs_inode.h"
  #include "xfs_errortag.h"
  #include "xfs_error.h"
-#include "xfs_cksum.h"
  #include "xfs_icache.h"
  #include "xfs_trans.h"
  #include "xfs_ialloc.h"
@@ -33,12 +31,9 @@ xfs_inobp_check(
         xfs_buf_t       *bp)
  {
         int             i;
-       int             j;
         xfs_dinode_t    *dip;
  
-       j = mp->m_inode_cluster_size >> mp->m_sb.sb_inodelog;
-
-       for (i = 0; i < j; i++) {
+       for (i = 0; i < M_IGEO(mp)->inodes_per_cluster; i++) {
                 dip = xfs_buf_offset(bp, i * mp->m_sb.sb_inodesize);
                 if (!dip->di_next_unlinked)  {
                         xfs_alert(mp,
@@ -80,7 +75,7 @@ xfs_inode_buf_verify(
         struct xfs_buf  *bp,
         bool            readahead)
  {
-       struct xfs_mount *mp = bp->b_target->bt_mount;
+       struct xfs_mount *mp = bp->b_mount;
         xfs_agnumber_t  agno;
         int             i;
         int             ni;
diff --git a/fs/xfs/libxfs/xfs_inode_fork.c b/fs/xfs/libxfs/xfs_inode_fork.c

index f9acf1d436f690952b9e8d5c4f33b3109acb3cba..bf3e0401824658359c5758f032494d5e2d4f6bec 100644 (file)
--- a/fs/xfs/libxfs/xfs_inode_fork.c
+++ b/fs/xfs/libxfs/xfs_inode_fork.c
@@ -3,10 +3,10 @@
   * Copyright (c) 2000-2006 Silicon Graphics, Inc.
   * All Rights Reserved.
   */
-#include <linux/log2.h>
  
  #include "xfs.h"
  #include "xfs_fs.h"
+#include "xfs_shared.h"
  #include "xfs_format.h"
  #include "xfs_log_format.h"
  #include "xfs_trans_resv.h"
@@ -19,12 +19,10 @@
  #include "xfs_bmap.h"
  #include "xfs_error.h"
  #include "xfs_trace.h"
-#include "xfs_attr_sf.h"
  #include "xfs_da_format.h"
  #include "xfs_da_btree.h"
  #include "xfs_dir2_priv.h"
  #include "xfs_attr_leaf.h"
-#include "xfs_shared.h"
  
  kmem_zone_t *xfs_ifork_zone;
  
diff --git a/fs/xfs/libxfs/xfs_log_rlimit.c b/fs/xfs/libxfs/xfs_log_rlimit.c

index 1b542ec11d5d450bb9e43afab396882853bfc78f..7f55eb3f365367a254ba78bfc191b16baec62f86 100644 (file)
--- a/fs/xfs/libxfs/xfs_log_rlimit.c
+++ b/fs/xfs/libxfs/xfs_log_rlimit.c
@@ -12,9 +12,7 @@
  #include "xfs_mount.h"
  #include "xfs_da_format.h"
  #include "xfs_trans_space.h"
-#include "xfs_inode.h"
  #include "xfs_da_btree.h"
-#include "xfs_attr_leaf.h"
  #include "xfs_bmap_btree.h"
  
  /*
diff --git a/fs/xfs/libxfs/xfs_refcount.c b/fs/xfs/libxfs/xfs_refcount.c

index 542aa1475b5f969b3faf98f92fa2121461b5d629..51bb9bdb0e847af138baa7ef70b3610364308c69 100644 (file)
--- a/fs/xfs/libxfs/xfs_refcount.c
+++ b/fs/xfs/libxfs/xfs_refcount.c
@@ -9,7 +9,6 @@
  #include "xfs_format.h"
  #include "xfs_log_format.h"
  #include "xfs_trans_resv.h"
-#include "xfs_sb.h"
  #include "xfs_mount.h"
  #include "xfs_defer.h"
  #include "xfs_btree.h"
@@ -19,7 +18,6 @@
  #include "xfs_errortag.h"
  #include "xfs_error.h"
  #include "xfs_trace.h"
-#include "xfs_cksum.h"
  #include "xfs_trans.h"
  #include "xfs_bit.h"
  #include "xfs_refcount.h"
diff --git a/fs/xfs/libxfs/xfs_refcount_btree.c b/fs/xfs/libxfs/xfs_refcount_btree.c

index 5d9de9b217266cfa0d9b9e054047cbb146ad9250..38529dbacd5566900a04ace644777ba879abf206 100644 (file)
--- a/fs/xfs/libxfs/xfs_refcount_btree.c
+++ b/fs/xfs/libxfs/xfs_refcount_btree.c
@@ -12,12 +12,10 @@
  #include "xfs_sb.h"
  #include "xfs_mount.h"
  #include "xfs_btree.h"
-#include "xfs_bmap.h"
  #include "xfs_refcount_btree.h"
  #include "xfs_alloc.h"
  #include "xfs_error.h"
  #include "xfs_trace.h"
-#include "xfs_cksum.h"
  #include "xfs_trans.h"
  #include "xfs_bit.h"
  #include "xfs_rmap.h"
@@ -203,7 +201,7 @@ STATIC xfs_failaddr_t
  xfs_refcountbt_verify(
         struct xfs_buf          *bp)
  {
-       struct xfs_mount        *mp = bp->b_target->bt_mount;
+       struct xfs_mount        *mp = bp->b_mount;
         struct xfs_btree_block  *block = XFS_BUF_TO_BLOCK(bp);
         struct xfs_perag        *pag = bp->b_pag;
         xfs_failaddr_t          fa;
diff --git a/fs/xfs/libxfs/xfs_rmap.c b/fs/xfs/libxfs/xfs_rmap.c

index 8ed885507dd82c9e5d156e434373f9e3630b3dfd..e6aeb390b2fb66db53b056b7738ec8bc35835013 100644 (file)
--- a/fs/xfs/libxfs/xfs_rmap.c
+++ b/fs/xfs/libxfs/xfs_rmap.c
@@ -10,24 +10,17 @@
  #include "xfs_log_format.h"
  #include "xfs_trans_resv.h"
  #include "xfs_bit.h"
-#include "xfs_sb.h"
  #include "xfs_mount.h"
  #include "xfs_defer.h"
-#include "xfs_da_format.h"
-#include "xfs_da_btree.h"
  #include "xfs_btree.h"
  #include "xfs_trans.h"
  #include "xfs_alloc.h"
  #include "xfs_rmap.h"
  #include "xfs_rmap_btree.h"
-#include "xfs_trans_space.h"
  #include "xfs_trace.h"
  #include "xfs_errortag.h"
  #include "xfs_error.h"
-#include "xfs_extent_busy.h"
-#include "xfs_bmap.h"
  #include "xfs_inode.h"
-#include "xfs_ialloc.h"
  
  /*
   * Lookup the first record less than or equal to [bno, len, owner, offset]
diff --git a/fs/xfs/libxfs/xfs_rmap_btree.c b/fs/xfs/libxfs/xfs_rmap_btree.c

index 5d1f8884c8886eedc81bb54acd90c8445ad62826..fc78efa52c94ed45d7ae25a2aa256bab5e70bb72 100644 (file)
--- a/fs/xfs/libxfs/xfs_rmap_btree.c
+++ b/fs/xfs/libxfs/xfs_rmap_btree.c
@@ -9,18 +9,14 @@
  #include "xfs_format.h"
  #include "xfs_log_format.h"
  #include "xfs_trans_resv.h"
-#include "xfs_bit.h"
  #include "xfs_sb.h"
  #include "xfs_mount.h"
-#include "xfs_defer.h"
-#include "xfs_inode.h"
  #include "xfs_trans.h"
  #include "xfs_alloc.h"
  #include "xfs_btree.h"
  #include "xfs_rmap.h"
  #include "xfs_rmap_btree.h"
  #include "xfs_trace.h"
-#include "xfs_cksum.h"
  #include "xfs_error.h"
  #include "xfs_extent_busy.h"
  #include "xfs_ag_resv.h"
@@ -292,7 +288,7 @@ static xfs_failaddr_t
  xfs_rmapbt_verify(
         struct xfs_buf          *bp)
  {
-       struct xfs_mount        *mp = bp->b_target->bt_mount;
+       struct xfs_mount        *mp = bp->b_mount;
         struct xfs_btree_block  *block = XFS_BUF_TO_BLOCK(bp);
         struct xfs_perag        *pag = bp->b_pag;
         xfs_failaddr_t          fa;
diff --git a/fs/xfs/libxfs/xfs_rtbitmap.c b/fs/xfs/libxfs/xfs_rtbitmap.c

index eaaff67e9626869b7d6b296f226bf4e2b109660c..8ea1efc97b41d180a9c79c90b82314c80598655f 100644 (file)
--- a/fs/xfs/libxfs/xfs_rtbitmap.c
+++ b/fs/xfs/libxfs/xfs_rtbitmap.c
@@ -13,15 +13,7 @@
  #include "xfs_mount.h"
  #include "xfs_inode.h"
  #include "xfs_bmap.h"
-#include "xfs_bmap_util.h"
-#include "xfs_bmap_btree.h"
-#include "xfs_alloc.h"
-#include "xfs_error.h"
  #include "xfs_trans.h"
-#include "xfs_trans_space.h"
-#include "xfs_trace.h"
-#include "xfs_buf.h"
-#include "xfs_icache.h"
  #include "xfs_rtalloc.h"
  
  
diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c

index e76a3e5d28d77dd9187495319717264895c81644..a08dd8f40346fae595b455e86dc784b90a3c4767 100644 (file)
--- a/fs/xfs/libxfs/xfs_sb.c
+++ b/fs/xfs/libxfs/xfs_sb.c
@@ -10,26 +10,19 @@
  #include "xfs_log_format.h"
  #include "xfs_trans_resv.h"
  #include "xfs_bit.h"
-#include "xfs_sb.h"
  #include "xfs_mount.h"
-#include "xfs_defer.h"
-#include "xfs_inode.h"
  #include "xfs_ialloc.h"
  #include "xfs_alloc.h"
  #include "xfs_error.h"
  #include "xfs_trace.h"
-#include "xfs_cksum.h"
  #include "xfs_trans.h"
  #include "xfs_buf_item.h"
  #include "xfs_bmap_btree.h"
  #include "xfs_alloc_btree.h"
-#include "xfs_ialloc_btree.h"
  #include "xfs_log.h"
  #include "xfs_rmap_btree.h"
-#include "xfs_bmap.h"
  #include "xfs_refcount_btree.h"
  #include "xfs_da_format.h"
-#include "xfs_da_btree.h"
  #include "xfs_health.h"
  
  /*
@@ -686,7 +679,7 @@ xfs_sb_read_verify(
         struct xfs_buf          *bp)
  {
         struct xfs_sb           sb;
-       struct xfs_mount        *mp = bp->b_target->bt_mount;
+       struct xfs_mount        *mp = bp->b_mount;
         struct xfs_dsb          *dsb = XFS_BUF_TO_SBP(bp);
         int                     error;
  
@@ -752,7 +745,7 @@ xfs_sb_write_verify(
         struct xfs_buf          *bp)
  {
         struct xfs_sb           sb;
-       struct xfs_mount        *mp = bp->b_target->bt_mount;
+       struct xfs_mount        *mp = bp->b_mount;
         struct xfs_buf_log_item *bip = bp->b_log_item;
         int                     error;
  
@@ -800,12 +793,14 @@ const struct xfs_buf_ops xfs_sb_quiet_buf_ops = {
   *
   * Mount initialization code establishing various mount
   * fields from the superblock associated with the given
- * mount structure
+ * mount structure.
+ *
+ * Inode geometry are calculated in xfs_ialloc_setup_geometry.
   */
  void
  xfs_sb_mount_common(
-       struct xfs_mount *mp,
-       struct xfs_sb   *sbp)
+       struct xfs_mount        *mp,
+       struct xfs_sb           *sbp)
  {
         mp->m_agfrotor = mp->m_agirotor = 0;
         mp->m_maxagi = mp->m_sb.sb_agcount;
@@ -813,7 +808,6 @@ xfs_sb_mount_common(
         mp->m_blkbb_log = sbp->sb_blocklog - BBSHIFT;
         mp->m_sectbb_log = sbp->sb_sectlog - BBSHIFT;
         mp->m_agno_log = xfs_highbit32(sbp->sb_agcount - 1) + 1;
-       mp->m_agino_log = sbp->sb_inopblog + sbp->sb_agblklog;
         mp->m_blockmask = sbp->sb_blocksize - 1;
         mp->m_blockwsize = sbp->sb_blocksize >> XFS_WORDLOG;
         mp->m_blockwmask = mp->m_blockwsize - 1;
@@ -823,11 +817,6 @@ xfs_sb_mount_common(
         mp->m_alloc_mnr[0] = mp->m_alloc_mxr[0] / 2;
         mp->m_alloc_mnr[1] = mp->m_alloc_mxr[1] / 2;
  
-       mp->m_inobt_mxr[0] = xfs_inobt_maxrecs(mp, sbp->sb_blocksize, 1);
-       mp->m_inobt_mxr[1] = xfs_inobt_maxrecs(mp, sbp->sb_blocksize, 0);
-       mp->m_inobt_mnr[0] = mp->m_inobt_mxr[0] / 2;
-       mp->m_inobt_mnr[1] = mp->m_inobt_mxr[1] / 2;
-
         mp->m_bmap_dmxr[0] = xfs_bmbt_maxrecs(mp, sbp->sb_blocksize, 1);
         mp->m_bmap_dmxr[1] = xfs_bmbt_maxrecs(mp, sbp->sb_blocksize, 0);
         mp->m_bmap_dmnr[0] = mp->m_bmap_dmxr[0] / 2;
@@ -844,14 +833,6 @@ xfs_sb_mount_common(
         mp->m_refc_mnr[1] = mp->m_refc_mxr[1] / 2;
  
         mp->m_bsize = XFS_FSB_TO_BB(mp, 1);
-       mp->m_ialloc_inos = max_t(uint16_t, XFS_INODES_PER_CHUNK,
-                                       sbp->sb_inopblock);
-       mp->m_ialloc_blks = mp->m_ialloc_inos >> sbp->sb_inopblog;
-
-       if (sbp->sb_spino_align)
-               mp->m_ialloc_min_blks = sbp->sb_spino_align;
-       else
-               mp->m_ialloc_min_blks = mp->m_ialloc_blks;
         mp->m_alloc_set_aside = xfs_alloc_set_aside(mp);
         mp->m_ag_max_usable = xfs_alloc_ag_max_usable(mp);
  }
@@ -939,7 +920,7 @@ xfs_log_sb(
         struct xfs_trans        *tp)
  {
         struct xfs_mount        *mp = tp->t_mountp;
-       struct xfs_buf          *bp = xfs_trans_getsb(tp, mp, 0);
+       struct xfs_buf          *bp = xfs_trans_getsb(tp, mp);
  
         mp->m_sb.sb_icount = percpu_counter_sum(&mp->m_icount);
         mp->m_sb.sb_ifree = percpu_counter_sum(&mp->m_ifree);
@@ -1005,7 +986,7 @@ xfs_update_secondary_sbs(
  
                 bp = xfs_buf_get(mp->m_ddev_targp,
                                  XFS_AG_DADDR(mp, agno, XFS_SB_DADDR),
-                                XFS_FSS_TO_BB(mp, 1), 0);
+                                XFS_FSS_TO_BB(mp, 1));
                 /*
                  * If we get an error reading or writing alternate superblocks,
                  * continue.  xfs_repair chooses the "best" superblock based
@@ -1069,7 +1050,7 @@ xfs_sync_sb_buf(
         if (error)
                 return error;
  
-       bp = xfs_trans_getsb(tp, mp, 0);
+       bp = xfs_trans_getsb(tp, mp);
         xfs_log_sb(tp);
         xfs_trans_bhold(tp, bp);
         xfs_trans_set_sync(tp);
diff --git a/fs/xfs/libxfs/xfs_shared.h b/fs/xfs/libxfs/xfs_shared.h

index 4e909791aeac48a9ca82c6eb5564ca8e2cc7cadc..e0641b7337b3cf27fb6b0fd4a5953691901e0154 100644 (file)
--- a/fs/xfs/libxfs/xfs_shared.h
+++ b/fs/xfs/libxfs/xfs_shared.h
@@ -65,7 +65,6 @@ void  xfs_log_get_max_trans_res(struct xfs_mount *mp,
  #define XFS_TRANS_DQ_DIRTY     0x10    /* at least one dquot in trx dirty */
  #define XFS_TRANS_RESERVE      0x20    /* OK to use reserved data blocks */
  #define XFS_TRANS_NO_WRITECOUNT 0x40   /* do not elevate SB writecount */
-#define XFS_TRANS_NOFS         0x80    /* pass KM_NOFS to kmem_alloc */
  /*
   * LOWMODE is used by the allocator to activate the lowspace algorithm - when
   * free space is running low the extent allocator may choose to allocate an
@@ -136,4 +135,52 @@ void xfs_symlink_local_to_remote(struct xfs_trans *tp, struct xfs_buf *bp,
                                  struct xfs_inode *ip, struct xfs_ifork *ifp);
  xfs_failaddr_t xfs_symlink_shortform_verify(struct xfs_inode *ip);
  
+/* Computed inode geometry for the filesystem. */
+struct xfs_ino_geometry {
+       /* Maximum inode count in this filesystem. */
+       uint64_t        maxicount;
+
+       /* Actual inode cluster buffer size, in bytes. */
+       unsigned int    inode_cluster_size;
+
+       /*
+        * Desired inode cluster buffer size, in bytes.  This value is not
+        * rounded up to at least one filesystem block, which is necessary for
+        * the sole purpose of validating sb_spino_align.  Runtime code must
+        * only ever use inode_cluster_size.
+        */
+       unsigned int    inode_cluster_size_raw;
+
+       /* Inode cluster sizes, adjusted to be at least 1 fsb. */
+       unsigned int    inodes_per_cluster;
+       unsigned int    blocks_per_cluster;
+
+       /* Inode cluster alignment. */
+       unsigned int    cluster_align;
+       unsigned int    cluster_align_inodes;
+       unsigned int    inoalign_mask;  /* mask sb_inoalignmt if used */
+
+       unsigned int    inobt_mxr[2]; /* max inobt btree records */
+       unsigned int    inobt_mnr[2]; /* min inobt btree records */
+       unsigned int    inobt_maxlevels; /* max inobt btree levels. */
+
+       /* Size of inode allocations under normal operation. */
+       unsigned int    ialloc_inos;
+       unsigned int    ialloc_blks;
+
+       /* Minimum inode blocks for a sparse allocation. */
+       unsigned int    ialloc_min_blks;
+
+       /* stripe unit inode alignment */
+       unsigned int    ialloc_align;
+
+       unsigned int    agino_log;      /* #bits for agino in inum */
+};
+
+/* Keep iterating the data structure. */
+#define XFS_ITER_CONTINUE      (0)
+
+/* Stop iterating the data structure. */
+#define XFS_ITER_ABORT         (1)
+
  #endif /* __XFS_SHARED_H__ */
diff --git a/fs/xfs/libxfs/xfs_symlink_remote.c b/fs/xfs/libxfs/xfs_symlink_remote.c

index a0ccc253c43d0a4c5733c28086c2475c7be5a67b..3b8260ca7d1b80525f6846a1fdded11fd112558b 100644 (file)
--- a/fs/xfs/libxfs/xfs_symlink_remote.c
+++ b/fs/xfs/libxfs/xfs_symlink_remote.c
@@ -11,12 +11,8 @@
  #include "xfs_shared.h"
  #include "xfs_trans_resv.h"
  #include "xfs_mount.h"
-#include "xfs_bmap_btree.h"
  #include "xfs_inode.h"
  #include "xfs_error.h"
-#include "xfs_trace.h"
-#include "xfs_symlink.h"
-#include "xfs_cksum.h"
  #include "xfs_trans.h"
  #include "xfs_buf_item.h"
  #include "xfs_log.h"
@@ -90,7 +86,7 @@ static xfs_failaddr_t
  xfs_symlink_verify(
         struct xfs_buf          *bp)
  {
-       struct xfs_mount        *mp = bp->b_target->bt_mount;
+       struct xfs_mount        *mp = bp->b_mount;
         struct xfs_dsymlink_hdr *dsl = bp->b_addr;
  
         if (!xfs_sb_version_hascrc(&mp->m_sb))
@@ -116,7 +112,7 @@ static void
  xfs_symlink_read_verify(
         struct xfs_buf  *bp)
  {
-       struct xfs_mount *mp = bp->b_target->bt_mount;
+       struct xfs_mount *mp = bp->b_mount;
         xfs_failaddr_t  fa;
  
         /* no verification of non-crc buffers */
@@ -136,7 +132,7 @@ static void
  xfs_symlink_write_verify(
         struct xfs_buf  *bp)
  {
-       struct xfs_mount *mp = bp->b_target->bt_mount;
+       struct xfs_mount *mp = bp->b_mount;
         struct xfs_buf_log_item *bip = bp->b_log_item;
         xfs_failaddr_t          fa;
  
diff --git a/fs/xfs/libxfs/xfs_trans_resv.c b/fs/xfs/libxfs/xfs_trans_resv.c

index 83f4ee2afc49e8092d0d7b733b02f996596f0541..d12bbd526e7c02ff21eaf07c37a09d1d3b13ad51 100644 (file)
--- a/fs/xfs/libxfs/xfs_trans_resv.c
+++ b/fs/xfs/libxfs/xfs_trans_resv.c
@@ -15,12 +15,10 @@
  #include "xfs_da_btree.h"
  #include "xfs_inode.h"
  #include "xfs_bmap_btree.h"
-#include "xfs_ialloc.h"
  #include "xfs_quota.h"
  #include "xfs_trans.h"
  #include "xfs_qm.h"
  #include "xfs_trans_space.h"
-#include "xfs_trace.h"
  
  #define _ALLOC true
  #define _FREE  false
@@ -136,9 +134,10 @@ STATIC uint
  xfs_calc_inobt_res(
         struct xfs_mount        *mp)
  {
-       return xfs_calc_buf_res(mp->m_in_maxlevels, XFS_FSB_TO_B(mp, 1)) +
-               xfs_calc_buf_res(xfs_allocfree_log_count(mp, 1),
-                                XFS_FSB_TO_B(mp, 1));
+       return xfs_calc_buf_res(M_IGEO(mp)->inobt_maxlevels,
+                       XFS_FSB_TO_B(mp, 1)) +
+                               xfs_calc_buf_res(xfs_allocfree_log_count(mp, 1),
+                       XFS_FSB_TO_B(mp, 1));
  }
  
  /*
@@ -167,7 +166,7 @@ xfs_calc_finobt_res(
   * includes:
   *
   * the allocation btrees: 2 trees * (max depth - 1) * block size
- * the inode chunk: m_ialloc_blks * N
+ * the inode chunk: m_ino_geo.ialloc_blks * N
   *
   * The size N of the inode chunk reservation depends on whether it is for
   * allocation or free and which type of create transaction is in use. An inode
@@ -193,7 +192,7 @@ xfs_calc_inode_chunk_res(
                 size = XFS_FSB_TO_B(mp, 1);
         }
  
-       res += xfs_calc_buf_res(mp->m_ialloc_blks, size);
+       res += xfs_calc_buf_res(M_IGEO(mp)->ialloc_blks, size);
         return res;
  }
  
@@ -307,7 +306,7 @@ xfs_calc_iunlink_remove_reservation(
         struct xfs_mount        *mp)
  {
         return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) +
-              2 * max_t(uint, XFS_FSB_TO_B(mp, 1), mp->m_inode_cluster_size);
+              2 * M_IGEO(mp)->inode_cluster_size;
  }
  
  /*
@@ -345,7 +344,7 @@ STATIC uint
  xfs_calc_iunlink_add_reservation(xfs_mount_t *mp)
  {
         return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) +
-               max_t(uint, XFS_FSB_TO_B(mp, 1), mp->m_inode_cluster_size);
+                       M_IGEO(mp)->inode_cluster_size;
  }
  
  /*
diff --git a/fs/xfs/libxfs/xfs_trans_space.h b/fs/xfs/libxfs/xfs_trans_space.h

index a62fb950bef18acfe820098082a19172feeadef5..88221c7a04ccfedd9a78aff832dbdef3ffb29673 100644 (file)
--- a/fs/xfs/libxfs/xfs_trans_space.h
+++ b/fs/xfs/libxfs/xfs_trans_space.h
@@ -56,9 +56,9 @@
  #define        XFS_DIRREMOVE_SPACE_RES(mp)     \
         XFS_DAREMOVE_SPACE_RES(mp, XFS_DATA_FORK)
  #define        XFS_IALLOC_SPACE_RES(mp)        \
-       ((mp)->m_ialloc_blks + \
+       (M_IGEO(mp)->ialloc_blks + \
          (xfs_sb_version_hasfinobt(&mp->m_sb) ? 2 : 1 * \
-         ((mp)->m_in_maxlevels - 1)))
+         (M_IGEO(mp)->inobt_maxlevels - 1)))
  
  /*
   * Space reservation values for various transactions.
@@ -94,7 +94,8 @@
  #define        XFS_SYMLINK_SPACE_RES(mp,nl,b)  \
         (XFS_IALLOC_SPACE_RES(mp) + XFS_DIRENTER_SPACE_RES(mp,nl) + (b))
  #define XFS_IFREE_SPACE_RES(mp)                \
-       (xfs_sb_version_hasfinobt(&mp->m_sb) ? (mp)->m_in_maxlevels : 0)
+       (xfs_sb_version_hasfinobt(&mp->m_sb) ? \
+                       M_IGEO(mp)->inobt_maxlevels : 0)
  
  
  #endif /* __XFS_TRANS_SPACE_H__ */
diff --git a/fs/xfs/libxfs/xfs_types.c b/fs/xfs/libxfs/xfs_types.c

index d51acc95bc005a61f92e1a594761793968f26eb6..4f595546a639b7c784a670fe31a7cd6870e8d3e2 100644 (file)
--- a/fs/xfs/libxfs/xfs_types.c
+++ b/fs/xfs/libxfs/xfs_types.c
@@ -7,19 +7,10 @@
  #include "xfs.h"
  #include "xfs_fs.h"
  #include "xfs_format.h"
-#include "xfs_log_format.h"
  #include "xfs_shared.h"
  #include "xfs_trans_resv.h"
  #include "xfs_bit.h"
-#include "xfs_sb.h"
  #include "xfs_mount.h"
-#include "xfs_defer.h"
-#include "xfs_inode.h"
-#include "xfs_btree.h"
-#include "xfs_rmap.h"
-#include "xfs_alloc_btree.h"
-#include "xfs_alloc.h"
-#include "xfs_ialloc.h"
  
  /* Find the size of the AG, in blocks. */
  xfs_agblock_t
@@ -87,14 +78,14 @@ xfs_agino_range(
          * Calculate the first inode, which will be in the first
          * cluster-aligned block after the AGFL.
          */
-       bno = round_up(XFS_AGFL_BLOCK(mp) + 1, mp->m_cluster_align);
+       bno = round_up(XFS_AGFL_BLOCK(mp) + 1, M_IGEO(mp)->cluster_align);
         *first = XFS_AGB_TO_AGINO(mp, bno);
  
         /*
          * Calculate the last inode, which will be at the end of the
          * last (aligned) cluster that can be allocated in the AG.
          */
-       bno = round_down(eoag, mp->m_cluster_align);
+       bno = round_down(eoag, M_IGEO(mp)->cluster_align);
         *last = XFS_AGB_TO_AGINO(mp, bno) - 1;
  }
  
diff --git a/fs/xfs/scrub/agheader.c b/fs/xfs/scrub/agheader.c

index adaeabdefdd33ad7ab35e55a64acd69a5a62e6c4..16b09b94144187c81c38e93c8892d697fd7dc6c2 100644 (file)
--- a/fs/xfs/scrub/agheader.c
+++ b/fs/xfs/scrub/agheader.c
@@ -9,20 +9,13 @@
  #include "xfs_format.h"
  #include "xfs_trans_resv.h"
  #include "xfs_mount.h"
-#include "xfs_defer.h"
  #include "xfs_btree.h"
-#include "xfs_bit.h"
-#include "xfs_log_format.h"
-#include "xfs_trans.h"
  #include "xfs_sb.h"
-#include "xfs_inode.h"
  #include "xfs_alloc.h"
  #include "xfs_ialloc.h"
  #include "xfs_rmap.h"
-#include "scrub/xfs_scrub.h"
  #include "scrub/scrub.h"
  #include "scrub/common.h"
-#include "scrub/trace.h"
  
  /* Superblock */
  
@@ -646,7 +639,7 @@ xchk_agfl_block(
         xchk_agfl_block_xref(sc, agbno);
  
         if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
-               return XFS_BTREE_QUERY_RANGE_ABORT;
+               return XFS_ITER_ABORT;
  
         return 0;
  }
@@ -737,7 +730,7 @@ xchk_agfl(
         /* Check the blocks in the AGFL. */
         error = xfs_agfl_walk(sc->mp, XFS_BUF_TO_AGF(sc->sa.agf_bp),
                         sc->sa.agfl_bp, xchk_agfl_block, &sai);
-       if (error == XFS_BTREE_QUERY_RANGE_ABORT) {
+       if (error == XFS_ITER_ABORT) {
                 error = 0;
                 goto out_free;
         }
diff --git a/fs/xfs/scrub/agheader_repair.c b/fs/xfs/scrub/agheader_repair.c

index 64e31f87d4907ada7d775ef3e3d6d729bdceeffb..7a1a38b636a91b20a7745ca42264cb68c88b2079 100644 (file)
--- a/fs/xfs/scrub/agheader_repair.c
+++ b/fs/xfs/scrub/agheader_repair.c
@@ -9,22 +9,17 @@
  #include "xfs_format.h"
  #include "xfs_trans_resv.h"
  #include "xfs_mount.h"
-#include "xfs_defer.h"
  #include "xfs_btree.h"
-#include "xfs_bit.h"
  #include "xfs_log_format.h"
  #include "xfs_trans.h"
  #include "xfs_sb.h"
-#include "xfs_inode.h"
  #include "xfs_alloc.h"
  #include "xfs_alloc_btree.h"
  #include "xfs_ialloc.h"
  #include "xfs_ialloc_btree.h"
  #include "xfs_rmap.h"
  #include "xfs_rmap_btree.h"
-#include "xfs_refcount.h"
  #include "xfs_refcount_btree.h"
-#include "scrub/xfs_scrub.h"
  #include "scrub/scrub.h"
  #include "scrub/common.h"
  #include "scrub/trace.h"
diff --git a/fs/xfs/scrub/alloc.c b/fs/xfs/scrub/alloc.c

index 44883e9112ad06b9db9bdee40a4cdd96ec8b31af..a43d1813c4ffe006a125bc2221809c5b8fbfed8b 100644 (file)
--- a/fs/xfs/scrub/alloc.c
+++ b/fs/xfs/scrub/alloc.c
@@ -9,19 +9,12 @@
  #include "xfs_format.h"
  #include "xfs_trans_resv.h"
  #include "xfs_mount.h"
-#include "xfs_defer.h"
  #include "xfs_btree.h"
-#include "xfs_bit.h"
-#include "xfs_log_format.h"
-#include "xfs_trans.h"
-#include "xfs_sb.h"
  #include "xfs_alloc.h"
  #include "xfs_rmap.h"
-#include "scrub/xfs_scrub.h"
  #include "scrub/scrub.h"
  #include "scrub/common.h"
  #include "scrub/btree.h"
-#include "scrub/trace.h"
  
  /*
   * Set us up to scrub free space btrees.
diff --git a/fs/xfs/scrub/attr.c b/fs/xfs/scrub/attr.c

index dce74ec570389a21204e40ddd14d4e1f619bebf9..1afc58bf71dd81e7a9bfc00d062ce217fdfc9abd 100644 (file)
--- a/fs/xfs/scrub/attr.c
+++ b/fs/xfs/scrub/attr.c
@@ -9,26 +9,62 @@
  #include "xfs_format.h"
  #include "xfs_trans_resv.h"
  #include "xfs_mount.h"
-#include "xfs_defer.h"
-#include "xfs_btree.h"
-#include "xfs_bit.h"
  #include "xfs_log_format.h"
-#include "xfs_trans.h"
-#include "xfs_sb.h"
  #include "xfs_inode.h"
  #include "xfs_da_format.h"
  #include "xfs_da_btree.h"
-#include "xfs_dir2.h"
  #include "xfs_attr.h"
  #include "xfs_attr_leaf.h"
-#include "scrub/xfs_scrub.h"
  #include "scrub/scrub.h"
  #include "scrub/common.h"
  #include "scrub/dabtree.h"
-#include "scrub/trace.h"
+#include "scrub/attr.h"
  
-#include <linux/posix_acl_xattr.h>
-#include <linux/xattr.h>
+/*
+ * Allocate enough memory to hold an attr value and attr block bitmaps,
+ * reallocating the buffer if necessary.  Buffer contents are not preserved
+ * across a reallocation.
+ */
+int
+xchk_setup_xattr_buf(
+       struct xfs_scrub        *sc,
+       size_t                  value_size,
+       xfs_km_flags_t          flags)
+{
+       size_t                  sz;
+       struct xchk_xattr_buf   *ab = sc->buf;
+
+       /*
+        * We need enough space to read an xattr value from the file or enough
+        * space to hold three copies of the xattr free space bitmap.  We don't
+        * need the buffer space for both purposes at the same time.
+        */
+       sz = 3 * sizeof(long) * BITS_TO_LONGS(sc->mp->m_attr_geo->blksize);
+       sz = max_t(size_t, sz, value_size);
+
+       /*
+        * If there's already a buffer, figure out if we need to reallocate it
+        * to accommodate a larger size.
+        */
+       if (ab) {
+               if (sz <= ab->sz)
+                       return 0;
+               kmem_free(ab);
+               sc->buf = NULL;
+       }
+
+       /*
+        * Don't zero the buffer upon allocation to avoid runtime overhead.
+        * All users must be careful never to read uninitialized contents.
+        */
+       ab = kmem_alloc_large(sizeof(*ab) + sz, flags);
+       if (!ab)
+               return -ENOMEM;
+
+       ab->sz = sz;
+       sc->buf = ab;
+       return 0;
+}
  
  /* Set us up to scrub an inode's extended attributes. */
  int
@@ -36,19 +72,18 @@ xchk_setup_xattr(
         struct xfs_scrub        *sc,
         struct xfs_inode        *ip)
  {
-       size_t                  sz;
+       int                     error;
  
         /*
-        * Allocate the buffer without the inode lock held.  We need enough
-        * space to read every xattr value in the file or enough space to
-        * hold three copies of the xattr free space bitmap.  (Not both at
-        * the same time.)
+        * We failed to get memory while checking attrs, so this time try to
+        * get all the memory we're ever going to need.  Allocate the buffer
+        * without the inode lock held, which means we can sleep.
          */
-       sz = max_t(size_t, XATTR_SIZE_MAX, 3 * sizeof(long) *
-                       BITS_TO_LONGS(sc->mp->m_attr_geo->blksize));
-       sc->buf = kmem_zalloc_large(sz, KM_SLEEP);
-       if (!sc->buf)
-               return -ENOMEM;
+       if (sc->flags & XCHK_TRY_HARDER) {
+               error = xchk_setup_xattr_buf(sc, XATTR_SIZE_MAX, KM_SLEEP);
+               if (error)
+                       return error;
+       }
  
         return xchk_setup_inode_contents(sc, ip, 0);
  }
@@ -83,7 +118,7 @@ xchk_xattr_listent(
         sx = container_of(context, struct xchk_xattr, context);
  
         if (xchk_should_terminate(sx->sc, &error)) {
-               context->seen_enough = 1;
+               context->seen_enough = error;
                 return;
         }
  
@@ -99,6 +134,19 @@ xchk_xattr_listent(
                 return;
         }
  
+       /*
+        * Try to allocate enough memory to extrat the attr value.  If that
+        * doesn't work, we overload the seen_enough variable to convey
+        * the error message back to the main scrub function.
+        */
+       error = xchk_setup_xattr_buf(sx->sc, valuelen, KM_MAYFAIL);
+       if (error == -ENOMEM)
+               error = -EDEADLOCK;
+       if (error) {
+               context->seen_enough = error;
+               return;
+       }
+
         args.flags = ATTR_KERNOTIME;
         if (flags & XFS_ATTR_ROOT)
                 args.flags |= ATTR_ROOT;
@@ -111,8 +159,8 @@ xchk_xattr_listent(
         args.namelen = namelen;
         args.hashval = xfs_da_hashname(args.name, args.namelen);
         args.trans = context->tp;
-       args.value = sx->sc->buf;
-       args.valuelen = XATTR_SIZE_MAX;
+       args.value = xchk_xattr_valuebuf(sx->sc);
+       args.valuelen = valuelen;
  
         error = xfs_attr_get_ilocked(context->dp, &args);
         if (error == -EEXIST)
@@ -125,7 +173,7 @@ xchk_xattr_listent(
                                              args.blkno);
  fail_xref:
         if (sx->sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
-               context->seen_enough = 1;
+               context->seen_enough = XFS_ITER_ABORT;
         return;
  }
  
@@ -170,13 +218,12 @@ xchk_xattr_check_freemap(
         unsigned long                   *map,
         struct xfs_attr3_icleaf_hdr     *leafhdr)
  {
-       unsigned long                   *freemap;
-       unsigned long                   *dstmap;
+       unsigned long                   *freemap = xchk_xattr_freemap(sc);
+       unsigned long                   *dstmap = xchk_xattr_dstmap(sc);
         unsigned int                    mapsize = sc->mp->m_attr_geo->blksize;
         int                             i;
  
         /* Construct bitmap of freemap contents. */
-       freemap = (unsigned long *)sc->buf + BITS_TO_LONGS(mapsize);
         bitmap_zero(freemap, mapsize);
         for (i = 0; i < XFS_ATTR_LEAF_MAPSIZE; i++) {
                 if (!xchk_xattr_set_map(sc, freemap,
@@ -186,7 +233,6 @@ xchk_xattr_check_freemap(
         }
  
         /* Look for bits that are set in freemap and are marked in use. */
-       dstmap = freemap + BITS_TO_LONGS(mapsize);
         return bitmap_and(dstmap, freemap, map, mapsize) == 0;
  }
  
@@ -201,13 +247,13 @@ xchk_xattr_entry(
         char                            *buf_end,
         struct xfs_attr_leafblock       *leaf,
         struct xfs_attr3_icleaf_hdr     *leafhdr,
-       unsigned long                   *usedmap,
         struct xfs_attr_leaf_entry      *ent,
         int                             idx,
         unsigned int                    *usedbytes,
         __u32                           *last_hashval)
  {
         struct xfs_mount                *mp = ds->state->mp;
+       unsigned long                   *usedmap = xchk_xattr_usedmap(ds->sc);
         char                            *name_end;
         struct xfs_attr_leaf_name_local *lentry;
         struct xfs_attr_leaf_name_remote *rentry;
@@ -267,16 +313,26 @@ xchk_xattr_block(
         struct xfs_attr_leafblock       *leaf = bp->b_addr;
         struct xfs_attr_leaf_entry      *ent;
         struct xfs_attr_leaf_entry      *entries;
-       unsigned long                   *usedmap = ds->sc->buf;
+       unsigned long                   *usedmap;
         char                            *buf_end;
         size_t                          off;
         __u32                           last_hashval = 0;
         unsigned int                    usedbytes = 0;
         unsigned int                    hdrsize;
         int                             i;
+       int                             error;
  
         if (*last_checked == blk->blkno)
                 return 0;
+
+       /* Allocate memory for block usage checking. */
+       error = xchk_setup_xattr_buf(ds->sc, 0, KM_MAYFAIL);
+       if (error == -ENOMEM)
+               return -EDEADLOCK;
+       if (error)
+               return error;
+       usedmap = xchk_xattr_usedmap(ds->sc);
+
         *last_checked = blk->blkno;
         bitmap_zero(usedmap, mp->m_attr_geo->blksize);
  
@@ -324,7 +380,7 @@ xchk_xattr_block(
  
                 /* Check the entry and nameval. */
                 xchk_xattr_entry(ds, level, buf_end, leaf, &leafhdr,
-                               usedmap, ent, i, &usedbytes, &last_hashval);
+                               ent, i, &usedbytes, &last_hashval);
  
                 if (ds->sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
                         goto out;
@@ -464,6 +520,10 @@ xchk_xattr(
         error = xfs_attr_list_int_ilocked(&sx.context);
         if (!xchk_fblock_process_error(sc, XFS_ATTR_FORK, 0, &error))
                 goto out;
+
+       /* Did our listent function try to return any errors? */
+       if (sx.context.seen_enough < 0)
+               error = sx.context.seen_enough;
  out:
         return error;
  }
diff --git a/fs/xfs/scrub/attr.h b/fs/xfs/scrub/attr.h

new file mode 100644 (file)

index 0000000..13a1d2e
--- /dev/null
+++ b/fs/xfs/scrub/attr.h
@@ -0,0 +1,71 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (C) 2019 Oracle.  All Rights Reserved.
+ * Author: Darrick J. Wong <darrick.wong@oracle.com>
+ */
+#ifndef __XFS_SCRUB_ATTR_H__
+#define __XFS_SCRUB_ATTR_H__
+
+/*
+ * Temporary storage for online scrub and repair of extended attributes.
+ */
+struct xchk_xattr_buf {
+       /* Size of @buf, in bytes. */
+       size_t                  sz;
+
+       /*
+        * Memory buffer -- either used for extracting attr values while
+        * walking the attributes; or for computing attr block bitmaps when
+        * checking the attribute tree.
+        *
+        * Each bitmap contains enough bits to track every byte in an attr
+        * block (rounded up to the size of an unsigned long).  The attr block
+        * used space bitmap starts at the beginning of the buffer; the free
+        * space bitmap follows immediately after; and we have a third buffer
+        * for storing intermediate bitmap results.
+        */
+       uint8_t                 buf[0];
+};
+
+/* A place to store attribute values. */
+static inline uint8_t *
+xchk_xattr_valuebuf(
+       struct xfs_scrub        *sc)
+{
+       struct xchk_xattr_buf   *ab = sc->buf;
+
+       return ab->buf;
+}
+
+/* A bitmap of space usage computed by walking an attr leaf block. */
+static inline unsigned long *
+xchk_xattr_usedmap(
+       struct xfs_scrub        *sc)
+{
+       struct xchk_xattr_buf   *ab = sc->buf;
+
+       return (unsigned long *)ab->buf;
+}
+
+/* A bitmap of free space computed by walking attr leaf block free info. */
+static inline unsigned long *
+xchk_xattr_freemap(
+       struct xfs_scrub        *sc)
+{
+       return xchk_xattr_usedmap(sc) +
+                       BITS_TO_LONGS(sc->mp->m_attr_geo->blksize);
+}
+
+/* A bitmap used to hold temporary results. */
+static inline unsigned long *
+xchk_xattr_dstmap(
+       struct xfs_scrub        *sc)
+{
+       return xchk_xattr_freemap(sc) +
+                       BITS_TO_LONGS(sc->mp->m_attr_geo->blksize);
+}
+
+int xchk_setup_xattr_buf(struct xfs_scrub *sc, size_t value_size,
+               xfs_km_flags_t flags);
+
+#endif /* __XFS_SCRUB_ATTR_H__ */
diff --git a/fs/xfs/scrub/bitmap.c b/fs/xfs/scrub/bitmap.c

index fdadc9e1dc49ea6a245258feeda14e36e226a3b0..3d47d111be5ae9413ee28e5f1e1e8da0852b7bb0 100644 (file)
--- a/fs/xfs/scrub/bitmap.c
+++ b/fs/xfs/scrub/bitmap.c
@@ -10,11 +10,6 @@
  #include "xfs_trans_resv.h"
  #include "xfs_mount.h"
  #include "xfs_btree.h"
-#include "scrub/xfs_scrub.h"
-#include "scrub/scrub.h"
-#include "scrub/common.h"
-#include "scrub/trace.h"
-#include "scrub/repair.h"
  #include "scrub/bitmap.h"
  
  /*
diff --git a/fs/xfs/scrub/bmap.c b/fs/xfs/scrub/bmap.c

index a703cd58a90e678854ac220f5661b9fb55b9ee8f..1bd29fdc2ab586945251084eebca671c9ec066b6 100644 (file)
--- a/fs/xfs/scrub/bmap.c
+++ b/fs/xfs/scrub/bmap.c
@@ -9,27 +9,19 @@
  #include "xfs_format.h"
  #include "xfs_trans_resv.h"
  #include "xfs_mount.h"
-#include "xfs_defer.h"
  #include "xfs_btree.h"
  #include "xfs_bit.h"
  #include "xfs_log_format.h"
  #include "xfs_trans.h"
-#include "xfs_sb.h"
  #include "xfs_inode.h"
-#include "xfs_inode_fork.h"
  #include "xfs_alloc.h"
-#include "xfs_rtalloc.h"
  #include "xfs_bmap.h"
-#include "xfs_bmap_util.h"
  #include "xfs_bmap_btree.h"
  #include "xfs_rmap.h"
  #include "xfs_rmap_btree.h"
-#include "xfs_refcount.h"
-#include "scrub/xfs_scrub.h"
  #include "scrub/scrub.h"
  #include "scrub/common.h"
  #include "scrub/btree.h"
-#include "scrub/trace.h"
  
  /* Set us up with an inode's bmap. */
  int
diff --git a/fs/xfs/scrub/btree.c b/fs/xfs/scrub/btree.c

index 117910db51b809ebeea0196182e05f0dd0c54611..f52a7b8256f96c7d5eadd58ce034c90f0b61299b 100644 (file)
--- a/fs/xfs/scrub/btree.c
+++ b/fs/xfs/scrub/btree.c
@@ -9,14 +9,7 @@
  #include "xfs_format.h"
  #include "xfs_trans_resv.h"
  #include "xfs_mount.h"
-#include "xfs_defer.h"
  #include "xfs_btree.h"
-#include "xfs_bit.h"
-#include "xfs_log_format.h"
-#include "xfs_trans.h"
-#include "xfs_sb.h"
-#include "xfs_inode.h"
-#include "xfs_alloc.h"
  #include "scrub/scrub.h"
  #include "scrub/common.h"
  #include "scrub/btree.h"
diff --git a/fs/xfs/scrub/common.c b/fs/xfs/scrub/common.c

index 973aa59975e328af594cbf7d7853ae1eac80bd70..18876056e5e02af78d29f6c7a5ac24fc74c3b32c 100644 (file)
--- a/fs/xfs/scrub/common.c
+++ b/fs/xfs/scrub/common.c
@@ -9,22 +9,16 @@
  #include "xfs_format.h"
  #include "xfs_trans_resv.h"
  #include "xfs_mount.h"
-#include "xfs_defer.h"
  #include "xfs_btree.h"
-#include "xfs_bit.h"
  #include "xfs_log_format.h"
  #include "xfs_trans.h"
  #include "xfs_sb.h"
  #include "xfs_inode.h"
  #include "xfs_icache.h"
-#include "xfs_itable.h"
  #include "xfs_alloc.h"
  #include "xfs_alloc_btree.h"
-#include "xfs_bmap.h"
-#include "xfs_bmap_btree.h"
  #include "xfs_ialloc.h"
  #include "xfs_ialloc_btree.h"
-#include "xfs_refcount.h"
  #include "xfs_refcount_btree.h"
  #include "xfs_rmap.h"
  #include "xfs_rmap_btree.h"
@@ -32,11 +26,9 @@
  #include "xfs_trans_priv.h"
  #include "xfs_attr.h"
  #include "xfs_reflink.h"
-#include "scrub/xfs_scrub.h"
  #include "scrub/scrub.h"
  #include "scrub/common.h"
  #include "scrub/trace.h"
-#include "scrub/btree.h"
  #include "scrub/repair.h"
  #include "scrub/health.h"
  
diff --git a/fs/xfs/scrub/dabtree.c b/fs/xfs/scrub/dabtree.c

index 90527b094878971f831c78daafe2483dd99e83d2..94c4f1de1922f31ea5f9ffe395792fe3a918c531 100644 (file)
--- a/fs/xfs/scrub/dabtree.c
+++ b/fs/xfs/scrub/dabtree.c
@@ -9,20 +9,12 @@
  #include "xfs_format.h"
  #include "xfs_trans_resv.h"
  #include "xfs_mount.h"
-#include "xfs_defer.h"
-#include "xfs_btree.h"
-#include "xfs_bit.h"
  #include "xfs_log_format.h"
  #include "xfs_trans.h"
-#include "xfs_sb.h"
  #include "xfs_inode.h"
-#include "xfs_inode_fork.h"
-#include "xfs_da_format.h"
-#include "xfs_da_btree.h"
  #include "xfs_dir2.h"
  #include "xfs_dir2_priv.h"
  #include "xfs_attr_leaf.h"
-#include "scrub/xfs_scrub.h"
  #include "scrub/scrub.h"
  #include "scrub/common.h"
  #include "scrub/trace.h"
diff --git a/fs/xfs/scrub/dir.c b/fs/xfs/scrub/dir.c

index a38a22785a1a28e6a7a50b533c1103f5caf2ebd0..1e2e11721eb993381879b2458a894d4178a3baa3 100644 (file)
--- a/fs/xfs/scrub/dir.c
+++ b/fs/xfs/scrub/dir.c
@@ -9,24 +9,14 @@
  #include "xfs_format.h"
  #include "xfs_trans_resv.h"
  #include "xfs_mount.h"
-#include "xfs_defer.h"
-#include "xfs_btree.h"
-#include "xfs_bit.h"
  #include "xfs_log_format.h"
  #include "xfs_trans.h"
-#include "xfs_sb.h"
  #include "xfs_inode.h"
  #include "xfs_icache.h"
-#include "xfs_itable.h"
-#include "xfs_da_format.h"
-#include "xfs_da_btree.h"
  #include "xfs_dir2.h"
  #include "xfs_dir2_priv.h"
-#include "xfs_ialloc.h"
-#include "scrub/xfs_scrub.h"
  #include "scrub/scrub.h"
  #include "scrub/common.h"
-#include "scrub/trace.h"
  #include "scrub/dabtree.h"
  
  /* Set us up to scrub directories. */
diff --git a/fs/xfs/scrub/fscounters.c b/fs/xfs/scrub/fscounters.c

index 07c11e3e6437c40658838a60d0cc8ca5807b2603..fc3f510c9034419465fef5b95e2ffa8a15398b6c 100644 (file)
--- a/fs/xfs/scrub/fscounters.c
+++ b/fs/xfs/scrub/fscounters.c
@@ -9,22 +9,10 @@
  #include "xfs_format.h"
  #include "xfs_trans_resv.h"
  #include "xfs_mount.h"
-#include "xfs_defer.h"
-#include "xfs_btree.h"
-#include "xfs_bit.h"
-#include "xfs_log_format.h"
-#include "xfs_trans.h"
  #include "xfs_sb.h"
-#include "xfs_inode.h"
  #include "xfs_alloc.h"
  #include "xfs_ialloc.h"
-#include "xfs_rmap.h"
-#include "xfs_error.h"
-#include "xfs_errortag.h"
-#include "xfs_icache.h"
  #include "xfs_health.h"
-#include "xfs_bmap.h"
-#include "scrub/xfs_scrub.h"
  #include "scrub/scrub.h"
  #include "scrub/common.h"
  #include "scrub/trace.h"
diff --git a/fs/xfs/scrub/health.c b/fs/xfs/scrub/health.c

index 23cf8e2f25db66905097246387fd8ebe1d329854..b2f602811e9dfcdc577cbc737b473624a08fe282 100644 (file)
--- a/fs/xfs/scrub/health.c
+++ b/fs/xfs/scrub/health.c
@@ -7,18 +7,10 @@
  #include "xfs_fs.h"
  #include "xfs_shared.h"
  #include "xfs_format.h"
-#include "xfs_trans_resv.h"
-#include "xfs_mount.h"
-#include "xfs_defer.h"
  #include "xfs_btree.h"
-#include "xfs_bit.h"
-#include "xfs_log_format.h"
-#include "xfs_trans.h"
  #include "xfs_sb.h"
-#include "xfs_inode.h"
  #include "xfs_health.h"
  #include "scrub/scrub.h"
-#include "scrub/health.h"
  
  /*
   * Scrub and In-Core Filesystem Health Assessments
diff --git a/fs/xfs/scrub/ialloc.c b/fs/xfs/scrub/ialloc.c

index 9b47117180cb1e8baaa4c1ae85a72bfc285ec383..681758704fda30e6250eb3322a2199d7010d9928 100644 (file)
--- a/fs/xfs/scrub/ialloc.c
+++ b/fs/xfs/scrub/ialloc.c
@@ -9,21 +9,14 @@
  #include "xfs_format.h"
  #include "xfs_trans_resv.h"
  #include "xfs_mount.h"
-#include "xfs_defer.h"
  #include "xfs_btree.h"
-#include "xfs_bit.h"
  #include "xfs_log_format.h"
  #include "xfs_trans.h"
-#include "xfs_sb.h"
  #include "xfs_inode.h"
-#include "xfs_alloc.h"
  #include "xfs_ialloc.h"
  #include "xfs_ialloc_btree.h"
  #include "xfs_icache.h"
  #include "xfs_rmap.h"
-#include "xfs_log.h"
-#include "xfs_trans_priv.h"
-#include "scrub/xfs_scrub.h"
  #include "scrub/scrub.h"
  #include "scrub/common.h"
  #include "scrub/btree.h"
@@ -230,7 +223,7 @@ xchk_iallocbt_check_cluster(
         int                             error = 0;
  
         nr_inodes = min_t(unsigned int, XFS_INODES_PER_CHUNK,
-                       mp->m_inodes_per_cluster);
+                       M_IGEO(mp)->inodes_per_cluster);
  
         /* Map this inode cluster */
         agbno = XFS_AGINO_TO_AGBNO(mp, irec->ir_startino + cluster_base);
@@ -251,7 +244,7 @@ xchk_iallocbt_check_cluster(
          */
         ir_holemask = (irec->ir_holemask & cluster_mask);
         imap.im_blkno = XFS_AGB_TO_DADDR(mp, agno, agbno);
-       imap.im_len = XFS_FSB_TO_BB(mp, mp->m_blocks_per_cluster);
+       imap.im_len = XFS_FSB_TO_BB(mp, M_IGEO(mp)->blocks_per_cluster);
         imap.im_boffset = XFS_INO_TO_OFFSET(mp, irec->ir_startino) <<
                         mp->m_sb.sb_inodelog;
  
@@ -276,12 +269,12 @@ xchk_iallocbt_check_cluster(
         /* If any part of this is a hole, skip it. */
         if (ir_holemask) {
                 xchk_xref_is_not_owned_by(bs->sc, agbno,
-                               mp->m_blocks_per_cluster,
+                               M_IGEO(mp)->blocks_per_cluster,
                                 &XFS_RMAP_OINFO_INODES);
                 return 0;
         }
  
-       xchk_xref_is_owned_by(bs->sc, agbno, mp->m_blocks_per_cluster,
+       xchk_xref_is_owned_by(bs->sc, agbno, M_IGEO(mp)->blocks_per_cluster,
                         &XFS_RMAP_OINFO_INODES);
  
         /* Grab the inode cluster buffer. */
@@ -333,7 +326,7 @@ xchk_iallocbt_check_clusters(
          */
         for (cluster_base = 0;
              cluster_base < XFS_INODES_PER_CHUNK;
-            cluster_base += bs->sc->mp->m_inodes_per_cluster) {
+            cluster_base += M_IGEO(bs->sc->mp)->inodes_per_cluster) {
                 error = xchk_iallocbt_check_cluster(bs, irec, cluster_base);
                 if (error)
                         break;
@@ -355,6 +348,7 @@ xchk_iallocbt_rec_alignment(
  {
         struct xfs_mount                *mp = bs->sc->mp;
         struct xchk_iallocbt            *iabt = bs->private;
+       struct xfs_ino_geometry         *igeo = M_IGEO(mp);
  
         /*
          * finobt records have different positioning requirements than inobt
@@ -372,7 +366,7 @@ xchk_iallocbt_rec_alignment(
                 unsigned int    imask;
  
                 imask = min_t(unsigned int, XFS_INODES_PER_CHUNK,
-                               mp->m_cluster_align_inodes) - 1;
+                               igeo->cluster_align_inodes) - 1;
                 if (irec->ir_startino & imask)
                         xchk_btree_set_corrupt(bs->sc, bs->cur, 0);
                 return;
@@ -400,17 +394,17 @@ xchk_iallocbt_rec_alignment(
         }
  
         /* inobt records must be aligned to cluster and inoalignmnt size. */
-       if (irec->ir_startino & (mp->m_cluster_align_inodes - 1)) {
+       if (irec->ir_startino & (igeo->cluster_align_inodes - 1)) {
                 xchk_btree_set_corrupt(bs->sc, bs->cur, 0);
                 return;
         }
  
-       if (irec->ir_startino & (mp->m_inodes_per_cluster - 1)) {
+       if (irec->ir_startino & (igeo->inodes_per_cluster - 1)) {
                 xchk_btree_set_corrupt(bs->sc, bs->cur, 0);
                 return;
         }
  
-       if (mp->m_inodes_per_cluster <= XFS_INODES_PER_CHUNK)
+       if (igeo->inodes_per_cluster <= XFS_INODES_PER_CHUNK)
                 return;
  
         /*
@@ -419,7 +413,7 @@ xchk_iallocbt_rec_alignment(
          * after this one.
          */
         iabt->next_startino = irec->ir_startino + XFS_INODES_PER_CHUNK;
-       iabt->next_cluster_ino = irec->ir_startino + mp->m_inodes_per_cluster;
+       iabt->next_cluster_ino = irec->ir_startino + igeo->inodes_per_cluster;
  }
  
  /* Scrub an inobt/finobt record. */
diff --git a/fs/xfs/scrub/inode.c b/fs/xfs/scrub/inode.c

index e213efc194a1d6e2417f941da0be04691befb3f7..6d483ab29e6397e8084935f2046face4b624097e 100644 (file)
--- a/fs/xfs/scrub/inode.c
+++ b/fs/xfs/scrub/inode.c
@@ -9,27 +9,17 @@
  #include "xfs_format.h"
  #include "xfs_trans_resv.h"
  #include "xfs_mount.h"
-#include "xfs_defer.h"
  #include "xfs_btree.h"
-#include "xfs_bit.h"
  #include "xfs_log_format.h"
-#include "xfs_trans.h"
-#include "xfs_sb.h"
  #include "xfs_inode.h"
-#include "xfs_icache.h"
-#include "xfs_inode_buf.h"
-#include "xfs_inode_fork.h"
  #include "xfs_ialloc.h"
  #include "xfs_da_format.h"
  #include "xfs_reflink.h"
  #include "xfs_rmap.h"
-#include "xfs_bmap.h"
  #include "xfs_bmap_util.h"
-#include "scrub/xfs_scrub.h"
  #include "scrub/scrub.h"
  #include "scrub/common.h"
  #include "scrub/btree.h"
-#include "scrub/trace.h"
  
  /*
   * Grab total control of the inode metadata.  It doesn't matter here if
diff --git a/fs/xfs/scrub/parent.c b/fs/xfs/scrub/parent.c

index d5d197f1b80f92d071a2fed4b01240bc65be393a..c962bd534690789c0931be1211ddc0e4d55a1b20 100644 (file)
--- a/fs/xfs/scrub/parent.c
+++ b/fs/xfs/scrub/parent.c
@@ -9,21 +9,13 @@
  #include "xfs_format.h"
  #include "xfs_trans_resv.h"
  #include "xfs_mount.h"
-#include "xfs_defer.h"
-#include "xfs_btree.h"
-#include "xfs_bit.h"
  #include "xfs_log_format.h"
-#include "xfs_trans.h"
-#include "xfs_sb.h"
  #include "xfs_inode.h"
  #include "xfs_icache.h"
  #include "xfs_dir2.h"
  #include "xfs_dir2_priv.h"
-#include "xfs_ialloc.h"
-#include "scrub/xfs_scrub.h"
  #include "scrub/scrub.h"
  #include "scrub/common.h"
-#include "scrub/trace.h"
  
  /* Set us up to scrub parents. */
  int
diff --git a/fs/xfs/scrub/quota.c b/fs/xfs/scrub/quota.c

index 5dfe2b5924db4f41243a9ebca0d747a28a5c6820..0a33b4421c32b1b2a239d58842b48077b312f19d 100644 (file)
--- a/fs/xfs/scrub/quota.c
+++ b/fs/xfs/scrub/quota.c
@@ -9,24 +9,13 @@
  #include "xfs_format.h"
  #include "xfs_trans_resv.h"
  #include "xfs_mount.h"
-#include "xfs_defer.h"
-#include "xfs_btree.h"
-#include "xfs_bit.h"
  #include "xfs_log_format.h"
  #include "xfs_trans.h"
-#include "xfs_sb.h"
  #include "xfs_inode.h"
-#include "xfs_inode_fork.h"
-#include "xfs_alloc.h"
-#include "xfs_bmap.h"
  #include "xfs_quota.h"
  #include "xfs_qm.h"
-#include "xfs_dquot.h"
-#include "xfs_dquot_item.h"
-#include "scrub/xfs_scrub.h"
  #include "scrub/scrub.h"
  #include "scrub/common.h"
-#include "scrub/trace.h"
  
  /* Convert a scrub type code to a DQ flag, or return 0 if error. */
  static inline uint
@@ -144,7 +133,7 @@ xchk_quota_item(
         if (bsoft > bhard)
                 xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, offset);
  
-       if (ihard > mp->m_maxicount)
+       if (ihard > M_IGEO(mp)->maxicount)
                 xchk_fblock_set_warning(sc, XFS_DATA_FORK, offset);
         if (isoft > ihard)
                 xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, offset);
diff --git a/fs/xfs/scrub/refcount.c b/fs/xfs/scrub/refcount.c

index 708b4158eb903b40fe7d577148f4adee8bdb554d..93b3793bc5b31a91dc466dc0a904a71c549e8dd0 100644 (file)
--- a/fs/xfs/scrub/refcount.c
+++ b/fs/xfs/scrub/refcount.c
@@ -7,22 +7,12 @@
  #include "xfs_fs.h"
  #include "xfs_shared.h"
  #include "xfs_format.h"
-#include "xfs_trans_resv.h"
-#include "xfs_mount.h"
-#include "xfs_defer.h"
  #include "xfs_btree.h"
-#include "xfs_bit.h"
-#include "xfs_log_format.h"
-#include "xfs_trans.h"
-#include "xfs_sb.h"
-#include "xfs_alloc.h"
  #include "xfs_rmap.h"
  #include "xfs_refcount.h"
-#include "scrub/xfs_scrub.h"
  #include "scrub/scrub.h"
  #include "scrub/common.h"
  #include "scrub/btree.h"
-#include "scrub/trace.h"
  
  /*
   * Set us up to scrub reference count btrees.
diff --git a/fs/xfs/scrub/repair.c b/fs/xfs/scrub/repair.c

index eb358f0f5e0ad1151d6d9e4d4cd7d5efb2ed58d8..4cfeec57fb05c30a0f2d5d27d7d9972ea0a5b08a 100644 (file)
--- a/fs/xfs/scrub/repair.c
+++ b/fs/xfs/scrub/repair.c
@@ -9,29 +9,21 @@
  #include "xfs_format.h"
  #include "xfs_trans_resv.h"
  #include "xfs_mount.h"
-#include "xfs_defer.h"
  #include "xfs_btree.h"
-#include "xfs_bit.h"
  #include "xfs_log_format.h"
  #include "xfs_trans.h"
  #include "xfs_sb.h"
  #include "xfs_inode.h"
-#include "xfs_icache.h"
  #include "xfs_alloc.h"
  #include "xfs_alloc_btree.h"
  #include "xfs_ialloc.h"
  #include "xfs_ialloc_btree.h"
  #include "xfs_rmap.h"
  #include "xfs_rmap_btree.h"
-#include "xfs_refcount.h"
  #include "xfs_refcount_btree.h"
  #include "xfs_extent_busy.h"
  #include "xfs_ag_resv.h"
-#include "xfs_trans_space.h"
  #include "xfs_quota.h"
-#include "xfs_attr.h"
-#include "xfs_reflink.h"
-#include "scrub/xfs_scrub.h"
  #include "scrub/scrub.h"
  #include "scrub/common.h"
  #include "scrub/trace.h"
@@ -357,7 +349,7 @@ xrep_init_btblock(
         bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, XFS_FSB_TO_DADDR(mp, fsb),
                         XFS_FSB_TO_BB(mp, 1), 0);
         xfs_buf_zero(bp, 0, BBTOB(bp->b_length));
-       xfs_btree_init_block(mp, bp, btnum, 0, 0, sc->sa.agno, 0);
+       xfs_btree_init_block(mp, bp, btnum, 0, 0, sc->sa.agno);
         xfs_trans_buf_set_type(tp, bp, XFS_BLFT_BTREE_BUF);
         xfs_trans_log_buf(tp, bp, 0, bp->b_length);
         bp->b_ops = ops;
@@ -672,7 +664,7 @@ xrep_findroot_agfl_walk(
  {
         xfs_agblock_t           *agbno = priv;
  
-       return (*agbno == bno) ? XFS_BTREE_QUERY_RANGE_ABORT : 0;
+       return (*agbno == bno) ? XFS_ITER_ABORT : 0;
  }
  
  /* Does this block match the btree information passed in? */
@@ -702,7 +694,7 @@ xrep_findroot_block(
         if (owner == XFS_RMAP_OWN_AG) {
                 error = xfs_agfl_walk(mp, ri->agf, ri->agfl_bp,
                                 xrep_findroot_agfl_walk, &agbno);
-               if (error == XFS_BTREE_QUERY_RANGE_ABORT)
+               if (error == XFS_ITER_ABORT)
                         return 0;
                 if (error)
                         return error;
diff --git a/fs/xfs/scrub/rmap.c b/fs/xfs/scrub/rmap.c

index 92a140c5b55e32c3b6b4620104b78cac7ddd68f3..8d4cefd761c1dc843915e91b7c26d5f5fb6449d4 100644 (file)
--- a/fs/xfs/scrub/rmap.c
+++ b/fs/xfs/scrub/rmap.c
@@ -9,21 +9,12 @@
  #include "xfs_format.h"
  #include "xfs_trans_resv.h"
  #include "xfs_mount.h"
-#include "xfs_defer.h"
  #include "xfs_btree.h"
-#include "xfs_bit.h"
-#include "xfs_log_format.h"
-#include "xfs_trans.h"
-#include "xfs_sb.h"
-#include "xfs_alloc.h"
-#include "xfs_ialloc.h"
  #include "xfs_rmap.h"
  #include "xfs_refcount.h"
-#include "scrub/xfs_scrub.h"
  #include "scrub/scrub.h"
  #include "scrub/common.h"
  #include "scrub/btree.h"
-#include "scrub/trace.h"
  
  /*
   * Set us up to scrub reverse mapping btrees.
diff --git a/fs/xfs/scrub/rtbitmap.c b/fs/xfs/scrub/rtbitmap.c

index dbe115b075f714007aef48b16e8b765629f6c284..c642bc206c41d8eb9a99aa1aff8b1561e7b5c689 100644 (file)
--- a/fs/xfs/scrub/rtbitmap.c
+++ b/fs/xfs/scrub/rtbitmap.c
@@ -9,19 +9,12 @@
  #include "xfs_format.h"
  #include "xfs_trans_resv.h"
  #include "xfs_mount.h"
-#include "xfs_defer.h"
-#include "xfs_btree.h"
-#include "xfs_bit.h"
  #include "xfs_log_format.h"
  #include "xfs_trans.h"
-#include "xfs_sb.h"
-#include "xfs_alloc.h"
  #include "xfs_rtalloc.h"
  #include "xfs_inode.h"
-#include "scrub/xfs_scrub.h"
  #include "scrub/scrub.h"
  #include "scrub/common.h"
-#include "scrub/trace.h"
  
  /* Set us up with the realtime metadata locked. */
  int
diff --git a/fs/xfs/scrub/scrub.c b/fs/xfs/scrub/scrub.c

index f630389ee176b14a5aeecd7dfec6cfa9ce2d81b2..15c8c5f3f688d1b3e905229ab1d13e8fc9bf8685 100644 (file)
--- a/fs/xfs/scrub/scrub.c
+++ b/fs/xfs/scrub/scrub.c
@@ -9,36 +9,16 @@
  #include "xfs_format.h"
  #include "xfs_trans_resv.h"
  #include "xfs_mount.h"
-#include "xfs_defer.h"
-#include "xfs_btree.h"
-#include "xfs_bit.h"
  #include "xfs_log_format.h"
  #include "xfs_trans.h"
-#include "xfs_sb.h"
  #include "xfs_inode.h"
-#include "xfs_icache.h"
-#include "xfs_itable.h"
-#include "xfs_alloc.h"
-#include "xfs_alloc_btree.h"
-#include "xfs_bmap.h"
-#include "xfs_bmap_btree.h"
-#include "xfs_ialloc.h"
-#include "xfs_ialloc_btree.h"
-#include "xfs_refcount.h"
-#include "xfs_refcount_btree.h"
-#include "xfs_rmap.h"
-#include "xfs_rmap_btree.h"
  #include "xfs_quota.h"
  #include "xfs_qm.h"
  #include "xfs_errortag.h"
  #include "xfs_error.h"
-#include "xfs_log.h"
-#include "xfs_trans_priv.h"
-#include "scrub/xfs_scrub.h"
  #include "scrub/scrub.h"
  #include "scrub/common.h"
  #include "scrub/trace.h"
-#include "scrub/btree.h"
  #include "scrub/repair.h"
  #include "scrub/health.h"
  
diff --git a/fs/xfs/scrub/symlink.c b/fs/xfs/scrub/symlink.c

index f7ebaa9469997a66c9ca518f6b528ff2ae718a0f..99c0b1234c3cae488db442914729a974ae8ded63 100644 (file)
--- a/fs/xfs/scrub/symlink.c
+++ b/fs/xfs/scrub/symlink.c
@@ -9,19 +9,11 @@
  #include "xfs_format.h"
  #include "xfs_trans_resv.h"
  #include "xfs_mount.h"
-#include "xfs_defer.h"
-#include "xfs_btree.h"
-#include "xfs_bit.h"
  #include "xfs_log_format.h"
-#include "xfs_trans.h"
-#include "xfs_sb.h"
  #include "xfs_inode.h"
-#include "xfs_inode_fork.h"
  #include "xfs_symlink.h"
-#include "scrub/xfs_scrub.h"
  #include "scrub/scrub.h"
  #include "scrub/common.h"
-#include "scrub/trace.h"
  
  /* Set us up to scrub a symbolic link. */
  int
diff --git a/fs/xfs/scrub/trace.c b/fs/xfs/scrub/trace.c

index 96feaf8dcdec5600475a2f831d0d44f61437ef87..9eaab2eb5ed3bf65d573e86a9af7e5662d3a126e 100644 (file)
--- a/fs/xfs/scrub/trace.c
+++ b/fs/xfs/scrub/trace.c
@@ -10,15 +10,9 @@
  #include "xfs_log_format.h"
  #include "xfs_trans_resv.h"
  #include "xfs_mount.h"
-#include "xfs_defer.h"
-#include "xfs_da_format.h"
  #include "xfs_inode.h"
  #include "xfs_btree.h"
-#include "xfs_trans.h"
-#include "xfs_bit.h"
-#include "scrub/xfs_scrub.h"
  #include "scrub/scrub.h"
-#include "scrub/common.h"
  
  /* Figure out which block the btree cursor was pointing to. */
  static inline xfs_fsblock_t
diff --git a/fs/xfs/xfs_acl.c b/fs/xfs/xfs_acl.c

index 8039e35147ddd015d228dd40a818253cb12b4da4..cbda40d40326683236022c3a30b976a72ba257ee 100644 (file)
--- a/fs/xfs/xfs_acl.c
+++ b/fs/xfs/xfs_acl.c
@@ -4,16 +4,14 @@
   * All Rights Reserved.
   */
  #include "xfs.h"
+#include "xfs_shared.h"
  #include "xfs_format.h"
  #include "xfs_log_format.h"
  #include "xfs_trans_resv.h"
  #include "xfs_mount.h"
  #include "xfs_inode.h"
-#include "xfs_acl.h"
  #include "xfs_attr.h"
  #include "xfs_trace.h"
-#include <linux/slab.h>
-#include <linux/xattr.h>
  #include <linux/posix_acl_xattr.h>
  
  
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c

index 11f703d4a60568fff5c2fa3e93647ebc8424ca38..761248ee27785afe07be567cf9d4aa87705aa031 100644 (file)
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -12,16 +12,11 @@
  #include "xfs_mount.h"
  #include "xfs_inode.h"
  #include "xfs_trans.h"
-#include "xfs_inode_item.h"
-#include "xfs_alloc.h"
-#include "xfs_error.h"
  #include "xfs_iomap.h"
  #include "xfs_trace.h"
  #include "xfs_bmap.h"
  #include "xfs_bmap_util.h"
-#include "xfs_bmap_btree.h"
  #include "xfs_reflink.h"
-#include <linux/writeback.h>
  
  /*
   * structure owned by writepages passed to individual writepage calls
@@ -138,8 +133,7 @@ xfs_setfilesize_trans_alloc(
         struct xfs_trans        *tp;
         int                     error;
  
-       error = xfs_trans_alloc(mp, &M_RES(mp)->tr_fsyncts, 0, 0,
-                               XFS_TRANS_NOFS, &tp);
+       error = xfs_trans_alloc(mp, &M_RES(mp)->tr_fsyncts, 0, 0, 0, &tp);
         if (error)
                 return error;
  
@@ -240,8 +234,16 @@ xfs_end_ioend(
         struct xfs_inode        *ip = XFS_I(ioend->io_inode);
         xfs_off_t               offset = ioend->io_offset;
         size_t                  size = ioend->io_size;
+       unsigned int            nofs_flag;
         int                     error;
  
+       /*
+        * We can allocate memory here while doing writeback on behalf of
+        * memory reclaim.  To avoid memory allocation deadlocks set the
+        * task-wide nofs context for the following operations.
+        */
+       nofs_flag = memalloc_nofs_save();
+
         /*
          * Just clean up the in-memory strutures if the fs has been shut down.
          */
@@ -282,6 +284,8 @@ done:
                 list_del_init(&ioend->io_list);
                 xfs_destroy_ioend(ioend, error);
         }
+
+       memalloc_nofs_restore(nofs_flag);
  }
  
  /*
@@ -290,13 +294,9 @@ done:
  static bool
  xfs_ioend_can_merge(
         struct xfs_ioend        *ioend,
-       int                     ioend_error,
         struct xfs_ioend        *next)
  {
-       int                     next_error;
-
-       next_error = blk_status_to_errno(next->io_bio->bi_status);
-       if (ioend_error != next_error)
+       if (ioend->io_bio->bi_status != next->io_bio->bi_status)
                 return false;
         if ((ioend->io_fork == XFS_COW_FORK) ^ (next->io_fork == XFS_COW_FORK))
                 return false;
@@ -305,11 +305,28 @@ xfs_ioend_can_merge(
                 return false;
         if (ioend->io_offset + ioend->io_size != next->io_offset)
                 return false;
-       if (xfs_ioend_is_append(ioend) != xfs_ioend_is_append(next))
-               return false;
         return true;
  }
  
+/*
+ * If the to be merged ioend has a preallocated transaction for file
+ * size updates we need to ensure the ioend it is merged into also
+ * has one.  If it already has one we can simply cancel the transaction
+ * as it is guaranteed to be clean.
+ */
+static void
+xfs_ioend_merge_append_transactions(
+       struct xfs_ioend        *ioend,
+       struct xfs_ioend        *next)
+{
+       if (!ioend->io_append_trans) {
+               ioend->io_append_trans = next->io_append_trans;
+               next->io_append_trans = NULL;
+       } else {
+               xfs_setfilesize_ioend(next, -ECANCELED);
+       }
+}
+
  /* Try to merge adjacent completions. */
  STATIC void
  xfs_ioend_try_merge(
@@ -317,25 +334,16 @@ xfs_ioend_try_merge(
         struct list_head        *more_ioends)
  {
         struct xfs_ioend        *next_ioend;
-       int                     ioend_error;
-       int                     error;
-
-       if (list_empty(more_ioends))
-               return;
-
-       ioend_error = blk_status_to_errno(ioend->io_bio->bi_status);
  
         while (!list_empty(more_ioends)) {
                 next_ioend = list_first_entry(more_ioends, struct xfs_ioend,
                                 io_list);
-               if (!xfs_ioend_can_merge(ioend, ioend_error, next_ioend))
+               if (!xfs_ioend_can_merge(ioend, next_ioend))
                         break;
                 list_move_tail(&next_ioend->io_list, &ioend->io_list);
                 ioend->io_size += next_ioend->io_size;
-               if (ioend->io_append_trans) {
-                       error = xfs_setfilesize_ioend(next_ioend, 1);
-                       ASSERT(error == 1);
-               }
+               if (next_ioend->io_append_trans)
+                       xfs_ioend_merge_append_transactions(ioend, next_ioend);
         }
  }
  
@@ -626,7 +634,7 @@ allocate_blocks:
   * reference to the ioend to ensure that the ioend completion is only done once
   * all bios have been submitted and the ioend is really done.
   *
- * If @fail is non-zero, it means that we have a situation where some part of
+ * If @status is non-zero, it means that we have a situation where some part of
   * the submission process has failed after we have marked paged for writeback
   * and unlocked them. In this situation, we need to fail the bio and ioend
   * rather than submit it to IO. This typically only happens on a filesystem
@@ -638,21 +646,19 @@ xfs_submit_ioend(
         struct xfs_ioend        *ioend,
         int                     status)
  {
+       unsigned int            nofs_flag;
+
+       /*
+        * We can allocate memory here while doing writeback on behalf of
+        * memory reclaim.  To avoid memory allocation deadlocks set the
+        * task-wide nofs context for the following operations.
+        */
+       nofs_flag = memalloc_nofs_save();
+
         /* Convert CoW extents to regular */
         if (!status && ioend->io_fork == XFS_COW_FORK) {
-               /*
-                * Yuk. This can do memory allocation, but is not a
-                * transactional operation so everything is done in GFP_KERNEL
-                * context. That can deadlock, because we hold pages in
-                * writeback state and GFP_KERNEL allocations can block on them.
-                * Hence we must operate in nofs conditions here.
-                */
-               unsigned nofs_flag;
-
-               nofs_flag = memalloc_nofs_save();
                 status = xfs_reflink_convert_cow(XFS_I(ioend->io_inode),
                                 ioend->io_offset, ioend->io_size);
-               memalloc_nofs_restore(nofs_flag);
         }
  
         /* Reserve log space if we might write beyond the on-disk inode size. */
@@ -663,9 +669,10 @@ xfs_submit_ioend(
             !ioend->io_append_trans)
                 status = xfs_setfilesize_trans_alloc(ioend);
  
+       memalloc_nofs_restore(nofs_flag);
+
         ioend->io_bio->bi_private = ioend;
         ioend->io_bio->bi_end_io = xfs_end_bio;
-       ioend->io_bio->bi_opf = REQ_OP_WRITE | wbc_to_write_flags(wbc);
  
         /*
          * If we are failing the IO now, just mark the ioend with an
@@ -679,7 +686,6 @@ xfs_submit_ioend(
                 return status;
         }
  
-       ioend->io_bio->bi_write_hint = ioend->io_inode->i_write_hint;
         submit_bio(ioend->io_bio);
         return 0;
  }
@@ -691,7 +697,8 @@ xfs_alloc_ioend(
         xfs_exntst_t            state,
         xfs_off_t               offset,
         struct block_device     *bdev,
-       sector_t                sector)
+       sector_t                sector,
+       struct writeback_control *wbc)
  {
         struct xfs_ioend        *ioend;
         struct bio              *bio;
@@ -699,6 +706,9 @@ xfs_alloc_ioend(
         bio = bio_alloc_bioset(GFP_NOFS, BIO_MAX_PAGES, &xfs_ioend_bioset);
         bio_set_dev(bio, bdev);
         bio->bi_iter.bi_sector = sector;
+       bio->bi_opf = REQ_OP_WRITE | wbc_to_write_flags(wbc);
+       bio->bi_write_hint = inode->i_write_hint;
+       wbc_init_bio(wbc, bio);
  
         ioend = container_of(bio, struct xfs_ioend, io_inline_bio);
         INIT_LIST_HEAD(&ioend->io_list);
@@ -719,24 +729,22 @@ xfs_alloc_ioend(
   * so that the bi_private linkage is set up in the right direction for the
   * traversal in xfs_destroy_ioend().
   */
-static void
+static struct bio *
  xfs_chain_bio(
-       struct xfs_ioend        *ioend,
-       struct writeback_control *wbc,
-       struct block_device     *bdev,
-       sector_t                sector)
+       struct bio              *prev)
  {
         struct bio *new;
  
         new = bio_alloc(GFP_NOFS, BIO_MAX_PAGES);
-       bio_set_dev(new, bdev);
-       new->bi_iter.bi_sector = sector;
-       bio_chain(ioend->io_bio, new);
-       bio_get(ioend->io_bio);         /* for xfs_destroy_ioend */
-       ioend->io_bio->bi_opf = REQ_OP_WRITE | wbc_to_write_flags(wbc);
-       ioend->io_bio->bi_write_hint = ioend->io_inode->i_write_hint;
-       submit_bio(ioend->io_bio);
-       ioend->io_bio = new;
+       bio_copy_dev(new, prev);/* also copies over blkcg information */
+       new->bi_iter.bi_sector = bio_end_sector(prev);
+       new->bi_opf = prev->bi_opf;
+       new->bi_write_hint = prev->bi_write_hint;
+
+       bio_chain(prev, new);
+       bio_get(prev);          /* for xfs_destroy_ioend */
+       submit_bio(prev);
+       return new;
  }
  
  /*
@@ -772,7 +780,7 @@ xfs_add_to_ioend(
                 if (wpc->ioend)
                         list_add(&wpc->ioend->io_list, iolist);
                 wpc->ioend = xfs_alloc_ioend(inode, wpc->fork,
-                               wpc->imap.br_state, offset, bdev, sector);
+                               wpc->imap.br_state, offset, bdev, sector, wbc);
         }
  
         merged = __bio_try_merge_page(wpc->ioend->io_bio, page, len, poff,
@@ -783,11 +791,12 @@ xfs_add_to_ioend(
  
         if (!merged) {
                 if (bio_full(wpc->ioend->io_bio, len))
-                       xfs_chain_bio(wpc->ioend, wbc, bdev, sector);
+                       wpc->ioend->io_bio = xfs_chain_bio(wpc->ioend->io_bio);
                 bio_add_page(wpc->ioend->io_bio, page, len, poff);
         }
  
         wpc->ioend->io_size += len;
+       wbc_account_io(wbc, page, len);
  }
  
  STATIC void
diff --git a/fs/xfs/xfs_aops.h b/fs/xfs/xfs_aops.h

index f62b03186c62967bbc8d08e541e318989a6d1503..45a1ea240cbbb0a0b3ded8fcd982169b11789c72 100644 (file)
--- a/fs/xfs/xfs_aops.h
+++ b/fs/xfs/xfs_aops.h
@@ -28,7 +28,6 @@ extern const struct address_space_operations xfs_dax_aops;
  
  int    xfs_setfilesize(struct xfs_inode *ip, xfs_off_t offset, size_t size);
  
-extern void xfs_count_page_state(struct page *, int *, int *);
  extern struct block_device *xfs_find_bdev_for_inode(struct inode *);
  extern struct dax_device *xfs_find_daxdev_for_inode(struct inode *);
  
diff --git a/fs/xfs/xfs_attr_inactive.c b/fs/xfs/xfs_attr_inactive.c

index 228821b2ebe0195db8be0a59d9eda366515f9a03..dc93c51c17de962794ad688156e0c5cc389de920 100644 (file)
--- a/fs/xfs/xfs_attr_inactive.c
+++ b/fs/xfs/xfs_attr_inactive.c
@@ -15,18 +15,13 @@
  #include "xfs_da_format.h"
  #include "xfs_da_btree.h"
  #include "xfs_inode.h"
-#include "xfs_alloc.h"
  #include "xfs_attr_remote.h"
  #include "xfs_trans.h"
-#include "xfs_inode_item.h"
  #include "xfs_bmap.h"
  #include "xfs_attr.h"
  #include "xfs_attr_leaf.h"
-#include "xfs_error.h"
  #include "xfs_quota.h"
-#include "xfs_trace.h"
  #include "xfs_dir2.h"
-#include "xfs_defer.h"
  
  /*
   * Look at all the extents for this logical region,
@@ -121,7 +116,7 @@ xfs_attr3_leaf_inactive(
         int                     size;
         int                     tmp;
         int                     i;
-       struct xfs_mount        *mp = bp->b_target->bt_mount;
+       struct xfs_mount        *mp = bp->b_mount;
  
         leaf = bp->b_addr;
         xfs_attr3_leaf_hdr_from_disk(mp->m_attr_geo, &ichdr, leaf);
diff --git a/fs/xfs/xfs_attr_list.c b/fs/xfs/xfs_attr_list.c

index 3d213a7394c5b747dfb5cffc17dfb3d44d66cf03..58fc820a70c6fc6ed131393e50de79e25095a534 100644 (file)
--- a/fs/xfs/xfs_attr_list.c
+++ b/fs/xfs/xfs_attr_list.c
@@ -6,25 +6,20 @@
   */
  #include "xfs.h"
  #include "xfs_fs.h"
+#include "xfs_shared.h"
  #include "xfs_format.h"
  #include "xfs_log_format.h"
  #include "xfs_trans_resv.h"
-#include "xfs_bit.h"
  #include "xfs_mount.h"
  #include "xfs_da_format.h"
-#include "xfs_da_btree.h"
  #include "xfs_inode.h"
  #include "xfs_trans.h"
-#include "xfs_inode_item.h"
  #include "xfs_bmap.h"
  #include "xfs_attr.h"
  #include "xfs_attr_sf.h"
-#include "xfs_attr_remote.h"
  #include "xfs_attr_leaf.h"
  #include "xfs_error.h"
  #include "xfs_trace.h"
-#include "xfs_buf_item.h"
-#include "xfs_cksum.h"
  #include "xfs_dir2.h"
  
  STATIC int
diff --git a/fs/xfs/xfs_bio_io.c b/fs/xfs/xfs_bio_io.c

new file mode 100644 (file)

index 0000000..e2148f2
--- /dev/null
+++ b/fs/xfs/xfs_bio_io.c
@@ -0,0 +1,61 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2019 Christoph Hellwig.
+ */
+#include "xfs.h"
+
+static inline unsigned int bio_max_vecs(unsigned int count)
+{
+       return min_t(unsigned, howmany(count, PAGE_SIZE), BIO_MAX_PAGES);
+}
+
+int
+xfs_rw_bdev(
+       struct block_device     *bdev,
+       sector_t                sector,
+       unsigned int            count,
+       char                    *data,
+       unsigned int            op)
+
+{
+       unsigned int            is_vmalloc = is_vmalloc_addr(data);
+       unsigned int            left = count;
+       int                     error;
+       struct bio              *bio;
+
+       if (is_vmalloc && op == REQ_OP_WRITE)
+               flush_kernel_vmap_range(data, count);
+
+       bio = bio_alloc(GFP_KERNEL, bio_max_vecs(left));
+       bio_set_dev(bio, bdev);
+       bio->bi_iter.bi_sector = sector;
+       bio->bi_opf = op | REQ_META | REQ_SYNC;
+
+       do {
+               struct page     *page = kmem_to_page(data);
+               unsigned int    off = offset_in_page(data);
+               unsigned int    len = min_t(unsigned, left, PAGE_SIZE - off);
+
+               while (bio_add_page(bio, page, len, off) != len) {
+                       struct bio      *prev = bio;
+
+                       bio = bio_alloc(GFP_KERNEL, bio_max_vecs(left));
+                       bio_copy_dev(bio, prev);
+                       bio->bi_iter.bi_sector = bio_end_sector(prev);
+                       bio->bi_opf = prev->bi_opf;
+                       bio_chain(prev, bio);
+
+                       submit_bio(prev);
+               }
+
+               data += len;
+               left -= len;
+       } while (left > 0);
+
+       error = submit_bio_wait(bio);
+       bio_put(bio);
+
+       if (is_vmalloc && op == REQ_OP_READ)
+               invalidate_kernel_vmap_range(data, count);
+       return error;
+}
diff --git a/fs/xfs/xfs_bmap_item.c b/fs/xfs/xfs_bmap_item.c

index ce45f066995ebec7c89cdce43c57db89bd29e96f..9fa4a7ee8cfc2ebb86fe5fc1216dd92b7e19f672 100644 (file)
--- a/fs/xfs/xfs_bmap_item.c
+++ b/fs/xfs/xfs_bmap_item.c
@@ -9,17 +9,16 @@
  #include "xfs_log_format.h"
  #include "xfs_trans_resv.h"
  #include "xfs_bit.h"
+#include "xfs_shared.h"
  #include "xfs_mount.h"
  #include "xfs_defer.h"
  #include "xfs_inode.h"
  #include "xfs_trans.h"
  #include "xfs_trans_priv.h"
-#include "xfs_buf_item.h"
  #include "xfs_bmap_item.h"
  #include "xfs_log.h"
  #include "xfs_bmap.h"
  #include "xfs_icache.h"
-#include "xfs_trace.h"
  #include "xfs_bmap_btree.h"
  #include "xfs_trans_space.h"
  
@@ -95,15 +94,6 @@ xfs_bui_item_format(
                         xfs_bui_log_format_sizeof(buip->bui_format.bui_nextents));
  }
  
-/*
- * Pinning has no meaning for an bui item, so just return.
- */
-STATIC void
-xfs_bui_item_pin(
-       struct xfs_log_item     *lip)
-{
-}
-
  /*
   * The unpin operation is the last place an BUI is manipulated in the log. It is
   * either inserted in the AIL or aborted in the event of a log I/O error. In
@@ -122,72 +112,23 @@ xfs_bui_item_unpin(
         xfs_bui_release(buip);
  }
  
-/*
- * BUI items have no locking or pushing.  However, since BUIs are pulled from
- * the AIL when their corresponding BUDs are committed to disk, their situation
- * is very similar to being pinned.  Return XFS_ITEM_PINNED so that the caller
- * will eventually flush the log.  This should help in getting the BUI out of
- * the AIL.
- */
-STATIC uint
-xfs_bui_item_push(
-       struct xfs_log_item     *lip,
-       struct list_head        *buffer_list)
-{
-       return XFS_ITEM_PINNED;
-}
-
  /*
   * The BUI has been either committed or aborted if the transaction has been
   * cancelled. If the transaction was cancelled, an BUD isn't going to be
   * constructed and thus we free the BUI here directly.
   */
  STATIC void
-xfs_bui_item_unlock(
+xfs_bui_item_release(
         struct xfs_log_item     *lip)
  {
-       if (test_bit(XFS_LI_ABORTED, &lip->li_flags))
-               xfs_bui_release(BUI_ITEM(lip));
-}
-
-/*
- * The BUI is logged only once and cannot be moved in the log, so simply return
- * the lsn at which it's been logged.
- */
-STATIC xfs_lsn_t
-xfs_bui_item_committed(
-       struct xfs_log_item     *lip,
-       xfs_lsn_t               lsn)
-{
-       return lsn;
+       xfs_bui_release(BUI_ITEM(lip));
  }
  
-/*
- * The BUI dependency tracking op doesn't do squat.  It can't because
- * it doesn't know where the free extent is coming from.  The dependency
- * tracking has to be handled by the "enclosing" metadata object.  For
- * example, for inodes, the inode is locked throughout the extent freeing
- * so the dependency should be recorded there.
- */
-STATIC void
-xfs_bui_item_committing(
-       struct xfs_log_item     *lip,
-       xfs_lsn_t               lsn)
-{
-}
-
-/*
- * This is the ops vector shared by all bui log items.
- */
  static const struct xfs_item_ops xfs_bui_item_ops = {
         .iop_size       = xfs_bui_item_size,
         .iop_format     = xfs_bui_item_format,
-       .iop_pin        = xfs_bui_item_pin,
         .iop_unpin      = xfs_bui_item_unpin,
-       .iop_unlock     = xfs_bui_item_unlock,
-       .iop_committed  = xfs_bui_item_committed,
-       .iop_push       = xfs_bui_item_push,
-       .iop_committing = xfs_bui_item_committing,
+       .iop_release    = xfs_bui_item_release,
  };
  
  /*
@@ -249,126 +190,241 @@ xfs_bud_item_format(
  }
  
  /*
- * Pinning has no meaning for an bud item, so just return.
+ * The BUD is either committed or aborted if the transaction is cancelled. If
+ * the transaction is cancelled, drop our reference to the BUI and free the
+ * BUD.
   */
  STATIC void
-xfs_bud_item_pin(
+xfs_bud_item_release(
         struct xfs_log_item     *lip)
  {
+       struct xfs_bud_log_item *budp = BUD_ITEM(lip);
+
+       xfs_bui_release(budp->bud_buip);
+       kmem_zone_free(xfs_bud_zone, budp);
  }
  
-/*
- * Since pinning has no meaning for an bud item, unpinning does
- * not either.
- */
-STATIC void
-xfs_bud_item_unpin(
-       struct xfs_log_item     *lip,
-       int                     remove)
+static const struct xfs_item_ops xfs_bud_item_ops = {
+       .flags          = XFS_ITEM_RELEASE_WHEN_COMMITTED,
+       .iop_size       = xfs_bud_item_size,
+       .iop_format     = xfs_bud_item_format,
+       .iop_release    = xfs_bud_item_release,
+};
+
+static struct xfs_bud_log_item *
+xfs_trans_get_bud(
+       struct xfs_trans                *tp,
+       struct xfs_bui_log_item         *buip)
  {
+       struct xfs_bud_log_item         *budp;
+
+       budp = kmem_zone_zalloc(xfs_bud_zone, KM_SLEEP);
+       xfs_log_item_init(tp->t_mountp, &budp->bud_item, XFS_LI_BUD,
+                         &xfs_bud_item_ops);
+       budp->bud_buip = buip;
+       budp->bud_format.bud_bui_id = buip->bui_format.bui_id;
+
+       xfs_trans_add_item(tp, &budp->bud_item);
+       return budp;
  }
  
  /*
- * There isn't much you can do to push on an bud item.  It is simply stuck
- * waiting for the log to be flushed to disk.
+ * Finish an bmap update and log it to the BUD. Note that the
+ * transaction is marked dirty regardless of whether the bmap update
+ * succeeds or fails to support the BUI/BUD lifecycle rules.
   */
-STATIC uint
-xfs_bud_item_push(
-       struct xfs_log_item     *lip,
-       struct list_head        *buffer_list)
+static int
+xfs_trans_log_finish_bmap_update(
+       struct xfs_trans                *tp,
+       struct xfs_bud_log_item         *budp,
+       enum xfs_bmap_intent_type       type,
+       struct xfs_inode                *ip,
+       int                             whichfork,
+       xfs_fileoff_t                   startoff,
+       xfs_fsblock_t                   startblock,
+       xfs_filblks_t                   *blockcount,
+       xfs_exntst_t                    state)
  {
-       return XFS_ITEM_PINNED;
+       int                             error;
+
+       error = xfs_bmap_finish_one(tp, ip, type, whichfork, startoff,
+                       startblock, blockcount, state);
+
+       /*
+        * Mark the transaction dirty, even on error. This ensures the
+        * transaction is aborted, which:
+        *
+        * 1.) releases the BUI and frees the BUD
+        * 2.) shuts down the filesystem
+        */
+       tp->t_flags |= XFS_TRANS_DIRTY;
+       set_bit(XFS_LI_DIRTY, &budp->bud_item.li_flags);
+
+       return error;
  }
  
-/*
- * The BUD is either committed or aborted if the transaction is cancelled. If
- * the transaction is cancelled, drop our reference to the BUI and free the
- * BUD.
- */
-STATIC void
-xfs_bud_item_unlock(
-       struct xfs_log_item     *lip)
+/* Sort bmap intents by inode. */
+static int
+xfs_bmap_update_diff_items(
+       void                            *priv,
+       struct list_head                *a,
+       struct list_head                *b)
  {
-       struct xfs_bud_log_item *budp = BUD_ITEM(lip);
+       struct xfs_bmap_intent          *ba;
+       struct xfs_bmap_intent          *bb;
  
-       if (test_bit(XFS_LI_ABORTED, &lip->li_flags)) {
-               xfs_bui_release(budp->bud_buip);
-               kmem_zone_free(xfs_bud_zone, budp);
-       }
+       ba = container_of(a, struct xfs_bmap_intent, bi_list);
+       bb = container_of(b, struct xfs_bmap_intent, bi_list);
+       return ba->bi_owner->i_ino - bb->bi_owner->i_ino;
  }
  
-/*
- * When the bud item is committed to disk, all we need to do is delete our
- * reference to our partner bui item and then free ourselves. Since we're
- * freeing ourselves we must return -1 to keep the transaction code from
- * further referencing this item.
- */
-STATIC xfs_lsn_t
-xfs_bud_item_committed(
-       struct xfs_log_item     *lip,
-       xfs_lsn_t               lsn)
+/* Get an BUI. */
+STATIC void *
+xfs_bmap_update_create_intent(
+       struct xfs_trans                *tp,
+       unsigned int                    count)
  {
-       struct xfs_bud_log_item *budp = BUD_ITEM(lip);
+       struct xfs_bui_log_item         *buip;
+
+       ASSERT(count == XFS_BUI_MAX_FAST_EXTENTS);
+       ASSERT(tp != NULL);
+
+       buip = xfs_bui_init(tp->t_mountp);
+       ASSERT(buip != NULL);
  
         /*
-        * Drop the BUI reference regardless of whether the BUD has been
-        * aborted. Once the BUD transaction is constructed, it is the sole
-        * responsibility of the BUD to release the BUI (even if the BUI is
-        * aborted due to log I/O error).
+        * Get a log_item_desc to point at the new item.
          */
-       xfs_bui_release(budp->bud_buip);
-       kmem_zone_free(xfs_bud_zone, budp);
+       xfs_trans_add_item(tp, &buip->bui_item);
+       return buip;
+}
  
-       return (xfs_lsn_t)-1;
+/* Set the map extent flags for this mapping. */
+static void
+xfs_trans_set_bmap_flags(
+       struct xfs_map_extent           *bmap,
+       enum xfs_bmap_intent_type       type,
+       int                             whichfork,
+       xfs_exntst_t                    state)
+{
+       bmap->me_flags = 0;
+       switch (type) {
+       case XFS_BMAP_MAP:
+       case XFS_BMAP_UNMAP:
+               bmap->me_flags = type;
+               break;
+       default:
+               ASSERT(0);
+       }
+       if (state == XFS_EXT_UNWRITTEN)
+               bmap->me_flags |= XFS_BMAP_EXTENT_UNWRITTEN;
+       if (whichfork == XFS_ATTR_FORK)
+               bmap->me_flags |= XFS_BMAP_EXTENT_ATTR_FORK;
  }
  
-/*
- * The BUD dependency tracking op doesn't do squat.  It can't because
- * it doesn't know where the free extent is coming from.  The dependency
- * tracking has to be handled by the "enclosing" metadata object.  For
- * example, for inodes, the inode is locked throughout the extent freeing
- * so the dependency should be recorded there.
- */
+/* Log bmap updates in the intent item. */
  STATIC void
-xfs_bud_item_committing(
-       struct xfs_log_item     *lip,
-       xfs_lsn_t               lsn)
+xfs_bmap_update_log_item(
+       struct xfs_trans                *tp,
+       void                            *intent,
+       struct list_head                *item)
  {
+       struct xfs_bui_log_item         *buip = intent;
+       struct xfs_bmap_intent          *bmap;
+       uint                            next_extent;
+       struct xfs_map_extent           *map;
+
+       bmap = container_of(item, struct xfs_bmap_intent, bi_list);
+
+       tp->t_flags |= XFS_TRANS_DIRTY;
+       set_bit(XFS_LI_DIRTY, &buip->bui_item.li_flags);
+
+       /*
+        * atomic_inc_return gives us the value after the increment;
+        * we want to use it as an array index so we need to subtract 1 from
+        * it.
+        */
+       next_extent = atomic_inc_return(&buip->bui_next_extent) - 1;
+       ASSERT(next_extent < buip->bui_format.bui_nextents);
+       map = &buip->bui_format.bui_extents[next_extent];
+       map->me_owner = bmap->bi_owner->i_ino;
+       map->me_startblock = bmap->bi_bmap.br_startblock;
+       map->me_startoff = bmap->bi_bmap.br_startoff;
+       map->me_len = bmap->bi_bmap.br_blockcount;
+       xfs_trans_set_bmap_flags(map, bmap->bi_type, bmap->bi_whichfork,
+                       bmap->bi_bmap.br_state);
  }
  
-/*
- * This is the ops vector shared by all bud log items.
- */
-static const struct xfs_item_ops xfs_bud_item_ops = {
-       .iop_size       = xfs_bud_item_size,
-       .iop_format     = xfs_bud_item_format,
-       .iop_pin        = xfs_bud_item_pin,
-       .iop_unpin      = xfs_bud_item_unpin,
-       .iop_unlock     = xfs_bud_item_unlock,
-       .iop_committed  = xfs_bud_item_committed,
-       .iop_push       = xfs_bud_item_push,
-       .iop_committing = xfs_bud_item_committing,
-};
+/* Get an BUD so we can process all the deferred rmap updates. */
+STATIC void *
+xfs_bmap_update_create_done(
+       struct xfs_trans                *tp,
+       void                            *intent,
+       unsigned int                    count)
+{
+       return xfs_trans_get_bud(tp, intent);
+}
  
-/*
- * Allocate and initialize an bud item with the given number of extents.
- */
-struct xfs_bud_log_item *
-xfs_bud_init(
-       struct xfs_mount                *mp,
-       struct xfs_bui_log_item         *buip)
+/* Process a deferred rmap update. */
+STATIC int
+xfs_bmap_update_finish_item(
+       struct xfs_trans                *tp,
+       struct list_head                *item,
+       void                            *done_item,
+       void                            **state)
+{
+       struct xfs_bmap_intent          *bmap;
+       xfs_filblks_t                   count;
+       int                             error;
+
+       bmap = container_of(item, struct xfs_bmap_intent, bi_list);
+       count = bmap->bi_bmap.br_blockcount;
+       error = xfs_trans_log_finish_bmap_update(tp, done_item,
+                       bmap->bi_type,
+                       bmap->bi_owner, bmap->bi_whichfork,
+                       bmap->bi_bmap.br_startoff,
+                       bmap->bi_bmap.br_startblock,
+                       &count,
+                       bmap->bi_bmap.br_state);
+       if (!error && count > 0) {
+               ASSERT(bmap->bi_type == XFS_BMAP_UNMAP);
+               bmap->bi_bmap.br_blockcount = count;
+               return -EAGAIN;
+       }
+       kmem_free(bmap);
+       return error;
+}
  
+/* Abort all pending BUIs. */
+STATIC void
+xfs_bmap_update_abort_intent(
+       void                            *intent)
  {
-       struct xfs_bud_log_item *budp;
+       xfs_bui_release(intent);
+}
  
-       budp = kmem_zone_zalloc(xfs_bud_zone, KM_SLEEP);
-       xfs_log_item_init(mp, &budp->bud_item, XFS_LI_BUD, &xfs_bud_item_ops);
-       budp->bud_buip = buip;
-       budp->bud_format.bud_bui_id = buip->bui_format.bui_id;
+/* Cancel a deferred rmap update. */
+STATIC void
+xfs_bmap_update_cancel_item(
+       struct list_head                *item)
+{
+       struct xfs_bmap_intent          *bmap;
  
-       return budp;
+       bmap = container_of(item, struct xfs_bmap_intent, bi_list);
+       kmem_free(bmap);
  }
  
+const struct xfs_defer_op_type xfs_bmap_update_defer_type = {
+       .max_items      = XFS_BUI_MAX_FAST_EXTENTS,
+       .diff_items     = xfs_bmap_update_diff_items,
+       .create_intent  = xfs_bmap_update_create_intent,
+       .abort_intent   = xfs_bmap_update_abort_intent,
+       .log_item       = xfs_bmap_update_log_item,
+       .create_done    = xfs_bmap_update_create_done,
+       .finish_item    = xfs_bmap_update_finish_item,
+       .cancel_item    = xfs_bmap_update_cancel_item,
+};
+
  /*
   * Process a bmap update intent item that was recovered from the log.
   * We need to update some inode's bmbt.
diff --git a/fs/xfs/xfs_bmap_item.h b/fs/xfs/xfs_bmap_item.h

index 89e043a88bb81c078c43a0b173da745e6a3f5186..ad479cc73de84b52d8ae4b866fc075fcdc03066f 100644 (file)
--- a/fs/xfs/xfs_bmap_item.h
+++ b/fs/xfs/xfs_bmap_item.h
@@ -75,8 +75,6 @@ extern struct kmem_zone       *xfs_bui_zone;
  extern struct kmem_zone        *xfs_bud_zone;
  
  struct xfs_bui_log_item *xfs_bui_init(struct xfs_mount *);
-struct xfs_bud_log_item *xfs_bud_init(struct xfs_mount *,
-               struct xfs_bui_log_item *);
  void xfs_bui_item_free(struct xfs_bui_log_item *);
  void xfs_bui_release(struct xfs_bui_log_item *);
  int xfs_bui_recover(struct xfs_trans *parent_tp, struct xfs_bui_log_item *buip);
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c

index 06d07f1e310b063db9b4ffd8d393f22d73d6115a..98c6a7a714276b11fe2cb373bca5c50c8e18465a 100644 (file)
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -12,12 +12,10 @@
  #include "xfs_trans_resv.h"
  #include "xfs_bit.h"
  #include "xfs_mount.h"
-#include "xfs_da_format.h"
  #include "xfs_defer.h"
  #include "xfs_inode.h"
  #include "xfs_btree.h"
  #include "xfs_trans.h"
-#include "xfs_extfree_item.h"
  #include "xfs_alloc.h"
  #include "xfs_bmap.h"
  #include "xfs_bmap_util.h"
@@ -28,11 +26,8 @@
  #include "xfs_trans_space.h"
  #include "xfs_trace.h"
  #include "xfs_icache.h"
-#include "xfs_log.h"
-#include "xfs_rmap_btree.h"
  #include "xfs_iomap.h"
  #include "xfs_reflink.h"
-#include "xfs_refcount.h"
  
  /* Kernel only BMAP related definitions and functions */
  
@@ -276,7 +271,7 @@ xfs_bmap_count_tree(
         struct xfs_btree_block  *block, *nextblock;
         int                     numrecs;
  
-       error = xfs_btree_read_bufl(mp, tp, bno, 0, &bp, XFS_BMAP_BTREE_REF,
+       error = xfs_btree_read_bufl(mp, tp, bno, &bp, XFS_BMAP_BTREE_REF,
                                                 &xfs_bmbt_buf_ops);
         if (error)
                 return error;
@@ -287,7 +282,7 @@ xfs_bmap_count_tree(
                 /* Not at node above leaves, count this level of nodes */
                 nextbno = be64_to_cpu(block->bb_u.l.bb_rightsib);
                 while (nextbno != NULLFSBLOCK) {
-                       error = xfs_btree_read_bufl(mp, tp, nextbno, 0, &nbp,
+                       error = xfs_btree_read_bufl(mp, tp, nextbno, &nbp,
                                                 XFS_BMAP_BTREE_REF,
                                                 &xfs_bmbt_buf_ops);
                         if (error)
@@ -321,7 +316,7 @@ xfs_bmap_count_tree(
                         if (nextbno == NULLFSBLOCK)
                                 break;
                         bno = nextbno;
-                       error = xfs_btree_read_bufl(mp, tp, bno, 0, &bp,
+                       error = xfs_btree_read_bufl(mp, tp, bno, &bp,
                                                 XFS_BMAP_BTREE_REF,
                                                 &xfs_bmbt_buf_ops);
                         if (error)
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c

index 548344e2512833bbb82f141fe34aefed88a6729e..ca0849043f542657a0a4d7f531cf3f564421b8c9 100644 (file)
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -4,24 +4,9 @@
   * All Rights Reserved.
   */
  #include "xfs.h"
-#include <linux/stddef.h>
-#include <linux/errno.h>
-#include <linux/gfp.h>
-#include <linux/pagemap.h>
-#include <linux/init.h>
-#include <linux/vmalloc.h>
-#include <linux/bio.h>
-#include <linux/sysctl.h>
-#include <linux/proc_fs.h>
-#include <linux/workqueue.h>
-#include <linux/percpu.h>
-#include <linux/blkdev.h>
-#include <linux/hash.h>
-#include <linux/kthread.h>
-#include <linux/migrate.h>
  #include <linux/backing-dev.h>
-#include <linux/freezer.h>
  
+#include "xfs_shared.h"
  #include "xfs_format.h"
  #include "xfs_log_format.h"
  #include "xfs_trans_resv.h"
@@ -213,7 +198,7 @@ xfs_buf_free_maps(
         }
  }
  
-struct xfs_buf *
+static struct xfs_buf *
  _xfs_buf_alloc(
         struct xfs_buftarg      *target,
         struct xfs_buf_map      *map,
@@ -243,6 +228,7 @@ _xfs_buf_alloc(
         sema_init(&bp->b_sema, 0); /* held, no waiters */
         spin_lock_init(&bp->b_lock);
         bp->b_target = target;
+       bp->b_mount = target->bt_mount;
         bp->b_flags = flags;
  
         /*
@@ -263,12 +249,11 @@ _xfs_buf_alloc(
                 bp->b_maps[i].bm_len = map[i].bm_len;
                 bp->b_length += map[i].bm_len;
         }
-       bp->b_io_length = bp->b_length;
  
         atomic_set(&bp->b_pin_count, 0);
         init_waitqueue_head(&bp->b_waiters);
  
-       XFS_STATS_INC(target->bt_mount, xb_create);
+       XFS_STATS_INC(bp->b_mount, xb_create);
         trace_xfs_buf_init(bp, _RET_IP_);
  
         return bp;
@@ -425,12 +410,12 @@ retry:
                                         current->comm, current->pid,
                                         __func__, gfp_mask);
  
-                       XFS_STATS_INC(bp->b_target->bt_mount, xb_page_retries);
+                       XFS_STATS_INC(bp->b_mount, xb_page_retries);
                         congestion_wait(BLK_RW_ASYNC, HZ/50);
                         goto retry;
                 }
  
-               XFS_STATS_INC(bp->b_target->bt_mount, xb_page_found);
+               XFS_STATS_INC(bp->b_mount, xb_page_found);
  
                 nbytes = min_t(size_t, size, PAGE_SIZE - offset);
                 size -= nbytes;
@@ -909,83 +894,6 @@ xfs_buf_read_uncached(
         return 0;
  }
  
-/*
- * Return a buffer allocated as an empty buffer and associated to external
- * memory via xfs_buf_associate_memory() back to it's empty state.
- */
-void
-xfs_buf_set_empty(
-       struct xfs_buf          *bp,
-       size_t                  numblks)
-{
-       if (bp->b_pages)
-               _xfs_buf_free_pages(bp);
-
-       bp->b_pages = NULL;
-       bp->b_page_count = 0;
-       bp->b_addr = NULL;
-       bp->b_length = numblks;
-       bp->b_io_length = numblks;
-
-       ASSERT(bp->b_map_count == 1);
-       bp->b_bn = XFS_BUF_DADDR_NULL;
-       bp->b_maps[0].bm_bn = XFS_BUF_DADDR_NULL;
-       bp->b_maps[0].bm_len = bp->b_length;
-}
-
-static inline struct page *
-mem_to_page(
-       void                    *addr)
-{
-       if ((!is_vmalloc_addr(addr))) {
-               return virt_to_page(addr);
-       } else {
-               return vmalloc_to_page(addr);
-       }
-}
-
-int
-xfs_buf_associate_memory(
-       xfs_buf_t               *bp,
-       void                    *mem,
-       size_t                  len)
-{
-       int                     rval;
-       int                     i = 0;
-       unsigned long           pageaddr;
-       unsigned long           offset;
-       size_t                  buflen;
-       int                     page_count;
-
-       pageaddr = (unsigned long)mem & PAGE_MASK;
-       offset = (unsigned long)mem - pageaddr;
-       buflen = PAGE_ALIGN(len + offset);
-       page_count = buflen >> PAGE_SHIFT;
-
-       /* Free any previous set of page pointers */
-       if (bp->b_pages)
-               _xfs_buf_free_pages(bp);
-
-       bp->b_pages = NULL;
-       bp->b_addr = mem;
-
-       rval = _xfs_buf_get_pages(bp, page_count);
-       if (rval)
-               return rval;
-
-       bp->b_offset = offset;
-
-       for (i = 0; i < bp->b_page_count; i++) {
-               bp->b_pages[i] = mem_to_page((void *)pageaddr);
-               pageaddr += PAGE_SIZE;
-       }
-
-       bp->b_io_length = BTOBB(len);
-       bp->b_length = BTOBB(buflen);
-
-       return 0;
-}
-
  xfs_buf_t *
  xfs_buf_get_uncached(
         struct xfs_buftarg      *target,
@@ -1180,7 +1088,7 @@ xfs_buf_lock(
         trace_xfs_buf_lock(bp, _RET_IP_);
  
         if (atomic_read(&bp->b_pin_count) && (bp->b_flags & XBF_STALE))
-               xfs_log_force(bp->b_target->bt_mount, 0);
+               xfs_log_force(bp->b_mount, 0);
         down(&bp->b_sema);
  
         trace_xfs_buf_lock_done(bp, _RET_IP_);
@@ -1269,7 +1177,7 @@ xfs_buf_ioend_async(
         struct xfs_buf  *bp)
  {
         INIT_WORK(&bp->b_ioend_work, xfs_buf_ioend_work);
-       queue_work(bp->b_ioend_wq, &bp->b_ioend_work);
+       queue_work(bp->b_mount->m_buf_workqueue, &bp->b_ioend_work);
  }
  
  void
@@ -1288,7 +1196,7 @@ xfs_buf_ioerror_alert(
         struct xfs_buf          *bp,
         const char              *func)
  {
-       xfs_alert(bp->b_target->bt_mount,
+       xfs_alert(bp->b_mount,
  "metadata I/O error in \"%s\" at daddr 0x%llx len %d error %d",
                         func, (uint64_t)XFS_BUF_ADDR(bp), bp->b_length,
                         -bp->b_error);
@@ -1307,10 +1215,8 @@ xfs_bwrite(
                          XBF_WRITE_FAIL | XBF_DONE);
  
         error = xfs_buf_submit(bp);
-       if (error) {
-               xfs_force_shutdown(bp->b_target->bt_mount,
-                                  SHUTDOWN_META_IO_ERROR);
-       }
+       if (error)
+               xfs_force_shutdown(bp->b_mount, SHUTDOWN_META_IO_ERROR);
         return error;
  }
  
@@ -1436,21 +1342,8 @@ _xfs_buf_ioapply(
          */
         bp->b_error = 0;
  
-       /*
-        * Initialize the I/O completion workqueue if we haven't yet or the
-        * submitter has not opted to specify a custom one.
-        */
-       if (!bp->b_ioend_wq)
-               bp->b_ioend_wq = bp->b_target->bt_mount->m_buf_workqueue;
-
         if (bp->b_flags & XBF_WRITE) {
                 op = REQ_OP_WRITE;
-               if (bp->b_flags & XBF_SYNCIO)
-                       op_flags = REQ_SYNC;
-               if (bp->b_flags & XBF_FUA)
-                       op_flags |= REQ_FUA;
-               if (bp->b_flags & XBF_FLUSH)
-                       op_flags |= REQ_PREFLUSH;
  
                 /*
                  * Run the write verifier callback function if it exists. If
@@ -1460,12 +1353,12 @@ _xfs_buf_ioapply(
                 if (bp->b_ops) {
                         bp->b_ops->verify_write(bp);
                         if (bp->b_error) {
-                               xfs_force_shutdown(bp->b_target->bt_mount,
+                               xfs_force_shutdown(bp->b_mount,
                                                    SHUTDOWN_CORRUPT_INCORE);
                                 return;
                         }
                 } else if (bp->b_bn != XFS_BUF_DADDR_NULL) {
-                       struct xfs_mount *mp = bp->b_target->bt_mount;
+                       struct xfs_mount *mp = bp->b_mount;
  
                         /*
                          * non-crc filesystems don't attach verifiers during
@@ -1497,7 +1390,7 @@ _xfs_buf_ioapply(
          * subsequent call.
          */
         offset = bp->b_offset;
-       size = BBTOB(bp->b_io_length);
+       size = BBTOB(bp->b_length);
         blk_start_plug(&plug);
         for (i = 0; i < bp->b_map_count; i++) {
                 xfs_buf_ioapply_map(bp, i, &offset, &size, op, op_flags);
@@ -1543,7 +1436,7 @@ __xfs_buf_submit(
         ASSERT(!(bp->b_flags & _XBF_DELWRI_Q));
  
         /* on shutdown we stale and complete the buffer immediately */
-       if (XFS_FORCED_SHUTDOWN(bp->b_target->bt_mount)) {
+       if (XFS_FORCED_SHUTDOWN(bp->b_mount)) {
                 xfs_buf_ioerror(bp, -EIO);
                 bp->b_flags &= ~XBF_DONE;
                 xfs_buf_stale(bp);
@@ -1613,16 +1506,11 @@ xfs_buf_offset(
         return page_address(page) + (offset & (PAGE_SIZE-1));
  }
  
-/*
- *     Move data into or out of a buffer.
- */
  void
-xfs_buf_iomove(
-       xfs_buf_t               *bp,    /* buffer to process            */
-       size_t                  boff,   /* starting buffer offset       */
-       size_t                  bsize,  /* length to copy               */
-       void                    *data,  /* data address                 */
-       xfs_buf_rw_t            mode)   /* read/write/zero flag         */
+xfs_buf_zero(
+       struct xfs_buf          *bp,
+       size_t                  boff,
+       size_t                  bsize)
  {
         size_t                  bend;
  
@@ -1635,23 +1523,13 @@ xfs_buf_iomove(
                 page_offset = (boff + bp->b_offset) & ~PAGE_MASK;
                 page = bp->b_pages[page_index];
                 csize = min_t(size_t, PAGE_SIZE - page_offset,
-                                     BBTOB(bp->b_io_length) - boff);
+                                     BBTOB(bp->b_length) - boff);
  
                 ASSERT((csize + page_offset) <= PAGE_SIZE);
  
-               switch (mode) {
-               case XBRW_ZERO:
-                       memset(page_address(page) + page_offset, 0, csize);
-                       break;
-               case XBRW_READ:
-                       memcpy(data, page_address(page) + page_offset, csize);
-                       break;
-               case XBRW_WRITE:
-                       memcpy(page_address(page) + page_offset, data, csize);
-               }
+               memset(page_address(page) + page_offset, 0, csize);
  
                 boff += csize;
-               data += csize;
         }
  }
  
@@ -2198,8 +2076,7 @@ void xfs_buf_set_ref(struct xfs_buf *bp, int lru_ref)
          * This allows userspace to disrupt buffer caching for debug/testing
          * purposes.
          */
-       if (XFS_TEST_ERROR(false, bp->b_target->bt_mount,
-                          XFS_ERRTAG_BUF_LRU_REF))
+       if (XFS_TEST_ERROR(false, bp->b_mount, XFS_ERRTAG_BUF_LRU_REF))
                 lru_ref = 0;
  
         atomic_set(&bp->b_lru_ref, lru_ref);
@@ -2215,7 +2092,7 @@ xfs_verify_magic(
         struct xfs_buf          *bp,
         __be32                  dmagic)
  {
-       struct xfs_mount        *mp = bp->b_target->bt_mount;
+       struct xfs_mount        *mp = bp->b_mount;
         int                     idx;
  
         idx = xfs_sb_version_hascrc(&mp->m_sb);
@@ -2233,7 +2110,7 @@ xfs_verify_magic16(
         struct xfs_buf          *bp,
         __be16                  dmagic)
  {
-       struct xfs_mount        *mp = bp->b_target->bt_mount;
+       struct xfs_mount        *mp = bp->b_mount;
         int                     idx;
  
         idx = xfs_sb_version_hascrc(&mp->m_sb);
diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h

index d0b96e071cec197a39ea7cf4c67f777f1bebb046..c6e57a3f409ee7855474bb323d5037a6f96ab97b 100644 (file)
--- a/fs/xfs/xfs_buf.h
+++ b/fs/xfs/xfs_buf.h
@@ -21,12 +21,6 @@
  
  #define XFS_BUF_DADDR_NULL     ((xfs_daddr_t) (-1LL))
  
-typedef enum {
-       XBRW_READ = 1,                  /* transfer into target memory */
-       XBRW_WRITE = 2,                 /* transfer from target memory */
-       XBRW_ZERO = 3,                  /* Zero target memory */
-} xfs_buf_rw_t;
-
  #define XBF_READ        (1 << 0) /* buffer intended for reading from device */
  #define XBF_WRITE       (1 << 1) /* buffer intended for writing to device */
  #define XBF_READ_AHEAD  (1 << 2) /* asynchronous read-ahead */
@@ -34,12 +28,7 @@ typedef enum {
  #define XBF_ASYNC       (1 << 4) /* initiator will not wait for completion */
  #define XBF_DONE        (1 << 5) /* all pages in the buffer uptodate */
  #define XBF_STALE       (1 << 6) /* buffer has been staled, do not find it */
-#define XBF_WRITE_FAIL  (1 << 24)/* async writes have failed on this buffer */
-
-/* I/O hints for the BIO layer */
-#define XBF_SYNCIO      (1 << 10)/* treat this buffer as synchronous I/O */
-#define XBF_FUA                 (1 << 11)/* force cache write through mode */
-#define XBF_FLUSH       (1 << 12)/* flush the disk cache before a write */
+#define XBF_WRITE_FAIL  (1 << 7) /* async writes have failed on this buffer */
  
  /* flags used only as arguments to access routines */
  #define XBF_TRYLOCK     (1 << 16)/* lock requested, but do not wait */
@@ -49,7 +38,6 @@ typedef enum {
  #define _XBF_PAGES      (1 << 20)/* backed by refcounted pages */
  #define _XBF_KMEM       (1 << 21)/* backed by heap memory */
  #define _XBF_DELWRI_Q   (1 << 22)/* buffer on a delwri queue */
-#define _XBF_COMPOUND   (1 << 23)/* compound buffer */
  
  typedef unsigned int xfs_buf_flags_t;
  
@@ -62,15 +50,11 @@ typedef unsigned int xfs_buf_flags_t;
         { XBF_DONE,             "DONE" }, \
         { XBF_STALE,            "STALE" }, \
         { XBF_WRITE_FAIL,       "WRITE_FAIL" }, \
-       { XBF_SYNCIO,           "SYNCIO" }, \
-       { XBF_FUA,              "FUA" }, \
-       { XBF_FLUSH,            "FLUSH" }, \
         { XBF_TRYLOCK,          "TRYLOCK" },    /* should never be set */\
         { XBF_UNMAPPED,         "UNMAPPED" },   /* ditto */\
         { _XBF_PAGES,           "PAGES" }, \
         { _XBF_KMEM,            "KMEM" }, \
-       { _XBF_DELWRI_Q,        "DELWRI_Q" }, \
-       { _XBF_COMPOUND,        "COMPOUND" }
+       { _XBF_DELWRI_Q,        "DELWRI_Q" }
  
  
  /*
@@ -161,13 +145,13 @@ typedef struct xfs_buf {
         wait_queue_head_t       b_waiters;      /* unpin waiters */
         struct list_head        b_list;
         struct xfs_perag        *b_pag;         /* contains rbtree root */
+       struct xfs_mount        *b_mount;
         xfs_buftarg_t           *b_target;      /* buffer target (device) */
         void                    *b_addr;        /* virtual address of buffer */
         struct work_struct      b_ioend_work;
-       struct workqueue_struct *b_ioend_wq;    /* I/O completion wq */
         xfs_buf_iodone_t        b_iodone;       /* I/O completion function */
         struct completion       b_iowait;       /* queue for I/O waiters */
-       void                    *b_log_item;
+       struct xfs_buf_log_item *b_log_item;
         struct list_head        b_li_list;      /* Log items list head */
         struct xfs_trans        *b_transp;
         struct page             **b_pages;      /* array of page pointers */
@@ -175,7 +159,6 @@ typedef struct xfs_buf {
         struct xfs_buf_map      *b_maps;        /* compound buffer map */
         struct xfs_buf_map      __b_map;        /* inline compound buffer map */
         int                     b_map_count;
-       int                     b_io_length;    /* IO size in BBs */
         atomic_t                b_pin_count;    /* pin count */
         atomic_t                b_io_remaining; /* #outstanding I/O requests */
         unsigned int            b_page_count;   /* size of page array */
@@ -209,21 +192,6 @@ struct xfs_buf *xfs_buf_incore(struct xfs_buftarg *target,
                            xfs_daddr_t blkno, size_t numblks,
                            xfs_buf_flags_t flags);
  
-struct xfs_buf *_xfs_buf_alloc(struct xfs_buftarg *target,
-                              struct xfs_buf_map *map, int nmaps,
-                              xfs_buf_flags_t flags);
-
-static inline struct xfs_buf *
-xfs_buf_alloc(
-       struct xfs_buftarg      *target,
-       xfs_daddr_t             blkno,
-       size_t                  numblks,
-       xfs_buf_flags_t         flags)
-{
-       DEFINE_SINGLE_BUF_MAP(map, blkno, numblks);
-       return _xfs_buf_alloc(target, &map, 1, flags);
-}
-
  struct xfs_buf *xfs_buf_get_map(struct xfs_buftarg *target,
                                struct xfs_buf_map *map, int nmaps,
                                xfs_buf_flags_t flags);
@@ -239,11 +207,10 @@ static inline struct xfs_buf *
  xfs_buf_get(
         struct xfs_buftarg      *target,
         xfs_daddr_t             blkno,
-       size_t                  numblks,
-       xfs_buf_flags_t         flags)
+       size_t                  numblks)
  {
         DEFINE_SINGLE_BUF_MAP(map, blkno, numblks);
-       return xfs_buf_get_map(target, &map, 1, flags);
+       return xfs_buf_get_map(target, &map, 1, 0);
  }
  
  static inline struct xfs_buf *
@@ -269,9 +236,6 @@ xfs_buf_readahead(
         return xfs_buf_readahead_map(target, &map, 1, ops);
  }
  
-void xfs_buf_set_empty(struct xfs_buf *bp, size_t numblks);
-int xfs_buf_associate_memory(struct xfs_buf *bp, void *mem, size_t length);
-
  struct xfs_buf *xfs_buf_get_uncached(struct xfs_buftarg *target, size_t numblks,
                                 int flags);
  int xfs_buf_read_uncached(struct xfs_buftarg *target, xfs_daddr_t daddr,
@@ -305,10 +269,7 @@ static inline int xfs_buf_submit(struct xfs_buf *bp)
         return __xfs_buf_submit(bp, wait);
  }
  
-extern void xfs_buf_iomove(xfs_buf_t *, size_t, size_t, void *,
-                               xfs_buf_rw_t);
-#define xfs_buf_zero(bp, off, len) \
-           xfs_buf_iomove((bp), (off), (len), NULL, XBRW_ZERO)
+void xfs_buf_zero(struct xfs_buf *bp, size_t boff, size_t bsize);
  
  /* Buffer Utility Routines */
  extern void *xfs_buf_offset(struct xfs_buf *, size_t);
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c

index 65b32acfa0f6070020f5aec660bc3bba38d64bdd..7dcaec54a20bc368613d96ed9c76ace1dd431d53 100644 (file)
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c
@@ -5,19 +5,17 @@
   */
  #include "xfs.h"
  #include "xfs_fs.h"
+#include "xfs_shared.h"
  #include "xfs_format.h"
  #include "xfs_log_format.h"
  #include "xfs_trans_resv.h"
  #include "xfs_bit.h"
-#include "xfs_sb.h"
  #include "xfs_mount.h"
  #include "xfs_trans.h"
  #include "xfs_buf_item.h"
  #include "xfs_trans_priv.h"
-#include "xfs_error.h"
  #include "xfs_trace.h"
  #include "xfs_log.h"
-#include "xfs_inode.h"
  
  
  kmem_zone_t    *xfs_buf_item_zone;
@@ -520,7 +518,7 @@ xfs_buf_item_push(
         /* has a previous flush failed due to IO errors? */
         if ((bp->b_flags & XBF_WRITE_FAIL) &&
             ___ratelimit(&xfs_buf_write_fail_rl_state, "XFS: Failing async write")) {
-               xfs_warn(bp->b_target->bt_mount,
+               xfs_warn(bp->b_mount,
  "Failing async write on buffer block 0x%llx. Retrying async write.",
                          (long long)bp->b_bn);
         }
@@ -594,7 +592,7 @@ xfs_buf_item_put(
   * free the item.
   */
  STATIC void
-xfs_buf_item_unlock(
+xfs_buf_item_release(
         struct xfs_log_item     *lip)
  {
         struct xfs_buf_log_item *bip = BUF_ITEM(lip);
@@ -609,7 +607,7 @@ xfs_buf_item_unlock(
                                                    &lip->li_flags);
  #endif
  
-       trace_xfs_buf_item_unlock(bip);
+       trace_xfs_buf_item_release(bip);
  
         /*
          * The bli dirty state should match whether the blf has logged segments
@@ -639,6 +637,14 @@ xfs_buf_item_unlock(
         xfs_buf_relse(bp);
  }
  
+STATIC void
+xfs_buf_item_committing(
+       struct xfs_log_item     *lip,
+       xfs_lsn_t               commit_lsn)
+{
+       return xfs_buf_item_release(lip);
+}
+
  /*
   * This is called to find out where the oldest active copy of the
   * buf log item in the on disk log resides now that the last log
@@ -671,25 +677,15 @@ xfs_buf_item_committed(
         return lsn;
  }
  
-STATIC void
-xfs_buf_item_committing(
-       struct xfs_log_item     *lip,
-       xfs_lsn_t               commit_lsn)
-{
-}
-
-/*
- * This is the ops vector shared by all buf log items.
- */
  static const struct xfs_item_ops xfs_buf_item_ops = {
         .iop_size       = xfs_buf_item_size,
         .iop_format     = xfs_buf_item_format,
         .iop_pin        = xfs_buf_item_pin,
         .iop_unpin      = xfs_buf_item_unpin,
-       .iop_unlock     = xfs_buf_item_unlock,
+       .iop_release    = xfs_buf_item_release,
+       .iop_committing = xfs_buf_item_committing,
         .iop_committed  = xfs_buf_item_committed,
         .iop_push       = xfs_buf_item_push,
-       .iop_committing = xfs_buf_item_committing
  };
  
  STATIC int
@@ -743,7 +739,7 @@ xfs_buf_item_init(
          * this buffer. If we do already have one, there is
          * nothing to do here so return.
          */
-       ASSERT(bp->b_target->bt_mount == mp);
+       ASSERT(bp->b_mount == mp);
         if (bip) {
                 ASSERT(bip->bli_item.li_type == XFS_LI_BUF);
                 ASSERT(!bp->b_transp);
@@ -980,9 +976,9 @@ xfs_buf_item_relse(
   */
  void
  xfs_buf_attach_iodone(
-       xfs_buf_t       *bp,
-       void            (*cb)(xfs_buf_t *, xfs_log_item_t *),
-       xfs_log_item_t  *lip)
+       struct xfs_buf          *bp,
+       void                    (*cb)(struct xfs_buf *, struct xfs_log_item *),
+       struct xfs_log_item     *lip)
  {
         ASSERT(xfs_buf_islocked(bp));
  
diff --git a/fs/xfs/xfs_buf_item.h b/fs/xfs/xfs_buf_item.h

index 90f65f891fabd27210e52a2c9085c677d12fde62..4a054b11011a076c81357a0281e5fa2156b2bf5c 100644 (file)
--- a/fs/xfs/xfs_buf_item.h
+++ b/fs/xfs/xfs_buf_item.h
@@ -39,7 +39,7 @@ struct xfs_buf_log_item;
   * locked, and which 128 byte chunks of the buffer are dirty.
   */
  struct xfs_buf_log_item {
-       xfs_log_item_t          bli_item;       /* common item structure */
+       struct xfs_log_item     bli_item;       /* common item structure */
         struct xfs_buf          *bli_buf;       /* real buffer pointer */
         unsigned int            bli_flags;      /* misc flags */
         unsigned int            bli_recur;      /* lock recursion count */
@@ -55,8 +55,8 @@ bool  xfs_buf_item_put(struct xfs_buf_log_item *);
  void   xfs_buf_item_log(struct xfs_buf_log_item *, uint, uint);
  bool   xfs_buf_item_dirty_format(struct xfs_buf_log_item *);
  void   xfs_buf_attach_iodone(struct xfs_buf *,
-                             void(*)(struct xfs_buf *, xfs_log_item_t *),
-                             xfs_log_item_t *);
+                             void(*)(struct xfs_buf *, struct xfs_log_item *),
+                             struct xfs_log_item *);
  void   xfs_buf_iodone_callbacks(struct xfs_buf *);
  void   xfs_buf_iodone(struct xfs_buf *, struct xfs_log_item *);
  bool   xfs_buf_resubmit_failed_buffers(struct xfs_buf *,
diff --git a/fs/xfs/xfs_dir2_readdir.c b/fs/xfs/xfs_dir2_readdir.c

index 5142e64e2345897b8a770f024dc594a10c009446..283df898dd9f6f5c02701dc99bda8f7d9f3b0599 100644 (file)
--- a/fs/xfs/xfs_dir2_readdir.c
+++ b/fs/xfs/xfs_dir2_readdir.c
@@ -6,17 +6,14 @@
   */
  #include "xfs.h"
  #include "xfs_fs.h"
+#include "xfs_shared.h"
  #include "xfs_format.h"
  #include "xfs_log_format.h"
  #include "xfs_trans_resv.h"
-#include "xfs_bit.h"
  #include "xfs_mount.h"
-#include "xfs_da_format.h"
-#include "xfs_da_btree.h"
  #include "xfs_inode.h"
  #include "xfs_dir2.h"
  #include "xfs_dir2_priv.h"
-#include "xfs_error.h"
  #include "xfs_trace.h"
  #include "xfs_bmap.h"
  #include "xfs_trans.h"
diff --git a/fs/xfs/xfs_discard.c b/fs/xfs/xfs_discard.c

index d0df0ed50f4b6733d6bdd3b21a62c49402a94476..8ec7aab89044019c846f0082be199bf08f1bbd48 100644 (file)
--- a/fs/xfs/xfs_discard.c
+++ b/fs/xfs/xfs_discard.c
@@ -4,19 +4,17 @@
   * All Rights Reserved.
   */
  #include "xfs.h"
+#include "xfs_shared.h"
  #include "xfs_format.h"
  #include "xfs_log_format.h"
  #include "xfs_trans_resv.h"
  #include "xfs_sb.h"
  #include "xfs_mount.h"
-#include "xfs_quota.h"
-#include "xfs_inode.h"
  #include "xfs_btree.h"
  #include "xfs_alloc_btree.h"
  #include "xfs_alloc.h"
  #include "xfs_error.h"
  #include "xfs_extent_busy.h"
-#include "xfs_discard.h"
  #include "xfs_trace.h"
  #include "xfs_log.h"
  
diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c

index a1af984e4913e94e88b0eac261c5479728d8b24e..fb1ad448308156a1edb9e44630f7565b48223cd2 100644 (file)
--- a/fs/xfs/xfs_dquot.c
+++ b/fs/xfs/xfs_dquot.c
@@ -14,16 +14,12 @@
  #include "xfs_defer.h"
  #include "xfs_inode.h"
  #include "xfs_bmap.h"
-#include "xfs_bmap_util.h"
-#include "xfs_alloc.h"
  #include "xfs_quota.h"
-#include "xfs_error.h"
  #include "xfs_trans.h"
  #include "xfs_buf_item.h"
  #include "xfs_trans_space.h"
  #include "xfs_trans_priv.h"
  #include "xfs_qm.h"
-#include "xfs_cksum.h"
  #include "xfs_trace.h"
  #include "xfs_log.h"
  #include "xfs_bmap_btree.h"
@@ -1243,7 +1239,7 @@ xfs_qm_exit(void)
  /*
   * Iterate every dquot of a particular type.  The caller must ensure that the
   * particular quota type is active.  iter_fn can return negative error codes,
- * or XFS_BTREE_QUERY_RANGE_ABORT to indicate that it wants to stop iterating.
+ * or XFS_ITER_ABORT to indicate that it wants to stop iterating.
   */
  int
  xfs_qm_dqiterate(
diff --git a/fs/xfs/xfs_dquot.h b/fs/xfs/xfs_dquot.h

index 64bd8640f6e81dc6adba863883fb745554db73af..4fe85709d55d245fb65f72c6bd87866c1d5c5ba4 100644 (file)
--- a/fs/xfs/xfs_dquot.h
+++ b/fs/xfs/xfs_dquot.h
@@ -34,7 +34,6 @@ typedef struct xfs_dquot {
         uint             dq_flags;      /* various flags (XFS_DQ_*) */
         struct list_head q_lru;         /* global free list of dquots */
         struct xfs_mount*q_mount;       /* filesystem this relates to */
-       struct xfs_trans*q_transp;      /* trans this belongs to currently */
         uint             q_nrefs;       /* # active refs from inodes */
         xfs_daddr_t      q_blkno;       /* blkno of dquot buffer */
         int              q_bufoffset;   /* off of dq in buffer (# dquots) */
diff --git a/fs/xfs/xfs_dquot_item.c b/fs/xfs/xfs_dquot_item.c

index 7dedd17c4813172239c2cef774b6c4ab3c068dd2..282ec5af293e8f161e9dd8d9ced18393a0e7d925 100644 (file)
--- a/fs/xfs/xfs_dquot_item.c
+++ b/fs/xfs/xfs_dquot_item.c
@@ -5,13 +5,13 @@
   */
  #include "xfs.h"
  #include "xfs_fs.h"
+#include "xfs_shared.h"
  #include "xfs_format.h"
  #include "xfs_log_format.h"
  #include "xfs_trans_resv.h"
  #include "xfs_mount.h"
  #include "xfs_inode.h"
  #include "xfs_quota.h"
-#include "xfs_error.h"
  #include "xfs_trans.h"
  #include "xfs_buf_item.h"
  #include "xfs_trans_priv.h"
@@ -94,18 +94,6 @@ xfs_qm_dquot_logitem_unpin(
                 wake_up(&dqp->q_pinwait);
  }
  
-STATIC xfs_lsn_t
-xfs_qm_dquot_logitem_committed(
-       struct xfs_log_item     *lip,
-       xfs_lsn_t               lsn)
-{
-       /*
-        * We always re-log the entire dquot when it becomes dirty,
-        * so, the latest copy _is_ the only one that matters.
-        */
-       return lsn;
-}
-
  /*
   * This is called to wait for the given dquot to be unpinned.
   * Most of these pin/unpin routines are plagiarized from inode code.
@@ -209,25 +197,14 @@ out_unlock:
         return rval;
  }
  
-/*
- * Unlock the dquot associated with the log item.
- * Clear the fields of the dquot and dquot log item that
- * are specific to the current transaction.  If the
- * hold flags is set, do not unlock the dquot.
- */
  STATIC void
-xfs_qm_dquot_logitem_unlock(
+xfs_qm_dquot_logitem_release(
         struct xfs_log_item     *lip)
  {
         struct xfs_dquot        *dqp = DQUOT_ITEM(lip)->qli_dquot;
  
         ASSERT(XFS_DQ_IS_LOCKED(dqp));
  
-       /*
-        * Clear the transaction pointer in the dquot
-        */
-       dqp->q_transp = NULL;
-
         /*
          * dquots are never 'held' from getting unlocked at the end of
          * a transaction.  Their locking and unlocking is hidden inside the
@@ -237,30 +214,22 @@ xfs_qm_dquot_logitem_unlock(
         xfs_dqunlock(dqp);
  }
  
-/*
- * this needs to stamp an lsn into the dquot, I think.
- * rpc's that look at user dquot's would then have to
- * push on the dependency recorded in the dquot
- */
  STATIC void
  xfs_qm_dquot_logitem_committing(
         struct xfs_log_item     *lip,
-       xfs_lsn_t               lsn)
+       xfs_lsn_t               commit_lsn)
  {
+       return xfs_qm_dquot_logitem_release(lip);
  }
  
-/*
- * This is the ops vector for dquots
- */
  static const struct xfs_item_ops xfs_dquot_item_ops = {
         .iop_size       = xfs_qm_dquot_logitem_size,
         .iop_format     = xfs_qm_dquot_logitem_format,
         .iop_pin        = xfs_qm_dquot_logitem_pin,
         .iop_unpin      = xfs_qm_dquot_logitem_unpin,
-       .iop_unlock     = xfs_qm_dquot_logitem_unlock,
-       .iop_committed  = xfs_qm_dquot_logitem_committed,
+       .iop_release    = xfs_qm_dquot_logitem_release,
+       .iop_committing = xfs_qm_dquot_logitem_committing,
         .iop_push       = xfs_qm_dquot_logitem_push,
-       .iop_committing = xfs_qm_dquot_logitem_committing,
         .iop_error      = xfs_dquot_item_error
  };
  
@@ -319,26 +288,6 @@ xfs_qm_qoff_logitem_format(
         xlog_finish_iovec(lv, vecp, sizeof(struct xfs_qoff_logitem));
  }
  
-/*
- * Pinning has no meaning for an quotaoff item, so just return.
- */
-STATIC void
-xfs_qm_qoff_logitem_pin(
-       struct xfs_log_item     *lip)
-{
-}
-
-/*
- * Since pinning has no meaning for an quotaoff item, unpinning does
- * not either.
- */
-STATIC void
-xfs_qm_qoff_logitem_unpin(
-       struct xfs_log_item     *lip,
-       int                     remove)
-{
-}
-
  /*
   * There isn't much you can do to push a quotaoff item.  It is simply
   * stuck waiting for the log to be flushed to disk.
@@ -351,28 +300,6 @@ xfs_qm_qoff_logitem_push(
         return XFS_ITEM_LOCKED;
  }
  
-/*
- * Quotaoff items have no locking or pushing, so return failure
- * so that the caller doesn't bother with us.
- */
-STATIC void
-xfs_qm_qoff_logitem_unlock(
-       struct xfs_log_item     *lip)
-{
-}
-
-/*
- * The quotaoff-start-item is logged only once and cannot be moved in the log,
- * so simply return the lsn at which it's been logged.
- */
-STATIC xfs_lsn_t
-xfs_qm_qoff_logitem_committed(
-       struct xfs_log_item     *lip,
-       xfs_lsn_t               lsn)
-{
-       return lsn;
-}
-
  STATIC xfs_lsn_t
  xfs_qm_qoffend_logitem_committed(
         struct xfs_log_item     *lip,
@@ -396,50 +323,17 @@ xfs_qm_qoffend_logitem_committed(
         return (xfs_lsn_t)-1;
  }
  
-/*
- * XXX rcc - don't know quite what to do with this.  I think we can
- * just ignore it.  The only time that isn't the case is if we allow
- * the client to somehow see that quotas have been turned off in which
- * we can't allow that to get back until the quotaoff hits the disk.
- * So how would that happen?  Also, do we need different routines for
- * quotaoff start and quotaoff end?  I suspect the answer is yes but
- * to be sure, I need to look at the recovery code and see how quota off
- * recovery is handled (do we roll forward or back or do something else).
- * If we roll forwards or backwards, then we need two separate routines,
- * one that does nothing and one that stamps in the lsn that matters
- * (truly makes the quotaoff irrevocable).  If we do something else,
- * then maybe we don't need two.
- */
-STATIC void
-xfs_qm_qoff_logitem_committing(
-       struct xfs_log_item     *lip,
-       xfs_lsn_t               commit_lsn)
-{
-}
-
  static const struct xfs_item_ops xfs_qm_qoffend_logitem_ops = {
         .iop_size       = xfs_qm_qoff_logitem_size,
         .iop_format     = xfs_qm_qoff_logitem_format,
-       .iop_pin        = xfs_qm_qoff_logitem_pin,
-       .iop_unpin      = xfs_qm_qoff_logitem_unpin,
-       .iop_unlock     = xfs_qm_qoff_logitem_unlock,
         .iop_committed  = xfs_qm_qoffend_logitem_committed,
         .iop_push       = xfs_qm_qoff_logitem_push,
-       .iop_committing = xfs_qm_qoff_logitem_committing
  };
  
-/*
- * This is the ops vector shared by all quotaoff-start log items.
- */
  static const struct xfs_item_ops xfs_qm_qoff_logitem_ops = {
         .iop_size       = xfs_qm_qoff_logitem_size,
         .iop_format     = xfs_qm_qoff_logitem_format,
-       .iop_pin        = xfs_qm_qoff_logitem_pin,
-       .iop_unpin      = xfs_qm_qoff_logitem_unpin,
-       .iop_unlock     = xfs_qm_qoff_logitem_unlock,
-       .iop_committed  = xfs_qm_qoff_logitem_committed,
         .iop_push       = xfs_qm_qoff_logitem_push,
-       .iop_committing = xfs_qm_qoff_logitem_committing
  };
  
  /*
diff --git a/fs/xfs/xfs_dquot_item.h b/fs/xfs/xfs_dquot_item.h

index db9df710a3080c43a964c04d3869a1a208209916..1aed34ccdabc21f9221680c21eacdca4ab6f8d20 100644 (file)
--- a/fs/xfs/xfs_dquot_item.h
+++ b/fs/xfs/xfs_dquot_item.h
@@ -12,13 +12,13 @@ struct xfs_mount;
  struct xfs_qoff_logitem;
  
  typedef struct xfs_dq_logitem {
-       xfs_log_item_t           qli_item;         /* common portion */
+       struct xfs_log_item      qli_item;         /* common portion */
         struct xfs_dquot        *qli_dquot;        /* dquot ptr */
         xfs_lsn_t                qli_flush_lsn;    /* lsn at last flush */
  } xfs_dq_logitem_t;
  
  typedef struct xfs_qoff_logitem {
-       xfs_log_item_t           qql_item;      /* common portion */
+       struct xfs_log_item      qql_item;      /* common portion */
         struct xfs_qoff_logitem *qql_start_lip; /* qoff-start logitem, if any */
         unsigned int            qql_flags;
  } xfs_qoff_logitem_t;
diff --git a/fs/xfs/xfs_error.c b/fs/xfs/xfs_error.c

index a1e177f66404d28184fd99a5711d2c6f0b5005c2..544c9482a0efec22883f168e670c1320ab06eac7 100644 (file)
--- a/fs/xfs/xfs_error.c
+++ b/fs/xfs/xfs_error.c
@@ -4,6 +4,7 @@
   * All Rights Reserved.
   */
  #include "xfs.h"
+#include "xfs_shared.h"
  #include "xfs_format.h"
  #include "xfs_fs.h"
  #include "xfs_log_format.h"
@@ -353,7 +354,7 @@ xfs_buf_verifier_error(
         size_t                  bufsz,
         xfs_failaddr_t          failaddr)
  {
-       struct xfs_mount        *mp = bp->b_target->bt_mount;
+       struct xfs_mount        *mp = bp->b_mount;
         xfs_failaddr_t          fa;
         int                     sz;
  
diff --git a/fs/xfs/xfs_export.c b/fs/xfs/xfs_export.c

index f2284ceb129f7acdf91874934c3013c6d9b174f3..f1372f9046e389313afa3101a9072d0344e31818 100644 (file)
--- a/fs/xfs/xfs_export.c
+++ b/fs/xfs/xfs_export.c
@@ -4,18 +4,16 @@
   * All Rights Reserved.
   */
  #include "xfs.h"
+#include "xfs_shared.h"
  #include "xfs_format.h"
  #include "xfs_log_format.h"
  #include "xfs_trans_resv.h"
  #include "xfs_mount.h"
-#include "xfs_da_format.h"
-#include "xfs_da_btree.h"
  #include "xfs_dir2.h"
  #include "xfs_export.h"
  #include "xfs_inode.h"
  #include "xfs_trans.h"
  #include "xfs_inode_item.h"
-#include "xfs_trace.h"
  #include "xfs_icache.h"
  #include "xfs_log.h"
  #include "xfs_pnfs.h"
diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c

index 74ddf66f4cfe463264cdcd2d7ea1dd9fb11bb8f9..86f6512d68643ec833b1aa5c5ef4e12036e90523 100644 (file)
--- a/fs/xfs/xfs_extfree_item.c
+++ b/fs/xfs/xfs_extfree_item.c
@@ -9,14 +9,18 @@
  #include "xfs_log_format.h"
  #include "xfs_trans_resv.h"
  #include "xfs_bit.h"
+#include "xfs_shared.h"
  #include "xfs_mount.h"
+#include "xfs_defer.h"
  #include "xfs_trans.h"
  #include "xfs_trans_priv.h"
-#include "xfs_buf_item.h"
  #include "xfs_extfree_item.h"
  #include "xfs_log.h"
  #include "xfs_btree.h"
  #include "xfs_rmap.h"
+#include "xfs_alloc.h"
+#include "xfs_bmap.h"
+#include "xfs_trace.h"
  
  
  kmem_zone_t    *xfs_efi_zone;
@@ -106,15 +110,6 @@ xfs_efi_item_format(
  }
  
  
-/*
- * Pinning has no meaning for an efi item, so just return.
- */
-STATIC void
-xfs_efi_item_pin(
-       struct xfs_log_item     *lip)
-{
-}
-
  /*
   * The unpin operation is the last place an EFI is manipulated in the log. It is
   * either inserted in the AIL or aborted in the event of a log I/O error. In
@@ -132,72 +127,23 @@ xfs_efi_item_unpin(
         xfs_efi_release(efip);
  }
  
-/*
- * Efi items have no locking or pushing.  However, since EFIs are pulled from
- * the AIL when their corresponding EFDs are committed to disk, their situation
- * is very similar to being pinned.  Return XFS_ITEM_PINNED so that the caller
- * will eventually flush the log.  This should help in getting the EFI out of
- * the AIL.
- */
-STATIC uint
-xfs_efi_item_push(
-       struct xfs_log_item     *lip,
-       struct list_head        *buffer_list)
-{
-       return XFS_ITEM_PINNED;
-}
-
  /*
   * The EFI has been either committed or aborted if the transaction has been
   * cancelled. If the transaction was cancelled, an EFD isn't going to be
   * constructed and thus we free the EFI here directly.
   */
  STATIC void
-xfs_efi_item_unlock(
+xfs_efi_item_release(
         struct xfs_log_item     *lip)
  {
-       if (test_bit(XFS_LI_ABORTED, &lip->li_flags))
-               xfs_efi_release(EFI_ITEM(lip));
-}
-
-/*
- * The EFI is logged only once and cannot be moved in the log, so simply return
- * the lsn at which it's been logged.
- */
-STATIC xfs_lsn_t
-xfs_efi_item_committed(
-       struct xfs_log_item     *lip,
-       xfs_lsn_t               lsn)
-{
-       return lsn;
-}
-
-/*
- * The EFI dependency tracking op doesn't do squat.  It can't because
- * it doesn't know where the free extent is coming from.  The dependency
- * tracking has to be handled by the "enclosing" metadata object.  For
- * example, for inodes, the inode is locked throughout the extent freeing
- * so the dependency should be recorded there.
- */
-STATIC void
-xfs_efi_item_committing(
-       struct xfs_log_item     *lip,
-       xfs_lsn_t               lsn)
-{
+       xfs_efi_release(EFI_ITEM(lip));
  }
  
-/*
- * This is the ops vector shared by all efi log items.
- */
  static const struct xfs_item_ops xfs_efi_item_ops = {
         .iop_size       = xfs_efi_item_size,
         .iop_format     = xfs_efi_item_format,
-       .iop_pin        = xfs_efi_item_pin,
         .iop_unpin      = xfs_efi_item_unpin,
-       .iop_unlock     = xfs_efi_item_unlock,
-       .iop_committed  = xfs_efi_item_committed,
-       .iop_push       = xfs_efi_item_push,
-       .iop_committing = xfs_efi_item_committing
+       .iop_release    = xfs_efi_item_release,
  };
  
  
@@ -349,136 +295,298 @@ xfs_efd_item_format(
  }
  
  /*
- * Pinning has no meaning for an efd item, so just return.
+ * The EFD is either committed or aborted if the transaction is cancelled. If
+ * the transaction is cancelled, drop our reference to the EFI and free the EFD.
   */
  STATIC void
-xfs_efd_item_pin(
+xfs_efd_item_release(
         struct xfs_log_item     *lip)
  {
+       struct xfs_efd_log_item *efdp = EFD_ITEM(lip);
+
+       xfs_efi_release(efdp->efd_efip);
+       xfs_efd_item_free(efdp);
  }
  
+static const struct xfs_item_ops xfs_efd_item_ops = {
+       .flags          = XFS_ITEM_RELEASE_WHEN_COMMITTED,
+       .iop_size       = xfs_efd_item_size,
+       .iop_format     = xfs_efd_item_format,
+       .iop_release    = xfs_efd_item_release,
+};
+
  /*
- * Since pinning has no meaning for an efd item, unpinning does
- * not either.
+ * Allocate an "extent free done" log item that will hold nextents worth of
+ * extents.  The caller must use all nextents extents, because we are not
+ * flexible about this at all.
   */
-STATIC void
-xfs_efd_item_unpin(
-       struct xfs_log_item     *lip,
-       int                     remove)
+static struct xfs_efd_log_item *
+xfs_trans_get_efd(
+       struct xfs_trans                *tp,
+       struct xfs_efi_log_item         *efip,
+       unsigned int                    nextents)
  {
+       struct xfs_efd_log_item         *efdp;
+
+       ASSERT(nextents > 0);
+
+       if (nextents > XFS_EFD_MAX_FAST_EXTENTS) {
+               efdp = kmem_zalloc(sizeof(struct xfs_efd_log_item) +
+                               (nextents - 1) * sizeof(struct xfs_extent),
+                               KM_SLEEP);
+       } else {
+               efdp = kmem_zone_zalloc(xfs_efd_zone, KM_SLEEP);
+       }
+
+       xfs_log_item_init(tp->t_mountp, &efdp->efd_item, XFS_LI_EFD,
+                         &xfs_efd_item_ops);
+       efdp->efd_efip = efip;
+       efdp->efd_format.efd_nextents = nextents;
+       efdp->efd_format.efd_efi_id = efip->efi_format.efi_id;
+
+       xfs_trans_add_item(tp, &efdp->efd_item);
+       return efdp;
  }
  
  /*
- * There isn't much you can do to push on an efd item.  It is simply stuck
- * waiting for the log to be flushed to disk.
+ * Free an extent and log it to the EFD. Note that the transaction is marked
+ * dirty regardless of whether the extent free succeeds or fails to support the
+ * EFI/EFD lifecycle rules.
   */
-STATIC uint
-xfs_efd_item_push(
-       struct xfs_log_item     *lip,
-       struct list_head        *buffer_list)
+static int
+xfs_trans_free_extent(
+       struct xfs_trans                *tp,
+       struct xfs_efd_log_item         *efdp,
+       xfs_fsblock_t                   start_block,
+       xfs_extlen_t                    ext_len,
+       const struct xfs_owner_info     *oinfo,
+       bool                            skip_discard)
  {
-       return XFS_ITEM_PINNED;
+       struct xfs_mount                *mp = tp->t_mountp;
+       struct xfs_extent               *extp;
+       uint                            next_extent;
+       xfs_agnumber_t                  agno = XFS_FSB_TO_AGNO(mp, start_block);
+       xfs_agblock_t                   agbno = XFS_FSB_TO_AGBNO(mp,
+                                                               start_block);
+       int                             error;
+
+       trace_xfs_bmap_free_deferred(tp->t_mountp, agno, 0, agbno, ext_len);
+
+       error = __xfs_free_extent(tp, start_block, ext_len,
+                                 oinfo, XFS_AG_RESV_NONE, skip_discard);
+       /*
+        * Mark the transaction dirty, even on error. This ensures the
+        * transaction is aborted, which:
+        *
+        * 1.) releases the EFI and frees the EFD
+        * 2.) shuts down the filesystem
+        */
+       tp->t_flags |= XFS_TRANS_DIRTY;
+       set_bit(XFS_LI_DIRTY, &efdp->efd_item.li_flags);
+
+       next_extent = efdp->efd_next_extent;
+       ASSERT(next_extent < efdp->efd_format.efd_nextents);
+       extp = &(efdp->efd_format.efd_extents[next_extent]);
+       extp->ext_start = start_block;
+       extp->ext_len = ext_len;
+       efdp->efd_next_extent++;
+
+       return error;
  }
  
-/*
- * The EFD is either committed or aborted if the transaction is cancelled. If
- * the transaction is cancelled, drop our reference to the EFI and free the EFD.
- */
-STATIC void
-xfs_efd_item_unlock(
-       struct xfs_log_item     *lip)
+/* Sort bmap items by AG. */
+static int
+xfs_extent_free_diff_items(
+       void                            *priv,
+       struct list_head                *a,
+       struct list_head                *b)
  {
-       struct xfs_efd_log_item *efdp = EFD_ITEM(lip);
+       struct xfs_mount                *mp = priv;
+       struct xfs_extent_free_item     *ra;
+       struct xfs_extent_free_item     *rb;
+
+       ra = container_of(a, struct xfs_extent_free_item, xefi_list);
+       rb = container_of(b, struct xfs_extent_free_item, xefi_list);
+       return  XFS_FSB_TO_AGNO(mp, ra->xefi_startblock) -
+               XFS_FSB_TO_AGNO(mp, rb->xefi_startblock);
+}
  
-       if (test_bit(XFS_LI_ABORTED, &lip->li_flags)) {
-               xfs_efi_release(efdp->efd_efip);
-               xfs_efd_item_free(efdp);
-       }
+/* Get an EFI. */
+STATIC void *
+xfs_extent_free_create_intent(
+       struct xfs_trans                *tp,
+       unsigned int                    count)
+{
+       struct xfs_efi_log_item         *efip;
+
+       ASSERT(tp != NULL);
+       ASSERT(count > 0);
+
+       efip = xfs_efi_init(tp->t_mountp, count);
+       ASSERT(efip != NULL);
+
+       /*
+        * Get a log_item_desc to point at the new item.
+        */
+       xfs_trans_add_item(tp, &efip->efi_item);
+       return efip;
  }
  
-/*
- * When the efd item is committed to disk, all we need to do is delete our
- * reference to our partner efi item and then free ourselves. Since we're
- * freeing ourselves we must return -1 to keep the transaction code from further
- * referencing this item.
- */
-STATIC xfs_lsn_t
-xfs_efd_item_committed(
-       struct xfs_log_item     *lip,
-       xfs_lsn_t               lsn)
+/* Log a free extent to the intent item. */
+STATIC void
+xfs_extent_free_log_item(
+       struct xfs_trans                *tp,
+       void                            *intent,
+       struct list_head                *item)
  {
-       struct xfs_efd_log_item *efdp = EFD_ITEM(lip);
+       struct xfs_efi_log_item         *efip = intent;
+       struct xfs_extent_free_item     *free;
+       uint                            next_extent;
+       struct xfs_extent               *extp;
+
+       free = container_of(item, struct xfs_extent_free_item, xefi_list);
+
+       tp->t_flags |= XFS_TRANS_DIRTY;
+       set_bit(XFS_LI_DIRTY, &efip->efi_item.li_flags);
  
         /*
-        * Drop the EFI reference regardless of whether the EFD has been
-        * aborted. Once the EFD transaction is constructed, it is the sole
-        * responsibility of the EFD to release the EFI (even if the EFI is
-        * aborted due to log I/O error).
+        * atomic_inc_return gives us the value after the increment;
+        * we want to use it as an array index so we need to subtract 1 from
+        * it.
          */
-       xfs_efi_release(efdp->efd_efip);
-       xfs_efd_item_free(efdp);
+       next_extent = atomic_inc_return(&efip->efi_next_extent) - 1;
+       ASSERT(next_extent < efip->efi_format.efi_nextents);
+       extp = &efip->efi_format.efi_extents[next_extent];
+       extp->ext_start = free->xefi_startblock;
+       extp->ext_len = free->xefi_blockcount;
+}
  
-       return (xfs_lsn_t)-1;
+/* Get an EFD so we can process all the free extents. */
+STATIC void *
+xfs_extent_free_create_done(
+       struct xfs_trans                *tp,
+       void                            *intent,
+       unsigned int                    count)
+{
+       return xfs_trans_get_efd(tp, intent, count);
  }
  
-/*
- * The EFD dependency tracking op doesn't do squat.  It can't because
- * it doesn't know where the free extent is coming from.  The dependency
- * tracking has to be handled by the "enclosing" metadata object.  For
- * example, for inodes, the inode is locked throughout the extent freeing
- * so the dependency should be recorded there.
- */
+/* Process a free extent. */
+STATIC int
+xfs_extent_free_finish_item(
+       struct xfs_trans                *tp,
+       struct list_head                *item,
+       void                            *done_item,
+       void                            **state)
+{
+       struct xfs_extent_free_item     *free;
+       int                             error;
+
+       free = container_of(item, struct xfs_extent_free_item, xefi_list);
+       error = xfs_trans_free_extent(tp, done_item,
+                       free->xefi_startblock,
+                       free->xefi_blockcount,
+                       &free->xefi_oinfo, free->xefi_skip_discard);
+       kmem_free(free);
+       return error;
+}
+
+/* Abort all pending EFIs. */
  STATIC void
-xfs_efd_item_committing(
-       struct xfs_log_item     *lip,
-       xfs_lsn_t               lsn)
+xfs_extent_free_abort_intent(
+       void                            *intent)
  {
+       xfs_efi_release(intent);
  }
  
-/*
- * This is the ops vector shared by all efd log items.
- */
-static const struct xfs_item_ops xfs_efd_item_ops = {
-       .iop_size       = xfs_efd_item_size,
-       .iop_format     = xfs_efd_item_format,
-       .iop_pin        = xfs_efd_item_pin,
-       .iop_unpin      = xfs_efd_item_unpin,
-       .iop_unlock     = xfs_efd_item_unlock,
-       .iop_committed  = xfs_efd_item_committed,
-       .iop_push       = xfs_efd_item_push,
-       .iop_committing = xfs_efd_item_committing
+/* Cancel a free extent. */
+STATIC void
+xfs_extent_free_cancel_item(
+       struct list_head                *item)
+{
+       struct xfs_extent_free_item     *free;
+
+       free = container_of(item, struct xfs_extent_free_item, xefi_list);
+       kmem_free(free);
+}
+
+const struct xfs_defer_op_type xfs_extent_free_defer_type = {
+       .max_items      = XFS_EFI_MAX_FAST_EXTENTS,
+       .diff_items     = xfs_extent_free_diff_items,
+       .create_intent  = xfs_extent_free_create_intent,
+       .abort_intent   = xfs_extent_free_abort_intent,
+       .log_item       = xfs_extent_free_log_item,
+       .create_done    = xfs_extent_free_create_done,
+       .finish_item    = xfs_extent_free_finish_item,
+       .cancel_item    = xfs_extent_free_cancel_item,
  };
  
  /*
- * Allocate and initialize an efd item with the given number of extents.
+ * AGFL blocks are accounted differently in the reserve pools and are not
+ * inserted into the busy extent list.
   */
-struct xfs_efd_log_item *
-xfs_efd_init(
-       struct xfs_mount        *mp,
-       struct xfs_efi_log_item *efip,
-       uint                    nextents)
-
+STATIC int
+xfs_agfl_free_finish_item(
+       struct xfs_trans                *tp,
+       struct list_head                *item,
+       void                            *done_item,
+       void                            **state)
  {
-       struct xfs_efd_log_item *efdp;
-       uint                    size;
+       struct xfs_mount                *mp = tp->t_mountp;
+       struct xfs_efd_log_item         *efdp = done_item;
+       struct xfs_extent_free_item     *free;
+       struct xfs_extent               *extp;
+       struct xfs_buf                  *agbp;
+       int                             error;
+       xfs_agnumber_t                  agno;
+       xfs_agblock_t                   agbno;
+       uint                            next_extent;
+
+       free = container_of(item, struct xfs_extent_free_item, xefi_list);
+       ASSERT(free->xefi_blockcount == 1);
+       agno = XFS_FSB_TO_AGNO(mp, free->xefi_startblock);
+       agbno = XFS_FSB_TO_AGBNO(mp, free->xefi_startblock);
+
+       trace_xfs_agfl_free_deferred(mp, agno, 0, agbno, free->xefi_blockcount);
+
+       error = xfs_alloc_read_agf(mp, tp, agno, 0, &agbp);
+       if (!error)
+               error = xfs_free_agfl_block(tp, agno, agbno, agbp,
+                                           &free->xefi_oinfo);
  
-       ASSERT(nextents > 0);
-       if (nextents > XFS_EFD_MAX_FAST_EXTENTS) {
-               size = (uint)(sizeof(xfs_efd_log_item_t) +
-                       ((nextents - 1) * sizeof(xfs_extent_t)));
-               efdp = kmem_zalloc(size, KM_SLEEP);
-       } else {
-               efdp = kmem_zone_zalloc(xfs_efd_zone, KM_SLEEP);
-       }
+       /*
+        * Mark the transaction dirty, even on error. This ensures the
+        * transaction is aborted, which:
+        *
+        * 1.) releases the EFI and frees the EFD
+        * 2.) shuts down the filesystem
+        */
+       tp->t_flags |= XFS_TRANS_DIRTY;
+       set_bit(XFS_LI_DIRTY, &efdp->efd_item.li_flags);
  
-       xfs_log_item_init(mp, &efdp->efd_item, XFS_LI_EFD, &xfs_efd_item_ops);
-       efdp->efd_efip = efip;
-       efdp->efd_format.efd_nextents = nextents;
-       efdp->efd_format.efd_efi_id = efip->efi_format.efi_id;
+       next_extent = efdp->efd_next_extent;
+       ASSERT(next_extent < efdp->efd_format.efd_nextents);
+       extp = &(efdp->efd_format.efd_extents[next_extent]);
+       extp->ext_start = free->xefi_startblock;
+       extp->ext_len = free->xefi_blockcount;
+       efdp->efd_next_extent++;
  
-       return efdp;
+       kmem_free(free);
+       return error;
  }
  
+/* sub-type with special handling for AGFL deferred frees */
+const struct xfs_defer_op_type xfs_agfl_free_defer_type = {
+       .max_items      = XFS_EFI_MAX_FAST_EXTENTS,
+       .diff_items     = xfs_extent_free_diff_items,
+       .create_intent  = xfs_extent_free_create_intent,
+       .abort_intent   = xfs_extent_free_abort_intent,
+       .log_item       = xfs_extent_free_log_item,
+       .create_done    = xfs_extent_free_create_done,
+       .finish_item    = xfs_agfl_free_finish_item,
+       .cancel_item    = xfs_extent_free_cancel_item,
+};
+
  /*
   * Process an extent free intent item that was recovered from
   * the log.  We need to free the extents that it describes.
diff --git a/fs/xfs/xfs_extfree_item.h b/fs/xfs/xfs_extfree_item.h

index 2a6a895ca73e542c571cb47d3ac7b86ccfe7511d..16aaab06d4ecc55afa09f1d7880ef41a225ad82b 100644 (file)
--- a/fs/xfs/xfs_extfree_item.h
+++ b/fs/xfs/xfs_extfree_item.h
@@ -51,7 +51,7 @@ struct kmem_zone;
   * AIL, so at this point both the EFI and EFD are freed.
   */
  typedef struct xfs_efi_log_item {
-       xfs_log_item_t          efi_item;
+       struct xfs_log_item     efi_item;
         atomic_t                efi_refcount;
         atomic_t                efi_next_extent;
         unsigned long           efi_flags;      /* misc flags */
@@ -64,7 +64,7 @@ typedef struct xfs_efi_log_item {
   * have been freed.
   */
  typedef struct xfs_efd_log_item {
-       xfs_log_item_t          efd_item;
+       struct xfs_log_item     efd_item;
         xfs_efi_log_item_t      *efd_efip;
         uint                    efd_next_extent;
         xfs_efd_log_format_t    efd_format;
@@ -79,8 +79,6 @@ extern struct kmem_zone       *xfs_efi_zone;
  extern struct kmem_zone        *xfs_efd_zone;
  
  xfs_efi_log_item_t     *xfs_efi_init(struct xfs_mount *, uint);
-xfs_efd_log_item_t     *xfs_efd_init(struct xfs_mount *, xfs_efi_log_item_t *,
-                                     uint);
  int                    xfs_efi_copy_format(xfs_log_iovec_t *buf,
                                             xfs_efi_log_format_t *dst_efi_fmt);
  void                   xfs_efi_item_free(xfs_efi_log_item_t *);
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c

index 916a35cae5e94d649d6d8d181647ab870ce558d9..e93bacbd49aed89946e22effc1c47b3026ec865b 100644 (file)
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -10,14 +10,11 @@
  #include "xfs_log_format.h"
  #include "xfs_trans_resv.h"
  #include "xfs_mount.h"
-#include "xfs_da_format.h"
-#include "xfs_da_btree.h"
  #include "xfs_inode.h"
  #include "xfs_trans.h"
  #include "xfs_inode_item.h"
  #include "xfs_bmap.h"
  #include "xfs_bmap_util.h"
-#include "xfs_error.h"
  #include "xfs_dir2.h"
  #include "xfs_dir2_priv.h"
  #include "xfs_ioctl.h"
@@ -28,9 +25,7 @@
  #include "xfs_iomap.h"
  #include "xfs_reflink.h"
  
-#include <linux/dcache.h>
  #include <linux/falloc.h>
-#include <linux/pagevec.h>
  #include <linux/backing-dev.h>
  #include <linux/mman.h>
  
@@ -379,6 +374,7 @@ xfs_dio_write_end_io(
         struct inode            *inode = file_inode(iocb->ki_filp);
         struct xfs_inode        *ip = XFS_I(inode);
         loff_t                  offset = iocb->ki_pos;
+       unsigned int            nofs_flag;
         int                     error = 0;
  
         trace_xfs_end_io_direct_write(ip, offset, size);
@@ -395,10 +391,17 @@ xfs_dio_write_end_io(
          */
         XFS_STATS_ADD(ip->i_mount, xs_write_bytes, size);
  
+       /*
+        * We can allocate memory here while doing writeback on behalf of
+        * memory reclaim.  To avoid memory allocation deadlocks set the
+        * task-wide nofs context for the following operations.
+        */
+       nofs_flag = memalloc_nofs_save();
+
         if (flags & IOMAP_DIO_COW) {
                 error = xfs_reflink_end_cow(ip, offset, size);
                 if (error)
-                       return error;
+                       goto out;
         }
  
         /*
@@ -407,8 +410,10 @@ xfs_dio_write_end_io(
          * earlier allows a racing dio read to find unwritten extents before
          * they are converted.
          */
-       if (flags & IOMAP_DIO_UNWRITTEN)
-               return xfs_iomap_write_unwritten(ip, offset, size, true);
+       if (flags & IOMAP_DIO_UNWRITTEN) {
+               error = xfs_iomap_write_unwritten(ip, offset, size, true);
+               goto out;
+       }
  
         /*
          * We need to update the in-core inode size here so that we don't end up
@@ -430,6 +435,8 @@ xfs_dio_write_end_io(
                 spin_unlock(&ip->i_flags_lock);
         }
  
+out:
+       memalloc_nofs_restore(nofs_flag);
         return error;
  }
  
diff --git a/fs/xfs/xfs_filestream.c b/fs/xfs/xfs_filestream.c

index 182501373af2dc429637b936c99542c5a2d90987..574a7a8b4736ba49b297478c0712f16cc8995c42 100644 (file)
--- a/fs/xfs/xfs_filestream.c
+++ b/fs/xfs/xfs_filestream.c
@@ -5,22 +5,19 @@
   * All Rights Reserved.
   */
  #include "xfs.h"
+#include "xfs_shared.h"
  #include "xfs_format.h"
  #include "xfs_log_format.h"
  #include "xfs_trans_resv.h"
  #include "xfs_sb.h"
  #include "xfs_mount.h"
-#include "xfs_defer.h"
  #include "xfs_inode.h"
  #include "xfs_bmap.h"
-#include "xfs_bmap_util.h"
  #include "xfs_alloc.h"
  #include "xfs_mru_cache.h"
-#include "xfs_filestream.h"
  #include "xfs_trace.h"
  #include "xfs_ag_resv.h"
  #include "xfs_trans.h"
-#include "xfs_shared.h"
  
  struct xfs_fstrm_item {
         struct xfs_mru_cache_elem       mru;
diff --git a/fs/xfs/xfs_fsmap.c b/fs/xfs/xfs_fsmap.c

index 3d76a9e35870adad0a29b2a04e5ab4c8adf02dc9..5a8f9641562aa12903dfbe321846bb770a85a0c1 100644 (file)
--- a/fs/xfs/xfs_fsmap.c
+++ b/fs/xfs/xfs_fsmap.c
@@ -9,16 +9,12 @@
  #include "xfs_format.h"
  #include "xfs_log_format.h"
  #include "xfs_trans_resv.h"
-#include "xfs_sb.h"
  #include "xfs_mount.h"
-#include "xfs_defer.h"
  #include "xfs_inode.h"
  #include "xfs_trans.h"
-#include "xfs_error.h"
  #include "xfs_btree.h"
  #include "xfs_rmap_btree.h"
  #include "xfs_trace.h"
-#include "xfs_log.h"
  #include "xfs_rmap.h"
  #include "xfs_alloc.h"
  #include "xfs_bit.h"
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c

index 3d0e0570e3aa1b00bbdd5f9655a9dce8819ace6d..3e61d0cc23f8c6a2b2200a8d4fee04d627cc39fc 100644 (file)
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -11,15 +11,11 @@
  #include "xfs_trans_resv.h"
  #include "xfs_sb.h"
  #include "xfs_mount.h"
-#include "xfs_defer.h"
  #include "xfs_trans.h"
  #include "xfs_error.h"
-#include "xfs_btree.h"
  #include "xfs_alloc.h"
  #include "xfs_fsops.h"
  #include "xfs_trans_space.h"
-#include "xfs_rtalloc.h"
-#include "xfs_trace.h"
  #include "xfs_log.h"
  #include "xfs_ag.h"
  #include "xfs_ag_resv.h"
@@ -251,9 +247,9 @@ xfs_growfs_data(
         if (mp->m_sb.sb_imax_pct) {
                 uint64_t icount = mp->m_sb.sb_dblocks * mp->m_sb.sb_imax_pct;
                 do_div(icount, 100);
-               mp->m_maxicount = XFS_FSB_TO_INO(mp, icount);
+               M_IGEO(mp)->maxicount = XFS_FSB_TO_INO(mp, icount);
         } else
-               mp->m_maxicount = 0;
+               M_IGEO(mp)->maxicount = 0;
  
         /* Update secondary superblocks now the physical grow has completed */
         error = xfs_update_secondary_sbs(mp);
diff --git a/fs/xfs/xfs_globals.c b/fs/xfs/xfs_globals.c

index d0d37738412009355957661fe7e47a18c7522de4..fa55ab8b8d80ef7a6b93e70f82f6a38f449b1ab1 100644 (file)
--- a/fs/xfs/xfs_globals.c
+++ b/fs/xfs/xfs_globals.c
@@ -4,7 +4,6 @@
   * All Rights Reserved.
   */
  #include "xfs.h"
-#include "xfs_sysctl.h"
  
  /*
   * Tunable XFS parameters.  xfs_params is required even when CONFIG_SYSCTL=n,
@@ -41,4 +40,7 @@ struct xfs_globals xfs_globals = {
  #else
         .bug_on_assert          =       false,  /* assert failures WARN() */
  #endif
+#ifdef DEBUG
+       .pwork_threads          =       -1,     /* automatic thread detection */
+#endif
  };
diff --git a/fs/xfs/xfs_health.c b/fs/xfs/xfs_health.c

index 4c4929f9e7bf382a537daea2586a11d429fa707e..8e0cb05a71424e557e065bf6a5ecfca4962bf041 100644 (file)
--- a/fs/xfs/xfs_health.c
+++ b/fs/xfs/xfs_health.c
@@ -9,12 +9,8 @@
  #include "xfs_format.h"
  #include "xfs_log_format.h"
  #include "xfs_trans_resv.h"
-#include "xfs_bit.h"
  #include "xfs_sb.h"
  #include "xfs_mount.h"
-#include "xfs_defer.h"
-#include "xfs_da_format.h"
-#include "xfs_da_btree.h"
  #include "xfs_inode.h"
  #include "xfs_trace.h"
  #include "xfs_health.h"
@@ -373,7 +369,7 @@ static const struct ioctl_sick_map ino_map[] = {
  void
  xfs_bulkstat_health(
         struct xfs_inode                *ip,
-       struct xfs_bstat                *bs)
+       struct xfs_bulkstat             *bs)
  {
         const struct ioctl_sick_map     *m;
         unsigned int                    sick;
diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c

index a76b27565a1898f5a7572e2e871da4011304084b..0b0fd10a36d4da80870e3d3734c6908acf721fa5 100644 (file)
--- a/fs/xfs/xfs_icache.c
+++ b/fs/xfs/xfs_icache.c
@@ -5,13 +5,13 @@
   */
  #include "xfs.h"
  #include "xfs_fs.h"
+#include "xfs_shared.h"
  #include "xfs_format.h"
  #include "xfs_log_format.h"
  #include "xfs_trans_resv.h"
  #include "xfs_sb.h"
  #include "xfs_mount.h"
  #include "xfs_inode.h"
-#include "xfs_error.h"
  #include "xfs_trans.h"
  #include "xfs_trans_priv.h"
  #include "xfs_inode_item.h"
@@ -23,8 +23,6 @@
  #include "xfs_dquot.h"
  #include "xfs_reflink.h"
  
-#include <linux/kthread.h>
-#include <linux/freezer.h>
  #include <linux/iversion.h>
  
  /*
diff --git a/fs/xfs/xfs_icreate_item.c b/fs/xfs/xfs_icreate_item.c

index 8381d34cb102f8c6f7541d80173c98d456b7e622..d99a0a3e5f400e767ff6f76296f4596bff6f7f58 100644 (file)
--- a/fs/xfs/xfs_icreate_item.c
+++ b/fs/xfs/xfs_icreate_item.c
@@ -6,14 +6,9 @@
  #include "xfs.h"
  #include "xfs_fs.h"
  #include "xfs_shared.h"
-#include "xfs_format.h"
  #include "xfs_log_format.h"
-#include "xfs_trans_resv.h"
-#include "xfs_bit.h"
-#include "xfs_mount.h"
  #include "xfs_trans.h"
  #include "xfs_trans_priv.h"
-#include "xfs_error.h"
  #include "xfs_icreate_item.h"
  #include "xfs_log.h"
  
@@ -56,80 +51,18 @@ xfs_icreate_item_format(
                         sizeof(struct xfs_icreate_log));
  }
  
-
-/* Pinning has no meaning for the create item, so just return. */
  STATIC void
-xfs_icreate_item_pin(
+xfs_icreate_item_release(
         struct xfs_log_item     *lip)
  {
+       kmem_zone_free(xfs_icreate_zone, ICR_ITEM(lip));
  }
  
-
-/* pinning has no meaning for the create item, so just return. */
-STATIC void
-xfs_icreate_item_unpin(
-       struct xfs_log_item     *lip,
-       int                     remove)
-{
-}
-
-STATIC void
-xfs_icreate_item_unlock(
-       struct xfs_log_item     *lip)
-{
-       struct xfs_icreate_item *icp = ICR_ITEM(lip);
-
-       if (test_bit(XFS_LI_ABORTED, &lip->li_flags))
-               kmem_zone_free(xfs_icreate_zone, icp);
-       return;
-}
-
-/*
- * Because we have ordered buffers being tracked in the AIL for the inode
- * creation, we don't need the create item after this. Hence we can free
- * the log item and return -1 to tell the caller we're done with the item.
- */
-STATIC xfs_lsn_t
-xfs_icreate_item_committed(
-       struct xfs_log_item     *lip,
-       xfs_lsn_t               lsn)
-{
-       struct xfs_icreate_item *icp = ICR_ITEM(lip);
-
-       kmem_zone_free(xfs_icreate_zone, icp);
-       return (xfs_lsn_t)-1;
-}
-
-/* item can never get into the AIL */
-STATIC uint
-xfs_icreate_item_push(
-       struct xfs_log_item     *lip,
-       struct list_head        *buffer_list)
-{
-       ASSERT(0);
-       return XFS_ITEM_SUCCESS;
-}
-
-/* Ordered buffers do the dependency tracking here, so this does nothing. */
-STATIC void
-xfs_icreate_item_committing(
-       struct xfs_log_item     *lip,
-       xfs_lsn_t               lsn)
-{
-}
-
-/*
- * This is the ops vector shared by all buf log items.
- */
  static const struct xfs_item_ops xfs_icreate_item_ops = {
+       .flags          = XFS_ITEM_RELEASE_WHEN_COMMITTED,
         .iop_size       = xfs_icreate_item_size,
         .iop_format     = xfs_icreate_item_format,
-       .iop_pin        = xfs_icreate_item_pin,
-       .iop_unpin      = xfs_icreate_item_unpin,
-       .iop_push       = xfs_icreate_item_push,
-       .iop_unlock     = xfs_icreate_item_unlock,
-       .iop_committed  = xfs_icreate_item_committed,
-       .iop_committing = xfs_icreate_item_committing,
+       .iop_release    = xfs_icreate_item_release,
  };
  
  
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c

index 71d216cf6f875e01516f15cafa673e0b3e0dbb78..6467d5e1df2dd1508f39aa9f3dc6df6ad37c2e5b 100644 (file)
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -3,7 +3,6 @@
   * Copyright (c) 2000-2006 Silicon Graphics, Inc.
   * All Rights Reserved.
   */
-#include <linux/log2.h>
  #include <linux/iversion.h>
  
  #include "xfs.h"
@@ -16,10 +15,7 @@
  #include "xfs_mount.h"
  #include "xfs_defer.h"
  #include "xfs_inode.h"
-#include "xfs_da_format.h"
-#include "xfs_da_btree.h"
  #include "xfs_dir2.h"
-#include "xfs_attr_sf.h"
  #include "xfs_attr.h"
  #include "xfs_trans_space.h"
  #include "xfs_trans.h"
@@ -32,7 +28,6 @@
  #include "xfs_error.h"
  #include "xfs_quota.h"
  #include "xfs_filestream.h"
-#include "xfs_cksum.h"
  #include "xfs_trace.h"
  #include "xfs_icache.h"
  #include "xfs_symlink.h"
@@ -40,7 +35,6 @@
  #include "xfs_log.h"
  #include "xfs_bmap_btree.h"
  #include "xfs_reflink.h"
-#include "xfs_dir2_priv.h"
  
  kmem_zone_t *xfs_inode_zone;
  
@@ -441,12 +435,12 @@ xfs_lock_inumorder(int lock_mode, int subclass)
   */
  static void
  xfs_lock_inodes(
-       xfs_inode_t     **ips,
-       int             inodes,
-       uint            lock_mode)
+       struct xfs_inode        **ips,
+       int                     inodes,
+       uint                    lock_mode)
  {
-       int             attempts = 0, i, j, try_lock;
-       xfs_log_item_t  *lp;
+       int                     attempts = 0, i, j, try_lock;
+       struct xfs_log_item     *lp;
  
         /*
          * Currently supports between 2 and 5 inodes with exclusive locking.  We
@@ -485,7 +479,7 @@ again:
                  */
                 if (!try_lock) {
                         for (j = (i - 1); j >= 0 && !try_lock; j--) {
-                               lp = (xfs_log_item_t *)ips[j]->i_itemp;
+                               lp = &ips[j]->i_itemp->ili_item;
                                 if (lp && test_bit(XFS_LI_IN_AIL, &lp->li_flags))
                                         try_lock++;
                         }
@@ -551,7 +545,7 @@ xfs_lock_two_inodes(
         struct xfs_inode        *temp;
         uint                    mode_temp;
         int                     attempts = 0;
-       xfs_log_item_t          *lp;
+       struct xfs_log_item     *lp;
  
         ASSERT(hweight32(ip0_mode) == 1);
         ASSERT(hweight32(ip1_mode) == 1);
@@ -585,7 +579,7 @@ xfs_lock_two_inodes(
          * the second lock. If we can't get it, we must release the first one
          * and try again.
          */
-       lp = (xfs_log_item_t *)ip0->i_itemp;
+       lp = &ip0->i_itemp->ili_item;
         if (lp && test_bit(XFS_LI_IN_AIL, &lp->li_flags)) {
                 if (!xfs_ilock_nowait(ip1, xfs_lock_inumorder(ip1_mode, 1))) {
                         xfs_iunlock(ip0, ip0_mode);
@@ -2537,13 +2531,14 @@ xfs_ifree_cluster(
         xfs_inode_log_item_t    *iip;
         struct xfs_log_item     *lip;
         struct xfs_perag        *pag;
+       struct xfs_ino_geometry *igeo = M_IGEO(mp);
         xfs_ino_t               inum;
  
         inum = xic->first_ino;
         pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, inum));
-       nbufs = mp->m_ialloc_blks / mp->m_blocks_per_cluster;
+       nbufs = igeo->ialloc_blks / igeo->blocks_per_cluster;
  
-       for (j = 0; j < nbufs; j++, inum += mp->m_inodes_per_cluster) {
+       for (j = 0; j < nbufs; j++, inum += igeo->inodes_per_cluster) {
                 /*
                  * The allocation bitmap tells us which inodes of the chunk were
                  * physically allocated. Skip the cluster if an inode falls into
@@ -2551,7 +2546,7 @@ xfs_ifree_cluster(
                  */
                 ioffset = inum - xic->first_ino;
                 if ((xic->alloc & XFS_INOBT_MASK(ioffset)) == 0) {
-                       ASSERT(ioffset % mp->m_inodes_per_cluster == 0);
+                       ASSERT(ioffset % igeo->inodes_per_cluster == 0);
                         continue;
                 }
  
@@ -2567,7 +2562,7 @@ xfs_ifree_cluster(
                  * to mark all the active inodes on the buffer stale.
                  */
                 bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, blkno,
-                                       mp->m_bsize * mp->m_blocks_per_cluster,
+                                       mp->m_bsize * igeo->blocks_per_cluster,
                                         XBF_UNMAPPED);
  
                 if (!bp)
@@ -2614,7 +2609,7 @@ xfs_ifree_cluster(
                  * transaction stale above, which means there is no point in
                  * even trying to lock them.
                  */
-               for (i = 0; i < mp->m_inodes_per_cluster; i++) {
+               for (i = 0; i < igeo->inodes_per_cluster; i++) {
  retry:
                         rcu_read_lock();
                         ip = radix_tree_lookup(&pag->pag_ici_root,
@@ -3472,28 +3467,27 @@ xfs_iflush_cluster(
         struct xfs_mount        *mp = ip->i_mount;
         struct xfs_perag        *pag;
         unsigned long           first_index, mask;
-       unsigned long           inodes_per_cluster;
         int                     cilist_size;
         struct xfs_inode        **cilist;
         struct xfs_inode        *cip;
+       struct xfs_ino_geometry *igeo = M_IGEO(mp);
         int                     nr_found;
         int                     clcount = 0;
         int                     i;
  
         pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino));
  
-       inodes_per_cluster = mp->m_inode_cluster_size >> mp->m_sb.sb_inodelog;
-       cilist_size = inodes_per_cluster * sizeof(xfs_inode_t *);
+       cilist_size = igeo->inodes_per_cluster * sizeof(struct xfs_inode *);
         cilist = kmem_alloc(cilist_size, KM_MAYFAIL|KM_NOFS);
         if (!cilist)
                 goto out_put;
  
-       mask = ~(((mp->m_inode_cluster_size >> mp->m_sb.sb_inodelog)) - 1);
+       mask = ~(igeo->inodes_per_cluster - 1);
         first_index = XFS_INO_TO_AGINO(mp, ip->i_ino) & mask;
         rcu_read_lock();
         /* really need a gang lookup range call here */
         nr_found = radix_tree_gang_lookup(&pag->pag_ici_root, (void**)cilist,
-                                       first_index, inodes_per_cluster);
+                                       first_index, igeo->inodes_per_cluster);
         if (nr_found == 0)
                 goto out_free;
  
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c

index fa1c4fe2ffbfb1fcda3eaaaff3bd8a3b3b3d9b74..c9a502eed20415fd83907b64c2870c4d079cf24c 100644 (file)
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c
@@ -5,6 +5,7 @@
   */
  #include "xfs.h"
  #include "xfs_fs.h"
+#include "xfs_shared.h"
  #include "xfs_format.h"
  #include "xfs_log_format.h"
  #include "xfs_trans_resv.h"
@@ -12,7 +13,6 @@
  #include "xfs_inode.h"
  #include "xfs_trans.h"
  #include "xfs_inode_item.h"
-#include "xfs_error.h"
  #include "xfs_trace.h"
  #include "xfs_trans_priv.h"
  #include "xfs_buf_item.h"
@@ -565,7 +565,7 @@ out_unlock:
   * Unlock the inode associated with the inode log item.
   */
  STATIC void
-xfs_inode_item_unlock(
+xfs_inode_item_release(
         struct xfs_log_item     *lip)
  {
         struct xfs_inode_log_item *iip = INODE_ITEM(lip);
@@ -621,23 +621,21 @@ xfs_inode_item_committed(
  STATIC void
  xfs_inode_item_committing(
         struct xfs_log_item     *lip,
-       xfs_lsn_t               lsn)
+       xfs_lsn_t               commit_lsn)
  {
-       INODE_ITEM(lip)->ili_last_lsn = lsn;
+       INODE_ITEM(lip)->ili_last_lsn = commit_lsn;
+       return xfs_inode_item_release(lip);
  }
  
-/*
- * This is the ops vector shared by all buf log items.
- */
  static const struct xfs_item_ops xfs_inode_item_ops = {
         .iop_size       = xfs_inode_item_size,
         .iop_format     = xfs_inode_item_format,
         .iop_pin        = xfs_inode_item_pin,
         .iop_unpin      = xfs_inode_item_unpin,
-       .iop_unlock     = xfs_inode_item_unlock,
+       .iop_release    = xfs_inode_item_release,
         .iop_committed  = xfs_inode_item_committed,
         .iop_push       = xfs_inode_item_push,
-       .iop_committing = xfs_inode_item_committing,
+       .iop_committing = xfs_inode_item_committing,
         .iop_error      = xfs_inode_item_error
  };
  
diff --git a/fs/xfs/xfs_inode_item.h b/fs/xfs/xfs_inode_item.h

index 27081eba220c95247a24a417e00ace8d574e3d44..07a60e74c39c80a07e0c43c793d75025635b22be 100644 (file)
--- a/fs/xfs/xfs_inode_item.h
+++ b/fs/xfs/xfs_inode_item.h
@@ -14,7 +14,7 @@ struct xfs_inode;
  struct xfs_mount;
  
  typedef struct xfs_inode_log_item {
-       xfs_log_item_t          ili_item;          /* common portion */
+       struct xfs_log_item     ili_item;          /* common portion */
         struct xfs_inode        *ili_inode;        /* inode ptr */
         xfs_lsn_t               ili_flush_lsn;     /* lsn at last flush */
         xfs_lsn_t               ili_last_lsn;      /* lsn at last transaction */
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c

index fe29aa61293c562131acc90dc540129484234184..6f7848cd5527bc8d32c840c44d4748a1422357fb 100644 (file)
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -11,9 +11,8 @@
  #include "xfs_trans_resv.h"
  #include "xfs_mount.h"
  #include "xfs_inode.h"
-#include "xfs_ioctl.h"
-#include "xfs_alloc.h"
  #include "xfs_rtalloc.h"
+#include "xfs_iwalk.h"
  #include "xfs_itable.h"
  #include "xfs_error.h"
  #include "xfs_attr.h"
@@ -25,7 +24,6 @@
  #include "xfs_export.h"
  #include "xfs_trace.h"
  #include "xfs_icache.h"
-#include "xfs_symlink.h"
  #include "xfs_trans.h"
  #include "xfs_acl.h"
  #include "xfs_btree.h"
@@ -36,14 +34,8 @@
  #include "xfs_ag.h"
  #include "xfs_health.h"
  
-#include <linux/capability.h>
-#include <linux/cred.h>
-#include <linux/dcache.h>
  #include <linux/mount.h>
  #include <linux/namei.h>
-#include <linux/pagemap.h>
-#include <linux/slab.h>
-#include <linux/exportfs.h>
  
  /*
   * xfs_find_handle maps from userspace xfs_fsop_handlereq structure to
@@ -721,16 +713,45 @@ out_unlock:
         return error;
  }
  
+/* Return 0 on success or positive error */
+int
+xfs_fsbulkstat_one_fmt(
+       struct xfs_ibulk                *breq,
+       const struct xfs_bulkstat       *bstat)
+{
+       struct xfs_bstat                bs1;
+
+       xfs_bulkstat_to_bstat(breq->mp, &bs1, bstat);
+       if (copy_to_user(breq->ubuffer, &bs1, sizeof(bs1)))
+               return -EFAULT;
+       return xfs_ibulk_advance(breq, sizeof(struct xfs_bstat));
+}
+
+int
+xfs_fsinumbers_fmt(
+       struct xfs_ibulk                *breq,
+       const struct xfs_inumbers       *igrp)
+{
+       struct xfs_inogrp               ig1;
+
+       xfs_inumbers_to_inogrp(&ig1, igrp);
+       if (copy_to_user(breq->ubuffer, &ig1, sizeof(struct xfs_inogrp)))
+               return -EFAULT;
+       return xfs_ibulk_advance(breq, sizeof(struct xfs_inogrp));
+}
+
  STATIC int
-xfs_ioc_bulkstat(
+xfs_ioc_fsbulkstat(
         xfs_mount_t             *mp,
         unsigned int            cmd,
         void                    __user *arg)
  {
-       xfs_fsop_bulkreq_t      bulkreq;
-       int                     count;  /* # of records returned */
-       xfs_ino_t               inlast; /* last inode number */
-       int                     done;
+       struct xfs_fsop_bulkreq bulkreq;
+       struct xfs_ibulk        breq = {
+               .mp             = mp,
+               .ocount         = 0,
+       };
+       xfs_ino_t               lastino;
         int                     error;
  
         /* done = 1 if there are more stats to get and if bulkstat */
@@ -742,41 +763,243 @@ xfs_ioc_bulkstat(
         if (XFS_FORCED_SHUTDOWN(mp))
                 return -EIO;
  
-       if (copy_from_user(&bulkreq, arg, sizeof(xfs_fsop_bulkreq_t)))
+       if (copy_from_user(&bulkreq, arg, sizeof(struct xfs_fsop_bulkreq)))
                 return -EFAULT;
  
-       if (copy_from_user(&inlast, bulkreq.lastip, sizeof(__s64)))
+       if (copy_from_user(&lastino, bulkreq.lastip, sizeof(__s64)))
                 return -EFAULT;
  
-       if ((count = bulkreq.icount) <= 0)
+       if (bulkreq.icount <= 0)
                 return -EINVAL;
  
         if (bulkreq.ubuffer == NULL)
                 return -EINVAL;
  
-       if (cmd == XFS_IOC_FSINUMBERS)
-               error = xfs_inumbers(mp, &inlast, &count,
-                                       bulkreq.ubuffer, xfs_inumbers_fmt);
-       else if (cmd == XFS_IOC_FSBULKSTAT_SINGLE)
-               error = xfs_bulkstat_one(mp, inlast, bulkreq.ubuffer,
-                                       sizeof(xfs_bstat_t), NULL, &done);
-       else    /* XFS_IOC_FSBULKSTAT */
-               error = xfs_bulkstat(mp, &inlast, &count, xfs_bulkstat_one,
-                                    sizeof(xfs_bstat_t), bulkreq.ubuffer,
-                                    &done);
+       breq.ubuffer = bulkreq.ubuffer;
+       breq.icount = bulkreq.icount;
+
+       /*
+        * FSBULKSTAT_SINGLE expects that *lastip contains the inode number
+        * that we want to stat.  However, FSINUMBERS and FSBULKSTAT expect
+        * that *lastip contains either zero or the number of the last inode to
+        * be examined by the previous call and return results starting with
+        * the next inode after that.  The new bulk request back end functions
+        * take the inode to start with, so we have to compute the startino
+        * parameter from lastino to maintain correct function.  lastino == 0
+        * is a special case because it has traditionally meant "first inode
+        * in filesystem".
+        */
+       if (cmd == XFS_IOC_FSINUMBERS) {
+               breq.startino = lastino ? lastino + 1 : 0;
+               error = xfs_inumbers(&breq, xfs_fsinumbers_fmt);
+               lastino = breq.startino - 1;
+       } else if (cmd == XFS_IOC_FSBULKSTAT_SINGLE) {
+               breq.startino = lastino;
+               breq.icount = 1;
+               error = xfs_bulkstat_one(&breq, xfs_fsbulkstat_one_fmt);
+       } else {        /* XFS_IOC_FSBULKSTAT */
+               breq.startino = lastino ? lastino + 1 : 0;
+               error = xfs_bulkstat(&breq, xfs_fsbulkstat_one_fmt);
+               lastino = breq.startino - 1;
+       }
  
         if (error)
                 return error;
  
-       if (bulkreq.ocount != NULL) {
-               if (copy_to_user(bulkreq.lastip, &inlast,
-                                               sizeof(xfs_ino_t)))
-                       return -EFAULT;
+       if (bulkreq.lastip != NULL &&
+           copy_to_user(bulkreq.lastip, &lastino, sizeof(xfs_ino_t)))
+               return -EFAULT;
  
-               if (copy_to_user(bulkreq.ocount, &count, sizeof(count)))
-                       return -EFAULT;
+       if (bulkreq.ocount != NULL &&
+           copy_to_user(bulkreq.ocount, &breq.ocount, sizeof(__s32)))
+               return -EFAULT;
+
+       return 0;
+}
+
+/* Return 0 on success or positive error */
+static int
+xfs_bulkstat_fmt(
+       struct xfs_ibulk                *breq,
+       const struct xfs_bulkstat       *bstat)
+{
+       if (copy_to_user(breq->ubuffer, bstat, sizeof(struct xfs_bulkstat)))
+               return -EFAULT;
+       return xfs_ibulk_advance(breq, sizeof(struct xfs_bulkstat));
+}
+
+/*
+ * Check the incoming bulk request @hdr from userspace and initialize the
+ * internal @breq bulk request appropriately.  Returns 0 if the bulk request
+ * should proceed; XFS_ITER_ABORT if there's nothing to do; or the usual
+ * negative error code.
+ */
+static int
+xfs_bulk_ireq_setup(
+       struct xfs_mount        *mp,
+       struct xfs_bulk_ireq    *hdr,
+       struct xfs_ibulk        *breq,
+       void __user             *ubuffer)
+{
+       if (hdr->icount == 0 ||
+           (hdr->flags & ~XFS_BULK_IREQ_FLAGS_ALL) ||
+           memchr_inv(hdr->reserved, 0, sizeof(hdr->reserved)))
+               return -EINVAL;
+
+       breq->startino = hdr->ino;
+       breq->ubuffer = ubuffer;
+       breq->icount = hdr->icount;
+       breq->ocount = 0;
+       breq->flags = 0;
+
+       /*
+        * The @ino parameter is a special value, so we must look it up here.
+        * We're not allowed to have IREQ_AGNO, and we only return one inode
+        * worth of data.
+        */
+       if (hdr->flags & XFS_BULK_IREQ_SPECIAL) {
+               if (hdr->flags & XFS_BULK_IREQ_AGNO)
+                       return -EINVAL;
+
+               switch (hdr->ino) {
+               case XFS_BULK_IREQ_SPECIAL_ROOT:
+                       hdr->ino = mp->m_sb.sb_rootino;
+                       break;
+               default:
+                       return -EINVAL;
+               }
+               breq->icount = 1;
         }
  
+       /*
+        * The IREQ_AGNO flag means that we only want results from a given AG.
+        * If @hdr->ino is zero, we start iterating in that AG.  If @hdr->ino is
+        * beyond the specified AG then we return no results.
+        */
+       if (hdr->flags & XFS_BULK_IREQ_AGNO) {
+               if (hdr->agno >= mp->m_sb.sb_agcount)
+                       return -EINVAL;
+
+               if (breq->startino == 0)
+                       breq->startino = XFS_AGINO_TO_INO(mp, hdr->agno, 0);
+               else if (XFS_INO_TO_AGNO(mp, breq->startino) < hdr->agno)
+                       return -EINVAL;
+
+               breq->flags |= XFS_IBULK_SAME_AG;
+
+               /* Asking for an inode past the end of the AG?  We're done! */
+               if (XFS_INO_TO_AGNO(mp, breq->startino) > hdr->agno)
+                       return XFS_ITER_ABORT;
+       } else if (hdr->agno)
+               return -EINVAL;
+
+       /* Asking for an inode past the end of the FS?  We're done! */
+       if (XFS_INO_TO_AGNO(mp, breq->startino) >= mp->m_sb.sb_agcount)
+               return XFS_ITER_ABORT;
+
+       return 0;
+}
+
+/*
+ * Update the userspace bulk request @hdr to reflect the end state of the
+ * internal bulk request @breq.
+ */
+static void
+xfs_bulk_ireq_teardown(
+       struct xfs_bulk_ireq    *hdr,
+       struct xfs_ibulk        *breq)
+{
+       hdr->ino = breq->startino;
+       hdr->ocount = breq->ocount;
+}
+
+/* Handle the v5 bulkstat ioctl. */
+STATIC int
+xfs_ioc_bulkstat(
+       struct xfs_mount                *mp,
+       unsigned int                    cmd,
+       struct xfs_bulkstat_req __user  *arg)
+{
+       struct xfs_bulk_ireq            hdr;
+       struct xfs_ibulk                breq = {
+               .mp                     = mp,
+       };
+       int                             error;
+
+       if (!capable(CAP_SYS_ADMIN))
+               return -EPERM;
+
+       if (XFS_FORCED_SHUTDOWN(mp))
+               return -EIO;
+
+       if (copy_from_user(&hdr, &arg->hdr, sizeof(hdr)))
+               return -EFAULT;
+
+       error = xfs_bulk_ireq_setup(mp, &hdr, &breq, arg->bulkstat);
+       if (error == XFS_ITER_ABORT)
+               goto out_teardown;
+       if (error < 0)
+               return error;
+
+       error = xfs_bulkstat(&breq, xfs_bulkstat_fmt);
+       if (error)
+               return error;
+
+out_teardown:
+       xfs_bulk_ireq_teardown(&hdr, &breq);
+       if (copy_to_user(&arg->hdr, &hdr, sizeof(hdr)))
+               return -EFAULT;
+
+       return 0;
+}
+
+STATIC int
+xfs_inumbers_fmt(
+       struct xfs_ibulk                *breq,
+       const struct xfs_inumbers       *igrp)
+{
+       if (copy_to_user(breq->ubuffer, igrp, sizeof(struct xfs_inumbers)))
+               return -EFAULT;
+       return xfs_ibulk_advance(breq, sizeof(struct xfs_inumbers));
+}
+
+/* Handle the v5 inumbers ioctl. */
+STATIC int
+xfs_ioc_inumbers(
+       struct xfs_mount                *mp,
+       unsigned int                    cmd,
+       struct xfs_inumbers_req __user  *arg)
+{
+       struct xfs_bulk_ireq            hdr;
+       struct xfs_ibulk                breq = {
+               .mp                     = mp,
+       };
+       int                             error;
+
+       if (!capable(CAP_SYS_ADMIN))
+               return -EPERM;
+
+       if (XFS_FORCED_SHUTDOWN(mp))
+               return -EIO;
+
+       if (copy_from_user(&hdr, &arg->hdr, sizeof(hdr)))
+               return -EFAULT;
+
+       error = xfs_bulk_ireq_setup(mp, &hdr, &breq, arg->inumbers);
+       if (error == XFS_ITER_ABORT)
+               goto out_teardown;
+       if (error < 0)
+               return error;
+
+       error = xfs_inumbers(&breq, xfs_inumbers_fmt);
+       if (error)
+               return error;
+
+out_teardown:
+       xfs_bulk_ireq_teardown(&hdr, &breq);
+       if (copy_to_user(&arg->hdr, &hdr, sizeof(hdr)))
+               return -EFAULT;
+
         return 0;
  }
  
@@ -1926,7 +2149,12 @@ xfs_file_ioctl(
         case XFS_IOC_FSBULKSTAT_SINGLE:
         case XFS_IOC_FSBULKSTAT:
         case XFS_IOC_FSINUMBERS:
+               return xfs_ioc_fsbulkstat(mp, cmd, arg);
+
+       case XFS_IOC_BULKSTAT:
                 return xfs_ioc_bulkstat(mp, cmd, arg);
+       case XFS_IOC_INUMBERS:
+               return xfs_ioc_inumbers(mp, cmd, arg);
  
         case XFS_IOC_FSGEOMETRY_V1:
                 return xfs_ioc_fsgeometry(mp, arg, 3);
diff --git a/fs/xfs/xfs_ioctl.h b/fs/xfs/xfs_ioctl.h

index 4b17f67c888a057feabfba771e058e51fe791be3..654c0bb1bcf8981c1f863ec315a409c0906365c3 100644 (file)
--- a/fs/xfs/xfs_ioctl.h
+++ b/fs/xfs/xfs_ioctl.h
@@ -77,4 +77,12 @@ xfs_set_dmattrs(
         uint                    evmask,
         uint16_t                state);
  
+struct xfs_ibulk;
+struct xfs_bstat;
+struct xfs_inogrp;
+
+int xfs_fsbulkstat_one_fmt(struct xfs_ibulk *breq,
+                          const struct xfs_bulkstat *bstat);
+int xfs_fsinumbers_fmt(struct xfs_ibulk *breq, const struct xfs_inumbers *igrp);
+
  #endif
diff --git a/fs/xfs/xfs_ioctl32.c b/fs/xfs/xfs_ioctl32.c

index 614fc6886d24553328d5a08496e2cfc802af421f..7fcf7569743f47a250f0bf2356f7c040b78cc59c 100644 (file)
--- a/fs/xfs/xfs_ioctl32.c
+++ b/fs/xfs/xfs_ioctl32.c
@@ -3,23 +3,19 @@
   * Copyright (c) 2004-2005 Silicon Graphics, Inc.
   * All Rights Reserved.
   */
-#include <linux/compat.h>
-#include <linux/ioctl.h>
  #include <linux/mount.h>
-#include <linux/slab.h>
-#include <linux/uaccess.h>
  #include <linux/fsmap.h>
  #include "xfs.h"
  #include "xfs_fs.h"
+#include "xfs_shared.h"
  #include "xfs_format.h"
  #include "xfs_log_format.h"
  #include "xfs_trans_resv.h"
  #include "xfs_mount.h"
  #include "xfs_inode.h"
+#include "xfs_iwalk.h"
  #include "xfs_itable.h"
-#include "xfs_error.h"
  #include "xfs_fsops.h"
-#include "xfs_alloc.h"
  #include "xfs_rtalloc.h"
  #include "xfs_attr.h"
  #include "xfs_ioctl.h"
@@ -84,27 +80,26 @@ xfs_compat_growfs_rt_copyin(
  }
  
  STATIC int
-xfs_inumbers_fmt_compat(
-       void                    __user *ubuffer,
-       const struct xfs_inogrp *buffer,
-       long                    count,
-       long                    *written)
+xfs_fsinumbers_fmt_compat(
+       struct xfs_ibulk                *breq,
+       const struct xfs_inumbers       *ig)
  {
-       compat_xfs_inogrp_t     __user *p32 = ubuffer;
-       long                    i;
+       struct compat_xfs_inogrp __user *p32 = breq->ubuffer;
+       struct xfs_inogrp               ig1;
+       struct xfs_inogrp               *igrp = &ig1;
  
-       for (i = 0; i < count; i++) {
-               if (put_user(buffer[i].xi_startino,   &p32[i].xi_startino) ||
-                   put_user(buffer[i].xi_alloccount, &p32[i].xi_alloccount) ||
-                   put_user(buffer[i].xi_allocmask,  &p32[i].xi_allocmask))
-                       return -EFAULT;
-       }
-       *written = count * sizeof(*p32);
-       return 0;
+       xfs_inumbers_to_inogrp(&ig1, ig);
+
+       if (put_user(igrp->xi_startino,   &p32->xi_startino) ||
+           put_user(igrp->xi_alloccount, &p32->xi_alloccount) ||
+           put_user(igrp->xi_allocmask,  &p32->xi_allocmask))
+               return -EFAULT;
+
+       return xfs_ibulk_advance(breq, sizeof(struct compat_xfs_inogrp));
  }
  
  #else
-#define xfs_inumbers_fmt_compat xfs_inumbers_fmt
+#define xfs_fsinumbers_fmt_compat xfs_fsinumbers_fmt
  #endif /* BROKEN_X86_ALIGNMENT */
  
  STATIC int
@@ -121,11 +116,14 @@ xfs_ioctl32_bstime_copyin(
         return 0;
  }
  
-/* xfs_bstat_t has differing alignment on intel, & bstime_t sizes everywhere */
+/*
+ * struct xfs_bstat has differing alignment on intel, & bstime_t sizes
+ * everywhere
+ */
  STATIC int
  xfs_ioctl32_bstat_copyin(
-       xfs_bstat_t             *bstat,
-       compat_xfs_bstat_t      __user *bstat32)
+       struct xfs_bstat                *bstat,
+       struct compat_xfs_bstat __user  *bstat32)
  {
         if (get_user(bstat->bs_ino,     &bstat32->bs_ino)       ||
             get_user(bstat->bs_mode,    &bstat32->bs_mode)      ||
@@ -171,16 +169,15 @@ xfs_bstime_store_compat(
  
  /* Return 0 on success or positive error (to xfs_bulkstat()) */
  STATIC int
-xfs_bulkstat_one_fmt_compat(
-       void                    __user *ubuffer,
-       int                     ubsize,
-       int                     *ubused,
-       const xfs_bstat_t       *buffer)
+xfs_fsbulkstat_one_fmt_compat(
+       struct xfs_ibulk                *breq,
+       const struct xfs_bulkstat       *bstat)
  {
-       compat_xfs_bstat_t      __user *p32 = ubuffer;
+       struct compat_xfs_bstat __user  *p32 = breq->ubuffer;
+       struct xfs_bstat                bs1;
+       struct xfs_bstat                *buffer = &bs1;
  
-       if (ubsize < sizeof(*p32))
-               return -ENOMEM;
+       xfs_bulkstat_to_bstat(breq->mp, &bs1, bstat);
  
         if (put_user(buffer->bs_ino,      &p32->bs_ino)         ||
             put_user(buffer->bs_mode,     &p32->bs_mode)        ||
@@ -205,37 +202,24 @@ xfs_bulkstat_one_fmt_compat(
             put_user(buffer->bs_dmstate,  &p32->bs_dmstate)     ||
             put_user(buffer->bs_aextents, &p32->bs_aextents))
                 return -EFAULT;
-       if (ubused)
-               *ubused = sizeof(*p32);
-       return 0;
-}
  
-STATIC int
-xfs_bulkstat_one_compat(
-       xfs_mount_t     *mp,            /* mount point for filesystem */
-       xfs_ino_t       ino,            /* inode number to get data for */
-       void            __user *buffer, /* buffer to place output in */
-       int             ubsize,         /* size of buffer */
-       int             *ubused,        /* bytes used by me */
-       int             *stat)          /* BULKSTAT_RV_... */
-{
-       return xfs_bulkstat_one_int(mp, ino, buffer, ubsize,
-                                   xfs_bulkstat_one_fmt_compat,
-                                   ubused, stat);
+       return xfs_ibulk_advance(breq, sizeof(struct compat_xfs_bstat));
  }
  
  /* copied from xfs_ioctl.c */
  STATIC int
-xfs_compat_ioc_bulkstat(
+xfs_compat_ioc_fsbulkstat(
         xfs_mount_t               *mp,
         unsigned int              cmd,
-       compat_xfs_fsop_bulkreq_t __user *p32)
+       struct compat_xfs_fsop_bulkreq __user *p32)
  {
         u32                     addr;
-       xfs_fsop_bulkreq_t      bulkreq;
-       int                     count;  /* # of records returned */
-       xfs_ino_t               inlast; /* last inode number */
-       int                     done;
+       struct xfs_fsop_bulkreq bulkreq;
+       struct xfs_ibulk        breq = {
+               .mp             = mp,
+               .ocount         = 0,
+       };
+       xfs_ino_t               lastino;
         int                     error;
  
         /*
@@ -244,9 +228,8 @@ xfs_compat_ioc_bulkstat(
          * to userpace memory via bulkreq.ubuffer.  Normally the compat
          * functions and structure size are the correct ones to use ...
          */
-       inumbers_fmt_pf inumbers_func = xfs_inumbers_fmt_compat;
-       bulkstat_one_pf bs_one_func = xfs_bulkstat_one_compat;
-       size_t bs_one_size = sizeof(struct compat_xfs_bstat);
+       inumbers_fmt_pf         inumbers_func = xfs_fsinumbers_fmt_compat;
+       bulkstat_one_fmt_pf     bs_one_func = xfs_fsbulkstat_one_fmt_compat;
  
  #ifdef CONFIG_X86_X32
         if (in_x32_syscall()) {
@@ -258,9 +241,8 @@ xfs_compat_ioc_bulkstat(
                  * the data written out in compat layout will not match what
                  * x32 userspace expects.
                  */
-               inumbers_func = xfs_inumbers_fmt;
-               bs_one_func = xfs_bulkstat_one;
-               bs_one_size = sizeof(struct xfs_bstat);
+               inumbers_func = xfs_fsinumbers_fmt;
+               bs_one_func = xfs_fsbulkstat_one_fmt;
         }
  #endif
  
@@ -284,40 +266,55 @@ xfs_compat_ioc_bulkstat(
                 return -EFAULT;
         bulkreq.ocount = compat_ptr(addr);
  
-       if (copy_from_user(&inlast, bulkreq.lastip, sizeof(__s64)))
+       if (copy_from_user(&lastino, bulkreq.lastip, sizeof(__s64)))
                 return -EFAULT;
  
-       if ((count = bulkreq.icount) <= 0)
+       if (bulkreq.icount <= 0)
                 return -EINVAL;
  
         if (bulkreq.ubuffer == NULL)
                 return -EINVAL;
  
+       breq.ubuffer = bulkreq.ubuffer;
+       breq.icount = bulkreq.icount;
+
+       /*
+        * FSBULKSTAT_SINGLE expects that *lastip contains the inode number
+        * that we want to stat.  However, FSINUMBERS and FSBULKSTAT expect
+        * that *lastip contains either zero or the number of the last inode to
+        * be examined by the previous call and return results starting with
+        * the next inode after that.  The new bulk request back end functions
+        * take the inode to start with, so we have to compute the startino
+        * parameter from lastino to maintain correct function.  lastino == 0
+        * is a special case because it has traditionally meant "first inode
+        * in filesystem".
+        */
         if (cmd == XFS_IOC_FSINUMBERS_32) {
-               error = xfs_inumbers(mp, &inlast, &count,
-                               bulkreq.ubuffer, inumbers_func);
+               breq.startino = lastino ? lastino + 1 : 0;
+               error = xfs_inumbers(&breq, inumbers_func);
+               lastino = breq.startino - 1;
         } else if (cmd == XFS_IOC_FSBULKSTAT_SINGLE_32) {
-               int res;
-
-               error = bs_one_func(mp, inlast, bulkreq.ubuffer,
-                               bs_one_size, NULL, &res);
+               breq.startino = lastino;
+               breq.icount = 1;
+               error = xfs_bulkstat_one(&breq, bs_one_func);
+               lastino = breq.startino;
         } else if (cmd == XFS_IOC_FSBULKSTAT_32) {
-               error = xfs_bulkstat(mp, &inlast, &count,
-                       bs_one_func, bs_one_size,
-                       bulkreq.ubuffer, &done);
-       } else
+               breq.startino = lastino ? lastino + 1 : 0;
+               error = xfs_bulkstat(&breq, bs_one_func);
+               lastino = breq.startino - 1;
+       } else {
                 error = -EINVAL;
+       }
         if (error)
                 return error;
  
-       if (bulkreq.ocount != NULL) {
-               if (copy_to_user(bulkreq.lastip, &inlast,
-                                               sizeof(xfs_ino_t)))
-                       return -EFAULT;
+       if (bulkreq.lastip != NULL &&
+           copy_to_user(bulkreq.lastip, &lastino, sizeof(xfs_ino_t)))
+               return -EFAULT;
  
-               if (copy_to_user(bulkreq.ocount, &count, sizeof(count)))
-                       return -EFAULT;
-       }
+       if (bulkreq.ocount != NULL &&
+           copy_to_user(bulkreq.ocount, &breq.ocount, sizeof(__s32)))
+               return -EFAULT;
  
         return 0;
  }
@@ -577,6 +574,8 @@ xfs_file_compat_ioctl(
         case XFS_IOC_ERROR_CLEARALL:
         case FS_IOC_GETFSMAP:
         case XFS_IOC_SCRUB_METADATA:
+       case XFS_IOC_BULKSTAT:
+       case XFS_IOC_INUMBERS:
                 return xfs_file_ioctl(filp, cmd, p);
  #if !defined(BROKEN_X86_ALIGNMENT) || defined(CONFIG_X86_X32)
         /*
@@ -674,7 +673,7 @@ xfs_file_compat_ioctl(
         case XFS_IOC_FSBULKSTAT_32:
         case XFS_IOC_FSBULKSTAT_SINGLE_32:
         case XFS_IOC_FSINUMBERS_32:
-               return xfs_compat_ioc_bulkstat(mp, cmd, arg);
+               return xfs_compat_ioc_fsbulkstat(mp, cmd, arg);
         case XFS_IOC_FD_TO_HANDLE_32:
         case XFS_IOC_PATH_TO_HANDLE_32:
         case XFS_IOC_PATH_TO_FSHANDLE_32: {
diff --git a/fs/xfs/xfs_ioctl32.h b/fs/xfs/xfs_ioctl32.h

index d28fa824284aaf8198ad50404e86f4793bb70892..7985344d3aa619925a82312b4f387ee815e8f212 100644 (file)
--- a/fs/xfs/xfs_ioctl32.h
+++ b/fs/xfs/xfs_ioctl32.h
@@ -36,7 +36,7 @@ typedef struct compat_xfs_bstime {
         __s32           tv_nsec;        /* and nanoseconds      */
  } compat_xfs_bstime_t;
  
-typedef struct compat_xfs_bstat {
+struct compat_xfs_bstat {
         __u64           bs_ino;         /* inode number                 */
         __u16           bs_mode;        /* type and mode                */
         __u16           bs_nlink;       /* number of links              */
@@ -61,14 +61,14 @@ typedef struct compat_xfs_bstat {
         __u32           bs_dmevmask;    /* DMIG event mask              */
         __u16           bs_dmstate;     /* DMIG state info              */
         __u16           bs_aextents;    /* attribute number of extents  */
-} __compat_packed compat_xfs_bstat_t;
+} __compat_packed;
  
-typedef struct compat_xfs_fsop_bulkreq {
+struct compat_xfs_fsop_bulkreq {
         compat_uptr_t   lastip;         /* last inode # pointer         */
         __s32           icount;         /* count of entries in buffer   */
         compat_uptr_t   ubuffer;        /* user buffer for inode desc.  */
         compat_uptr_t   ocount;         /* output count pointer         */
-} compat_xfs_fsop_bulkreq_t;
+};
  
  #define XFS_IOC_FSBULKSTAT_32 \
         _IOWR('X', 101, struct compat_xfs_fsop_bulkreq)
@@ -106,7 +106,7 @@ typedef struct compat_xfs_swapext {
         xfs_off_t               sx_offset;      /* offset into file */
         xfs_off_t               sx_length;      /* leng from offset */
         char                    sx_pad[16];     /* pad space, unused */
-       compat_xfs_bstat_t      sx_stat;        /* stat of target b4 copy */
+       struct compat_xfs_bstat sx_stat;        /* stat of target b4 copy */
  } __compat_packed compat_xfs_swapext_t;
  
  #define XFS_IOC_SWAPEXT_32     _IOWR('X', 109, struct compat_xfs_swapext)
@@ -201,11 +201,11 @@ typedef struct compat_xfs_fsop_geom_v1 {
  #define XFS_IOC_FSGEOMETRY_V1_32  \
         _IOR('X', 100, struct compat_xfs_fsop_geom_v1)
  
-typedef struct compat_xfs_inogrp {
+struct compat_xfs_inogrp {
         __u64           xi_startino;    /* starting inode number        */
         __s32           xi_alloccount;  /* # bits set in allocmask      */
         __u64           xi_allocmask;   /* mask of allocated inodes     */
-} __attribute__((packed)) compat_xfs_inogrp_t;
+} __attribute__((packed));
  
  /* These growfs input structures have padding on the end, so must translate */
  typedef struct compat_xfs_growfs_data {
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c

index 63d323916bba9e42dc3f37d81359b16a6821784b..3a4310d7cb59d4901d7519002f7eae03e5170ab5 100644 (file)
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -4,7 +4,6 @@
   * Copyright (c) 2016-2018 Christoph Hellwig.
   * All Rights Reserved.
   */
-#include <linux/iomap.h>
  #include "xfs.h"
  #include "xfs_fs.h"
  #include "xfs_shared.h"
@@ -12,7 +11,6 @@
  #include "xfs_log_format.h"
  #include "xfs_trans_resv.h"
  #include "xfs_mount.h"
-#include "xfs_defer.h"
  #include "xfs_inode.h"
  #include "xfs_btree.h"
  #include "xfs_bmap_btree.h"
@@ -25,7 +23,6 @@
  #include "xfs_inode_item.h"
  #include "xfs_iomap.h"
  #include "xfs_trace.h"
-#include "xfs_icache.h"
  #include "xfs_quota.h"
  #include "xfs_dquot_item.h"
  #include "xfs_dquot.h"
@@ -779,7 +776,7 @@ xfs_iomap_write_unwritten(
                  * complete here and might deadlock on the iolock.
                  */
                 error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks, 0,
-                               XFS_TRANS_RESERVE | XFS_TRANS_NOFS, &tp);
+                               XFS_TRANS_RESERVE, &tp);
                 if (error)
                         return error;
  
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c

index 74047bd0c1aeb44709ceae3ef779921778c4be0e..ff3c1fae53571e79d139e9117efc8ccc4ec751bd 100644 (file)
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -10,30 +10,20 @@
  #include "xfs_log_format.h"
  #include "xfs_trans_resv.h"
  #include "xfs_mount.h"
-#include "xfs_da_format.h"
  #include "xfs_inode.h"
-#include "xfs_bmap.h"
-#include "xfs_bmap_util.h"
  #include "xfs_acl.h"
  #include "xfs_quota.h"
-#include "xfs_error.h"
  #include "xfs_attr.h"
  #include "xfs_trans.h"
  #include "xfs_trace.h"
  #include "xfs_icache.h"
  #include "xfs_symlink.h"
-#include "xfs_da_btree.h"
  #include "xfs_dir2.h"
-#include "xfs_trans_space.h"
  #include "xfs_iomap.h"
-#include "xfs_defer.h"
  
-#include <linux/capability.h>
  #include <linux/xattr.h>
  #include <linux/posix_acl.h>
  #include <linux/security.h>
-#include <linux/iomap.h>
-#include <linux/slab.h>
  #include <linux/iversion.h>
  
  /*
diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c

index 1e1a0af1dd34d2dc96c7f4b7f974add392f41bd9..a8a06bb78ea8e3c942d3d6c45f25f5999ff0a6c3 100644 (file)
--- a/fs/xfs/xfs_itable.c
+++ b/fs/xfs/xfs_itable.c
@@ -14,46 +14,66 @@
  #include "xfs_btree.h"
  #include "xfs_ialloc.h"
  #include "xfs_ialloc_btree.h"
+#include "xfs_iwalk.h"
  #include "xfs_itable.h"
  #include "xfs_error.h"
-#include "xfs_trace.h"
  #include "xfs_icache.h"
  #include "xfs_health.h"
  
  /*
- * Return stat information for one inode.
- * Return 0 if ok, else errno.
+ * Bulk Stat
+ * =========
+ *
+ * Use the inode walking functions to fill out struct xfs_bulkstat for every
+ * allocated inode, then pass the stat information to some externally provided
+ * iteration function.
   */
-int
+
+struct xfs_bstat_chunk {
+       bulkstat_one_fmt_pf     formatter;
+       struct xfs_ibulk        *breq;
+       struct xfs_bulkstat     *buf;
+};
+
+/*
+ * Fill out the bulkstat info for a single inode and report it somewhere.
+ *
+ * bc->breq->lastino is effectively the inode cursor as we walk through the
+ * filesystem.  Therefore, we update it any time we need to move the cursor
+ * forward, regardless of whether or not we're sending any bstat information
+ * back to userspace.  If the inode is internal metadata or, has been freed
+ * out from under us, we just simply keep going.
+ *
+ * However, if any other type of error happens we want to stop right where we
+ * are so that userspace will call back with exact number of the bad inode and
+ * we can send back an error code.
+ *
+ * Note that if the formatter tells us there's no space left in the buffer we
+ * move the cursor forward and abort the walk.
+ */
+STATIC int
  xfs_bulkstat_one_int(
-       struct xfs_mount        *mp,            /* mount point for filesystem */
-       xfs_ino_t               ino,            /* inode to get data for */
-       void __user             *buffer,        /* buffer to place output in */
-       int                     ubsize,         /* size of buffer */
-       bulkstat_one_fmt_pf     formatter,      /* formatter, copy to user */
-       int                     *ubused,        /* bytes used by me */
-       int                     *stat)          /* BULKSTAT_RV_... */
+       struct xfs_mount        *mp,
+       struct xfs_trans        *tp,
+       xfs_ino_t               ino,
+       struct xfs_bstat_chunk  *bc)
  {
         struct xfs_icdinode     *dic;           /* dinode core info pointer */
         struct xfs_inode        *ip;            /* incore inode pointer */
         struct inode            *inode;
-       struct xfs_bstat        *buf;           /* return buffer */
-       int                     error = 0;      /* error value */
+       struct xfs_bulkstat     *buf = bc->buf;
+       int                     error = -EINVAL;
  
-       *stat = BULKSTAT_RV_NOTHING;
+       if (xfs_internal_inum(mp, ino))
+               goto out_advance;
  
-       if (!buffer || xfs_internal_inum(mp, ino))
-               return -EINVAL;
-
-       buf = kmem_zalloc(sizeof(*buf), KM_SLEEP | KM_MAYFAIL);
-       if (!buf)
-               return -ENOMEM;
-
-       error = xfs_iget(mp, NULL, ino,
+       error = xfs_iget(mp, tp, ino,
                          (XFS_IGET_DONTCACHE | XFS_IGET_UNTRUSTED),
                          XFS_ILOCK_SHARED, &ip);
+       if (error == -ENOENT || error == -EINVAL)
+               goto out_advance;
         if (error)
-               goto out_free;
+               goto out;
  
         ASSERT(ip != NULL);
         ASSERT(ip->i_imap.im_blkno != 0);
@@ -64,37 +84,35 @@ xfs_bulkstat_one_int(
         /* xfs_iget returns the following without needing
          * further change.
          */
-       buf->bs_projid_lo = dic->di_projid_lo;
-       buf->bs_projid_hi = dic->di_projid_hi;
+       buf->bs_projectid = xfs_get_projid(ip);
         buf->bs_ino = ino;
         buf->bs_uid = dic->di_uid;
         buf->bs_gid = dic->di_gid;
         buf->bs_size = dic->di_size;
  
         buf->bs_nlink = inode->i_nlink;
-       buf->bs_atime.tv_sec = inode->i_atime.tv_sec;
-       buf->bs_atime.tv_nsec = inode->i_atime.tv_nsec;
-       buf->bs_mtime.tv_sec = inode->i_mtime.tv_sec;
-       buf->bs_mtime.tv_nsec = inode->i_mtime.tv_nsec;
-       buf->bs_ctime.tv_sec = inode->i_ctime.tv_sec;
-       buf->bs_ctime.tv_nsec = inode->i_ctime.tv_nsec;
+       buf->bs_atime = inode->i_atime.tv_sec;
+       buf->bs_atime_nsec = inode->i_atime.tv_nsec;
+       buf->bs_mtime = inode->i_mtime.tv_sec;
+       buf->bs_mtime_nsec = inode->i_mtime.tv_nsec;
+       buf->bs_ctime = inode->i_ctime.tv_sec;
+       buf->bs_ctime_nsec = inode->i_ctime.tv_nsec;
+       buf->bs_btime = dic->di_crtime.t_sec;
+       buf->bs_btime_nsec = dic->di_crtime.t_nsec;
         buf->bs_gen = inode->i_generation;
         buf->bs_mode = inode->i_mode;
  
         buf->bs_xflags = xfs_ip2xflags(ip);
-       buf->bs_extsize = dic->di_extsize << mp->m_sb.sb_blocklog;
+       buf->bs_extsize_blks = dic->di_extsize;
         buf->bs_extents = dic->di_nextents;
-       memset(buf->bs_pad, 0, sizeof(buf->bs_pad));
         xfs_bulkstat_health(ip, buf);
-       buf->bs_dmevmask = dic->di_dmevmask;
-       buf->bs_dmstate = dic->di_dmstate;
         buf->bs_aextents = dic->di_anextents;
         buf->bs_forkoff = XFS_IFORK_BOFF(ip);
+       buf->bs_version = XFS_BULKSTAT_VERSION_V5;
  
         if (dic->di_version == 3) {
                 if (dic->di_flags2 & XFS_DIFLAG2_COWEXTSIZE)
-                       buf->bs_cowextsize = dic->di_cowextsize <<
-                                       mp->m_sb.sb_blocklog;
+                       buf->bs_cowextsize_blks = dic->di_cowextsize;
         }
  
         switch (dic->di_format) {
@@ -118,385 +136,121 @@ xfs_bulkstat_one_int(
         xfs_iunlock(ip, XFS_ILOCK_SHARED);
         xfs_irele(ip);
  
-       error = formatter(buffer, ubsize, ubused, buf);
-       if (!error)
-               *stat = BULKSTAT_RV_DIDONE;
+       error = bc->formatter(bc->breq, buf);
+       if (error == XFS_IBULK_ABORT)
+               goto out_advance;
+       if (error)
+               goto out;
  
- out_free:
-       kmem_free(buf);
+out_advance:
+       /*
+        * Advance the cursor to the inode that comes after the one we just
+        * looked at.  We want the caller to move along if the bulkstat
+        * information was copied successfully; if we tried to grab the inode
+        * but it's no longer allocated; or if it's internal metadata.
+        */
+       bc->breq->startino = ino + 1;
+out:
         return error;
  }
  
-/* Return 0 on success or positive error */
-STATIC int
-xfs_bulkstat_one_fmt(
-       void                    __user *ubuffer,
-       int                     ubsize,
-       int                     *ubused,
-       const xfs_bstat_t       *buffer)
-{
-       if (ubsize < sizeof(*buffer))
-               return -ENOMEM;
-       if (copy_to_user(ubuffer, buffer, sizeof(*buffer)))
-               return -EFAULT;
-       if (ubused)
-               *ubused = sizeof(*buffer);
-       return 0;
-}
-
+/* Bulkstat a single inode. */
  int
  xfs_bulkstat_one(
-       xfs_mount_t     *mp,            /* mount point for filesystem */
-       xfs_ino_t       ino,            /* inode number to get data for */
-       void            __user *buffer, /* buffer to place output in */
-       int             ubsize,         /* size of buffer */
-       int             *ubused,        /* bytes used by me */
-       int             *stat)          /* BULKSTAT_RV_... */
+       struct xfs_ibulk        *breq,
+       bulkstat_one_fmt_pf     formatter)
  {
-       return xfs_bulkstat_one_int(mp, ino, buffer, ubsize,
-                                   xfs_bulkstat_one_fmt, ubused, stat);
-}
+       struct xfs_bstat_chunk  bc = {
+               .formatter      = formatter,
+               .breq           = breq,
+       };
+       int                     error;
  
-/*
- * Loop over all clusters in a chunk for a given incore inode allocation btree
- * record.  Do a readahead if there are any allocated inodes in that cluster.
- */
-STATIC void
-xfs_bulkstat_ichunk_ra(
-       struct xfs_mount                *mp,
-       xfs_agnumber_t                  agno,
-       struct xfs_inobt_rec_incore     *irec)
-{
-       xfs_agblock_t                   agbno;
-       struct blk_plug                 plug;
-       int                             i;      /* inode chunk index */
-
-       agbno = XFS_AGINO_TO_AGBNO(mp, irec->ir_startino);
-
-       blk_start_plug(&plug);
-       for (i = 0; i < XFS_INODES_PER_CHUNK;
-            i += mp->m_inodes_per_cluster, agbno += mp->m_blocks_per_cluster) {
-               if (xfs_inobt_maskn(i, mp->m_inodes_per_cluster) &
-                   ~irec->ir_free) {
-                       xfs_btree_reada_bufs(mp, agno, agbno,
-                                       mp->m_blocks_per_cluster,
-                                       &xfs_inode_buf_ops);
-               }
-       }
-       blk_finish_plug(&plug);
-}
+       ASSERT(breq->icount == 1);
  
-/*
- * Lookup the inode chunk that the given inode lives in and then get the record
- * if we found the chunk.  If the inode was not the last in the chunk and there
- * are some left allocated, update the data for the pointed-to record as well as
- * return the count of grabbed inodes.
- */
-STATIC int
-xfs_bulkstat_grab_ichunk(
-       struct xfs_btree_cur            *cur,   /* btree cursor */
-       xfs_agino_t                     agino,  /* starting inode of chunk */
-       int                             *icount,/* return # of inodes grabbed */
-       struct xfs_inobt_rec_incore     *irec)  /* btree record */
-{
-       int                             idx;    /* index into inode chunk */
-       int                             stat;
-       int                             error = 0;
-
-       /* Lookup the inode chunk that this inode lives in */
-       error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_LE, &stat);
-       if (error)
-               return error;
-       if (!stat) {
-               *icount = 0;
-               return error;
-       }
+       bc.buf = kmem_zalloc(sizeof(struct xfs_bulkstat),
+                       KM_SLEEP | KM_MAYFAIL);
+       if (!bc.buf)
+               return -ENOMEM;
  
-       /* Get the record, should always work */
-       error = xfs_inobt_get_rec(cur, irec, &stat);
-       if (error)
-               return error;
-       XFS_WANT_CORRUPTED_RETURN(cur->bc_mp, stat == 1);
+       error = xfs_bulkstat_one_int(breq->mp, NULL, breq->startino, &bc);
  
-       /* Check if the record contains the inode in request */
-       if (irec->ir_startino + XFS_INODES_PER_CHUNK <= agino) {
-               *icount = 0;
-               return 0;
-       }
+       kmem_free(bc.buf);
  
-       idx = agino - irec->ir_startino + 1;
-       if (idx < XFS_INODES_PER_CHUNK &&
-           (xfs_inobt_maskn(idx, XFS_INODES_PER_CHUNK - idx) & ~irec->ir_free)) {
-               int     i;
-
-               /* We got a right chunk with some left inodes allocated at it.
-                * Grab the chunk record.  Mark all the uninteresting inodes
-                * free -- because they're before our start point.
-                */
-               for (i = 0; i < idx; i++) {
-                       if (XFS_INOBT_MASK(i) & ~irec->ir_free)
-                               irec->ir_freecount++;
-               }
-
-               irec->ir_free |= xfs_inobt_maskn(0, idx);
-               *icount = irec->ir_count - irec->ir_freecount;
-       }
+       /*
+        * If we reported one inode to userspace then we abort because we hit
+        * the end of the buffer.  Don't leak that back to userspace.
+        */
+       if (error == XFS_IWALK_ABORT)
+               error = 0;
  
-       return 0;
+       return error;
  }
  
-#define XFS_BULKSTAT_UBLEFT(ubleft)    ((ubleft) >= statstruct_size)
-
-struct xfs_bulkstat_agichunk {
-       char            __user **ac_ubuffer;/* pointer into user's buffer */
-       int             ac_ubleft;      /* bytes left in user's buffer */
-       int             ac_ubelem;      /* spaces used in user's buffer */
-};
-
-/*
- * Process inodes in chunk with a pointer to a formatter function
- * that will iget the inode and fill in the appropriate structure.
- */
  static int
-xfs_bulkstat_ag_ichunk(
-       struct xfs_mount                *mp,
-       xfs_agnumber_t                  agno,
-       struct xfs_inobt_rec_incore     *irbp,
-       bulkstat_one_pf                 formatter,
-       size_t                          statstruct_size,
-       struct xfs_bulkstat_agichunk    *acp,
-       xfs_agino_t                     *last_agino)
+xfs_bulkstat_iwalk(
+       struct xfs_mount        *mp,
+       struct xfs_trans        *tp,
+       xfs_ino_t               ino,
+       void                    *data)
  {
-       char                            __user **ubufp = acp->ac_ubuffer;
-       int                             chunkidx;
-       int                             error = 0;
-       xfs_agino_t                     agino = irbp->ir_startino;
-
-       for (chunkidx = 0; chunkidx < XFS_INODES_PER_CHUNK;
-            chunkidx++, agino++) {
-               int             fmterror;
-               int             ubused;
-
-               /* inode won't fit in buffer, we are done */
-               if (acp->ac_ubleft < statstruct_size)
-                       break;
-
-               /* Skip if this inode is free */
-               if (XFS_INOBT_MASK(chunkidx) & irbp->ir_free)
-                       continue;
-
-               /* Get the inode and fill in a single buffer */
-               ubused = statstruct_size;
-               error = formatter(mp, XFS_AGINO_TO_INO(mp, agno, agino),
-                                 *ubufp, acp->ac_ubleft, &ubused, &fmterror);
-
-               if (fmterror == BULKSTAT_RV_GIVEUP ||
-                   (error && error != -ENOENT && error != -EINVAL)) {
-                       acp->ac_ubleft = 0;
-                       ASSERT(error);
-                       break;
-               }
-
-               /* be careful not to leak error if at end of chunk */
-               if (fmterror == BULKSTAT_RV_NOTHING || error) {
-                       error = 0;
-                       continue;
-               }
-
-               *ubufp += ubused;
-               acp->ac_ubleft -= ubused;
-               acp->ac_ubelem++;
-       }
-
-       /*
-        * Post-update *last_agino. At this point, agino will always point one
-        * inode past the last inode we processed successfully. Hence we
-        * substract that inode when setting the *last_agino cursor so that we
-        * return the correct cookie to userspace. On the next bulkstat call,
-        * the inode under the lastino cookie will be skipped as we have already
-        * processed it here.
-        */
-       *last_agino = agino - 1;
+       int                     error;
  
+       error = xfs_bulkstat_one_int(mp, tp, ino, data);
+       /* bulkstat just skips over missing inodes */
+       if (error == -ENOENT || error == -EINVAL)
+               return 0;
         return error;
  }
  
  /*
- * Return stat information in bulk (by-inode) for the filesystem.
+ * Check the incoming lastino parameter.
+ *
+ * We allow any inode value that could map to physical space inside the
+ * filesystem because if there are no inodes there, bulkstat moves on to the
+ * next chunk.  In other words, the magic agino value of zero takes us to the
+ * first chunk in the AG, and an agino value past the end of the AG takes us to
+ * the first chunk in the next AG.
+ *
+ * Therefore we can end early if the requested inode is beyond the end of the
+ * filesystem or doesn't map properly.
   */
-int                                    /* error status */
-xfs_bulkstat(
-       xfs_mount_t             *mp,    /* mount point for filesystem */
-       xfs_ino_t               *lastinop, /* last inode returned */
-       int                     *ubcountp, /* size of buffer/count returned */
-       bulkstat_one_pf         formatter, /* func that'd fill a single buf */
-       size_t                  statstruct_size, /* sizeof struct filling */
-       char                    __user *ubuffer, /* buffer with inode stats */
-       int                     *done)  /* 1 if there are more stats to get */
+static inline bool
+xfs_bulkstat_already_done(
+       struct xfs_mount        *mp,
+       xfs_ino_t               startino)
  {
-       xfs_buf_t               *agbp;  /* agi header buffer */
-       xfs_agino_t             agino;  /* inode # in allocation group */
-       xfs_agnumber_t          agno;   /* allocation group number */
-       xfs_btree_cur_t         *cur;   /* btree cursor for ialloc btree */
-       xfs_inobt_rec_incore_t  *irbuf; /* start of irec buffer */
-       int                     nirbuf; /* size of irbuf */
-       int                     ubcount; /* size of user's buffer */
-       struct xfs_bulkstat_agichunk ac;
-       int                     error = 0;
+       xfs_agnumber_t          agno = XFS_INO_TO_AGNO(mp, startino);
+       xfs_agino_t             agino = XFS_INO_TO_AGINO(mp, startino);
  
-       /*
-        * Get the last inode value, see if there's nothing to do.
-        */
-       agno = XFS_INO_TO_AGNO(mp, *lastinop);
-       agino = XFS_INO_TO_AGINO(mp, *lastinop);
-       if (agno >= mp->m_sb.sb_agcount ||
-           *lastinop != XFS_AGINO_TO_INO(mp, agno, agino)) {
-               *done = 1;
-               *ubcountp = 0;
-               return 0;
-       }
+       return agno >= mp->m_sb.sb_agcount ||
+              startino != XFS_AGINO_TO_INO(mp, agno, agino);
+}
  
-       ubcount = *ubcountp; /* statstruct's */
-       ac.ac_ubuffer = &ubuffer;
-       ac.ac_ubleft = ubcount * statstruct_size; /* bytes */;
-       ac.ac_ubelem = 0;
+/* Return stat information in bulk (by-inode) for the filesystem. */
+int
+xfs_bulkstat(
+       struct xfs_ibulk        *breq,
+       bulkstat_one_fmt_pf     formatter)
+{
+       struct xfs_bstat_chunk  bc = {
+               .formatter      = formatter,
+               .breq           = breq,
+       };
+       int                     error;
  
-       *ubcountp = 0;
-       *done = 0;
+       if (xfs_bulkstat_already_done(breq->mp, breq->startino))
+               return 0;
  
-       irbuf = kmem_zalloc_large(PAGE_SIZE * 4, KM_SLEEP);
-       if (!irbuf)
+       bc.buf = kmem_zalloc(sizeof(struct xfs_bulkstat),
+                       KM_SLEEP | KM_MAYFAIL);
+       if (!bc.buf)
                 return -ENOMEM;
-       nirbuf = (PAGE_SIZE * 4) / sizeof(*irbuf);
  
-       /*
-        * Loop over the allocation groups, starting from the last
-        * inode returned; 0 means start of the allocation group.
-        */
-       while (agno < mp->m_sb.sb_agcount) {
-               struct xfs_inobt_rec_incore     *irbp = irbuf;
-               struct xfs_inobt_rec_incore     *irbufend = irbuf + nirbuf;
-               bool                            end_of_ag = false;
-               int                             icount = 0;
-               int                             stat;
-
-               error = xfs_ialloc_read_agi(mp, NULL, agno, &agbp);
-               if (error)
-                       break;
-               /*
-                * Allocate and initialize a btree cursor for ialloc btree.
-                */
-               cur = xfs_inobt_init_cursor(mp, NULL, agbp, agno,
-                                           XFS_BTNUM_INO);
-               if (agino > 0) {
-                       /*
-                        * In the middle of an allocation group, we need to get
-                        * the remainder of the chunk we're in.
-                        */
-                       struct xfs_inobt_rec_incore     r;
-
-                       error = xfs_bulkstat_grab_ichunk(cur, agino, &icount, &r);
-                       if (error)
-                               goto del_cursor;
-                       if (icount) {
-                               irbp->ir_startino = r.ir_startino;
-                               irbp->ir_holemask = r.ir_holemask;
-                               irbp->ir_count = r.ir_count;
-                               irbp->ir_freecount = r.ir_freecount;
-                               irbp->ir_free = r.ir_free;
-                               irbp++;
-                       }
-                       /* Increment to the next record */
-                       error = xfs_btree_increment(cur, 0, &stat);
-               } else {
-                       /* Start of ag.  Lookup the first inode chunk */
-                       error = xfs_inobt_lookup(cur, 0, XFS_LOOKUP_GE, &stat);
-               }
-               if (error || stat == 0) {
-                       end_of_ag = true;
-                       goto del_cursor;
-               }
-
-               /*
-                * Loop through inode btree records in this ag,
-                * until we run out of inodes or space in the buffer.
-                */
-               while (irbp < irbufend && icount < ubcount) {
-                       struct xfs_inobt_rec_incore     r;
-
-                       error = xfs_inobt_get_rec(cur, &r, &stat);
-                       if (error || stat == 0) {
-                               end_of_ag = true;
-                               goto del_cursor;
-                       }
-
-                       /*
-                        * If this chunk has any allocated inodes, save it.
-                        * Also start read-ahead now for this chunk.
-                        */
-                       if (r.ir_freecount < r.ir_count) {
-                               xfs_bulkstat_ichunk_ra(mp, agno, &r);
-                               irbp->ir_startino = r.ir_startino;
-                               irbp->ir_holemask = r.ir_holemask;
-                               irbp->ir_count = r.ir_count;
-                               irbp->ir_freecount = r.ir_freecount;
-                               irbp->ir_free = r.ir_free;
-                               irbp++;
-                               icount += r.ir_count - r.ir_freecount;
-                       }
-                       error = xfs_btree_increment(cur, 0, &stat);
-                       if (error || stat == 0) {
-                               end_of_ag = true;
-                               goto del_cursor;
-                       }
-                       cond_resched();
-               }
-
-               /*
-                * Drop the btree buffers and the agi buffer as we can't hold any
-                * of the locks these represent when calling iget. If there is a
-                * pending error, then we are done.
-                */
-del_cursor:
-               xfs_btree_del_cursor(cur, error);
-               xfs_buf_relse(agbp);
-               if (error)
-                       break;
-               /*
-                * Now format all the good inodes into the user's buffer. The
-                * call to xfs_bulkstat_ag_ichunk() sets up the agino pointer
-                * for the next loop iteration.
-                */
-               irbufend = irbp;
-               for (irbp = irbuf;
-                    irbp < irbufend && ac.ac_ubleft >= statstruct_size;
-                    irbp++) {
-                       error = xfs_bulkstat_ag_ichunk(mp, agno, irbp,
-                                       formatter, statstruct_size, &ac,
-                                       &agino);
-                       if (error)
-                               break;
-
-                       cond_resched();
-               }
-
-               /*
-                * If we've run out of space or had a formatting error, we
-                * are now done
-                */
-               if (ac.ac_ubleft < statstruct_size || error)
-                       break;
-
-               if (end_of_ag) {
-                       agno++;
-                       agino = 0;
-               }
-       }
-       /*
-        * Done, we're either out of filesystem or space to put the data.
-        */
-       kmem_free(irbuf);
-       *ubcountp = ac.ac_ubelem;
+       error = xfs_iwalk(breq->mp, NULL, breq->startino, breq->flags,
+                       xfs_bulkstat_iwalk, breq->icount, &bc);
+
+       kmem_free(bc.buf);
  
         /*
          * We found some inodes, so clear the error status and return them.
@@ -505,135 +259,136 @@ del_cursor:
          * triggered again and propagated to userspace as there will be no
          * formatted inodes in the buffer.
          */
-       if (ac.ac_ubelem)
+       if (breq->ocount > 0)
                 error = 0;
  
-       /*
-        * If we ran out of filesystem, lastino will point off the end of
-        * the filesystem so the next call will return immediately.
-        */
-       *lastinop = XFS_AGINO_TO_INO(mp, agno, agino);
-       if (agno >= mp->m_sb.sb_agcount)
-               *done = 1;
-
         return error;
  }
  
-int
-xfs_inumbers_fmt(
-       void                    __user *ubuffer, /* buffer to write to */
-       const struct xfs_inogrp *buffer,        /* buffer to read from */
-       long                    count,          /* # of elements to read */
-       long                    *written)       /* # of bytes written */
+/* Convert bulkstat (v5) to bstat (v1). */
+void
+xfs_bulkstat_to_bstat(
+       struct xfs_mount                *mp,
+       struct xfs_bstat                *bs1,
+       const struct xfs_bulkstat       *bstat)
  {
-       if (copy_to_user(ubuffer, buffer, count * sizeof(*buffer)))
-               return -EFAULT;
-       *written = count * sizeof(*buffer);
-       return 0;
+       memset(bs1, 0, sizeof(struct xfs_bstat));
+       bs1->bs_ino = bstat->bs_ino;
+       bs1->bs_mode = bstat->bs_mode;
+       bs1->bs_nlink = bstat->bs_nlink;
+       bs1->bs_uid = bstat->bs_uid;
+       bs1->bs_gid = bstat->bs_gid;
+       bs1->bs_rdev = bstat->bs_rdev;
+       bs1->bs_blksize = bstat->bs_blksize;
+       bs1->bs_size = bstat->bs_size;
+       bs1->bs_atime.tv_sec = bstat->bs_atime;
+       bs1->bs_mtime.tv_sec = bstat->bs_mtime;
+       bs1->bs_ctime.tv_sec = bstat->bs_ctime;
+       bs1->bs_atime.tv_nsec = bstat->bs_atime_nsec;
+       bs1->bs_mtime.tv_nsec = bstat->bs_mtime_nsec;
+       bs1->bs_ctime.tv_nsec = bstat->bs_ctime_nsec;
+       bs1->bs_blocks = bstat->bs_blocks;
+       bs1->bs_xflags = bstat->bs_xflags;
+       bs1->bs_extsize = XFS_FSB_TO_B(mp, bstat->bs_extsize_blks);
+       bs1->bs_extents = bstat->bs_extents;
+       bs1->bs_gen = bstat->bs_gen;
+       bs1->bs_projid_lo = bstat->bs_projectid & 0xFFFF;
+       bs1->bs_forkoff = bstat->bs_forkoff;
+       bs1->bs_projid_hi = bstat->bs_projectid >> 16;
+       bs1->bs_sick = bstat->bs_sick;
+       bs1->bs_checked = bstat->bs_checked;
+       bs1->bs_cowextsize = XFS_FSB_TO_B(mp, bstat->bs_cowextsize_blks);
+       bs1->bs_dmevmask = 0;
+       bs1->bs_dmstate = 0;
+       bs1->bs_aextents = bstat->bs_aextents;
+}
+
+struct xfs_inumbers_chunk {
+       inumbers_fmt_pf         formatter;
+       struct xfs_ibulk        *breq;
+};
+
+/*
+ * INUMBERS
+ * ========
+ * This is how we export inode btree records to userspace, so that XFS tools
+ * can figure out where inodes are allocated.
+ */
+
+/*
+ * Format the inode group structure and report it somewhere.
+ *
+ * Similar to xfs_bulkstat_one_int, lastino is the inode cursor as we walk
+ * through the filesystem so we move it forward unless there was a runtime
+ * error.  If the formatter tells us the buffer is now full we also move the
+ * cursor forward and abort the walk.
+ */
+STATIC int
+xfs_inumbers_walk(
+       struct xfs_mount        *mp,
+       struct xfs_trans        *tp,
+       xfs_agnumber_t          agno,
+       const struct xfs_inobt_rec_incore *irec,
+       void                    *data)
+{
+       struct xfs_inumbers     inogrp = {
+               .xi_startino    = XFS_AGINO_TO_INO(mp, agno, irec->ir_startino),
+               .xi_alloccount  = irec->ir_count - irec->ir_freecount,
+               .xi_allocmask   = ~irec->ir_free,
+               .xi_version     = XFS_INUMBERS_VERSION_V5,
+       };
+       struct xfs_inumbers_chunk *ic = data;
+       int                     error;
+
+       error = ic->formatter(ic->breq, &inogrp);
+       if (error && error != XFS_IBULK_ABORT)
+               return error;
+
+       ic->breq->startino = XFS_AGINO_TO_INO(mp, agno, irec->ir_startino) +
+                       XFS_INODES_PER_CHUNK;
+       return error;
  }
  
  /*
   * Return inode number table for the filesystem.
   */
-int                                    /* error status */
+int
  xfs_inumbers(
-       struct xfs_mount        *mp,/* mount point for filesystem */
-       xfs_ino_t               *lastino,/* last inode returned */
-       int                     *count,/* size of buffer/count returned */
-       void                    __user *ubuffer,/* buffer with inode descriptions */
+       struct xfs_ibulk        *breq,
         inumbers_fmt_pf         formatter)
  {
-       xfs_agnumber_t          agno = XFS_INO_TO_AGNO(mp, *lastino);
-       xfs_agino_t             agino = XFS_INO_TO_AGINO(mp, *lastino);
-       struct xfs_btree_cur    *cur = NULL;
-       struct xfs_buf          *agbp = NULL;
-       struct xfs_inogrp       *buffer;
-       int                     bcount;
-       int                     left = *count;
-       int                     bufidx = 0;
+       struct xfs_inumbers_chunk ic = {
+               .formatter      = formatter,
+               .breq           = breq,
+       };
         int                     error = 0;
  
-       *count = 0;
-       if (agno >= mp->m_sb.sb_agcount ||
-           *lastino != XFS_AGINO_TO_INO(mp, agno, agino))
-               return error;
+       if (xfs_bulkstat_already_done(breq->mp, breq->startino))
+               return 0;
  
-       bcount = min(left, (int)(PAGE_SIZE / sizeof(*buffer)));
-       buffer = kmem_zalloc(bcount * sizeof(*buffer), KM_SLEEP);
-       do {
-               struct xfs_inobt_rec_incore     r;
-               int                             stat;
-
-               if (!agbp) {
-                       error = xfs_ialloc_read_agi(mp, NULL, agno, &agbp);
-                       if (error)
-                               break;
-
-                       cur = xfs_inobt_init_cursor(mp, NULL, agbp, agno,
-                                                   XFS_BTNUM_INO);
-                       error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_GE,
-                                                &stat);
-                       if (error)
-                               break;
-                       if (!stat)
-                               goto next_ag;
-               }
-
-               error = xfs_inobt_get_rec(cur, &r, &stat);
-               if (error)
-                       break;
-               if (!stat)
-                       goto next_ag;
-
-               agino = r.ir_startino + XFS_INODES_PER_CHUNK - 1;
-               buffer[bufidx].xi_startino =
-                       XFS_AGINO_TO_INO(mp, agno, r.ir_startino);
-               buffer[bufidx].xi_alloccount = r.ir_count - r.ir_freecount;
-               buffer[bufidx].xi_allocmask = ~r.ir_free;
-               if (++bufidx == bcount) {
-                       long    written;
-
-                       error = formatter(ubuffer, buffer, bufidx, &written);
-                       if (error)
-                               break;
-                       ubuffer += written;
-                       *count += bufidx;
-                       bufidx = 0;
-               }
-               if (!--left)
-                       break;
-
-               error = xfs_btree_increment(cur, 0, &stat);
-               if (error)
-                       break;
-               if (stat)
-                       continue;
-
-next_ag:
-               xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
-               cur = NULL;
-               xfs_buf_relse(agbp);
-               agbp = NULL;
-               agino = 0;
-               agno++;
-       } while (agno < mp->m_sb.sb_agcount);
-
-       if (!error) {
-               if (bufidx) {
-                       long    written;
-
-                       error = formatter(ubuffer, buffer, bufidx, &written);
-                       if (!error)
-                               *count += bufidx;
-               }
-               *lastino = XFS_AGINO_TO_INO(mp, agno, agino);
-       }
+       error = xfs_inobt_walk(breq->mp, NULL, breq->startino, breq->flags,
+                       xfs_inumbers_walk, breq->icount, &ic);
  
-       kmem_free(buffer);
-       if (cur)
-               xfs_btree_del_cursor(cur, error);
-       if (agbp)
-               xfs_buf_relse(agbp);
+       /*
+        * We found some inode groups, so clear the error status and return
+        * them.  The lastino pointer will point directly at the inode that
+        * triggered any error that occurred, so on the next call the error
+        * will be triggered again and propagated to userspace as there will be
+        * no formatted inode groups in the buffer.
+        */
+       if (breq->ocount > 0)
+               error = 0;
  
         return error;
  }
+
+/* Convert an inumbers (v5) struct to a inogrp (v1) struct. */
+void
+xfs_inumbers_to_inogrp(
+       struct xfs_inogrp               *ig1,
+       const struct xfs_inumbers       *ig)
+{
+       ig1->xi_startino = ig->xi_startino;
+       ig1->xi_alloccount = ig->xi_alloccount;
+       ig1->xi_allocmask = ig->xi_allocmask;
+}
diff --git a/fs/xfs/xfs_itable.h b/fs/xfs/xfs_itable.h

index 8a822285b6718f417d4ab3c04a0566d1dac3566a..e90c1fc5b981a7786d02c8112995fdf78eee8b47 100644 (file)
--- a/fs/xfs/xfs_itable.h
+++ b/fs/xfs/xfs_itable.h
@@ -5,83 +5,55 @@
  #ifndef __XFS_ITABLE_H__
  #define        __XFS_ITABLE_H__
  
-/*
- * xfs_bulkstat() is used to fill in xfs_bstat structures as well as dm_stat
- * structures (by the dmi library). This is a pointer to a formatter function
- * that will iget the inode and fill in the appropriate structure.
- * see xfs_bulkstat_one() and xfs_dm_bulkstat_one() in dmapi_xfs.c
- */
-typedef int (*bulkstat_one_pf)(struct xfs_mount        *mp,
-                              xfs_ino_t        ino,
-                              void             __user *buffer,
-                              int              ubsize,
-                              int              *ubused,
-                              int              *stat);
+/* In-memory representation of a userspace request for batch inode data. */
+struct xfs_ibulk {
+       struct xfs_mount        *mp;
+       void __user             *ubuffer; /* user output buffer */
+       xfs_ino_t               startino; /* start with this inode */
+       unsigned int            icount;   /* number of elements in ubuffer */
+       unsigned int            ocount;   /* number of records returned */
+       unsigned int            flags;    /* see XFS_IBULK_FLAG_* */
+};
+
+/* Only iterate within the same AG as startino */
+#define XFS_IBULK_SAME_AG      (XFS_IWALK_SAME_AG)
+
+/* Return value that means we want to abort the walk. */
+#define XFS_IBULK_ABORT                (XFS_IWALK_ABORT)
  
  /*
- * Values for stat return value.
+ * Advance the user buffer pointer by one record of the given size.  If the
+ * buffer is now full, return the appropriate error code.
   */
-#define BULKSTAT_RV_NOTHING    0
-#define BULKSTAT_RV_DIDONE     1
-#define BULKSTAT_RV_GIVEUP     2
+static inline int
+xfs_ibulk_advance(
+       struct xfs_ibulk        *breq,
+       size_t                  bytes)
+{
+       char __user             *b = breq->ubuffer;
+
+       breq->ubuffer = b + bytes;
+       breq->ocount++;
+       return breq->ocount == breq->icount ? XFS_IBULK_ABORT : 0;
+}
  
  /*
   * Return stat information in bulk (by-inode) for the filesystem.
   */
-int                                    /* error status */
-xfs_bulkstat(
-       xfs_mount_t     *mp,            /* mount point for filesystem */
-       xfs_ino_t       *lastino,       /* last inode returned */
-       int             *count,         /* size of buffer/count returned */
-       bulkstat_one_pf formatter,      /* func that'd fill a single buf */
-       size_t          statstruct_size,/* sizeof struct that we're filling */
-       char            __user *ubuffer,/* buffer with inode stats */
-       int             *done);         /* 1 if there are more stats to get */
-
-typedef int (*bulkstat_one_fmt_pf)(  /* used size in bytes or negative error */
-       void                    __user *ubuffer, /* buffer to write to */
-       int                     ubsize,          /* remaining user buffer sz */
-       int                     *ubused,         /* bytes used by formatter */
-       const xfs_bstat_t       *buffer);        /* buffer to read from */
-
-int
-xfs_bulkstat_one_int(
-       xfs_mount_t             *mp,
-       xfs_ino_t               ino,
-       void                    __user *buffer,
-       int                     ubsize,
-       bulkstat_one_fmt_pf     formatter,
-       int                     *ubused,
-       int                     *stat);
  
-int
-xfs_bulkstat_one(
-       xfs_mount_t             *mp,
-       xfs_ino_t               ino,
-       void                    __user *buffer,
-       int                     ubsize,
-       int                     *ubused,
-       int                     *stat);
+typedef int (*bulkstat_one_fmt_pf)(struct xfs_ibulk *breq,
+               const struct xfs_bulkstat *bstat);
  
-typedef int (*inumbers_fmt_pf)(
-       void                    __user *ubuffer, /* buffer to write to */
-       const xfs_inogrp_t      *buffer,        /* buffer to read from */
-       long                    count,          /* # of elements to read */
-       long                    *written);      /* # of bytes written */
+int xfs_bulkstat_one(struct xfs_ibulk *breq, bulkstat_one_fmt_pf formatter);
+int xfs_bulkstat(struct xfs_ibulk *breq, bulkstat_one_fmt_pf formatter);
+void xfs_bulkstat_to_bstat(struct xfs_mount *mp, struct xfs_bstat *bs1,
+               const struct xfs_bulkstat *bstat);
  
-int
-xfs_inumbers_fmt(
-       void                    __user *ubuffer, /* buffer to write to */
-       const xfs_inogrp_t      *buffer,        /* buffer to read from */
-       long                    count,          /* # of elements to read */
-       long                    *written);      /* # of bytes written */
+typedef int (*inumbers_fmt_pf)(struct xfs_ibulk *breq,
+               const struct xfs_inumbers *igrp);
  
-int                                    /* error status */
-xfs_inumbers(
-       xfs_mount_t             *mp,    /* mount point for filesystem */
-       xfs_ino_t               *last,  /* last inode returned */
-       int                     *count, /* size of buffer/count returned */
-       void                    __user *buffer, /* buffer with inode info */
-       inumbers_fmt_pf         formatter);
+int xfs_inumbers(struct xfs_ibulk *breq, inumbers_fmt_pf formatter);
+void xfs_inumbers_to_inogrp(struct xfs_inogrp *ig1,
+               const struct xfs_inumbers *ig);
  
  #endif /* __XFS_ITABLE_H__ */
diff --git a/fs/xfs/xfs_iwalk.c b/fs/xfs/xfs_iwalk.c

new file mode 100644 (file)

index 0000000..8c7d727
--- /dev/null
+++ b/fs/xfs/xfs_iwalk.c
@@ -0,0 +1,720 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2019 Oracle.  All Rights Reserved.
+ * Author: Darrick J. Wong <darrick.wong@oracle.com>
+ */
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_shared.h"
+#include "xfs_format.h"
+#include "xfs_log_format.h"
+#include "xfs_trans_resv.h"
+#include "xfs_mount.h"
+#include "xfs_inode.h"
+#include "xfs_btree.h"
+#include "xfs_ialloc.h"
+#include "xfs_ialloc_btree.h"
+#include "xfs_iwalk.h"
+#include "xfs_error.h"
+#include "xfs_trace.h"
+#include "xfs_icache.h"
+#include "xfs_health.h"
+#include "xfs_trans.h"
+#include "xfs_pwork.h"
+
+/*
+ * Walking Inodes in the Filesystem
+ * ================================
+ *
+ * This iterator function walks a subset of filesystem inodes in increasing
+ * order from @startino until there are no more inodes.  For each allocated
+ * inode it finds, it calls a walk function with the relevant inode number and
+ * a pointer to caller-provided data.  The walk function can return the usual
+ * negative error code to stop the iteration; 0 to continue the iteration; or
+ * XFS_IWALK_ABORT to stop the iteration.  This return value is returned to the
+ * caller.
+ *
+ * Internally, we allow the walk function to do anything, which means that we
+ * cannot maintain the inobt cursor or our lock on the AGI buffer.  We
+ * therefore cache the inobt records in kernel memory and only call the walk
+ * function when our memory buffer is full.  @nr_recs is the number of records
+ * that we've cached, and @sz_recs is the size of our cache.
+ *
+ * It is the responsibility of the walk function to ensure it accesses
+ * allocated inodes, as the inobt records may be stale by the time they are
+ * acted upon.
+ */
+
+struct xfs_iwalk_ag {
+       /* parallel work control data; will be null if single threaded */
+       struct xfs_pwork                pwork;
+
+       struct xfs_mount                *mp;
+       struct xfs_trans                *tp;
+
+       /* Where do we start the traversal? */
+       xfs_ino_t                       startino;
+
+       /* Array of inobt records we cache. */
+       struct xfs_inobt_rec_incore     *recs;
+
+       /* Number of entries allocated for the @recs array. */
+       unsigned int                    sz_recs;
+
+       /* Number of entries in the @recs array that are in use. */
+       unsigned int                    nr_recs;
+
+       /* Inode walk function and data pointer. */
+       xfs_iwalk_fn                    iwalk_fn;
+       xfs_inobt_walk_fn               inobt_walk_fn;
+       void                            *data;
+
+       /*
+        * Make it look like the inodes up to startino are free so that
+        * bulkstat can start its inode iteration at the correct place without
+        * needing to special case everywhere.
+        */
+       unsigned int                    trim_start:1;
+
+       /* Skip empty inobt records? */
+       unsigned int                    skip_empty:1;
+};
+
+/*
+ * Loop over all clusters in a chunk for a given incore inode allocation btree
+ * record.  Do a readahead if there are any allocated inodes in that cluster.
+ */
+STATIC void
+xfs_iwalk_ichunk_ra(
+       struct xfs_mount                *mp,
+       xfs_agnumber_t                  agno,
+       struct xfs_inobt_rec_incore     *irec)
+{
+       struct xfs_ino_geometry         *igeo = M_IGEO(mp);
+       xfs_agblock_t                   agbno;
+       struct blk_plug                 plug;
+       int                             i;      /* inode chunk index */
+
+       agbno = XFS_AGINO_TO_AGBNO(mp, irec->ir_startino);
+
+       blk_start_plug(&plug);
+       for (i = 0; i < XFS_INODES_PER_CHUNK; i += igeo->inodes_per_cluster) {
+               xfs_inofree_t   imask;
+
+               imask = xfs_inobt_maskn(i, igeo->inodes_per_cluster);
+               if (imask & ~irec->ir_free) {
+                       xfs_btree_reada_bufs(mp, agno, agbno,
+                                       igeo->blocks_per_cluster,
+                                       &xfs_inode_buf_ops);
+               }
+               agbno += igeo->blocks_per_cluster;
+       }
+       blk_finish_plug(&plug);
+}
+
+/*
+ * Set the bits in @irec's free mask that correspond to the inodes before
+ * @agino so that we skip them.  This is how we restart an inode walk that was
+ * interrupted in the middle of an inode record.
+ */
+STATIC void
+xfs_iwalk_adjust_start(
+       xfs_agino_t                     agino,  /* starting inode of chunk */
+       struct xfs_inobt_rec_incore     *irec)  /* btree record */
+{
+       int                             idx;    /* index into inode chunk */
+       int                             i;
+
+       idx = agino - irec->ir_startino;
+
+       /*
+        * We got a right chunk with some left inodes allocated at it.  Grab
+        * the chunk record.  Mark all the uninteresting inodes free because
+        * they're before our start point.
+        */
+       for (i = 0; i < idx; i++) {
+               if (XFS_INOBT_MASK(i) & ~irec->ir_free)
+                       irec->ir_freecount++;
+       }
+
+       irec->ir_free |= xfs_inobt_maskn(0, idx);
+}
+
+/* Allocate memory for a walk. */
+STATIC int
+xfs_iwalk_alloc(
+       struct xfs_iwalk_ag     *iwag)
+{
+       size_t                  size;
+
+       ASSERT(iwag->recs == NULL);
+       iwag->nr_recs = 0;
+
+       /* Allocate a prefetch buffer for inobt records. */
+       size = iwag->sz_recs * sizeof(struct xfs_inobt_rec_incore);
+       iwag->recs = kmem_alloc(size, KM_MAYFAIL);
+       if (iwag->recs == NULL)
+               return -ENOMEM;
+
+       return 0;
+}
+
+/* Free memory we allocated for a walk. */
+STATIC void
+xfs_iwalk_free(
+       struct xfs_iwalk_ag     *iwag)
+{
+       kmem_free(iwag->recs);
+       iwag->recs = NULL;
+}
+
+/* For each inuse inode in each cached inobt record, call our function. */
+STATIC int
+xfs_iwalk_ag_recs(
+       struct xfs_iwalk_ag             *iwag)
+{
+       struct xfs_mount                *mp = iwag->mp;
+       struct xfs_trans                *tp = iwag->tp;
+       xfs_ino_t                       ino;
+       unsigned int                    i, j;
+       xfs_agnumber_t                  agno;
+       int                             error;
+
+       agno = XFS_INO_TO_AGNO(mp, iwag->startino);
+       for (i = 0; i < iwag->nr_recs; i++) {
+               struct xfs_inobt_rec_incore     *irec = &iwag->recs[i];
+
+               trace_xfs_iwalk_ag_rec(mp, agno, irec);
+
+               if (xfs_pwork_want_abort(&iwag->pwork))
+                       return 0;
+
+               if (iwag->inobt_walk_fn) {
+                       error = iwag->inobt_walk_fn(mp, tp, agno, irec,
+                                       iwag->data);
+                       if (error)
+                               return error;
+               }
+
+               if (!iwag->iwalk_fn)
+                       continue;
+
+               for (j = 0; j < XFS_INODES_PER_CHUNK; j++) {
+                       if (xfs_pwork_want_abort(&iwag->pwork))
+                               return 0;
+
+                       /* Skip if this inode is free */
+                       if (XFS_INOBT_MASK(j) & irec->ir_free)
+                               continue;
+
+                       /* Otherwise call our function. */
+                       ino = XFS_AGINO_TO_INO(mp, agno, irec->ir_startino + j);
+                       error = iwag->iwalk_fn(mp, tp, ino, iwag->data);
+                       if (error)
+                               return error;
+               }
+       }
+
+       return 0;
+}
+
+/* Delete cursor and let go of AGI. */
+static inline void
+xfs_iwalk_del_inobt(
+       struct xfs_trans        *tp,
+       struct xfs_btree_cur    **curpp,
+       struct xfs_buf          **agi_bpp,
+       int                     error)
+{
+       if (*curpp) {
+               xfs_btree_del_cursor(*curpp, error);
+               *curpp = NULL;
+       }
+       if (*agi_bpp) {
+               xfs_trans_brelse(tp, *agi_bpp);
+               *agi_bpp = NULL;
+       }
+}
+
+/*
+ * Set ourselves up for walking inobt records starting from a given point in
+ * the filesystem.
+ *
+ * If caller passed in a nonzero start inode number, load the record from the
+ * inobt and make the record look like all the inodes before agino are free so
+ * that we skip them, and then move the cursor to the next inobt record.  This
+ * is how we support starting an iwalk in the middle of an inode chunk.
+ *
+ * If the caller passed in a start number of zero, move the cursor to the first
+ * inobt record.
+ *
+ * The caller is responsible for cleaning up the cursor and buffer pointer
+ * regardless of the error status.
+ */
+STATIC int
+xfs_iwalk_ag_start(
+       struct xfs_iwalk_ag     *iwag,
+       xfs_agnumber_t          agno,
+       xfs_agino_t             agino,
+       struct xfs_btree_cur    **curpp,
+       struct xfs_buf          **agi_bpp,
+       int                     *has_more)
+{
+       struct xfs_mount        *mp = iwag->mp;
+       struct xfs_trans        *tp = iwag->tp;
+       struct xfs_inobt_rec_incore *irec;
+       int                     error;
+
+       /* Set up a fresh cursor and empty the inobt cache. */
+       iwag->nr_recs = 0;
+       error = xfs_inobt_cur(mp, tp, agno, XFS_BTNUM_INO, curpp, agi_bpp);
+       if (error)
+               return error;
+
+       /* Starting at the beginning of the AG?  That's easy! */
+       if (agino == 0)
+               return xfs_inobt_lookup(*curpp, 0, XFS_LOOKUP_GE, has_more);
+
+       /*
+        * Otherwise, we have to grab the inobt record where we left off, stuff
+        * the record into our cache, and then see if there are more records.
+        * We require a lookup cache of at least two elements so that the
+        * caller doesn't have to deal with tearing down the cursor to walk the
+        * records.
+        */
+       error = xfs_inobt_lookup(*curpp, agino, XFS_LOOKUP_LE, has_more);
+       if (error)
+               return error;
+
+       /*
+        * If the LE lookup at @agino yields no records, jump ahead to the
+        * inobt cursor increment to see if there are more records to process.
+        */
+       if (!*has_more)
+               goto out_advance;
+
+       /* Get the record, should always work */
+       irec = &iwag->recs[iwag->nr_recs];
+       error = xfs_inobt_get_rec(*curpp, irec, has_more);
+       if (error)
+               return error;
+       XFS_WANT_CORRUPTED_RETURN(mp, *has_more == 1);
+
+       /*
+        * If the LE lookup yielded an inobt record before the cursor position,
+        * skip it and see if there's another one after it.
+        */
+       if (irec->ir_startino + XFS_INODES_PER_CHUNK <= agino)
+               goto out_advance;
+
+       /*
+        * If agino fell in the middle of the inode record, make it look like
+        * the inodes up to agino are free so that we don't return them again.
+        */
+       if (iwag->trim_start)
+               xfs_iwalk_adjust_start(agino, irec);
+
+       /*
+        * The prefetch calculation is supposed to give us a large enough inobt
+        * record cache that grab_ichunk can stage a partial first record and
+        * the loop body can cache a record without having to check for cache
+        * space until after it reads an inobt record.
+        */
+       iwag->nr_recs++;
+       ASSERT(iwag->nr_recs < iwag->sz_recs);
+
+out_advance:
+       return xfs_btree_increment(*curpp, 0, has_more);
+}
+
+/*
+ * The inobt record cache is full, so preserve the inobt cursor state and
+ * run callbacks on the cached inobt records.  When we're done, restore the
+ * cursor state to wherever the cursor would have been had the cache not been
+ * full (and therefore we could've just incremented the cursor) if *@has_more
+ * is true.  On exit, *@has_more will indicate whether or not the caller should
+ * try for more inode records.
+ */
+STATIC int
+xfs_iwalk_run_callbacks(
+       struct xfs_iwalk_ag             *iwag,
+       xfs_agnumber_t                  agno,
+       struct xfs_btree_cur            **curpp,
+       struct xfs_buf                  **agi_bpp,
+       int                             *has_more)
+{
+       struct xfs_mount                *mp = iwag->mp;
+       struct xfs_trans                *tp = iwag->tp;
+       struct xfs_inobt_rec_incore     *irec;
+       xfs_agino_t                     restart;
+       int                             error;
+
+       ASSERT(iwag->nr_recs > 0);
+
+       /* Delete cursor but remember the last record we cached... */
+       xfs_iwalk_del_inobt(tp, curpp, agi_bpp, 0);
+       irec = &iwag->recs[iwag->nr_recs - 1];
+       restart = irec->ir_startino + XFS_INODES_PER_CHUNK - 1;
+
+       error = xfs_iwalk_ag_recs(iwag);
+       if (error)
+               return error;
+
+       /* ...empty the cache... */
+       iwag->nr_recs = 0;
+
+       if (!has_more)
+               return 0;
+
+       /* ...and recreate the cursor just past where we left off. */
+       error = xfs_inobt_cur(mp, tp, agno, XFS_BTNUM_INO, curpp, agi_bpp);
+       if (error)
+               return error;
+
+       return xfs_inobt_lookup(*curpp, restart, XFS_LOOKUP_GE, has_more);
+}
+
+/* Walk all inodes in a single AG, from @iwag->startino to the end of the AG. */
+STATIC int
+xfs_iwalk_ag(
+       struct xfs_iwalk_ag             *iwag)
+{
+       struct xfs_mount                *mp = iwag->mp;
+       struct xfs_trans                *tp = iwag->tp;
+       struct xfs_buf                  *agi_bp = NULL;
+       struct xfs_btree_cur            *cur = NULL;
+       xfs_agnumber_t                  agno;
+       xfs_agino_t                     agino;
+       int                             has_more;
+       int                             error = 0;
+
+       /* Set up our cursor at the right place in the inode btree. */
+       agno = XFS_INO_TO_AGNO(mp, iwag->startino);
+       agino = XFS_INO_TO_AGINO(mp, iwag->startino);
+       error = xfs_iwalk_ag_start(iwag, agno, agino, &cur, &agi_bp, &has_more);
+
+       while (!error && has_more) {
+               struct xfs_inobt_rec_incore     *irec;
+
+               cond_resched();
+               if (xfs_pwork_want_abort(&iwag->pwork))
+                       goto out;
+
+               /* Fetch the inobt record. */
+               irec = &iwag->recs[iwag->nr_recs];
+               error = xfs_inobt_get_rec(cur, irec, &has_more);
+               if (error || !has_more)
+                       break;
+
+               /* No allocated inodes in this chunk; skip it. */
+               if (iwag->skip_empty && irec->ir_freecount == irec->ir_count) {
+                       error = xfs_btree_increment(cur, 0, &has_more);
+                       if (error)
+                               break;
+                       continue;
+               }
+
+               /*
+                * Start readahead for this inode chunk in anticipation of
+                * walking the inodes.
+                */
+               if (iwag->iwalk_fn)
+                       xfs_iwalk_ichunk_ra(mp, agno, irec);
+
+               /*
+                * If there's space in the buffer for more records, increment
+                * the btree cursor and grab more.
+                */
+               if (++iwag->nr_recs < iwag->sz_recs) {
+                       error = xfs_btree_increment(cur, 0, &has_more);
+                       if (error || !has_more)
+                               break;
+                       continue;
+               }
+
+               /*
+                * Otherwise, we need to save cursor state and run the callback
+                * function on the cached records.  The run_callbacks function
+                * is supposed to return a cursor pointing to the record where
+                * we would be if we had been able to increment like above.
+                */
+               ASSERT(has_more);
+               error = xfs_iwalk_run_callbacks(iwag, agno, &cur, &agi_bp,
+                               &has_more);
+       }
+
+       if (iwag->nr_recs == 0 || error)
+               goto out;
+
+       /* Walk the unprocessed records in the cache. */
+       error = xfs_iwalk_run_callbacks(iwag, agno, &cur, &agi_bp, &has_more);
+
+out:
+       xfs_iwalk_del_inobt(tp, &cur, &agi_bp, error);
+       return error;
+}
+
+/*
+ * We experimentally determined that the reduction in ioctl call overhead
+ * diminishes when userspace asks for more than 2048 inodes, so we'll cap
+ * prefetch at this point.
+ */
+#define IWALK_MAX_INODE_PREFETCH       (2048U)
+
+/*
+ * Given the number of inodes to prefetch, set the number of inobt records that
+ * we cache in memory, which controls the number of inodes we try to read
+ * ahead.  Set the maximum if @inodes == 0.
+ */
+static inline unsigned int
+xfs_iwalk_prefetch(
+       unsigned int            inodes)
+{
+       unsigned int            inobt_records;
+
+       /*
+        * If the caller didn't tell us the number of inodes they wanted,
+        * assume the maximum prefetch possible for best performance.
+        * Otherwise, cap prefetch at that maximum so that we don't start an
+        * absurd amount of prefetch.
+        */
+       if (inodes == 0)
+               inodes = IWALK_MAX_INODE_PREFETCH;
+       inodes = min(inodes, IWALK_MAX_INODE_PREFETCH);
+
+       /* Round the inode count up to a full chunk. */
+       inodes = round_up(inodes, XFS_INODES_PER_CHUNK);
+
+       /*
+        * In order to convert the number of inodes to prefetch into an
+        * estimate of the number of inobt records to cache, we require a
+        * conversion factor that reflects our expectations of the average
+        * loading factor of an inode chunk.  Based on data gathered, most
+        * (but not all) filesystems manage to keep the inode chunks totally
+        * full, so we'll underestimate slightly so that our readahead will
+        * still deliver the performance we want on aging filesystems:
+        *
+        * inobt = inodes / (INODES_PER_CHUNK * (4 / 5));
+        *
+        * The funny math is to avoid integer division.
+        */
+       inobt_records = (inodes * 5) / (4 * XFS_INODES_PER_CHUNK);
+
+       /*
+        * Allocate enough space to prefetch at least two inobt records so that
+        * we can cache both the record where the iwalk started and the next
+        * record.  This simplifies the AG inode walk loop setup code.
+        */
+       return max(inobt_records, 2U);
+}
+
+/*
+ * Walk all inodes in the filesystem starting from @startino.  The @iwalk_fn
+ * will be called for each allocated inode, being passed the inode's number and
+ * @data.  @max_prefetch controls how many inobt records' worth of inodes we
+ * try to readahead.
+ */
+int
+xfs_iwalk(
+       struct xfs_mount        *mp,
+       struct xfs_trans        *tp,
+       xfs_ino_t               startino,
+       unsigned int            flags,
+       xfs_iwalk_fn            iwalk_fn,
+       unsigned int            inode_records,
+       void                    *data)
+{
+       struct xfs_iwalk_ag     iwag = {
+               .mp             = mp,
+               .tp             = tp,
+               .iwalk_fn       = iwalk_fn,
+               .data           = data,
+               .startino       = startino,
+               .sz_recs        = xfs_iwalk_prefetch(inode_records),
+               .trim_start     = 1,
+               .skip_empty     = 1,
+               .pwork          = XFS_PWORK_SINGLE_THREADED,
+       };
+       xfs_agnumber_t          agno = XFS_INO_TO_AGNO(mp, startino);
+       int                     error;
+
+       ASSERT(agno < mp->m_sb.sb_agcount);
+       ASSERT(!(flags & ~XFS_IWALK_FLAGS_ALL));
+
+       error = xfs_iwalk_alloc(&iwag);
+       if (error)
+               return error;
+
+       for (; agno < mp->m_sb.sb_agcount; agno++) {
+               error = xfs_iwalk_ag(&iwag);
+               if (error)
+                       break;
+               iwag.startino = XFS_AGINO_TO_INO(mp, agno + 1, 0);
+               if (flags & XFS_INOBT_WALK_SAME_AG)
+                       break;
+       }
+
+       xfs_iwalk_free(&iwag);
+       return error;
+}
+
+/* Run per-thread iwalk work. */
+static int
+xfs_iwalk_ag_work(
+       struct xfs_mount        *mp,
+       struct xfs_pwork        *pwork)
+{
+       struct xfs_iwalk_ag     *iwag;
+       int                     error = 0;
+
+       iwag = container_of(pwork, struct xfs_iwalk_ag, pwork);
+       if (xfs_pwork_want_abort(pwork))
+               goto out;
+
+       error = xfs_iwalk_alloc(iwag);
+       if (error)
+               goto out;
+
+       error = xfs_iwalk_ag(iwag);
+       xfs_iwalk_free(iwag);
+out:
+       kmem_free(iwag);
+       return error;
+}
+
+/*
+ * Walk all the inodes in the filesystem using multiple threads to process each
+ * AG.
+ */
+int
+xfs_iwalk_threaded(
+       struct xfs_mount        *mp,
+       xfs_ino_t               startino,
+       unsigned int            flags,
+       xfs_iwalk_fn            iwalk_fn,
+       unsigned int            inode_records,
+       bool                    polled,
+       void                    *data)
+{
+       struct xfs_pwork_ctl    pctl;
+       xfs_agnumber_t          agno = XFS_INO_TO_AGNO(mp, startino);
+       unsigned int            nr_threads;
+       int                     error;
+
+       ASSERT(agno < mp->m_sb.sb_agcount);
+       ASSERT(!(flags & ~XFS_IWALK_FLAGS_ALL));
+
+       nr_threads = xfs_pwork_guess_datadev_parallelism(mp);
+       error = xfs_pwork_init(mp, &pctl, xfs_iwalk_ag_work, "xfs_iwalk",
+                       nr_threads);
+       if (error)
+               return error;
+
+       for (; agno < mp->m_sb.sb_agcount; agno++) {
+               struct xfs_iwalk_ag     *iwag;
+
+               if (xfs_pwork_ctl_want_abort(&pctl))
+                       break;
+
+               iwag = kmem_zalloc(sizeof(struct xfs_iwalk_ag), KM_SLEEP);
+               iwag->mp = mp;
+               iwag->iwalk_fn = iwalk_fn;
+               iwag->data = data;
+               iwag->startino = startino;
+               iwag->sz_recs = xfs_iwalk_prefetch(inode_records);
+               xfs_pwork_queue(&pctl, &iwag->pwork);
+               startino = XFS_AGINO_TO_INO(mp, agno + 1, 0);
+               if (flags & XFS_INOBT_WALK_SAME_AG)
+                       break;
+       }
+
+       if (polled)
+               xfs_pwork_poll(&pctl);
+       return xfs_pwork_destroy(&pctl);
+}
+
+/*
+ * Allow callers to cache up to a page's worth of inobt records.  This reflects
+ * the existing inumbers prefetching behavior.  Since the inobt walk does not
+ * itself do anything with the inobt records, we can set a fairly high limit
+ * here.
+ */
+#define MAX_INOBT_WALK_PREFETCH        \
+       (PAGE_SIZE / sizeof(struct xfs_inobt_rec_incore))
+
+/*
+ * Given the number of records that the user wanted, set the number of inobt
+ * records that we buffer in memory.  Set the maximum if @inobt_records == 0.
+ */
+static inline unsigned int
+xfs_inobt_walk_prefetch(
+       unsigned int            inobt_records)
+{
+       /*
+        * If the caller didn't tell us the number of inobt records they
+        * wanted, assume the maximum prefetch possible for best performance.
+        */
+       if (inobt_records == 0)
+               inobt_records = MAX_INOBT_WALK_PREFETCH;
+
+       /*
+        * Allocate enough space to prefetch at least two inobt records so that
+        * we can cache both the record where the iwalk started and the next
+        * record.  This simplifies the AG inode walk loop setup code.
+        */
+       inobt_records = max(inobt_records, 2U);
+
+       /*
+        * Cap prefetch at that maximum so that we don't use an absurd amount
+        * of memory.
+        */
+       return min_t(unsigned int, inobt_records, MAX_INOBT_WALK_PREFETCH);
+}
+
+/*
+ * Walk all inode btree records in the filesystem starting from @startino.  The
+ * @inobt_walk_fn will be called for each btree record, being passed the incore
+ * record and @data.  @max_prefetch controls how many inobt records we try to
+ * cache ahead of time.
+ */
+int
+xfs_inobt_walk(
+       struct xfs_mount        *mp,
+       struct xfs_trans        *tp,
+       xfs_ino_t               startino,
+       unsigned int            flags,
+       xfs_inobt_walk_fn       inobt_walk_fn,
+       unsigned int            inobt_records,
+       void                    *data)
+{
+       struct xfs_iwalk_ag     iwag = {
+               .mp             = mp,
+               .tp             = tp,
+               .inobt_walk_fn  = inobt_walk_fn,
+               .data           = data,
+               .startino       = startino,
+               .sz_recs        = xfs_inobt_walk_prefetch(inobt_records),
+               .pwork          = XFS_PWORK_SINGLE_THREADED,
+       };
+       xfs_agnumber_t          agno = XFS_INO_TO_AGNO(mp, startino);
+       int                     error;
+
+       ASSERT(agno < mp->m_sb.sb_agcount);
+       ASSERT(!(flags & ~XFS_INOBT_WALK_FLAGS_ALL));
+
+       error = xfs_iwalk_alloc(&iwag);
+       if (error)
+               return error;
+
+       for (; agno < mp->m_sb.sb_agcount; agno++) {
+               error = xfs_iwalk_ag(&iwag);
+               if (error)
+                       break;
+               iwag.startino = XFS_AGINO_TO_INO(mp, agno + 1, 0);
+               if (flags & XFS_INOBT_WALK_SAME_AG)
+                       break;
+       }
+
+       xfs_iwalk_free(&iwag);
+       return error;
+}
diff --git a/fs/xfs/xfs_iwalk.h b/fs/xfs/xfs_iwalk.h

new file mode 100644 (file)

index 0000000..6c960e1
--- /dev/null
+++ b/fs/xfs/xfs_iwalk.h
@@ -0,0 +1,46 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (C) 2019 Oracle.  All Rights Reserved.
+ * Author: Darrick J. Wong <darrick.wong@oracle.com>
+ */
+#ifndef __XFS_IWALK_H__
+#define __XFS_IWALK_H__
+
+/* Walk all inodes in the filesystem starting from @startino. */
+typedef int (*xfs_iwalk_fn)(struct xfs_mount *mp, struct xfs_trans *tp,
+                           xfs_ino_t ino, void *data);
+/* Return values for xfs_iwalk_fn. */
+#define XFS_IWALK_CONTINUE     (XFS_ITER_CONTINUE)
+#define XFS_IWALK_ABORT                (XFS_ITER_ABORT)
+
+int xfs_iwalk(struct xfs_mount *mp, struct xfs_trans *tp, xfs_ino_t startino,
+               unsigned int flags, xfs_iwalk_fn iwalk_fn,
+               unsigned int inode_records, void *data);
+int xfs_iwalk_threaded(struct xfs_mount *mp, xfs_ino_t startino,
+               unsigned int flags, xfs_iwalk_fn iwalk_fn,
+               unsigned int inode_records, bool poll, void *data);
+
+/* Only iterate inodes within the same AG as @startino. */
+#define XFS_IWALK_SAME_AG      (0x1)
+
+#define XFS_IWALK_FLAGS_ALL    (XFS_IWALK_SAME_AG)
+
+/* Walk all inode btree records in the filesystem starting from @startino. */
+typedef int (*xfs_inobt_walk_fn)(struct xfs_mount *mp, struct xfs_trans *tp,
+                                xfs_agnumber_t agno,
+                                const struct xfs_inobt_rec_incore *irec,
+                                void *data);
+/* Return value (for xfs_inobt_walk_fn) that aborts the walk immediately. */
+#define XFS_INOBT_WALK_ABORT   (XFS_IWALK_ABORT)
+
+int xfs_inobt_walk(struct xfs_mount *mp, struct xfs_trans *tp,
+               xfs_ino_t startino, unsigned int flags,
+               xfs_inobt_walk_fn inobt_walk_fn, unsigned int inobt_records,
+               void *data);
+
+/* Only iterate inobt records within the same AG as @startino. */
+#define XFS_INOBT_WALK_SAME_AG (XFS_IWALK_SAME_AG)
+
+#define XFS_INOBT_WALK_FLAGS_ALL (XFS_INOBT_WALK_SAME_AG)
+
+#endif /* __XFS_IWALK_H__ */
diff --git a/fs/xfs/xfs_linux.h b/fs/xfs/xfs_linux.h

index edbd5a210df22144ab810a67d88dac5f479b39f7..ca15105681cacb7c3677b2397baf729ca846d47c 100644 (file)
--- a/fs/xfs/xfs_linux.h
+++ b/fs/xfs/xfs_linux.h
@@ -110,8 +110,6 @@ typedef __u32                       xfs_nlink_t;
  #define current_restore_flags_nested(sp, f)    \
                 (current->flags = ((current->flags & ~(f)) | (*(sp) & (f))))
  
-#define spinlock_destroy(lock)
-
  #define NBBY           8               /* number of bits per byte */
  
  /*
@@ -221,6 +219,9 @@ static inline uint64_t howmany_64(uint64_t x, uint32_t y)
         return x;
  }
  
+int xfs_rw_bdev(struct block_device *bdev, sector_t sector, unsigned int count,
+               char *data, unsigned int op);
+
  #define ASSERT_ALWAYS(expr)    \
         (likely(expr) ? (void)0 : assfail(#expr, __FILE__, __LINE__))
  
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c

index 2466b0f5b6c43f175c27e4952e1aed7a0698c607..00e9f5c388d366031fd8c5b713655a718c0a9287 100644 (file)
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -16,11 +16,7 @@
  #include "xfs_trans_priv.h"
  #include "xfs_log.h"
  #include "xfs_log_priv.h"
-#include "xfs_log_recover.h"
-#include "xfs_inode.h"
  #include "xfs_trace.h"
-#include "xfs_fsops.h"
-#include "xfs_cksum.h"
  #include "xfs_sysfs.h"
  #include "xfs_sb.h"
  #include "xfs_health.h"
@@ -45,21 +41,14 @@ STATIC int
  xlog_space_left(
         struct xlog             *log,
         atomic64_t              *head);
-STATIC int
-xlog_sync(
-       struct xlog             *log,
-       struct xlog_in_core     *iclog);
  STATIC void
  xlog_dealloc_log(
         struct xlog             *log);
  
  /* local state machine functions */
-STATIC void xlog_state_done_syncing(xlog_in_core_t *iclog, int);
-STATIC void
-xlog_state_do_callback(
-       struct xlog             *log,
-       int                     aborted,
-       struct xlog_in_core     *iclog);
+STATIC void xlog_state_done_syncing(
+       struct xlog_in_core     *iclog,
+       bool                    aborted);
  STATIC int
  xlog_state_get_iclog_space(
         struct xlog             *log,
@@ -107,8 +96,7 @@ STATIC void
  xlog_verify_iclog(
         struct xlog             *log,
         struct xlog_in_core     *iclog,
-       int                     count,
-       bool                    syncing);
+       int                     count);
  STATIC void
  xlog_verify_tail_lsn(
         struct xlog             *log,
@@ -117,7 +105,7 @@ xlog_verify_tail_lsn(
  #else
  #define xlog_verify_dest_ptr(a,b)
  #define xlog_verify_grant_tail(a)
-#define xlog_verify_iclog(a,b,c,d)
+#define xlog_verify_iclog(a,b,c)
  #define xlog_verify_tail_lsn(a,b,c)
  #endif
  
@@ -541,32 +529,6 @@ xfs_log_done(
         return lsn;
  }
  
-/*
- * Attaches a new iclog I/O completion callback routine during
- * transaction commit.  If the log is in error state, a non-zero
- * return code is handed back and the caller is responsible for
- * executing the callback at an appropriate time.
- */
-int
-xfs_log_notify(
-       struct xlog_in_core     *iclog,
-       xfs_log_callback_t      *cb)
-{
-       int     abortflg;
-
-       spin_lock(&iclog->ic_callback_lock);
-       abortflg = (iclog->ic_state & XLOG_STATE_IOERROR);
-       if (!abortflg) {
-               ASSERT_ALWAYS((iclog->ic_state == XLOG_STATE_ACTIVE) ||
-                             (iclog->ic_state == XLOG_STATE_WANT_SYNC));
-               cb->cb_next = NULL;
-               *(iclog->ic_callback_tail) = cb;
-               iclog->ic_callback_tail = &(cb->cb_next);
-       }
-       spin_unlock(&iclog->ic_callback_lock);
-       return abortflg;
-}
-
  int
  xfs_log_release_iclog(
         struct xfs_mount        *mp,
@@ -807,16 +769,12 @@ xfs_log_mount_finish(
   * The mount has failed. Cancel the recovery if it hasn't completed and destroy
   * the log.
   */
-int
+void
  xfs_log_mount_cancel(
         struct xfs_mount        *mp)
  {
-       int                     error;
-
-       error = xlog_recover_cancel(mp->m_log);
+       xlog_recover_cancel(mp->m_log);
         xfs_log_unmount(mp);
-
-       return error;
  }
  
  /*
@@ -932,7 +890,7 @@ xfs_log_unmount_write(xfs_mount_t *mp)
          * Or, if we are doing a forced umount (typically because of IO errors).
          */
         if (mp->m_flags & XFS_MOUNT_NORECOVERY ||
-           xfs_readonly_buftarg(log->l_mp->m_logdev_targp)) {
+           xfs_readonly_buftarg(log->l_targ)) {
                 ASSERT(mp->m_flags & XFS_MOUNT_RDONLY);
                 return 0;
         }
@@ -1244,53 +1202,49 @@ xlog_space_left(
  }
  
  
-/*
- * Log function which is called when an io completes.
- *
- * The log manager needs its own routine, in order to control what
- * happens with the buffer after the write completes.
- */
  static void
-xlog_iodone(xfs_buf_t *bp)
+xlog_ioend_work(
+       struct work_struct      *work)
  {
-       struct xlog_in_core     *iclog = bp->b_log_item;
-       struct xlog             *l = iclog->ic_log;
-       int                     aborted = 0;
+       struct xlog_in_core     *iclog =
+               container_of(work, struct xlog_in_core, ic_end_io_work);
+       struct xlog             *log = iclog->ic_log;
+       bool                    aborted = false;
+       int                     error;
+
+       error = blk_status_to_errno(iclog->ic_bio.bi_status);
+#ifdef DEBUG
+       /* treat writes with injected CRC errors as failed */
+       if (iclog->ic_fail_crc)
+               error = -EIO;
+#endif
  
         /*
-        * Race to shutdown the filesystem if we see an error or the iclog is in
-        * IOABORT state. The IOABORT state is only set in DEBUG mode to inject
-        * CRC errors into log recovery.
+        * Race to shutdown the filesystem if we see an error.
          */
-       if (XFS_TEST_ERROR(bp->b_error, l->l_mp, XFS_ERRTAG_IODONE_IOERR) ||
-           iclog->ic_state & XLOG_STATE_IOABORT) {
-               if (iclog->ic_state & XLOG_STATE_IOABORT)
-                       iclog->ic_state &= ~XLOG_STATE_IOABORT;
-
-               xfs_buf_ioerror_alert(bp, __func__);
-               xfs_buf_stale(bp);
-               xfs_force_shutdown(l->l_mp, SHUTDOWN_LOG_IO_ERROR);
+       if (XFS_TEST_ERROR(error, log->l_mp, XFS_ERRTAG_IODONE_IOERR)) {
+               xfs_alert(log->l_mp, "log I/O error %d", error);
+               xfs_force_shutdown(log->l_mp, SHUTDOWN_LOG_IO_ERROR);
                 /*
                  * This flag will be propagated to the trans-committed
                  * callback routines to let them know that the log-commit
                  * didn't succeed.
                  */
-               aborted = XFS_LI_ABORTED;
+               aborted = true;
         } else if (iclog->ic_state & XLOG_STATE_IOERROR) {
-               aborted = XFS_LI_ABORTED;
+               aborted = true;
         }
  
-       /* log I/O is always issued ASYNC */
-       ASSERT(bp->b_flags & XBF_ASYNC);
         xlog_state_done_syncing(iclog, aborted);
+       bio_uninit(&iclog->ic_bio);
  
         /*
-        * drop the buffer lock now that we are done. Nothing references
-        * the buffer after this, so an unmount waiting on this lock can now
-        * tear it down safely. As such, it is unsafe to reference the buffer
-        * (bp) after the unlock as we could race with it being freed.
+        * Drop the lock to signal that we are done. Nothing references the
+        * iclog after this, so an unmount waiting on this lock can now tear it
+        * down safely. As such, it is unsafe to reference the iclog after the
+        * unlock as we could race with it being freed.
          */
-       xfs_buf_unlock(bp);
+       up(&iclog->ic_sema);
  }
  
  /*
@@ -1301,65 +1255,26 @@ xlog_iodone(xfs_buf_t *bp)
   * If the filesystem blocksize is too large, we may need to choose a
   * larger size since the directory code currently logs entire blocks.
   */
-
  STATIC void
  xlog_get_iclog_buffer_size(
         struct xfs_mount        *mp,
         struct xlog             *log)
  {
-       int size;
-       int xhdrs;
-
         if (mp->m_logbufs <= 0)
-               log->l_iclog_bufs = XLOG_MAX_ICLOGS;
-       else
-               log->l_iclog_bufs = mp->m_logbufs;
+               mp->m_logbufs = XLOG_MAX_ICLOGS;
+       if (mp->m_logbsize <= 0)
+               mp->m_logbsize = XLOG_BIG_RECORD_BSIZE;
+
+       log->l_iclog_bufs = mp->m_logbufs;
+       log->l_iclog_size = mp->m_logbsize;
  
         /*
-        * Buffer size passed in from mount system call.
+        * # headers = size / 32k - one header holds cycles from 32k of data.
          */
-       if (mp->m_logbsize > 0) {
-               size = log->l_iclog_size = mp->m_logbsize;
-               log->l_iclog_size_log = 0;
-               while (size != 1) {
-                       log->l_iclog_size_log++;
-                       size >>= 1;
-               }
-
-               if (xfs_sb_version_haslogv2(&mp->m_sb)) {
-                       /* # headers = size / 32k
-                        * one header holds cycles from 32k of data
-                        */
-
-                       xhdrs = mp->m_logbsize / XLOG_HEADER_CYCLE_SIZE;
-                       if (mp->m_logbsize % XLOG_HEADER_CYCLE_SIZE)
-                               xhdrs++;
-                       log->l_iclog_hsize = xhdrs << BBSHIFT;
-                       log->l_iclog_heads = xhdrs;
-               } else {
-                       ASSERT(mp->m_logbsize <= XLOG_BIG_RECORD_BSIZE);
-                       log->l_iclog_hsize = BBSIZE;
-                       log->l_iclog_heads = 1;
-               }
-               goto done;
-       }
-
-       /* All machines use 32kB buffers by default. */
-       log->l_iclog_size = XLOG_BIG_RECORD_BSIZE;
-       log->l_iclog_size_log = XLOG_BIG_RECORD_BSHIFT;
-
-       /* the default log size is 16k or 32k which is one header sector */
-       log->l_iclog_hsize = BBSIZE;
-       log->l_iclog_heads = 1;
-
-done:
-       /* are we being asked to make the sizes selected above visible? */
-       if (mp->m_logbufs == 0)
-               mp->m_logbufs = log->l_iclog_bufs;
-       if (mp->m_logbsize == 0)
-               mp->m_logbsize = log->l_iclog_size;
-}      /* xlog_get_iclog_buffer_size */
-
+       log->l_iclog_heads =
+               DIV_ROUND_UP(mp->m_logbsize, XLOG_HEADER_CYCLE_SIZE);
+       log->l_iclog_hsize = log->l_iclog_heads << BBSHIFT;
+}
  
  void
  xfs_log_work_queue(
@@ -1422,7 +1337,6 @@ xlog_alloc_log(
         xlog_rec_header_t       *head;
         xlog_in_core_t          **iclogp;
         xlog_in_core_t          *iclog, *prev_iclog=NULL;
-       xfs_buf_t               *bp;
         int                     i;
         int                     error = -ENOMEM;
         uint                    log2_size = 0;
@@ -1480,30 +1394,6 @@ xlog_alloc_log(
  
         xlog_get_iclog_buffer_size(mp, log);
  
-       /*
-        * Use a NULL block for the extra log buffer used during splits so that
-        * it will trigger errors if we ever try to do IO on it without first
-        * having set it up properly.
-        */
-       error = -ENOMEM;
-       bp = xfs_buf_alloc(mp->m_logdev_targp, XFS_BUF_DADDR_NULL,
-                          BTOBB(log->l_iclog_size), XBF_NO_IOACCT);
-       if (!bp)
-               goto out_free_log;
-
-       /*
-        * The iclogbuf buffer locks are held over IO but we are not going to do
-        * IO yet.  Hence unlock the buffer so that the log IO path can grab it
-        * when appropriately.
-        */
-       ASSERT(xfs_buf_islocked(bp));
-       xfs_buf_unlock(bp);
-
-       /* use high priority wq for log I/O completion */
-       bp->b_ioend_wq = mp->m_log_workqueue;
-       bp->b_iodone = xlog_iodone;
-       log->l_xbuf = bp;
-
         spin_lock_init(&log->l_icloglock);
         init_waitqueue_head(&log->l_flush_wait);
  
@@ -1516,29 +1406,22 @@ xlog_alloc_log(
          * xlog_in_core_t in xfs_log_priv.h for details.
          */
         ASSERT(log->l_iclog_size >= 4096);
-       for (i=0; i < log->l_iclog_bufs; i++) {
-               *iclogp = kmem_zalloc(sizeof(xlog_in_core_t), KM_MAYFAIL);
-               if (!*iclogp)
+       for (i = 0; i < log->l_iclog_bufs; i++) {
+               size_t bvec_size = howmany(log->l_iclog_size, PAGE_SIZE) *
+                               sizeof(struct bio_vec);
+
+               iclog = kmem_zalloc(sizeof(*iclog) + bvec_size, KM_MAYFAIL);
+               if (!iclog)
                         goto out_free_iclog;
  
-               iclog = *iclogp;
+               *iclogp = iclog;
                 iclog->ic_prev = prev_iclog;
                 prev_iclog = iclog;
  
-               bp = xfs_buf_get_uncached(mp->m_logdev_targp,
-                                         BTOBB(log->l_iclog_size),
-                                         XBF_NO_IOACCT);
-               if (!bp)
+               iclog->ic_data = kmem_alloc_large(log->l_iclog_size,
+                               KM_MAYFAIL);
+               if (!iclog->ic_data)
                         goto out_free_iclog;
-
-               ASSERT(xfs_buf_islocked(bp));
-               xfs_buf_unlock(bp);
-
-               /* use high priority wq for log I/O completion */
-               bp->b_ioend_wq = mp->m_log_workqueue;
-               bp->b_iodone = xlog_iodone;
-               iclog->ic_bp = bp;
-               iclog->ic_data = bp->b_addr;
  #ifdef DEBUG
                 log->l_iclog_bak[i] = &iclog->ic_header;
  #endif
@@ -1552,36 +1435,43 @@ xlog_alloc_log(
                 head->h_fmt = cpu_to_be32(XLOG_FMT);
                 memcpy(&head->h_fs_uuid, &mp->m_sb.sb_uuid, sizeof(uuid_t));
  
-               iclog->ic_size = BBTOB(bp->b_length) - log->l_iclog_hsize;
+               iclog->ic_size = log->l_iclog_size - log->l_iclog_hsize;
                 iclog->ic_state = XLOG_STATE_ACTIVE;
                 iclog->ic_log = log;
                 atomic_set(&iclog->ic_refcnt, 0);
                 spin_lock_init(&iclog->ic_callback_lock);
-               iclog->ic_callback_tail = &(iclog->ic_callback);
+               INIT_LIST_HEAD(&iclog->ic_callbacks);
                 iclog->ic_datap = (char *)iclog->ic_data + log->l_iclog_hsize;
  
                 init_waitqueue_head(&iclog->ic_force_wait);
                 init_waitqueue_head(&iclog->ic_write_wait);
+               INIT_WORK(&iclog->ic_end_io_work, xlog_ioend_work);
+               sema_init(&iclog->ic_sema, 1);
  
                 iclogp = &iclog->ic_next;
         }
         *iclogp = log->l_iclog;                 /* complete ring */
         log->l_iclog->ic_prev = prev_iclog;     /* re-write 1st prev ptr */
  
+       log->l_ioend_workqueue = alloc_workqueue("xfs-log/%s",
+                       WQ_MEM_RECLAIM | WQ_FREEZABLE | WQ_HIGHPRI, 0,
+                       mp->m_fsname);
+       if (!log->l_ioend_workqueue)
+               goto out_free_iclog;
+
         error = xlog_cil_init(log);
         if (error)
-               goto out_free_iclog;
+               goto out_destroy_workqueue;
         return log;
  
+out_destroy_workqueue:
+       destroy_workqueue(log->l_ioend_workqueue);
  out_free_iclog:
         for (iclog = log->l_iclog; iclog; iclog = prev_iclog) {
                 prev_iclog = iclog->ic_next;
-               if (iclog->ic_bp)
-                       xfs_buf_free(iclog->ic_bp);
+               kmem_free(iclog->ic_data);
                 kmem_free(iclog);
         }
-       spinlock_destroy(&log->l_icloglock);
-       xfs_buf_free(log->l_xbuf);
  out_free_log:
         kmem_free(log);
  out:
@@ -1766,42 +1656,155 @@ xlog_cksum(
         return xfs_end_cksum(crc);
  }
  
-/*
- * The bdstrat callback function for log bufs. This gives us a central
- * place to trap bufs in case we get hit by a log I/O error and need to
- * shutdown. Actually, in practice, even when we didn't get a log error,
- * we transition the iclogs to IOERROR state *after* flushing all existing
- * iclogs to disk. This is because we don't want anymore new transactions to be
- * started or completed afterwards.
- *
- * We lock the iclogbufs here so that we can serialise against IO completion
- * during unmount. We might be processing a shutdown triggered during unmount,
- * and that can occur asynchronously to the unmount thread, and hence we need to
- * ensure that completes before tearing down the iclogbufs. Hence we need to
- * hold the buffer lock across the log IO to acheive that.
- */
-STATIC int
-xlog_bdstrat(
-       struct xfs_buf          *bp)
+static void
+xlog_bio_end_io(
+       struct bio              *bio)
  {
-       struct xlog_in_core     *iclog = bp->b_log_item;
+       struct xlog_in_core     *iclog = bio->bi_private;
  
-       xfs_buf_lock(bp);
-       if (iclog->ic_state & XLOG_STATE_IOERROR) {
-               xfs_buf_ioerror(bp, -EIO);
-               xfs_buf_stale(bp);
-               xfs_buf_ioend(bp);
+       queue_work(iclog->ic_log->l_ioend_workqueue,
+                  &iclog->ic_end_io_work);
+}
+
+static void
+xlog_map_iclog_data(
+       struct bio              *bio,
+       void                    *data,
+       size_t                  count)
+{
+       do {
+               struct page     *page = kmem_to_page(data);
+               unsigned int    off = offset_in_page(data);
+               size_t          len = min_t(size_t, count, PAGE_SIZE - off);
+
+               WARN_ON_ONCE(bio_add_page(bio, page, len, off) != len);
+
+               data += len;
+               count -= len;
+       } while (count);
+}
+
+STATIC void
+xlog_write_iclog(
+       struct xlog             *log,
+       struct xlog_in_core     *iclog,
+       uint64_t                bno,
+       unsigned int            count,
+       bool                    need_flush)
+{
+       ASSERT(bno < log->l_logBBsize);
+
+       /*
+        * We lock the iclogbufs here so that we can serialise against I/O
+        * completion during unmount.  We might be processing a shutdown
+        * triggered during unmount, and that can occur asynchronously to the
+        * unmount thread, and hence we need to ensure that completes before
+        * tearing down the iclogbufs.  Hence we need to hold the buffer lock
+        * across the log IO to archieve that.
+        */
+       down(&iclog->ic_sema);
+       if (unlikely(iclog->ic_state & XLOG_STATE_IOERROR)) {
                 /*
                  * It would seem logical to return EIO here, but we rely on
                  * the log state machine to propagate I/O errors instead of
-                * doing it here. Similarly, IO completion will unlock the
-                * buffer, so we don't do it here.
+                * doing it here.  We kick of the state machine and unlock
+                * the buffer manually, the code needs to be kept in sync
+                * with the I/O completion path.
                  */
-               return 0;
+               xlog_state_done_syncing(iclog, XFS_LI_ABORTED);
+               up(&iclog->ic_sema);
+               return;
         }
  
-       xfs_buf_submit(bp);
-       return 0;
+       iclog->ic_io_size = count;
+
+       bio_init(&iclog->ic_bio, iclog->ic_bvec, howmany(count, PAGE_SIZE));
+       bio_set_dev(&iclog->ic_bio, log->l_targ->bt_bdev);
+       iclog->ic_bio.bi_iter.bi_sector = log->l_logBBstart + bno;
+       iclog->ic_bio.bi_end_io = xlog_bio_end_io;
+       iclog->ic_bio.bi_private = iclog;
+       iclog->ic_bio.bi_opf = REQ_OP_WRITE | REQ_META | REQ_SYNC | REQ_FUA;
+       if (need_flush)
+               iclog->ic_bio.bi_opf |= REQ_PREFLUSH;
+
+       xlog_map_iclog_data(&iclog->ic_bio, iclog->ic_data, iclog->ic_io_size);
+       if (is_vmalloc_addr(iclog->ic_data))
+               flush_kernel_vmap_range(iclog->ic_data, iclog->ic_io_size);
+
+       /*
+        * If this log buffer would straddle the end of the log we will have
+        * to split it up into two bios, so that we can continue at the start.
+        */
+       if (bno + BTOBB(count) > log->l_logBBsize) {
+               struct bio *split;
+
+               split = bio_split(&iclog->ic_bio, log->l_logBBsize - bno,
+                                 GFP_NOIO, &fs_bio_set);
+               bio_chain(split, &iclog->ic_bio);
+               submit_bio(split);
+
+               /* restart at logical offset zero for the remainder */
+               iclog->ic_bio.bi_iter.bi_sector = log->l_logBBstart;
+       }
+
+       submit_bio(&iclog->ic_bio);
+}
+
+/*
+ * We need to bump cycle number for the part of the iclog that is
+ * written to the start of the log. Watch out for the header magic
+ * number case, though.
+ */
+static void
+xlog_split_iclog(
+       struct xlog             *log,
+       void                    *data,
+       uint64_t                bno,
+       unsigned int            count)
+{
+       unsigned int            split_offset = BBTOB(log->l_logBBsize - bno);
+       unsigned int            i;
+
+       for (i = split_offset; i < count; i += BBSIZE) {
+               uint32_t cycle = get_unaligned_be32(data + i);
+
+               if (++cycle == XLOG_HEADER_MAGIC_NUM)
+                       cycle++;
+               put_unaligned_be32(cycle, data + i);
+       }
+}
+
+static int
+xlog_calc_iclog_size(
+       struct xlog             *log,
+       struct xlog_in_core     *iclog,
+       uint32_t                *roundoff)
+{
+       uint32_t                count_init, count;
+       bool                    use_lsunit;
+
+       use_lsunit = xfs_sb_version_haslogv2(&log->l_mp->m_sb) &&
+                       log->l_mp->m_sb.sb_logsunit > 1;
+
+       /* Add for LR header */
+       count_init = log->l_iclog_hsize + iclog->ic_offset;
+
+       /* Round out the log write size */
+       if (use_lsunit) {
+               /* we have a v2 stripe unit to use */
+               count = XLOG_LSUNITTOB(log, XLOG_BTOLSUNIT(log, count_init));
+       } else {
+               count = BBTOB(BTOBB(count_init));
+       }
+
+       ASSERT(count >= count_init);
+       *roundoff = count - count_init;
+
+       if (use_lsunit)
+               ASSERT(*roundoff < log->l_mp->m_sb.sb_logsunit);
+       else
+               ASSERT(*roundoff < BBTOB(1));
+       return count;
  }
  
  /*
@@ -1824,46 +1827,23 @@ xlog_bdstrat(
   * log will require grabbing the lock though.
   *
   * The entire log manager uses a logical block numbering scheme.  Only
- * log_sync (and then only bwrite()) know about the fact that the log may
- * not start with block zero on a given device.  The log block start offset
- * is added immediately before calling bwrite().
+ * xlog_write_iclog knows about the fact that the log may not start with
+ * block zero on a given device.
   */
-
-STATIC int
+STATIC void
  xlog_sync(
         struct xlog             *log,
         struct xlog_in_core     *iclog)
  {
-       xfs_buf_t       *bp;
-       int             i;
-       uint            count;          /* byte count of bwrite */
-       uint            count_init;     /* initial count before roundup */
-       int             roundoff;       /* roundoff to BB or stripe */
-       int             split = 0;      /* split write into two regions */
-       int             error;
-       int             v2 = xfs_sb_version_haslogv2(&log->l_mp->m_sb);
-       int             size;
+       unsigned int            count;          /* byte count of bwrite */
+       unsigned int            roundoff;       /* roundoff to BB or stripe */
+       uint64_t                bno;
+       unsigned int            size;
+       bool                    need_flush = true, split = false;
  
-       XFS_STATS_INC(log->l_mp, xs_log_writes);
         ASSERT(atomic_read(&iclog->ic_refcnt) == 0);
  
-       /* Add for LR header */
-       count_init = log->l_iclog_hsize + iclog->ic_offset;
-
-       /* Round out the log write size */
-       if (v2 && log->l_mp->m_sb.sb_logsunit > 1) {
-               /* we have a v2 stripe unit to use */
-               count = XLOG_LSUNITTOB(log, XLOG_BTOLSUNIT(log, count_init));
-       } else {
-               count = BBTOB(BTOBB(count_init));
-       }
-       roundoff = count - count_init;
-       ASSERT(roundoff >= 0);
-       ASSERT((v2 && log->l_mp->m_sb.sb_logsunit > 1 && 
-                roundoff < log->l_mp->m_sb.sb_logsunit)
-               || 
-               (log->l_mp->m_sb.sb_logsunit <= 1 && 
-                roundoff < BBTOB(1)));
+       count = xlog_calc_iclog_size(log, iclog, &roundoff);
  
         /* move grant heads by roundoff in sync */
         xlog_grant_add_space(log, &log->l_reserve_head.grant, roundoff);
@@ -1874,41 +1854,19 @@ xlog_sync(
  
         /* real byte length */
         size = iclog->ic_offset;
-       if (v2)
+       if (xfs_sb_version_haslogv2(&log->l_mp->m_sb))
                 size += roundoff;
         iclog->ic_header.h_len = cpu_to_be32(size);
  
-       bp = iclog->ic_bp;
-       XFS_BUF_SET_ADDR(bp, BLOCK_LSN(be64_to_cpu(iclog->ic_header.h_lsn)));
-
+       XFS_STATS_INC(log->l_mp, xs_log_writes);
         XFS_STATS_ADD(log->l_mp, xs_log_blocks, BTOBB(count));
  
-       /* Do we need to split this write into 2 parts? */
-       if (XFS_BUF_ADDR(bp) + BTOBB(count) > log->l_logBBsize) {
-               char            *dptr;
-
-               split = count - (BBTOB(log->l_logBBsize - XFS_BUF_ADDR(bp)));
-               count = BBTOB(log->l_logBBsize - XFS_BUF_ADDR(bp));
-               iclog->ic_bwritecnt = 2;
+       bno = BLOCK_LSN(be64_to_cpu(iclog->ic_header.h_lsn));
  
-               /*
-                * Bump the cycle numbers at the start of each block in the
-                * part of the iclog that ends up in the buffer that gets
-                * written to the start of the log.
-                *
-                * Watch out for the header magic number case, though.
-                */
-               dptr = (char *)&iclog->ic_header + count;
-               for (i = 0; i < split; i += BBSIZE) {
-                       uint32_t cycle = be32_to_cpu(*(__be32 *)dptr);
-                       if (++cycle == XLOG_HEADER_MAGIC_NUM)
-                               cycle++;
-                       *(__be32 *)dptr = cpu_to_be32(cycle);
-
-                       dptr += BBSIZE;
-               }
-       } else {
-               iclog->ic_bwritecnt = 1;
+       /* Do we need to split this write into 2 parts? */
+       if (bno + BTOBB(count) > log->l_logBBsize) {
+               xlog_split_iclog(log, &iclog->ic_header, bno, count);
+               split = true;
         }
  
         /* calculcate the checksum */
@@ -1921,18 +1879,15 @@ xlog_sync(
          * write on I/O completion and shutdown the fs. The subsequent mount
          * detects the bad CRC and attempts to recover.
          */
+#ifdef DEBUG
         if (XFS_TEST_ERROR(false, log->l_mp, XFS_ERRTAG_LOG_BAD_CRC)) {
                 iclog->ic_header.h_crc &= cpu_to_le32(0xAAAAAAAA);
-               iclog->ic_state |= XLOG_STATE_IOABORT;
+               iclog->ic_fail_crc = true;
                 xfs_warn(log->l_mp,
         "Intentionally corrupted log record at LSN 0x%llx. Shutdown imminent.",
                          be64_to_cpu(iclog->ic_header.h_lsn));
         }
-
-       bp->b_io_length = BTOBB(count);
-       bp->b_log_item = iclog;
-       bp->b_flags &= ~XBF_FLUSH;
-       bp->b_flags |= (XBF_ASYNC | XBF_SYNCIO | XBF_WRITE | XBF_FUA);
+#endif
  
         /*
          * Flush the data device before flushing the log to make sure all meta
@@ -1942,50 +1897,14 @@ xlog_sync(
          * synchronously here; for an internal log we can simply use the block
          * layer state machine for preflushes.
          */
-       if (log->l_mp->m_logdev_targp != log->l_mp->m_ddev_targp)
+       if (log->l_targ != log->l_mp->m_ddev_targp || split) {
                 xfs_blkdev_issue_flush(log->l_mp->m_ddev_targp);
-       else
-               bp->b_flags |= XBF_FLUSH;
-
-       ASSERT(XFS_BUF_ADDR(bp) <= log->l_logBBsize-1);
-       ASSERT(XFS_BUF_ADDR(bp) + BTOBB(count) <= log->l_logBBsize);
-
-       xlog_verify_iclog(log, iclog, count, true);
-
-       /* account for log which doesn't start at block #0 */
-       XFS_BUF_SET_ADDR(bp, XFS_BUF_ADDR(bp) + log->l_logBBstart);
-
-       /*
-        * Don't call xfs_bwrite here. We do log-syncs even when the filesystem
-        * is shutting down.
-        */
-       error = xlog_bdstrat(bp);
-       if (error) {
-               xfs_buf_ioerror_alert(bp, "xlog_sync");
-               return error;
+               need_flush = false;
         }
-       if (split) {
-               bp = iclog->ic_log->l_xbuf;
-               XFS_BUF_SET_ADDR(bp, 0);             /* logical 0 */
-               xfs_buf_associate_memory(bp,
-                               (char *)&iclog->ic_header + count, split);
-               bp->b_log_item = iclog;
-               bp->b_flags &= ~XBF_FLUSH;
-               bp->b_flags |= (XBF_ASYNC | XBF_SYNCIO | XBF_WRITE | XBF_FUA);
-
-               ASSERT(XFS_BUF_ADDR(bp) <= log->l_logBBsize-1);
-               ASSERT(XFS_BUF_ADDR(bp) + BTOBB(count) <= log->l_logBBsize);
-
-               /* account for internal log which doesn't start at block #0 */
-               XFS_BUF_SET_ADDR(bp, XFS_BUF_ADDR(bp) + log->l_logBBstart);
-               error = xlog_bdstrat(bp);
-               if (error) {
-                       xfs_buf_ioerror_alert(bp, "xlog_sync (split)");
-                       return error;
-               }
-       }
-       return 0;
-}      /* xlog_sync */
+
+       xlog_verify_iclog(log, iclog, count);
+       xlog_write_iclog(log, iclog, bno, count, need_flush);
+}
  
  /*
   * Deallocate a log structure
@@ -2005,31 +1924,21 @@ xlog_dealloc_log(
          */
         iclog = log->l_iclog;
         for (i = 0; i < log->l_iclog_bufs; i++) {
-               xfs_buf_lock(iclog->ic_bp);
-               xfs_buf_unlock(iclog->ic_bp);
+               down(&iclog->ic_sema);
+               up(&iclog->ic_sema);
                 iclog = iclog->ic_next;
         }
  
-       /*
-        * Always need to ensure that the extra buffer does not point to memory
-        * owned by another log buffer before we free it. Also, cycle the lock
-        * first to ensure we've completed IO on it.
-        */
-       xfs_buf_lock(log->l_xbuf);
-       xfs_buf_unlock(log->l_xbuf);
-       xfs_buf_set_empty(log->l_xbuf, BTOBB(log->l_iclog_size));
-       xfs_buf_free(log->l_xbuf);
-
         iclog = log->l_iclog;
         for (i = 0; i < log->l_iclog_bufs; i++) {
-               xfs_buf_free(iclog->ic_bp);
                 next_iclog = iclog->ic_next;
+               kmem_free(iclog->ic_data);
                 kmem_free(iclog);
                 iclog = next_iclog;
         }
-       spinlock_destroy(&log->l_icloglock);
  
         log->l_mp->m_log = NULL;
+       destroy_workqueue(log->l_ioend_workqueue);
         kmem_free(log);
  }      /* xlog_dealloc_log */
  
@@ -2610,7 +2519,7 @@ xlog_state_clean_log(
                 if (iclog->ic_state == XLOG_STATE_DIRTY) {
                         iclog->ic_state = XLOG_STATE_ACTIVE;
                         iclog->ic_offset       = 0;
-                       ASSERT(iclog->ic_callback == NULL);
+                       ASSERT(list_empty_careful(&iclog->ic_callbacks));
                         /*
                          * If the number of ops in this iclog indicate it just
                          * contains the dummy transaction, we can
@@ -2680,37 +2589,32 @@ xlog_state_clean_log(
  
  STATIC xfs_lsn_t
  xlog_get_lowest_lsn(
-       struct xlog     *log)
+       struct xlog             *log)
  {
-       xlog_in_core_t  *lsn_log;
-       xfs_lsn_t       lowest_lsn, lsn;
+       struct xlog_in_core     *iclog = log->l_iclog;
+       xfs_lsn_t               lowest_lsn = 0, lsn;
  
-       lsn_log = log->l_iclog;
-       lowest_lsn = 0;
         do {
-           if (!(lsn_log->ic_state & (XLOG_STATE_ACTIVE|XLOG_STATE_DIRTY))) {
-               lsn = be64_to_cpu(lsn_log->ic_header.h_lsn);
-               if ((lsn && !lowest_lsn) ||
-                   (XFS_LSN_CMP(lsn, lowest_lsn) < 0)) {
+               if (iclog->ic_state & (XLOG_STATE_ACTIVE | XLOG_STATE_DIRTY))
+                       continue;
+
+               lsn = be64_to_cpu(iclog->ic_header.h_lsn);
+               if ((lsn && !lowest_lsn) || XFS_LSN_CMP(lsn, lowest_lsn) < 0)
                         lowest_lsn = lsn;
-               }
-           }
-           lsn_log = lsn_log->ic_next;
-       } while (lsn_log != log->l_iclog);
+       } while ((iclog = iclog->ic_next) != log->l_iclog);
+
         return lowest_lsn;
  }
  
-
  STATIC void
  xlog_state_do_callback(
         struct xlog             *log,
-       int                     aborted,
+       bool                    aborted,
         struct xlog_in_core     *ciclog)
  {
         xlog_in_core_t     *iclog;
         xlog_in_core_t     *first_iclog;        /* used to know when we've
                                                  * processed all iclogs once */
-       xfs_log_callback_t *cb, *cb_next;
         int                flushcnt = 0;
         xfs_lsn_t          lowest_lsn;
         int                ioerrors;    /* counter: iclogs with errors */
@@ -2821,7 +2725,7 @@ xlog_state_do_callback(
                                  */
                                 ASSERT(XFS_LSN_CMP(atomic64_read(&log->l_last_sync_lsn),
                                         be64_to_cpu(iclog->ic_header.h_lsn)) <= 0);
-                               if (iclog->ic_callback)
+                               if (!list_empty_careful(&iclog->ic_callbacks))
                                         atomic64_set(&log->l_last_sync_lsn,
                                                 be64_to_cpu(iclog->ic_header.h_lsn));
  
@@ -2838,26 +2742,20 @@ xlog_state_do_callback(
                          * callbacks being added.
                          */
                         spin_lock(&iclog->ic_callback_lock);
-                       cb = iclog->ic_callback;
-                       while (cb) {
-                               iclog->ic_callback_tail = &(iclog->ic_callback);
-                               iclog->ic_callback = NULL;
-                               spin_unlock(&iclog->ic_callback_lock);
+                       while (!list_empty(&iclog->ic_callbacks)) {
+                               LIST_HEAD(tmp);
  
-                               /* perform callbacks in the order given */
-                               for (; cb; cb = cb_next) {
-                                       cb_next = cb->cb_next;
-                                       cb->cb_func(cb->cb_arg, aborted);
-                               }
+                               list_splice_init(&iclog->ic_callbacks, &tmp);
+
+                               spin_unlock(&iclog->ic_callback_lock);
+                               xlog_cil_process_committed(&tmp, aborted);
                                 spin_lock(&iclog->ic_callback_lock);
-                               cb = iclog->ic_callback;
                         }
  
                         loopdidcallbacks++;
                         funcdidcallbacks++;
  
                         spin_lock(&log->l_icloglock);
-                       ASSERT(iclog->ic_callback == NULL);
                         spin_unlock(&iclog->ic_callback_lock);
                         if (!(iclog->ic_state & XLOG_STATE_IOERROR))
                                 iclog->ic_state = XLOG_STATE_DIRTY;
@@ -2943,18 +2841,16 @@ xlog_state_do_callback(
   */
  STATIC void
  xlog_state_done_syncing(
-       xlog_in_core_t  *iclog,
-       int             aborted)
+       struct xlog_in_core     *iclog,
+       bool                    aborted)
  {
-       struct xlog        *log = iclog->ic_log;
+       struct xlog             *log = iclog->ic_log;
  
         spin_lock(&log->l_icloglock);
  
         ASSERT(iclog->ic_state == XLOG_STATE_SYNCING ||
                iclog->ic_state == XLOG_STATE_IOERROR);
         ASSERT(atomic_read(&iclog->ic_refcnt) == 0);
-       ASSERT(iclog->ic_bwritecnt == 1 || iclog->ic_bwritecnt == 2);
-
  
         /*
          * If we got an error, either on the first buffer, or in the case of
@@ -2962,13 +2858,8 @@ xlog_state_done_syncing(
          * and none should ever be attempted to be written to disk
          * again.
          */
-       if (iclog->ic_state != XLOG_STATE_IOERROR) {
-               if (--iclog->ic_bwritecnt == 1) {
-                       spin_unlock(&log->l_icloglock);
-                       return;
-               }
+       if (iclog->ic_state != XLOG_STATE_IOERROR)
                 iclog->ic_state = XLOG_STATE_DONE_SYNC;
-       }
  
         /*
          * Someone could be sleeping prior to writing out the next
@@ -3237,7 +3128,7 @@ xlog_state_release_iclog(
          * flags after this point.
          */
         if (sync)
-               return xlog_sync(log, iclog);
+               xlog_sync(log, iclog);
         return 0;
  }      /* xlog_state_release_iclog */
  
@@ -3828,8 +3719,7 @@ STATIC void
  xlog_verify_iclog(
         struct xlog             *log,
         struct xlog_in_core     *iclog,
-       int                     count,
-       bool                    syncing)
+       int                     count)
  {
         xlog_op_header_t        *ophead;
         xlog_in_core_t          *icptr;
@@ -3873,7 +3763,7 @@ xlog_verify_iclog(
                 /* clientid is only 1 byte */
                 p = &ophead->oh_clientid;
                 field_offset = p - base_ptr;
-               if (!syncing || (field_offset & 0x1ff)) {
+               if (field_offset & 0x1ff) {
                         clientid = ophead->oh_clientid;
                 } else {
                         idx = BTOBBT((char *)&ophead->oh_clientid - iclog->ic_datap);
@@ -3896,7 +3786,7 @@ xlog_verify_iclog(
                 /* check length */
                 p = &ophead->oh_len;
                 field_offset = p - base_ptr;
-               if (!syncing || (field_offset & 0x1ff)) {
+               if (field_offset & 0x1ff) {
                         op_len = be32_to_cpu(ophead->oh_len);
                 } else {
                         idx = BTOBBT((uintptr_t)&ophead->oh_len -
@@ -4033,7 +3923,7 @@ xfs_log_force_umount(
          * avoid races.
          */
         wake_up_all(&log->l_cilp->xc_commit_wait);
-       xlog_state_do_callback(log, XFS_LI_ABORTED, NULL);
+       xlog_state_do_callback(log, true, NULL);
  
  #ifdef XFSERRORDEBUG
         {
diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h

index 73a64bf32f6f4a9214bebf2080d4a556ea634d1e..84e06805160f88d6d490153b3138e31a0c5e91b8 100644 (file)
--- a/fs/xfs/xfs_log.h
+++ b/fs/xfs/xfs_log.h
@@ -6,6 +6,8 @@
  #ifndef        __XFS_LOG_H__
  #define __XFS_LOG_H__
  
+struct xfs_cil_ctx;
+
  struct xfs_log_vec {
         struct xfs_log_vec      *lv_next;       /* next lv in build list */
         int                     lv_niovecs;     /* number of iovecs in lv */
@@ -71,16 +73,6 @@ xlog_copy_iovec(struct xfs_log_vec *lv, struct xfs_log_iovec **vecp,
         return buf;
  }
  
-/*
- * Structure used to pass callback function and the function's argument
- * to the log manager.
- */
-typedef struct xfs_log_callback {
-       struct xfs_log_callback *cb_next;
-       void                    (*cb_func)(void *, int);
-       void                    *cb_arg;
-} xfs_log_callback_t;
-
  /*
   * By comparing each component, we don't have to worry about extra
   * endian issues in treating two 32 bit numbers as one 64 bit number
@@ -125,12 +117,10 @@ int         xfs_log_mount(struct xfs_mount        *mp,
                         xfs_daddr_t             start_block,
                         int                     num_bblocks);
  int      xfs_log_mount_finish(struct xfs_mount *mp);
-int    xfs_log_mount_cancel(struct xfs_mount *);
+void   xfs_log_mount_cancel(struct xfs_mount *);
  xfs_lsn_t xlog_assign_tail_lsn(struct xfs_mount *mp);
  xfs_lsn_t xlog_assign_tail_lsn_locked(struct xfs_mount *mp);
  void     xfs_log_space_wake(struct xfs_mount *mp);
-int      xfs_log_notify(struct xlog_in_core    *iclog,
-                        struct xfs_log_callback *callback_entry);
  int      xfs_log_release_iclog(struct xfs_mount *mp,
                          struct xlog_in_core     *iclog);
  int      xfs_log_reserve(struct xfs_mount *mp,
@@ -148,6 +138,7 @@ void          xfs_log_ticket_put(struct xlog_ticket *ticket);
  
  void   xfs_log_commit_cil(struct xfs_mount *mp, struct xfs_trans *tp,
                                 xfs_lsn_t *commit_lsn, bool regrant);
+void   xlog_cil_process_committed(struct list_head *list, bool aborted);
  bool   xfs_log_item_in_current_chkpt(struct xfs_log_item *lip);
  
  void   xfs_log_work_queue(struct xfs_mount *mp);
diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c

index 5e595948bc5a6ffabad05fbfda64ef5413d36623..fa5602d0fd7f6567bd1091f81d134144c5abbcd9 100644 (file)
--- a/fs/xfs/xfs_log_cil.c
+++ b/fs/xfs/xfs_log_cil.c
@@ -10,10 +10,7 @@
  #include "xfs_shared.h"
  #include "xfs_trans_resv.h"
  #include "xfs_mount.h"
-#include "xfs_error.h"
-#include "xfs_alloc.h"
  #include "xfs_extent_busy.h"
-#include "xfs_discard.h"
  #include "xfs_trans.h"
  #include "xfs_trans_priv.h"
  #include "xfs_log.h"
@@ -246,7 +243,8 @@ xfs_cil_prepare_item(
          * shadow buffer, so update the the pointer to it appropriately.
          */
         if (!old_lv) {
-               lv->lv_item->li_ops->iop_pin(lv->lv_item);
+               if (lv->lv_item->li_ops->iop_pin)
+                       lv->lv_item->li_ops->iop_pin(lv->lv_item);
                 lv->lv_item->li_lv_shadow = NULL;
         } else if (old_lv != lv) {
                 ASSERT(lv->lv_buf_len != XFS_LOG_VEC_ORDERED);
@@ -576,10 +574,9 @@ xlog_discard_busy_extents(
   */
  static void
  xlog_cil_committed(
-       void    *args,
-       int     abort)
+       struct xfs_cil_ctx      *ctx,
+       bool                    abort)
  {
-       struct xfs_cil_ctx      *ctx = args;
         struct xfs_mount        *mp = ctx->cil->xc_log->l_mp;
  
         /*
@@ -614,6 +611,20 @@ xlog_cil_committed(
                 kmem_free(ctx);
  }
  
+void
+xlog_cil_process_committed(
+       struct list_head        *list,
+       bool                    aborted)
+{
+       struct xfs_cil_ctx      *ctx;
+
+       while ((ctx = list_first_entry_or_null(list,
+                       struct xfs_cil_ctx, iclog_entry))) {
+               list_del(&ctx->iclog_entry);
+               xlog_cil_committed(ctx, aborted);
+       }
+}
+
  /*
   * Push the Committed Item List to the log. If @push_seq flag is zero, then it
   * is a background flush and so we can chose to ignore it. Otherwise, if the
@@ -835,12 +846,15 @@ restart:
         if (commit_lsn == -1)
                 goto out_abort;
  
-       /* attach all the transactions w/ busy extents to iclog */
-       ctx->log_cb.cb_func = xlog_cil_committed;
-       ctx->log_cb.cb_arg = ctx;
-       error = xfs_log_notify(commit_iclog, &ctx->log_cb);
-       if (error)
+       spin_lock(&commit_iclog->ic_callback_lock);
+       if (commit_iclog->ic_state & XLOG_STATE_IOERROR) {
+               spin_unlock(&commit_iclog->ic_callback_lock);
                 goto out_abort;
+       }
+       ASSERT_ALWAYS(commit_iclog->ic_state == XLOG_STATE_ACTIVE ||
+                     commit_iclog->ic_state == XLOG_STATE_WANT_SYNC);
+       list_add_tail(&ctx->iclog_entry, &commit_iclog->ic_callbacks);
+       spin_unlock(&commit_iclog->ic_callback_lock);
  
         /*
          * now the checkpoint commit is complete and we've attached the
@@ -864,7 +878,7 @@ out_skip:
  out_abort_free_ticket:
         xfs_log_ticket_put(tic);
  out_abort:
-       xlog_cil_committed(ctx, XFS_LI_ABORTED);
+       xlog_cil_committed(ctx, true);
         return -EIO;
  }
  
@@ -984,6 +998,7 @@ xfs_log_commit_cil(
  {
         struct xlog             *log = mp->m_log;
         struct xfs_cil          *cil = log->l_cilp;
+       struct xfs_log_item     *lip, *next;
         xfs_lsn_t               xc_commit_lsn;
  
         /*
@@ -1008,7 +1023,7 @@ xfs_log_commit_cil(
  
         /*
          * Once all the items of the transaction have been copied to the CIL,
-        * the items can be unlocked and freed.
+        * the items can be unlocked and possibly freed.
          *
          * This needs to be done before we drop the CIL context lock because we
          * have to update state in the log items and unlock them before they go
@@ -1017,8 +1032,12 @@ xfs_log_commit_cil(
          * the log items. This affects (at least) processing of stale buffers,
          * inodes and EFIs.
          */
-       xfs_trans_free_items(tp, xc_commit_lsn, false);
-
+       trace_xfs_trans_commit_items(tp, _RET_IP_);
+       list_for_each_entry_safe(lip, next, &tp->t_items, li_trans) {
+               xfs_trans_del_item(lip);
+               if (lip->li_ops->iop_committing)
+                       lip->li_ops->iop_committing(lip, xc_commit_lsn);
+       }
         xlog_cil_push_background(log);
  
         up_read(&cil->xc_ctx_lock);
diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h

index b5f82cb362020c7451b823c2ff7d1e23840e12fd..b880c23cb6e4ffd78324ff26a2890c0010f67d64 100644 (file)
--- a/fs/xfs/xfs_log_priv.h
+++ b/fs/xfs/xfs_log_priv.h
@@ -10,7 +10,6 @@ struct xfs_buf;
  struct xlog;
  struct xlog_ticket;
  struct xfs_mount;
-struct xfs_log_callback;
  
  /*
   * Flags for log structure
@@ -50,7 +49,6 @@ static inline uint xlog_get_client_id(__be32 i)
  #define XLOG_STATE_CALLBACK  0x0020 /* Callback functions now */
  #define XLOG_STATE_DIRTY     0x0040 /* Dirty IC log, not ready for ACTIVE status*/
  #define XLOG_STATE_IOERROR   0x0080 /* IO error happened in sync'ing log */
-#define XLOG_STATE_IOABORT   0x0100 /* force abort on I/O completion (debug) */
  #define XLOG_STATE_ALL      0x7FFF /* All possible valid flags */
  #define XLOG_STATE_NOTUSED   0x8000 /* This IC log not being used */
  
@@ -179,11 +177,10 @@ typedef struct xlog_ticket {
   *     the iclog.
   * - ic_forcewait is used to implement synchronous forcing of the iclog to disk.
   * - ic_next is the pointer to the next iclog in the ring.
- * - ic_bp is a pointer to the buffer used to write this incore log to disk.
   * - ic_log is a pointer back to the global log structure.
- * - ic_callback is a linked list of callback function/argument pairs to be
- *     called after an iclog finishes writing.
- * - ic_size is the full size of the header plus data.
+ * - ic_size is the full size of the log buffer, minus the cycle headers.
+ * - ic_io_size is the size of the currently pending log buffer write, which
+ *     might be smaller than ic_size
   * - ic_offset is the current number of bytes written to in this iclog.
   * - ic_refcnt is bumped when someone is writing to the log.
   * - ic_state is the state of the iclog.
@@ -193,7 +190,7 @@ typedef struct xlog_ticket {
   * structure cacheline aligned. The following fields can be contended on
   * by independent processes:
   *
- *     - ic_callback_*
+ *     - ic_callbacks
   *     - ic_refcnt
   *     - fields protected by the global l_icloglock
   *
@@ -206,23 +203,28 @@ typedef struct xlog_in_core {
         wait_queue_head_t       ic_write_wait;
         struct xlog_in_core     *ic_next;
         struct xlog_in_core     *ic_prev;
-       struct xfs_buf          *ic_bp;
         struct xlog             *ic_log;
-       int                     ic_size;
-       int                     ic_offset;
-       int                     ic_bwritecnt;
+       u32                     ic_size;
+       u32                     ic_io_size;
+       u32                     ic_offset;
         unsigned short          ic_state;
         char                    *ic_datap;      /* pointer to iclog data */
  
         /* Callback structures need their own cacheline */
         spinlock_t              ic_callback_lock ____cacheline_aligned_in_smp;
-       struct xfs_log_callback *ic_callback;
-       struct xfs_log_callback **ic_callback_tail;
+       struct list_head        ic_callbacks;
  
         /* reference counts need their own cacheline */
         atomic_t                ic_refcnt ____cacheline_aligned_in_smp;
         xlog_in_core_2_t        *ic_data;
  #define ic_header      ic_data->hic_header
+#ifdef DEBUG
+       bool                    ic_fail_crc : 1;
+#endif
+       struct semaphore        ic_sema;
+       struct work_struct      ic_end_io_work;
+       struct bio              ic_bio;
+       struct bio_vec          ic_bvec[];
  } xlog_in_core_t;
  
  /*
@@ -243,7 +245,7 @@ struct xfs_cil_ctx {
         int                     space_used;     /* aggregate size of regions */
         struct list_head        busy_extents;   /* busy extents in chkpt */
         struct xfs_log_vec      *lv_chain;      /* logvecs being pushed */
-       struct xfs_log_callback log_cb;         /* completion callback hook. */
+       struct list_head        iclog_entry;
         struct list_head        committing;     /* ctx committing list */
         struct work_struct      discard_endio_work;
  };
@@ -350,9 +352,8 @@ struct xlog {
         struct xfs_mount        *l_mp;          /* mount point */
         struct xfs_ail          *l_ailp;        /* AIL log is working with */
         struct xfs_cil          *l_cilp;        /* CIL log is working with */
-       struct xfs_buf          *l_xbuf;        /* extra buffer for log
-                                                * wrapping */
         struct xfs_buftarg      *l_targ;        /* buftarg of log */
+       struct workqueue_struct *l_ioend_workqueue; /* for I/O completions */
         struct delayed_work     l_work;         /* background flush work */
         uint                    l_flags;
         uint                    l_quotaoffs_flag; /* XFS_DQ_*, for QUOTAOFFs */
@@ -361,7 +362,6 @@ struct xlog {
         int                     l_iclog_heads;  /* # of iclog header sectors */
         uint                    l_sectBBsize;   /* sector size in BBs (2^n) */
         int                     l_iclog_size;   /* size of log in bytes */
-       int                     l_iclog_size_log; /* log power size of log */
         int                     l_iclog_bufs;   /* number of iclog buffers */
         xfs_daddr_t             l_logBBstart;   /* start block of log */
         int                     l_logsize;      /* size of log in bytes */
@@ -418,7 +418,7 @@ xlog_recover(
  extern int
  xlog_recover_finish(
         struct xlog             *log);
-extern int
+extern void
  xlog_recover_cancel(struct xlog *);
  
  extern __le32   xlog_cksum(struct xlog *log, struct xlog_rec_header *rhead,
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c

index 9329f5adbfbef28648f169ce216b034a64b7d3fc..13d1d3e95b888fb2630c784869bf932092a51666 100644 (file)
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -13,8 +13,6 @@
  #include "xfs_sb.h"
  #include "xfs_mount.h"
  #include "xfs_defer.h"
-#include "xfs_da_format.h"
-#include "xfs_da_btree.h"
  #include "xfs_inode.h"
  #include "xfs_trans.h"
  #include "xfs_log.h"
@@ -26,7 +24,6 @@
  #include "xfs_alloc.h"
  #include "xfs_ialloc.h"
  #include "xfs_quota.h"
-#include "xfs_cksum.h"
  #include "xfs_trace.h"
  #include "xfs_icache.h"
  #include "xfs_bmap_btree.h"
@@ -79,7 +76,7 @@ struct xfs_buf_cancel {
   * are valid, false otherwise.
   */
  static inline bool
-xlog_verify_bp(
+xlog_verify_bno(
         struct xlog     *log,
         xfs_daddr_t     blk_no,
         int             bbcount)
@@ -92,22 +89,19 @@ xlog_verify_bp(
  }
  
  /*
- * Allocate a buffer to hold log data.  The buffer needs to be able
- * to map to a range of nbblks basic blocks at any valid (basic
- * block) offset within the log.
+ * Allocate a buffer to hold log data.  The buffer needs to be able to map to
+ * a range of nbblks basic blocks at any valid offset within the log.
   */
-STATIC xfs_buf_t *
-xlog_get_bp(
+static char *
+xlog_alloc_buffer(
         struct xlog     *log,
         int             nbblks)
  {
-       struct xfs_buf  *bp;
-
         /*
          * Pass log block 0 since we don't have an addr yet, buffer will be
          * verified on read.
          */
-       if (!xlog_verify_bp(log, 0, nbblks)) {
+       if (!xlog_verify_bno(log, 0, nbblks)) {
                 xfs_warn(log->l_mp, "Invalid block length (0x%x) for buffer",
                         nbblks);
                 XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_HIGH, log->l_mp);
@@ -115,69 +109,48 @@ xlog_get_bp(
         }
  
         /*
-        * We do log I/O in units of log sectors (a power-of-2
-        * multiple of the basic block size), so we round up the
-        * requested size to accommodate the basic blocks required
-        * for complete log sectors.
+        * We do log I/O in units of log sectors (a power-of-2 multiple of the
+        * basic block size), so we round up the requested size to accommodate
+        * the basic blocks required for complete log sectors.
          *
-        * In addition, the buffer may be used for a non-sector-
-        * aligned block offset, in which case an I/O of the
-        * requested size could extend beyond the end of the
-        * buffer.  If the requested size is only 1 basic block it
-        * will never straddle a sector boundary, so this won't be
-        * an issue.  Nor will this be a problem if the log I/O is
-        * done in basic blocks (sector size 1).  But otherwise we
-        * extend the buffer by one extra log sector to ensure
-        * there's space to accommodate this possibility.
+        * In addition, the buffer may be used for a non-sector-aligned block
+        * offset, in which case an I/O of the requested size could extend
+        * beyond the end of the buffer.  If the requested size is only 1 basic
+        * block it will never straddle a sector boundary, so this won't be an
+        * issue.  Nor will this be a problem if the log I/O is done in basic
+        * blocks (sector size 1).  But otherwise we extend the buffer by one
+        * extra log sector to ensure there's space to accommodate this
+        * possibility.
          */
         if (nbblks > 1 && log->l_sectBBsize > 1)
                 nbblks += log->l_sectBBsize;
         nbblks = round_up(nbblks, log->l_sectBBsize);
-
-       bp = xfs_buf_get_uncached(log->l_mp->m_logdev_targp, nbblks, 0);
-       if (bp)
-               xfs_buf_unlock(bp);
-       return bp;
-}
-
-STATIC void
-xlog_put_bp(
-       xfs_buf_t       *bp)
-{
-       xfs_buf_free(bp);
+       return kmem_alloc_large(BBTOB(nbblks), KM_MAYFAIL);
  }
  
  /*
   * Return the address of the start of the given block number's data
   * in a log buffer.  The buffer covers a log sector-aligned region.
   */
-STATIC char *
+static inline unsigned int
  xlog_align(
         struct xlog     *log,
-       xfs_daddr_t     blk_no,
-       int             nbblks,
-       struct xfs_buf  *bp)
+       xfs_daddr_t     blk_no)
  {
-       xfs_daddr_t     offset = blk_no & ((xfs_daddr_t)log->l_sectBBsize - 1);
-
-       ASSERT(offset + nbblks <= bp->b_length);
-       return bp->b_addr + BBTOB(offset);
+       return BBTOB(blk_no & ((xfs_daddr_t)log->l_sectBBsize - 1));
  }
  
-
-/*
- * nbblks should be uint, but oh well.  Just want to catch that 32-bit length.
- */
-STATIC int
-xlog_bread_noalign(
-       struct xlog     *log,
-       xfs_daddr_t     blk_no,
-       int             nbblks,
-       struct xfs_buf  *bp)
+static int
+xlog_do_io(
+       struct xlog             *log,
+       xfs_daddr_t             blk_no,
+       unsigned int            nbblks,
+       char                    *data,
+       unsigned int            op)
  {
-       int             error;
+       int                     error;
  
-       if (!xlog_verify_bp(log, blk_no, nbblks)) {
+       if (!xlog_verify_bno(log, blk_no, nbblks)) {
                 xfs_warn(log->l_mp,
                          "Invalid log block/length (0x%llx, 0x%x) for buffer",
                          blk_no, nbblks);
@@ -187,107 +160,53 @@ xlog_bread_noalign(
  
         blk_no = round_down(blk_no, log->l_sectBBsize);
         nbblks = round_up(nbblks, log->l_sectBBsize);
-
         ASSERT(nbblks > 0);
-       ASSERT(nbblks <= bp->b_length);
-
-       XFS_BUF_SET_ADDR(bp, log->l_logBBstart + blk_no);
-       bp->b_flags |= XBF_READ;
-       bp->b_io_length = nbblks;
-       bp->b_error = 0;
  
-       error = xfs_buf_submit(bp);
-       if (error && !XFS_FORCED_SHUTDOWN(log->l_mp))
-               xfs_buf_ioerror_alert(bp, __func__);
+       error = xfs_rw_bdev(log->l_targ->bt_bdev, log->l_logBBstart + blk_no,
+                       BBTOB(nbblks), data, op);
+       if (error && !XFS_FORCED_SHUTDOWN(log->l_mp)) {
+               xfs_alert(log->l_mp,
+                         "log recovery %s I/O error at daddr 0x%llx len %d error %d",
+                         op == REQ_OP_WRITE ? "write" : "read",
+                         blk_no, nbblks, error);
+       }
         return error;
  }
  
  STATIC int
-xlog_bread(
+xlog_bread_noalign(
         struct xlog     *log,
         xfs_daddr_t     blk_no,
         int             nbblks,
-       struct xfs_buf  *bp,
-       char            **offset)
+       char            *data)
  {
-       int             error;
-
-       error = xlog_bread_noalign(log, blk_no, nbblks, bp);
-       if (error)
-               return error;
-
-       *offset = xlog_align(log, blk_no, nbblks, bp);
-       return 0;
+       return xlog_do_io(log, blk_no, nbblks, data, REQ_OP_READ);
  }
  
-/*
- * Read at an offset into the buffer. Returns with the buffer in it's original
- * state regardless of the result of the read.
- */
  STATIC int
-xlog_bread_offset(
+xlog_bread(
         struct xlog     *log,
-       xfs_daddr_t     blk_no,         /* block to read from */
-       int             nbblks,         /* blocks to read */
-       struct xfs_buf  *bp,
-       char            *offset)
+       xfs_daddr_t     blk_no,
+       int             nbblks,
+       char            *data,
+       char            **offset)
  {
-       char            *orig_offset = bp->b_addr;
-       int             orig_len = BBTOB(bp->b_length);
-       int             error, error2;
-
-       error = xfs_buf_associate_memory(bp, offset, BBTOB(nbblks));
-       if (error)
-               return error;
-
-       error = xlog_bread_noalign(log, blk_no, nbblks, bp);
+       int             error;
  
-       /* must reset buffer pointer even on error */
-       error2 = xfs_buf_associate_memory(bp, orig_offset, orig_len);
-       if (error)
-               return error;
-       return error2;
+       error = xlog_do_io(log, blk_no, nbblks, data, REQ_OP_READ);
+       if (!error)
+               *offset = data + xlog_align(log, blk_no);
+       return error;
  }
  
-/*
- * Write out the buffer at the given block for the given number of blocks.
- * The buffer is kept locked across the write and is returned locked.
- * This can only be used for synchronous log writes.
- */
  STATIC int
  xlog_bwrite(
         struct xlog     *log,
         xfs_daddr_t     blk_no,
         int             nbblks,
-       struct xfs_buf  *bp)
+       char            *data)
  {
-       int             error;
-
-       if (!xlog_verify_bp(log, blk_no, nbblks)) {
-               xfs_warn(log->l_mp,
-                        "Invalid log block/length (0x%llx, 0x%x) for buffer",
-                        blk_no, nbblks);
-               XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_HIGH, log->l_mp);
-               return -EFSCORRUPTED;
-       }
-
-       blk_no = round_down(blk_no, log->l_sectBBsize);
-       nbblks = round_up(nbblks, log->l_sectBBsize);
-
-       ASSERT(nbblks > 0);
-       ASSERT(nbblks <= bp->b_length);
-
-       XFS_BUF_SET_ADDR(bp, log->l_logBBstart + blk_no);
-       xfs_buf_hold(bp);
-       xfs_buf_lock(bp);
-       bp->b_io_length = nbblks;
-       bp->b_error = 0;
-
-       error = xfs_bwrite(bp);
-       if (error)
-               xfs_buf_ioerror_alert(bp, __func__);
-       xfs_buf_relse(bp);
-       return error;
+       return xlog_do_io(log, blk_no, nbblks, data, REQ_OP_WRITE);
  }
  
  #ifdef DEBUG
@@ -377,10 +296,9 @@ xlog_recover_iodone(
                  * We're not going to bother about retrying
                  * this during recovery. One strike!
                  */
-               if (!XFS_FORCED_SHUTDOWN(bp->b_target->bt_mount)) {
+               if (!XFS_FORCED_SHUTDOWN(bp->b_mount)) {
                         xfs_buf_ioerror_alert(bp, __func__);
-                       xfs_force_shutdown(bp->b_target->bt_mount,
-                                               SHUTDOWN_META_IO_ERROR);
+                       xfs_force_shutdown(bp->b_mount, SHUTDOWN_META_IO_ERROR);
                 }
         }
  
@@ -405,7 +323,7 @@ xlog_recover_iodone(
  STATIC int
  xlog_find_cycle_start(
         struct xlog     *log,
-       struct xfs_buf  *bp,
+       char            *buffer,
         xfs_daddr_t     first_blk,
         xfs_daddr_t     *last_blk,
         uint            cycle)
@@ -419,7 +337,7 @@ xlog_find_cycle_start(
         end_blk = *last_blk;
         mid_blk = BLK_AVG(first_blk, end_blk);
         while (mid_blk != first_blk && mid_blk != end_blk) {
-               error = xlog_bread(log, mid_blk, 1, bp, &offset);
+               error = xlog_bread(log, mid_blk, 1, buffer, &offset);
                 if (error)
                         return error;
                 mid_cycle = xlog_get_cycle(offset);
@@ -455,7 +373,7 @@ xlog_find_verify_cycle(
  {
         xfs_daddr_t     i, j;
         uint            cycle;
-       xfs_buf_t       *bp;
+       char            *buffer;
         xfs_daddr_t     bufblks;
         char            *buf = NULL;
         int             error = 0;
@@ -469,7 +387,7 @@ xlog_find_verify_cycle(
         bufblks = 1 << ffs(nbblks);
         while (bufblks > log->l_logBBsize)
                 bufblks >>= 1;
-       while (!(bp = xlog_get_bp(log, bufblks))) {
+       while (!(buffer = xlog_alloc_buffer(log, bufblks))) {
                 bufblks >>= 1;
                 if (bufblks < log->l_sectBBsize)
                         return -ENOMEM;
@@ -480,7 +398,7 @@ xlog_find_verify_cycle(
  
                 bcount = min(bufblks, (start_blk + nbblks - i));
  
-               error = xlog_bread(log, i, bcount, bp, &buf);
+               error = xlog_bread(log, i, bcount, buffer, &buf);
                 if (error)
                         goto out;
  
@@ -498,7 +416,7 @@ xlog_find_verify_cycle(
         *new_blk = -1;
  
  out:
-       xlog_put_bp(bp);
+       kmem_free(buffer);
         return error;
  }
  
@@ -522,7 +440,7 @@ xlog_find_verify_log_record(
         int                     extra_bblks)
  {
         xfs_daddr_t             i;
-       xfs_buf_t               *bp;
+       char                    *buffer;
         char                    *offset = NULL;
         xlog_rec_header_t       *head = NULL;
         int                     error = 0;
@@ -532,12 +450,14 @@ xlog_find_verify_log_record(
  
         ASSERT(start_blk != 0 || *last_blk != start_blk);
  
-       if (!(bp = xlog_get_bp(log, num_blks))) {
-               if (!(bp = xlog_get_bp(log, 1)))
+       buffer = xlog_alloc_buffer(log, num_blks);
+       if (!buffer) {
+               buffer = xlog_alloc_buffer(log, 1);
+               if (!buffer)
                         return -ENOMEM;
                 smallmem = 1;
         } else {
-               error = xlog_bread(log, start_blk, num_blks, bp, &offset);
+               error = xlog_bread(log, start_blk, num_blks, buffer, &offset);
                 if (error)
                         goto out;
                 offset += ((num_blks - 1) << BBSHIFT);
@@ -554,7 +474,7 @@ xlog_find_verify_log_record(
                 }
  
                 if (smallmem) {
-                       error = xlog_bread(log, i, 1, bp, &offset);
+                       error = xlog_bread(log, i, 1, buffer, &offset);
                         if (error)
                                 goto out;
                 }
@@ -607,7 +527,7 @@ xlog_find_verify_log_record(
                 *last_blk = i;
  
  out:
-       xlog_put_bp(bp);
+       kmem_free(buffer);
         return error;
  }
  
@@ -629,7 +549,7 @@ xlog_find_head(
         struct xlog     *log,
         xfs_daddr_t     *return_head_blk)
  {
-       xfs_buf_t       *bp;
+       char            *buffer;
         char            *offset;
         xfs_daddr_t     new_blk, first_blk, start_blk, last_blk, head_blk;
         int             num_scan_bblks;
@@ -659,20 +579,20 @@ xlog_find_head(
         }
  
         first_blk = 0;                  /* get cycle # of 1st block */
-       bp = xlog_get_bp(log, 1);
-       if (!bp)
+       buffer = xlog_alloc_buffer(log, 1);
+       if (!buffer)
                 return -ENOMEM;
  
-       error = xlog_bread(log, 0, 1, bp, &offset);
+       error = xlog_bread(log, 0, 1, buffer, &offset);
         if (error)
-               goto bp_err;
+               goto out_free_buffer;
  
         first_half_cycle = xlog_get_cycle(offset);
  
         last_blk = head_blk = log_bbnum - 1;    /* get cycle # of last block */
-       error = xlog_bread(log, last_blk, 1, bp, &offset);
+       error = xlog_bread(log, last_blk, 1, buffer, &offset);
         if (error)
-               goto bp_err;
+               goto out_free_buffer;
  
         last_half_cycle = xlog_get_cycle(offset);
         ASSERT(last_half_cycle != 0);
@@ -740,9 +660,10 @@ xlog_find_head(
                  *                           ^ we want to locate this spot
                  */
                 stop_on_cycle = last_half_cycle;
-               if ((error = xlog_find_cycle_start(log, bp, first_blk,
-                                               &head_blk, last_half_cycle)))
-                       goto bp_err;
+               error = xlog_find_cycle_start(log, buffer, first_blk, &head_blk,
+                               last_half_cycle);
+               if (error)
+                       goto out_free_buffer;
         }
  
         /*
@@ -762,7 +683,7 @@ xlog_find_head(
                 if ((error = xlog_find_verify_cycle(log,
                                                 start_blk, num_scan_bblks,
                                                 stop_on_cycle, &new_blk)))
-                       goto bp_err;
+                       goto out_free_buffer;
                 if (new_blk != -1)
                         head_blk = new_blk;
         } else {                /* need to read 2 parts of log */
@@ -799,7 +720,7 @@ xlog_find_head(
                 if ((error = xlog_find_verify_cycle(log, start_blk,
                                         num_scan_bblks - (int)head_blk,
                                         (stop_on_cycle - 1), &new_blk)))
-                       goto bp_err;
+                       goto out_free_buffer;
                 if (new_blk != -1) {
                         head_blk = new_blk;
                         goto validate_head;
@@ -815,7 +736,7 @@ xlog_find_head(
                 if ((error = xlog_find_verify_cycle(log,
                                         start_blk, (int)head_blk,
                                         stop_on_cycle, &new_blk)))
-                       goto bp_err;
+                       goto out_free_buffer;
                 if (new_blk != -1)
                         head_blk = new_blk;
         }
@@ -834,13 +755,13 @@ validate_head:
                 if (error == 1)
                         error = -EIO;
                 if (error)
-                       goto bp_err;
+                       goto out_free_buffer;
         } else {
                 start_blk = 0;
                 ASSERT(head_blk <= INT_MAX);
                 error = xlog_find_verify_log_record(log, start_blk, &head_blk, 0);
                 if (error < 0)
-                       goto bp_err;
+                       goto out_free_buffer;
                 if (error == 1) {
                         /* We hit the beginning of the log during our search */
                         start_blk = log_bbnum - (num_scan_bblks - head_blk);
@@ -853,14 +774,14 @@ validate_head:
                         if (error == 1)
                                 error = -EIO;
                         if (error)
-                               goto bp_err;
+                               goto out_free_buffer;
                         if (new_blk != log_bbnum)
                                 head_blk = new_blk;
                 } else if (error)
-                       goto bp_err;
+                       goto out_free_buffer;
         }
  
-       xlog_put_bp(bp);
+       kmem_free(buffer);
         if (head_blk == log_bbnum)
                 *return_head_blk = 0;
         else
@@ -873,9 +794,8 @@ validate_head:
          */
         return 0;
  
- bp_err:
-       xlog_put_bp(bp);
-
+out_free_buffer:
+       kmem_free(buffer);
         if (error)
                 xfs_warn(log->l_mp, "failed to find log head");
         return error;
@@ -895,7 +815,7 @@ xlog_rseek_logrec_hdr(
         xfs_daddr_t             head_blk,
         xfs_daddr_t             tail_blk,
         int                     count,
-       struct xfs_buf          *bp,
+       char                    *buffer,
         xfs_daddr_t             *rblk,
         struct xlog_rec_header  **rhead,
         bool                    *wrapped)
@@ -914,7 +834,7 @@ xlog_rseek_logrec_hdr(
          */
         end_blk = head_blk > tail_blk ? tail_blk : 0;
         for (i = (int) head_blk - 1; i >= end_blk; i--) {
-               error = xlog_bread(log, i, 1, bp, &offset);
+               error = xlog_bread(log, i, 1, buffer, &offset);
                 if (error)
                         goto out_error;
  
@@ -933,7 +853,7 @@ xlog_rseek_logrec_hdr(
          */
         if (tail_blk >= head_blk && found != count) {
                 for (i = log->l_logBBsize - 1; i >= (int) tail_blk; i--) {
-                       error = xlog_bread(log, i, 1, bp, &offset);
+                       error = xlog_bread(log, i, 1, buffer, &offset);
                         if (error)
                                 goto out_error;
  
@@ -969,7 +889,7 @@ xlog_seek_logrec_hdr(
         xfs_daddr_t             head_blk,
         xfs_daddr_t             tail_blk,
         int                     count,
-       struct xfs_buf          *bp,
+       char                    *buffer,
         xfs_daddr_t             *rblk,
         struct xlog_rec_header  **rhead,
         bool                    *wrapped)
@@ -988,7 +908,7 @@ xlog_seek_logrec_hdr(
          */
         end_blk = head_blk > tail_blk ? head_blk : log->l_logBBsize - 1;
         for (i = (int) tail_blk; i <= end_blk; i++) {
-               error = xlog_bread(log, i, 1, bp, &offset);
+               error = xlog_bread(log, i, 1, buffer, &offset);
                 if (error)
                         goto out_error;
  
@@ -1006,7 +926,7 @@ xlog_seek_logrec_hdr(
          */
         if (tail_blk > head_blk && found != count) {
                 for (i = 0; i < (int) head_blk; i++) {
-                       error = xlog_bread(log, i, 1, bp, &offset);
+                       error = xlog_bread(log, i, 1, buffer, &offset);
                         if (error)
                                 goto out_error;
  
@@ -1069,22 +989,22 @@ xlog_verify_tail(
         int                     hsize)
  {
         struct xlog_rec_header  *thead;
-       struct xfs_buf          *bp;
+       char                    *buffer;
         xfs_daddr_t             first_bad;
         int                     error = 0;
         bool                    wrapped;
         xfs_daddr_t             tmp_tail;
         xfs_daddr_t             orig_tail = *tail_blk;
  
-       bp = xlog_get_bp(log, 1);
-       if (!bp)
+       buffer = xlog_alloc_buffer(log, 1);
+       if (!buffer)
                 return -ENOMEM;
  
         /*
          * Make sure the tail points to a record (returns positive count on
          * success).
          */
-       error = xlog_seek_logrec_hdr(log, head_blk, *tail_blk, 1, bp,
+       error = xlog_seek_logrec_hdr(log, head_blk, *tail_blk, 1, buffer,
                         &tmp_tail, &thead, &wrapped);
         if (error < 0)
                 goto out;
@@ -1113,8 +1033,8 @@ xlog_verify_tail(
                         break;
  
                 /* skip to the next record; returns positive count on success */
-               error = xlog_seek_logrec_hdr(log, head_blk, first_bad, 2, bp,
-                               &tmp_tail, &thead, &wrapped);
+               error = xlog_seek_logrec_hdr(log, head_blk, first_bad, 2,
+                               buffer, &tmp_tail, &thead, &wrapped);
                 if (error < 0)
                         goto out;
  
@@ -1129,7 +1049,7 @@ xlog_verify_tail(
                 "Tail block (0x%llx) overwrite detected. Updated to 0x%llx",
                          orig_tail, *tail_blk);
  out:
-       xlog_put_bp(bp);
+       kmem_free(buffer);
         return error;
  }
  
@@ -1151,13 +1071,13 @@ xlog_verify_head(
         struct xlog             *log,
         xfs_daddr_t             *head_blk,      /* in/out: unverified head */
         xfs_daddr_t             *tail_blk,      /* out: tail block */
-       struct xfs_buf          *bp,
+       char                    *buffer,
         xfs_daddr_t             *rhead_blk,     /* start blk of last record */
         struct xlog_rec_header  **rhead,        /* ptr to last record */
         bool                    *wrapped)       /* last rec. wraps phys. log */
  {
         struct xlog_rec_header  *tmp_rhead;
-       struct xfs_buf          *tmp_bp;
+       char                    *tmp_buffer;
         xfs_daddr_t             first_bad;
         xfs_daddr_t             tmp_rhead_blk;
         int                     found;
@@ -1168,15 +1088,15 @@ xlog_verify_head(
          * Check the head of the log for torn writes. Search backwards from the
          * head until we hit the tail or the maximum number of log record I/Os
          * that could have been in flight at one time. Use a temporary buffer so
-        * we don't trash the rhead/bp pointers from the caller.
+        * we don't trash the rhead/buffer pointers from the caller.
          */
-       tmp_bp = xlog_get_bp(log, 1);
-       if (!tmp_bp)
+       tmp_buffer = xlog_alloc_buffer(log, 1);
+       if (!tmp_buffer)
                 return -ENOMEM;
         error = xlog_rseek_logrec_hdr(log, *head_blk, *tail_blk,
-                                     XLOG_MAX_ICLOGS, tmp_bp, &tmp_rhead_blk,
-                                     &tmp_rhead, &tmp_wrapped);
-       xlog_put_bp(tmp_bp);
+                                     XLOG_MAX_ICLOGS, tmp_buffer,
+                                     &tmp_rhead_blk, &tmp_rhead, &tmp_wrapped);
+       kmem_free(tmp_buffer);
         if (error < 0)
                 return error;
  
@@ -1205,8 +1125,8 @@ xlog_verify_head(
                  * (i.e., the records with invalid CRC) if the cycle number
                  * matches the the current cycle.
                  */
-               found = xlog_rseek_logrec_hdr(log, first_bad, *tail_blk, 1, bp,
-                                             rhead_blk, rhead, wrapped);
+               found = xlog_rseek_logrec_hdr(log, first_bad, *tail_blk, 1,
+                               buffer, rhead_blk, rhead, wrapped);
                 if (found < 0)
                         return found;
                 if (found == 0)         /* XXX: right thing to do here? */
@@ -1266,7 +1186,7 @@ xlog_check_unmount_rec(
         xfs_daddr_t             *tail_blk,
         struct xlog_rec_header  *rhead,
         xfs_daddr_t             rhead_blk,
-       struct xfs_buf          *bp,
+       char                    *buffer,
         bool                    *clean)
  {
         struct xlog_op_header   *op_head;
@@ -1309,7 +1229,7 @@ xlog_check_unmount_rec(
         if (*head_blk == after_umount_blk &&
             be32_to_cpu(rhead->h_num_logops) == 1) {
                 umount_data_blk = xlog_wrap_logbno(log, rhead_blk + hblks);
-               error = xlog_bread(log, umount_data_blk, 1, bp, &offset);
+               error = xlog_bread(log, umount_data_blk, 1, buffer, &offset);
                 if (error)
                         return error;
  
@@ -1388,7 +1308,7 @@ xlog_find_tail(
  {
         xlog_rec_header_t       *rhead;
         char                    *offset = NULL;
-       xfs_buf_t               *bp;
+       char                    *buffer;
         int                     error;
         xfs_daddr_t             rhead_blk;
         xfs_lsn_t               tail_lsn;
@@ -1402,11 +1322,11 @@ xlog_find_tail(
                 return error;
         ASSERT(*head_blk < INT_MAX);
  
-       bp = xlog_get_bp(log, 1);
-       if (!bp)
+       buffer = xlog_alloc_buffer(log, 1);
+       if (!buffer)
                 return -ENOMEM;
         if (*head_blk == 0) {                           /* special case */
-               error = xlog_bread(log, 0, 1, bp, &offset);
+               error = xlog_bread(log, 0, 1, buffer, &offset);
                 if (error)
                         goto done;
  
@@ -1422,7 +1342,7 @@ xlog_find_tail(
          * block. This wraps all the way back around to the head so something is
          * seriously wrong if we can't find it.
          */
-       error = xlog_rseek_logrec_hdr(log, *head_blk, *head_blk, 1, bp,
+       error = xlog_rseek_logrec_hdr(log, *head_blk, *head_blk, 1, buffer,
                                       &rhead_blk, &rhead, &wrapped);
         if (error < 0)
                 return error;
@@ -1443,7 +1363,7 @@ xlog_find_tail(
          * state to determine whether recovery is necessary.
          */
         error = xlog_check_unmount_rec(log, head_blk, tail_blk, rhead,
-                                      rhead_blk, bp, &clean);
+                                      rhead_blk, buffer, &clean);
         if (error)
                 goto done;
  
@@ -1460,7 +1380,7 @@ xlog_find_tail(
         if (!clean) {
                 xfs_daddr_t     orig_head = *head_blk;
  
-               error = xlog_verify_head(log, head_blk, tail_blk, bp,
+               error = xlog_verify_head(log, head_blk, tail_blk, buffer,
                                          &rhead_blk, &rhead, &wrapped);
                 if (error)
                         goto done;
@@ -1471,7 +1391,7 @@ xlog_find_tail(
                                        wrapped);
                         tail_lsn = atomic64_read(&log->l_tail_lsn);
                         error = xlog_check_unmount_rec(log, head_blk, tail_blk,
-                                                      rhead, rhead_blk, bp,
+                                                      rhead, rhead_blk, buffer,
                                                        &clean);
                         if (error)
                                 goto done;
@@ -1505,11 +1425,11 @@ xlog_find_tail(
          * But... if the -device- itself is readonly, just skip this.
          * We can't recover this device anyway, so it won't matter.
          */
-       if (!xfs_readonly_buftarg(log->l_mp->m_logdev_targp))
+       if (!xfs_readonly_buftarg(log->l_targ))
                 error = xlog_clear_stale_blocks(log, tail_lsn);
  
  done:
-       xlog_put_bp(bp);
+       kmem_free(buffer);
  
         if (error)
                 xfs_warn(log->l_mp, "failed to locate log tail");
@@ -1537,7 +1457,7 @@ xlog_find_zeroed(
         struct xlog     *log,
         xfs_daddr_t     *blk_no)
  {
-       xfs_buf_t       *bp;
+       char            *buffer;
         char            *offset;
         uint            first_cycle, last_cycle;
         xfs_daddr_t     new_blk, last_blk, start_blk;
@@ -1547,35 +1467,36 @@ xlog_find_zeroed(
         *blk_no = 0;
  
         /* check totally zeroed log */
-       bp = xlog_get_bp(log, 1);
-       if (!bp)
+       buffer = xlog_alloc_buffer(log, 1);
+       if (!buffer)
                 return -ENOMEM;
-       error = xlog_bread(log, 0, 1, bp, &offset);
+       error = xlog_bread(log, 0, 1, buffer, &offset);
         if (error)
-               goto bp_err;
+               goto out_free_buffer;
  
         first_cycle = xlog_get_cycle(offset);
         if (first_cycle == 0) {         /* completely zeroed log */
                 *blk_no = 0;
-               xlog_put_bp(bp);
+               kmem_free(buffer);
                 return 1;
         }
  
         /* check partially zeroed log */
-       error = xlog_bread(log, log_bbnum-1, 1, bp, &offset);
+       error = xlog_bread(log, log_bbnum-1, 1, buffer, &offset);
         if (error)
-               goto bp_err;
+               goto out_free_buffer;
  
         last_cycle = xlog_get_cycle(offset);
         if (last_cycle != 0) {          /* log completely written to */
-               xlog_put_bp(bp);
+               kmem_free(buffer);
                 return 0;
         }
  
         /* we have a partially zeroed log */
         last_blk = log_bbnum-1;
-       if ((error = xlog_find_cycle_start(log, bp, 0, &last_blk, 0)))
-               goto bp_err;
+       error = xlog_find_cycle_start(log, buffer, 0, &last_blk, 0);
+       if (error)
+               goto out_free_buffer;
  
         /*
          * Validate the answer.  Because there is no way to guarantee that
@@ -1598,7 +1519,7 @@ xlog_find_zeroed(
          */
         if ((error = xlog_find_verify_cycle(log, start_blk,
                                          (int)num_scan_bblks, 0, &new_blk)))
-               goto bp_err;
+               goto out_free_buffer;
         if (new_blk != -1)
                 last_blk = new_blk;
  
@@ -1610,11 +1531,11 @@ xlog_find_zeroed(
         if (error == 1)
                 error = -EIO;
         if (error)
-               goto bp_err;
+               goto out_free_buffer;
  
         *blk_no = last_blk;
-bp_err:
-       xlog_put_bp(bp);
+out_free_buffer:
+       kmem_free(buffer);
         if (error)
                 return error;
         return 1;
@@ -1657,7 +1578,7 @@ xlog_write_log_records(
         int             tail_block)
  {
         char            *offset;
-       xfs_buf_t       *bp;
+       char            *buffer;
         int             balign, ealign;
         int             sectbb = log->l_sectBBsize;
         int             end_block = start_block + blocks;
@@ -1674,7 +1595,7 @@ xlog_write_log_records(
         bufblks = 1 << ffs(blocks);
         while (bufblks > log->l_logBBsize)
                 bufblks >>= 1;
-       while (!(bp = xlog_get_bp(log, bufblks))) {
+       while (!(buffer = xlog_alloc_buffer(log, bufblks))) {
                 bufblks >>= 1;
                 if (bufblks < sectbb)
                         return -ENOMEM;
@@ -1686,9 +1607,9 @@ xlog_write_log_records(
          */
         balign = round_down(start_block, sectbb);
         if (balign != start_block) {
-               error = xlog_bread_noalign(log, start_block, 1, bp);
+               error = xlog_bread_noalign(log, start_block, 1, buffer);
                 if (error)
-                       goto out_put_bp;
+                       goto out_free_buffer;
  
                 j = start_block - balign;
         }
@@ -1705,29 +1626,28 @@ xlog_write_log_records(
                  */
                 ealign = round_down(end_block, sectbb);
                 if (j == 0 && (start_block + endcount > ealign)) {
-                       offset = bp->b_addr + BBTOB(ealign - start_block);
-                       error = xlog_bread_offset(log, ealign, sectbb,
-                                                       bp, offset);
+                       error = xlog_bread_noalign(log, ealign, sectbb,
+                                       buffer + BBTOB(ealign - start_block));
                         if (error)
                                 break;
  
                 }
  
-               offset = xlog_align(log, start_block, endcount, bp);
+               offset = buffer + xlog_align(log, start_block);
                 for (; j < endcount; j++) {
                         xlog_add_record(log, offset, cycle, i+j,
                                         tail_cycle, tail_block);
                         offset += BBSIZE;
                 }
-               error = xlog_bwrite(log, start_block, endcount, bp);
+               error = xlog_bwrite(log, start_block, endcount, buffer);
                 if (error)
                         break;
                 start_block += endcount;
                 j = 0;
         }
  
- out_put_bp:
-       xlog_put_bp(bp);
+out_free_buffer:
+       kmem_free(buffer);
         return error;
  }
  
@@ -2162,7 +2082,7 @@ xlog_recover_do_inode_buffer(
         if (xfs_sb_version_hascrc(&mp->m_sb))
                 bp->b_ops = &xfs_inode_buf_ops;
  
-       inodes_per_buf = BBTOB(bp->b_io_length) >> mp->m_sb.sb_inodelog;
+       inodes_per_buf = BBTOB(bp->b_length) >> mp->m_sb.sb_inodelog;
         for (i = 0; i < inodes_per_buf; i++) {
                 next_unlinked_offset = (i * mp->m_sb.sb_inodesize) +
                         offsetof(xfs_dinode_t, di_next_unlinked);
@@ -2204,8 +2124,7 @@ xlog_recover_do_inode_buffer(
  
                 ASSERT(item->ri_buf[item_index].i_addr != NULL);
                 ASSERT((item->ri_buf[item_index].i_len % XFS_BLF_CHUNK) == 0);
-               ASSERT((reg_buf_offset + reg_buf_bytes) <=
-                                                       BBTOB(bp->b_io_length));
+               ASSERT((reg_buf_offset + reg_buf_bytes) <= BBTOB(bp->b_length));
  
                 /*
                  * The current logged region contains a copy of the
@@ -2670,7 +2589,7 @@ xlog_recover_do_reg_buffer(
                 ASSERT(nbits > 0);
                 ASSERT(item->ri_buf[i].i_addr != NULL);
                 ASSERT(item->ri_buf[i].i_len % XFS_BLF_CHUNK == 0);
-               ASSERT(BBTOB(bp->b_io_length) >=
+               ASSERT(BBTOB(bp->b_length) >=
                        ((uint)bit << XFS_BLF_SHIFT) + (nbits << XFS_BLF_SHIFT));
  
                 /*
@@ -2882,23 +2801,22 @@ xlog_recover_buffer_pass2(
          *
          * Also make sure that only inode buffers with good sizes stay in
          * the buffer cache.  The kernel moves inodes in buffers of 1 block
-        * or mp->m_inode_cluster_size bytes, whichever is bigger.  The inode
+        * or inode_cluster_size bytes, whichever is bigger.  The inode
          * buffers in the log can be a different size if the log was generated
          * by an older kernel using unclustered inode buffers or a newer kernel
          * running with a different inode cluster size.  Regardless, if the
-        * the inode buffer size isn't max(blocksize, mp->m_inode_cluster_size)
-        * for *our* value of mp->m_inode_cluster_size, then we need to keep
+        * the inode buffer size isn't max(blocksize, inode_cluster_size)
+        * for *our* value of inode_cluster_size, then we need to keep
          * the buffer out of the buffer cache so that the buffer won't
          * overlap with future reads of those inodes.
          */
         if (XFS_DINODE_MAGIC ==
             be16_to_cpu(*((__be16 *)xfs_buf_offset(bp, 0))) &&
-           (BBTOB(bp->b_io_length) != max(log->l_mp->m_sb.sb_blocksize,
-                       (uint32_t)log->l_mp->m_inode_cluster_size))) {
+           (BBTOB(bp->b_length) != M_IGEO(log->l_mp)->inode_cluster_size)) {
                 xfs_buf_stale(bp);
                 error = xfs_bwrite(bp);
         } else {
-               ASSERT(bp->b_target->bt_mount == mp);
+               ASSERT(bp->b_mount == mp);
                 bp->b_iodone = xlog_recover_iodone;
                 xfs_buf_delwri_queue(bp, buffer_list);
         }
@@ -3260,7 +3178,7 @@ out_owner_change:
         /* re-generate the checksum. */
         xfs_dinode_calc_crc(log->l_mp, dip);
  
-       ASSERT(bp->b_target->bt_mount == mp);
+       ASSERT(bp->b_mount == mp);
         bp->b_iodone = xlog_recover_iodone;
         xfs_buf_delwri_queue(bp, buffer_list);
  
@@ -3399,7 +3317,7 @@ xlog_recover_dquot_pass2(
         }
  
         ASSERT(dq_f->qlf_size == 2);
-       ASSERT(bp->b_target->bt_mount == mp);
+       ASSERT(bp->b_mount == mp);
         bp->b_iodone = xlog_recover_iodone;
         xfs_buf_delwri_queue(bp, buffer_list);
  
@@ -3463,7 +3381,7 @@ xlog_recover_efd_pass2(
  {
         xfs_efd_log_format_t    *efd_formatp;
         xfs_efi_log_item_t      *efip = NULL;
-       xfs_log_item_t          *lip;
+       struct xfs_log_item     *lip;
         uint64_t                efi_id;
         struct xfs_ail_cursor   cur;
         struct xfs_ail          *ailp = log->l_ailp;
@@ -3849,6 +3767,7 @@ xlog_recover_do_icreate_pass2(
  {
         struct xfs_mount        *mp = log->l_mp;
         struct xfs_icreate_log  *icl;
+       struct xfs_ino_geometry *igeo = M_IGEO(mp);
         xfs_agnumber_t          agno;
         xfs_agblock_t           agbno;
         unsigned int            count;
@@ -3898,10 +3817,10 @@ xlog_recover_do_icreate_pass2(
  
         /*
          * The inode chunk is either full or sparse and we only support
-        * m_ialloc_min_blks sized sparse allocations at this time.
+        * m_ino_geo.ialloc_min_blks sized sparse allocations at this time.
          */
-       if (length != mp->m_ialloc_blks &&
-           length != mp->m_ialloc_min_blks) {
+       if (length != igeo->ialloc_blks &&
+           length != igeo->ialloc_min_blks) {
                 xfs_warn(log->l_mp,
                          "%s: unsupported chunk length", __FUNCTION__);
                 return -EINVAL;
@@ -3921,13 +3840,13 @@ xlog_recover_do_icreate_pass2(
          * buffers for cancellation so we don't overwrite anything written after
          * a cancellation.
          */
-       bb_per_cluster = XFS_FSB_TO_BB(mp, mp->m_blocks_per_cluster);
-       nbufs = length / mp->m_blocks_per_cluster;
+       bb_per_cluster = XFS_FSB_TO_BB(mp, igeo->blocks_per_cluster);
+       nbufs = length / igeo->blocks_per_cluster;
         for (i = 0, cancel_count = 0; i < nbufs; i++) {
                 xfs_daddr_t     daddr;
  
                 daddr = XFS_AGB_TO_DADDR(mp, agno,
-                                        agbno + i * mp->m_blocks_per_cluster);
+                               agbno + i * igeo->blocks_per_cluster);
                 if (xlog_check_buffer_cancelled(log, daddr, bb_per_cluster, 0))
                         cancel_count++;
         }
@@ -4956,12 +4875,11 @@ out:
   * A cancel occurs when the mount has failed and we're bailing out.
   * Release all pending log intent items so they don't pin the AIL.
   */
-STATIC int
+STATIC void
  xlog_recover_cancel_intents(
         struct xlog             *log)
  {
         struct xfs_log_item     *lip;
-       int                     error = 0;
         struct xfs_ail_cursor   cur;
         struct xfs_ail          *ailp;
  
@@ -5001,7 +4919,6 @@ xlog_recover_cancel_intents(
  
         xfs_trans_ail_cursor_done(&cur);
         spin_unlock(&ailp->ail_lock);
-       return error;
  }
  
  /*
@@ -5307,7 +5224,7 @@ xlog_do_recovery_pass(
         xfs_daddr_t             blk_no, rblk_no;
         xfs_daddr_t             rhead_blk;
         char                    *offset;
-       xfs_buf_t               *hbp, *dbp;
+       char                    *hbp, *dbp;
         int                     error = 0, h_size, h_len;
         int                     error2 = 0;
         int                     bblks, split_bblks;
@@ -5332,7 +5249,7 @@ xlog_do_recovery_pass(
                  * iclog header and extract the header size from it.  Get a
                  * new hbp that is the correct size.
                  */
-               hbp = xlog_get_bp(log, 1);
+               hbp = xlog_alloc_buffer(log, 1);
                 if (!hbp)
                         return -ENOMEM;
  
@@ -5374,23 +5291,23 @@ xlog_do_recovery_pass(
                         hblks = h_size / XLOG_HEADER_CYCLE_SIZE;
                         if (h_size % XLOG_HEADER_CYCLE_SIZE)
                                 hblks++;
-                       xlog_put_bp(hbp);
-                       hbp = xlog_get_bp(log, hblks);
+                       kmem_free(hbp);
+                       hbp = xlog_alloc_buffer(log, hblks);
                 } else {
                         hblks = 1;
                 }
         } else {
                 ASSERT(log->l_sectBBsize == 1);
                 hblks = 1;
-               hbp = xlog_get_bp(log, 1);
+               hbp = xlog_alloc_buffer(log, 1);
                 h_size = XLOG_BIG_RECORD_BSIZE;
         }
  
         if (!hbp)
                 return -ENOMEM;
-       dbp = xlog_get_bp(log, BTOBB(h_size));
+       dbp = xlog_alloc_buffer(log, BTOBB(h_size));
         if (!dbp) {
-               xlog_put_bp(hbp);
+               kmem_free(hbp);
                 return -ENOMEM;
         }
  
@@ -5405,7 +5322,7 @@ xlog_do_recovery_pass(
                         /*
                          * Check for header wrapping around physical end-of-log
                          */
-                       offset = hbp->b_addr;
+                       offset = hbp;
                         split_hblks = 0;
                         wrapped_hblks = 0;
                         if (blk_no + hblks <= log->l_logBBsize) {
@@ -5441,8 +5358,8 @@ xlog_do_recovery_pass(
                                  *   - order is important.
                                  */
                                 wrapped_hblks = hblks - split_hblks;
-                               error = xlog_bread_offset(log, 0,
-                                               wrapped_hblks, hbp,
+                               error = xlog_bread_noalign(log, 0,
+                                               wrapped_hblks,
                                                 offset + BBTOB(split_hblks));
                                 if (error)
                                         goto bread_err2;
@@ -5473,7 +5390,7 @@ xlog_do_recovery_pass(
                         } else {
                                 /* This log record is split across the
                                  * physical end of log */
-                               offset = dbp->b_addr;
+                               offset = dbp;
                                 split_bblks = 0;
                                 if (blk_no != log->l_logBBsize) {
                                         /* some data is before the physical
@@ -5502,8 +5419,8 @@ xlog_do_recovery_pass(
                                  *   _first_, then the log start (LR header end)
                                  *   - order is important.
                                  */
-                               error = xlog_bread_offset(log, 0,
-                                               bblks - split_bblks, dbp,
+                               error = xlog_bread_noalign(log, 0,
+                                               bblks - split_bblks,
                                                 offset + BBTOB(split_bblks));
                                 if (error)
                                         goto bread_err2;
@@ -5551,9 +5468,9 @@ xlog_do_recovery_pass(
         }
  
   bread_err2:
-       xlog_put_bp(dbp);
+       kmem_free(dbp);
   bread_err1:
-       xlog_put_bp(hbp);
+       kmem_free(hbp);
  
         /*
          * Submit buffers that have been added from the last record processed,
@@ -5687,7 +5604,7 @@ xlog_do_recover(
          * Now that we've finished replaying all buffer and inode
          * updates, re-read in the superblock and reverify it.
          */
-       bp = xfs_getsb(mp, 0);
+       bp = xfs_getsb(mp);
         bp->b_flags &= ~(XBF_DONE | XBF_ASYNC);
         ASSERT(!(bp->b_flags & XBF_WRITE));
         bp->b_flags |= XBF_READ;
@@ -5860,16 +5777,12 @@ xlog_recover_finish(
         return 0;
  }
  
-int
+void
  xlog_recover_cancel(
         struct xlog     *log)
  {
-       int             error = 0;
-
         if (log->l_flags & XLOG_RECOVERY_NEEDED)
-               error = xlog_recover_cancel_intents(log);
-
-       return error;
+               xlog_recover_cancel_intents(log);
  }
  
  #if defined(DEBUG)
diff --git a/fs/xfs/xfs_message.c b/fs/xfs/xfs_message.c

index 6b736ea58d35402eb7e7975067a4303131cf3d83..9804efe525a9314e4b78a52e7c41c1f88296ffd2 100644 (file)
--- a/fs/xfs/xfs_message.c
+++ b/fs/xfs/xfs_message.c
@@ -6,8 +6,8 @@
  #include "xfs.h"
  #include "xfs_fs.h"
  #include "xfs_error.h"
+#include "xfs_shared.h"
  #include "xfs_format.h"
-#include "xfs_log_format.h"
  #include "xfs_trans_resv.h"
  #include "xfs_mount.h"
  
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c

index 6b2bfe81dc51be79b5b3d736dc3b8f0f459f09ec..322da69092909078fb16222877ddcf7ad9c1a62d 100644 (file)
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -12,9 +12,6 @@
  #include "xfs_bit.h"
  #include "xfs_sb.h"
  #include "xfs_mount.h"
-#include "xfs_defer.h"
-#include "xfs_da_format.h"
-#include "xfs_da_btree.h"
  #include "xfs_inode.h"
  #include "xfs_dir2.h"
  #include "xfs_ialloc.h"
@@ -27,7 +24,6 @@
  #include "xfs_error.h"
  #include "xfs_quota.h"
  #include "xfs_fsops.h"
-#include "xfs_trace.h"
  #include "xfs_icache.h"
  #include "xfs_sysfs.h"
  #include "xfs_rmap_btree.h"
@@ -429,30 +425,6 @@ xfs_update_alignment(xfs_mount_t *mp)
         return 0;
  }
  
-/*
- * Set the maximum inode count for this filesystem
- */
-STATIC void
-xfs_set_maxicount(xfs_mount_t *mp)
-{
-       xfs_sb_t        *sbp = &(mp->m_sb);
-       uint64_t        icount;
-
-       if (sbp->sb_imax_pct) {
-               /*
-                * Make sure the maximum inode count is a multiple
-                * of the units we allocate inodes in.
-                */
-               icount = sbp->sb_dblocks * sbp->sb_imax_pct;
-               do_div(icount, 100);
-               do_div(icount, mp->m_ialloc_blks);
-               mp->m_maxicount = (icount * mp->m_ialloc_blks)  <<
-                                  sbp->sb_inopblog;
-       } else {
-               mp->m_maxicount = 0;
-       }
-}
-
  /*
   * Set the default minimum read and write sizes unless
   * already specified in a mount option.
@@ -509,29 +481,6 @@ xfs_set_low_space_thresholds(
         }
  }
  
-
-/*
- * Set whether we're using inode alignment.
- */
-STATIC void
-xfs_set_inoalignment(xfs_mount_t *mp)
-{
-       if (xfs_sb_version_hasalign(&mp->m_sb) &&
-               mp->m_sb.sb_inoalignmt >= xfs_icluster_size_fsb(mp))
-               mp->m_inoalign_mask = mp->m_sb.sb_inoalignmt - 1;
-       else
-               mp->m_inoalign_mask = 0;
-       /*
-        * If we are using stripe alignment, check whether
-        * the stripe unit is a multiple of the inode alignment
-        */
-       if (mp->m_dalign && mp->m_inoalign_mask &&
-           !(mp->m_dalign & mp->m_inoalign_mask))
-               mp->m_sinoalign = mp->m_dalign;
-       else
-               mp->m_sinoalign = 0;
-}
-
  /*
   * Check that the data (and log if separate) is an ok size.
   */
@@ -683,6 +632,7 @@ xfs_mountfs(
  {
         struct xfs_sb           *sbp = &(mp->m_sb);
         struct xfs_inode        *rip;
+       struct xfs_ino_geometry *igeo = M_IGEO(mp);
         uint64_t                resblks;
         uint                    quotamount = 0;
         uint                    quotaflags = 0;
@@ -749,12 +699,10 @@ xfs_mountfs(
         xfs_alloc_compute_maxlevels(mp);
         xfs_bmap_compute_maxlevels(mp, XFS_DATA_FORK);
         xfs_bmap_compute_maxlevels(mp, XFS_ATTR_FORK);
-       xfs_ialloc_compute_maxlevels(mp);
+       xfs_ialloc_setup_geometry(mp);
         xfs_rmapbt_compute_maxlevels(mp);
         xfs_refcountbt_compute_maxlevels(mp);
  
-       xfs_set_maxicount(mp);
-
         /* enable fail_at_unmount as default */
         mp->m_fail_unmount = true;
  
@@ -787,29 +735,6 @@ xfs_mountfs(
         /* set the low space thresholds for dynamic preallocation */
         xfs_set_low_space_thresholds(mp);
  
-       /*
-        * Set the inode cluster size.
-        * This may still be overridden by the file system
-        * block size if it is larger than the chosen cluster size.
-        *
-        * For v5 filesystems, scale the cluster size with the inode size to
-        * keep a constant ratio of inode per cluster buffer, but only if mkfs
-        * has set the inode alignment value appropriately for larger cluster
-        * sizes.
-        */
-       mp->m_inode_cluster_size = XFS_INODE_BIG_CLUSTER_SIZE;
-       if (xfs_sb_version_hascrc(&mp->m_sb)) {
-               int     new_size = mp->m_inode_cluster_size;
-
-               new_size *= mp->m_sb.sb_inodesize / XFS_DINODE_MIN_SIZE;
-               if (mp->m_sb.sb_inoalignmt >= XFS_B_TO_FSBT(mp, new_size))
-                       mp->m_inode_cluster_size = new_size;
-       }
-       mp->m_blocks_per_cluster = xfs_icluster_size_fsb(mp);
-       mp->m_inodes_per_cluster = XFS_FSB_TO_INO(mp, mp->m_blocks_per_cluster);
-       mp->m_cluster_align = xfs_ialloc_cluster_alignment(mp);
-       mp->m_cluster_align_inodes = XFS_FSB_TO_INO(mp, mp->m_cluster_align);
-
         /*
          * If enabled, sparse inode chunk alignment is expected to match the
          * cluster size. Full inode chunk alignment must match the chunk size,
@@ -817,20 +742,15 @@ xfs_mountfs(
          */
         if (xfs_sb_version_hassparseinodes(&mp->m_sb) &&
             mp->m_sb.sb_spino_align !=
-                       XFS_B_TO_FSBT(mp, mp->m_inode_cluster_size)) {
+                       XFS_B_TO_FSBT(mp, igeo->inode_cluster_size_raw)) {
                 xfs_warn(mp,
         "Sparse inode block alignment (%u) must match cluster size (%llu).",
                          mp->m_sb.sb_spino_align,
-                        XFS_B_TO_FSBT(mp, mp->m_inode_cluster_size));
+                        XFS_B_TO_FSBT(mp, igeo->inode_cluster_size_raw));
                 error = -EINVAL;
                 goto out_remove_uuid;
         }
  
-       /*
-        * Set inode alignment fields
-        */
-       xfs_set_inoalignment(mp);
-
         /*
          * Check that the data (and log if separate) is an ok size.
          */
@@ -1385,24 +1305,14 @@ xfs_mod_frextents(
   * xfs_getsb() is called to obtain the buffer for the superblock.
   * The buffer is returned locked and read in from disk.
   * The buffer should be released with a call to xfs_brelse().
- *
- * If the flags parameter is BUF_TRYLOCK, then we'll only return
- * the superblock buffer if it can be locked without sleeping.
- * If it can't then we'll return NULL.
   */
  struct xfs_buf *
  xfs_getsb(
-       struct xfs_mount        *mp,
-       int                     flags)
+       struct xfs_mount        *mp)
  {
         struct xfs_buf          *bp = mp->m_sb_bp;
  
-       if (!xfs_buf_trylock(bp)) {
-               if (flags & XBF_TRYLOCK)
-                       return NULL;
-               xfs_buf_lock(bp);
-       }
-
+       xfs_buf_lock(bp);
         xfs_buf_hold(bp);
         ASSERT(bp->b_flags & XBF_DONE);
         return bp;
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h

index c81a5cd7c2288014da00c994d58f9d9fa2caede3..4adb6837439ac38fa600f0bd1f373634cef5bb78 100644 (file)
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -105,6 +105,7 @@ typedef struct xfs_mount {
         struct xfs_da_geometry  *m_dir_geo;     /* directory block geometry */
         struct xfs_da_geometry  *m_attr_geo;    /* attribute block geometry */
         struct xlog             *m_log;         /* log specific stuff */
+       struct xfs_ino_geometry m_ino_geo;      /* inode geometry */
         int                     m_logbufs;      /* number of log buffers */
         int                     m_logbsize;     /* size of each log buffer */
         uint                    m_rsumlevels;   /* rt summary levels */
@@ -126,12 +127,6 @@ typedef struct xfs_mount {
         uint8_t                 m_blkbit_log;   /* blocklog + NBBY */
         uint8_t                 m_blkbb_log;    /* blocklog - BBSHIFT */
         uint8_t                 m_agno_log;     /* log #ag's */
-       uint8_t                 m_agino_log;    /* #bits for agino in inum */
-       uint                    m_inode_cluster_size;/* min inode buf size */
-       unsigned int            m_inodes_per_cluster;
-       unsigned int            m_blocks_per_cluster;
-       unsigned int            m_cluster_align;
-       unsigned int            m_cluster_align_inodes;
         uint                    m_blockmask;    /* sb_blocksize-1 */
         uint                    m_blockwsize;   /* sb_blocksize in words */
         uint                    m_blockwmask;   /* blockwsize-1 */
@@ -139,15 +134,12 @@ typedef struct xfs_mount {
         uint                    m_alloc_mnr[2]; /* min alloc btree records */
         uint                    m_bmap_dmxr[2]; /* max bmap btree records */
         uint                    m_bmap_dmnr[2]; /* min bmap btree records */
-       uint                    m_inobt_mxr[2]; /* max inobt btree records */
-       uint                    m_inobt_mnr[2]; /* min inobt btree records */
         uint                    m_rmap_mxr[2];  /* max rmap btree records */
         uint                    m_rmap_mnr[2];  /* min rmap btree records */
         uint                    m_refc_mxr[2];  /* max refc btree records */
         uint                    m_refc_mnr[2];  /* min refc btree records */
         uint                    m_ag_maxlevels; /* XFS_AG_MAXLEVELS */
         uint                    m_bm_maxlevels[2]; /* XFS_BM_MAXLEVELS */
-       uint                    m_in_maxlevels; /* max inobt btree levels. */
         uint                    m_rmap_maxlevels; /* max rmap btree levels */
         uint                    m_refc_maxlevels; /* max refcount btree level */
         xfs_extlen_t            m_ag_prealloc_blocks; /* reserved ag blocks */
@@ -159,20 +151,13 @@ typedef struct xfs_mount {
         int                     m_fixedfsid[2]; /* unchanged for life of FS */
         uint64_t                m_flags;        /* global mount flags */
         bool                    m_finobt_nores; /* no per-AG finobt resv. */
-       int                     m_ialloc_inos;  /* inodes in inode allocation */
-       int                     m_ialloc_blks;  /* blocks in inode allocation */
-       int                     m_ialloc_min_blks;/* min blocks in sparse inode
-                                                  * allocation */
-       int                     m_inoalign_mask;/* mask sb_inoalignmt if used */
         uint                    m_qflags;       /* quota status flags */
         struct xfs_trans_resv   m_resv;         /* precomputed res values */
-       uint64_t                m_maxicount;    /* maximum inode count */
         uint64_t                m_resblks;      /* total reserved blocks */
         uint64_t                m_resblks_avail;/* available reserved blocks */
         uint64_t                m_resblks_save; /* reserved blks @ remount,ro */
         int                     m_dalign;       /* stripe unit */
         int                     m_swidth;       /* stripe width */
-       int                     m_sinoalign;    /* stripe unit inode alignment */
         uint8_t                 m_sectbb_log;   /* sectlog - BBSHIFT */
         const struct xfs_nameops *m_dirnameops; /* vector of dir name ops */
         const struct xfs_dir_ops *m_dir_inode_ops; /* vector of dir inode ops */
@@ -198,7 +183,6 @@ typedef struct xfs_mount {
         struct workqueue_struct *m_unwritten_workqueue;
         struct workqueue_struct *m_cil_workqueue;
         struct workqueue_struct *m_reclaim_workqueue;
-       struct workqueue_struct *m_log_workqueue;
         struct workqueue_struct *m_eofblocks_workqueue;
         struct workqueue_struct *m_sync_workqueue;
  
@@ -226,6 +210,8 @@ typedef struct xfs_mount {
  #endif
  } xfs_mount_t;
  
+#define M_IGEO(mp)             (&(mp)->m_ino_geo)
+
  /*
   * Flags for m_flags.
   */
@@ -465,7 +451,7 @@ extern int  xfs_mod_fdblocks(struct xfs_mount *mp, int64_t delta,
                                  bool reserved);
  extern int     xfs_mod_frextents(struct xfs_mount *mp, int64_t delta);
  
-extern struct xfs_buf *xfs_getsb(xfs_mount_t *, int);
+extern struct xfs_buf *xfs_getsb(xfs_mount_t *);
  extern int     xfs_readsb(xfs_mount_t *, int);
  extern void    xfs_freesb(xfs_mount_t *);
  extern bool    xfs_fs_writable(struct xfs_mount *mp, int level);
diff --git a/fs/xfs/xfs_ondisk.h b/fs/xfs/xfs_ondisk.h

index c8ba98fae30aefa7013ebfa168fed652a955f3e7..b6701b4f59a9b5bc44cd66e69a4eb7d5b69a547b 100644 (file)
--- a/fs/xfs/xfs_ondisk.h
+++ b/fs/xfs/xfs_ondisk.h
@@ -146,6 +146,11 @@ xfs_check_ondisk_structs(void)
         XFS_CHECK_OFFSET(struct xfs_dir3_data_hdr, hdr.magic,   0);
         XFS_CHECK_OFFSET(struct xfs_dir3_free, hdr.hdr.magic,   0);
         XFS_CHECK_OFFSET(struct xfs_attr3_leafblock, hdr.info.hdr, 0);
+
+       XFS_CHECK_STRUCT_SIZE(struct xfs_bulkstat,              192);
+       XFS_CHECK_STRUCT_SIZE(struct xfs_inumbers,              24);
+       XFS_CHECK_STRUCT_SIZE(struct xfs_bulkstat_req,          64);
+       XFS_CHECK_STRUCT_SIZE(struct xfs_inumbers_req,          64);
  }
  
  #endif /* __XFS_ONDISK_H */
diff --git a/fs/xfs/xfs_pnfs.c b/fs/xfs/xfs_pnfs.c

index bde2c9f56a46ab883fdfd5cb932d958838c867eb..0c954cad74493cf6fb9002b93101baeb7717e3f3 100644 (file)
--- a/fs/xfs/xfs_pnfs.c
+++ b/fs/xfs/xfs_pnfs.c
@@ -2,23 +2,16 @@
  /*
   * Copyright (c) 2014 Christoph Hellwig.
   */
-#include <linux/iomap.h>
  #include "xfs.h"
+#include "xfs_shared.h"
  #include "xfs_format.h"
  #include "xfs_log_format.h"
  #include "xfs_trans_resv.h"
-#include "xfs_sb.h"
  #include "xfs_mount.h"
  #include "xfs_inode.h"
  #include "xfs_trans.h"
-#include "xfs_log.h"
  #include "xfs_bmap.h"
-#include "xfs_bmap_util.h"
-#include "xfs_error.h"
  #include "xfs_iomap.h"
-#include "xfs_shared.h"
-#include "xfs_bit.h"
-#include "xfs_pnfs.h"
  
  /*
   * Ensure that we do not have any outstanding pNFS layouts that can be used by
diff --git a/fs/xfs/xfs_pwork.c b/fs/xfs/xfs_pwork.c

new file mode 100644 (file)

index 0000000..4bcc3e6
--- /dev/null
+++ b/fs/xfs/xfs_pwork.c
@@ -0,0 +1,136 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2019 Oracle.  All Rights Reserved.
+ * Author: Darrick J. Wong <darrick.wong@oracle.com>
+ */
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_shared.h"
+#include "xfs_format.h"
+#include "xfs_log_format.h"
+#include "xfs_trans_resv.h"
+#include "xfs_mount.h"
+#include "xfs_trace.h"
+#include "xfs_sysctl.h"
+#include "xfs_pwork.h"
+#include <linux/nmi.h>
+
+/*
+ * Parallel Work Queue
+ * ===================
+ *
+ * Abstract away the details of running a large and "obviously" parallelizable
+ * task across multiple CPUs.  Callers initialize the pwork control object with
+ * a desired level of parallelization and a work function.  Next, they embed
+ * struct xfs_pwork in whatever structure they use to pass work context to a
+ * worker thread and queue that pwork.  The work function will be passed the
+ * pwork item when it is run (from process context) and any returned error will
+ * be recorded in xfs_pwork_ctl.error.  Work functions should check for errors
+ * and abort if necessary; the non-zeroness of xfs_pwork_ctl.error does not
+ * stop workqueue item processing.
+ *
+ * This is the rough equivalent of the xfsprogs workqueue code, though we can't
+ * reuse that name here.
+ */
+
+/* Invoke our caller's function. */
+static void
+xfs_pwork_work(
+       struct work_struct      *work)
+{
+       struct xfs_pwork        *pwork;
+       struct xfs_pwork_ctl    *pctl;
+       int                     error;
+
+       pwork = container_of(work, struct xfs_pwork, work);
+       pctl = pwork->pctl;
+       error = pctl->work_fn(pctl->mp, pwork);
+       if (error && !pctl->error)
+               pctl->error = error;
+       if (atomic_dec_and_test(&pctl->nr_work))
+               wake_up(&pctl->poll_wait);
+}
+
+/*
+ * Set up control data for parallel work.  @work_fn is the function that will
+ * be called.  @tag will be written into the kernel threads.  @nr_threads is
+ * the level of parallelism desired, or 0 for no limit.
+ */
+int
+xfs_pwork_init(
+       struct xfs_mount        *mp,
+       struct xfs_pwork_ctl    *pctl,
+       xfs_pwork_work_fn       work_fn,
+       const char              *tag,
+       unsigned int            nr_threads)
+{
+#ifdef DEBUG
+       if (xfs_globals.pwork_threads >= 0)
+               nr_threads = xfs_globals.pwork_threads;
+#endif
+       trace_xfs_pwork_init(mp, nr_threads, current->pid);
+
+       pctl->wq = alloc_workqueue("%s-%d", WQ_FREEZABLE, nr_threads, tag,
+                       current->pid);
+       if (!pctl->wq)
+               return -ENOMEM;
+       pctl->work_fn = work_fn;
+       pctl->error = 0;
+       pctl->mp = mp;
+       atomic_set(&pctl->nr_work, 0);
+       init_waitqueue_head(&pctl->poll_wait);
+
+       return 0;
+}
+
+/* Queue some parallel work. */
+void
+xfs_pwork_queue(
+       struct xfs_pwork_ctl    *pctl,
+       struct xfs_pwork        *pwork)
+{
+       INIT_WORK(&pwork->work, xfs_pwork_work);
+       pwork->pctl = pctl;
+       atomic_inc(&pctl->nr_work);
+       queue_work(pctl->wq, &pwork->work);
+}
+
+/* Wait for the work to finish and tear down the control structure. */
+int
+xfs_pwork_destroy(
+       struct xfs_pwork_ctl    *pctl)
+{
+       destroy_workqueue(pctl->wq);
+       pctl->wq = NULL;
+       return pctl->error;
+}
+
+/*
+ * Wait for the work to finish by polling completion status and touch the soft
+ * lockup watchdog.  This is for callers such as mount which hold locks.
+ */
+void
+xfs_pwork_poll(
+       struct xfs_pwork_ctl    *pctl)
+{
+       while (wait_event_timeout(pctl->poll_wait,
+                               atomic_read(&pctl->nr_work) == 0, HZ) == 0)
+               touch_softlockup_watchdog();
+}
+
+/*
+ * Return the amount of parallelism that the data device can handle, or 0 for
+ * no limit.
+ */
+unsigned int
+xfs_pwork_guess_datadev_parallelism(
+       struct xfs_mount        *mp)
+{
+       struct xfs_buftarg      *btp = mp->m_ddev_targp;
+
+       /*
+        * For now we'll go with the most conservative setting possible,
+        * which is two threads for an SSD and 1 thread everywhere else.
+        */
+       return blk_queue_nonrot(btp->bt_bdev->bd_queue) ? 2 : 1;
+}
diff --git a/fs/xfs/xfs_pwork.h b/fs/xfs/xfs_pwork.h

new file mode 100644 (file)

index 0000000..8133124
--- /dev/null
+++ b/fs/xfs/xfs_pwork.h
@@ -0,0 +1,61 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (C) 2019 Oracle.  All Rights Reserved.
+ * Author: Darrick J. Wong <darrick.wong@oracle.com>
+ */
+#ifndef __XFS_PWORK_H__
+#define __XFS_PWORK_H__
+
+struct xfs_pwork;
+struct xfs_mount;
+
+typedef int (*xfs_pwork_work_fn)(struct xfs_mount *mp, struct xfs_pwork *pwork);
+
+/*
+ * Parallel work coordination structure.
+ */
+struct xfs_pwork_ctl {
+       struct workqueue_struct *wq;
+       struct xfs_mount        *mp;
+       xfs_pwork_work_fn       work_fn;
+       struct wait_queue_head  poll_wait;
+       atomic_t                nr_work;
+       int                     error;
+};
+
+/*
+ * Embed this parallel work control item inside your own work structure,
+ * then queue work with it.
+ */
+struct xfs_pwork {
+       struct work_struct      work;
+       struct xfs_pwork_ctl    *pctl;
+};
+
+#define XFS_PWORK_SINGLE_THREADED      { .pctl = NULL }
+
+/* Have we been told to abort? */
+static inline bool
+xfs_pwork_ctl_want_abort(
+       struct xfs_pwork_ctl    *pctl)
+{
+       return pctl && pctl->error;
+}
+
+/* Have we been told to abort? */
+static inline bool
+xfs_pwork_want_abort(
+       struct xfs_pwork        *pwork)
+{
+       return xfs_pwork_ctl_want_abort(pwork->pctl);
+}
+
+int xfs_pwork_init(struct xfs_mount *mp, struct xfs_pwork_ctl *pctl,
+               xfs_pwork_work_fn work_fn, const char *tag,
+               unsigned int nr_threads);
+void xfs_pwork_queue(struct xfs_pwork_ctl *pctl, struct xfs_pwork *pwork);
+int xfs_pwork_destroy(struct xfs_pwork_ctl *pctl);
+void xfs_pwork_poll(struct xfs_pwork_ctl *pctl);
+unsigned int xfs_pwork_guess_datadev_parallelism(struct xfs_mount *mp);
+
+#endif /* __XFS_PWORK_H__ */
diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c

index aa6b6db3db0ec953096bf9fb6ae166d9cc392238..5e7a37f0cf84856663e93e40a605c9c169f0ddb3 100644 (file)
--- a/fs/xfs/xfs_qm.c
+++ b/fs/xfs/xfs_qm.c
@@ -13,19 +13,15 @@
  #include "xfs_sb.h"
  #include "xfs_mount.h"
  #include "xfs_inode.h"
-#include "xfs_ialloc.h"
-#include "xfs_itable.h"
+#include "xfs_iwalk.h"
  #include "xfs_quota.h"
-#include "xfs_error.h"
  #include "xfs_bmap.h"
-#include "xfs_bmap_btree.h"
  #include "xfs_bmap_util.h"
  #include "xfs_trans.h"
  #include "xfs_trans_space.h"
  #include "xfs_qm.h"
  #include "xfs_trace.h"
  #include "xfs_icache.h"
-#include "xfs_cksum.h"
  
  /*
   * The global quota manager. There is only one of these for the entire
@@ -1118,17 +1114,15 @@ xfs_qm_quotacheck_dqadjust(
  /* ARGSUSED */
  STATIC int
  xfs_qm_dqusage_adjust(
-       xfs_mount_t     *mp,            /* mount point for filesystem */
-       xfs_ino_t       ino,            /* inode number to get data for */
-       void            __user *buffer, /* not used */
-       int             ubsize,         /* not used */
-       int             *ubused,        /* not used */
-       int             *res)           /* result code value */
+       struct xfs_mount        *mp,
+       struct xfs_trans        *tp,
+       xfs_ino_t               ino,
+       void                    *data)
  {
-       xfs_inode_t     *ip;
-       xfs_qcnt_t      nblks;
-       xfs_filblks_t   rtblks = 0;     /* total rt blks */
-       int             error;
+       struct xfs_inode        *ip;
+       xfs_qcnt_t              nblks;
+       xfs_filblks_t           rtblks = 0;     /* total rt blks */
+       int                     error;
  
         ASSERT(XFS_IS_QUOTA_RUNNING(mp));
  
@@ -1136,20 +1130,18 @@ xfs_qm_dqusage_adjust(
          * rootino must have its resources accounted for, not so with the quota
          * inodes.
          */
-       if (xfs_is_quota_inode(&mp->m_sb, ino)) {
-               *res = BULKSTAT_RV_NOTHING;
-               return -EINVAL;
-       }
+       if (xfs_is_quota_inode(&mp->m_sb, ino))
+               return 0;
  
         /*
          * We don't _need_ to take the ilock EXCL here because quotacheck runs
          * at mount time and therefore nobody will be racing chown/chproj.
          */
-       error = xfs_iget(mp, NULL, ino, XFS_IGET_DONTCACHE, 0, &ip);
-       if (error) {
-               *res = BULKSTAT_RV_NOTHING;
+       error = xfs_iget(mp, tp, ino, XFS_IGET_DONTCACHE, 0, &ip);
+       if (error == -EINVAL || error == -ENOENT)
+               return 0;
+       if (error)
                 return error;
-       }
  
         ASSERT(ip->i_delayed_blks == 0);
  
@@ -1157,7 +1149,7 @@ xfs_qm_dqusage_adjust(
                 struct xfs_ifork        *ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
  
                 if (!(ifp->if_flags & XFS_IFEXTENTS)) {
-                       error = xfs_iread_extents(NULL, ip, XFS_DATA_FORK);
+                       error = xfs_iread_extents(tp, ip, XFS_DATA_FORK);
                         if (error)
                                 goto error0;
                 }
@@ -1200,13 +1192,8 @@ xfs_qm_dqusage_adjust(
                         goto error0;
         }
  
-       xfs_irele(ip);
-       *res = BULKSTAT_RV_DIDONE;
-       return 0;
-
  error0:
         xfs_irele(ip);
-       *res = BULKSTAT_RV_GIVEUP;
         return error;
  }
  
@@ -1270,18 +1257,13 @@ STATIC int
  xfs_qm_quotacheck(
         xfs_mount_t     *mp)
  {
-       int                     done, count, error, error2;
-       xfs_ino_t               lastino;
-       size_t                  structsz;
+       int                     error, error2;
         uint                    flags;
         LIST_HEAD               (buffer_list);
         struct xfs_inode        *uip = mp->m_quotainfo->qi_uquotaip;
         struct xfs_inode        *gip = mp->m_quotainfo->qi_gquotaip;
         struct xfs_inode        *pip = mp->m_quotainfo->qi_pquotaip;
  
-       count = INT_MAX;
-       structsz = 1;
-       lastino = 0;
         flags = 0;
  
         ASSERT(uip || gip || pip);
@@ -1318,18 +1300,10 @@ xfs_qm_quotacheck(
                 flags |= XFS_PQUOTA_CHKD;
         }
  
-       do {
-               /*
-                * Iterate thru all the inodes in the file system,
-                * adjusting the corresponding dquot counters in core.
-                */
-               error = xfs_bulkstat(mp, &lastino, &count,
-                                    xfs_qm_dqusage_adjust,
-                                    structsz, NULL, &done);
-               if (error)
-                       break;
-
-       } while (!done);
+       error = xfs_iwalk_threaded(mp, 0, 0, xfs_qm_dqusage_adjust, 0, true,
+                       NULL);
+       if (error)
+               goto error_return;
  
         /*
          * We've made all the changes that we need to make incore.  Flush them
diff --git a/fs/xfs/xfs_qm_bhv.c b/fs/xfs/xfs_qm_bhv.c

index 3091e4bc04efe1e6f4d9aa88ed7987a221f9bd78..5d72e88598b41a16316b09f0c50819b48ad4871b 100644 (file)
--- a/fs/xfs/xfs_qm_bhv.c
+++ b/fs/xfs/xfs_qm_bhv.c
@@ -5,13 +5,13 @@
   */
  #include "xfs.h"
  #include "xfs_fs.h"
+#include "xfs_shared.h"
  #include "xfs_format.h"
  #include "xfs_log_format.h"
  #include "xfs_trans_resv.h"
  #include "xfs_quota.h"
  #include "xfs_mount.h"
  #include "xfs_inode.h"
-#include "xfs_error.h"
  #include "xfs_trans.h"
  #include "xfs_qm.h"
  
diff --git a/fs/xfs/xfs_qm_syscalls.c b/fs/xfs/xfs_qm_syscalls.c

index b3190890f096d5f8717f8762e53be74edfdaa9d9..da7ad0383037bfb5994393c1f595bad55b1b6c0b 100644 (file)
--- a/fs/xfs/xfs_qm_syscalls.c
+++ b/fs/xfs/xfs_qm_syscalls.c
@@ -4,7 +4,6 @@
   * All Rights Reserved.
   */
  
-#include <linux/capability.h>
  
  #include "xfs.h"
  #include "xfs_fs.h"
@@ -12,17 +11,13 @@
  #include "xfs_format.h"
  #include "xfs_log_format.h"
  #include "xfs_trans_resv.h"
-#include "xfs_bit.h"
  #include "xfs_sb.h"
  #include "xfs_mount.h"
  #include "xfs_inode.h"
  #include "xfs_trans.h"
-#include "xfs_error.h"
  #include "xfs_quota.h"
  #include "xfs_qm.h"
-#include "xfs_trace.h"
  #include "xfs_icache.h"
-#include "xfs_defer.h"
  
  STATIC int     xfs_qm_log_quotaoff(xfs_mount_t *, xfs_qoff_logitem_t **, uint);
  STATIC int     xfs_qm_log_quotaoff_end(xfs_mount_t *, xfs_qoff_logitem_t *,
diff --git a/fs/xfs/xfs_quotaops.c b/fs/xfs/xfs_quotaops.c

index a7c0c657dfaf943037bcb6fa753996c32956413b..cd6c7210a37366a9e144f85f5e042cea4aa20968 100644 (file)
--- a/fs/xfs/xfs_quotaops.c
+++ b/fs/xfs/xfs_quotaops.c
@@ -4,6 +4,7 @@
   * All Rights Reserved.
   */
  #include "xfs.h"
+#include "xfs_shared.h"
  #include "xfs_format.h"
  #include "xfs_log_format.h"
  #include "xfs_trans_resv.h"
@@ -11,10 +12,8 @@
  #include "xfs_inode.h"
  #include "xfs_quota.h"
  #include "xfs_trans.h"
-#include "xfs_trace.h"
  #include "xfs_icache.h"
  #include "xfs_qm.h"
-#include <linux/quota.h>
  
  
  static void
diff --git a/fs/xfs/xfs_refcount_item.c b/fs/xfs/xfs_refcount_item.c

index fce38b56b962cf07ea5ce82b205db30dfe7dc2c8..d8288aa0670ad61a5353452f0aaabae8663f4a4f 100644 (file)
--- a/fs/xfs/xfs_refcount_item.c
+++ b/fs/xfs/xfs_refcount_item.c
@@ -14,7 +14,6 @@
  #include "xfs_defer.h"
  #include "xfs_trans.h"
  #include "xfs_trans_priv.h"
-#include "xfs_buf_item.h"
  #include "xfs_refcount_item.h"
  #include "xfs_log.h"
  #include "xfs_refcount.h"
@@ -94,15 +93,6 @@ xfs_cui_item_format(
                         xfs_cui_log_format_sizeof(cuip->cui_format.cui_nextents));
  }
  
-/*
- * Pinning has no meaning for an cui item, so just return.
- */
-STATIC void
-xfs_cui_item_pin(
-       struct xfs_log_item     *lip)
-{
-}
-
  /*
   * The unpin operation is the last place an CUI is manipulated in the log. It is
   * either inserted in the AIL or aborted in the event of a log I/O error. In
@@ -121,72 +111,23 @@ xfs_cui_item_unpin(
         xfs_cui_release(cuip);
  }
  
-/*
- * CUI items have no locking or pushing.  However, since CUIs are pulled from
- * the AIL when their corresponding CUDs are committed to disk, their situation
- * is very similar to being pinned.  Return XFS_ITEM_PINNED so that the caller
- * will eventually flush the log.  This should help in getting the CUI out of
- * the AIL.
- */
-STATIC uint
-xfs_cui_item_push(
-       struct xfs_log_item     *lip,
-       struct list_head        *buffer_list)
-{
-       return XFS_ITEM_PINNED;
-}
-
  /*
   * The CUI has been either committed or aborted if the transaction has been
   * cancelled. If the transaction was cancelled, an CUD isn't going to be
   * constructed and thus we free the CUI here directly.
   */
  STATIC void
-xfs_cui_item_unlock(
+xfs_cui_item_release(
         struct xfs_log_item     *lip)
  {
-       if (test_bit(XFS_LI_ABORTED, &lip->li_flags))
-               xfs_cui_release(CUI_ITEM(lip));
+       xfs_cui_release(CUI_ITEM(lip));
  }
  
-/*
- * The CUI is logged only once and cannot be moved in the log, so simply return
- * the lsn at which it's been logged.
- */
-STATIC xfs_lsn_t
-xfs_cui_item_committed(
-       struct xfs_log_item     *lip,
-       xfs_lsn_t               lsn)
-{
-       return lsn;
-}
-
-/*
- * The CUI dependency tracking op doesn't do squat.  It can't because
- * it doesn't know where the free extent is coming from.  The dependency
- * tracking has to be handled by the "enclosing" metadata object.  For
- * example, for inodes, the inode is locked throughout the extent freeing
- * so the dependency should be recorded there.
- */
-STATIC void
-xfs_cui_item_committing(
-       struct xfs_log_item     *lip,
-       xfs_lsn_t               lsn)
-{
-}
-
-/*
- * This is the ops vector shared by all cui log items.
- */
  static const struct xfs_item_ops xfs_cui_item_ops = {
         .iop_size       = xfs_cui_item_size,
         .iop_format     = xfs_cui_item_format,
-       .iop_pin        = xfs_cui_item_pin,
         .iop_unpin      = xfs_cui_item_unpin,
-       .iop_unlock     = xfs_cui_item_unlock,
-       .iop_committed  = xfs_cui_item_committed,
-       .iop_push       = xfs_cui_item_push,
-       .iop_committing = xfs_cui_item_committing,
+       .iop_release    = xfs_cui_item_release,
  };
  
  /*
@@ -254,126 +195,250 @@ xfs_cud_item_format(
  }
  
  /*
- * Pinning has no meaning for an cud item, so just return.
+ * The CUD is either committed or aborted if the transaction is cancelled. If
+ * the transaction is cancelled, drop our reference to the CUI and free the
+ * CUD.
   */
  STATIC void
-xfs_cud_item_pin(
+xfs_cud_item_release(
         struct xfs_log_item     *lip)
  {
+       struct xfs_cud_log_item *cudp = CUD_ITEM(lip);
+
+       xfs_cui_release(cudp->cud_cuip);
+       kmem_zone_free(xfs_cud_zone, cudp);
  }
  
-/*
- * Since pinning has no meaning for an cud item, unpinning does
- * not either.
- */
-STATIC void
-xfs_cud_item_unpin(
-       struct xfs_log_item     *lip,
-       int                     remove)
+static const struct xfs_item_ops xfs_cud_item_ops = {
+       .flags          = XFS_ITEM_RELEASE_WHEN_COMMITTED,
+       .iop_size       = xfs_cud_item_size,
+       .iop_format     = xfs_cud_item_format,
+       .iop_release    = xfs_cud_item_release,
+};
+
+static struct xfs_cud_log_item *
+xfs_trans_get_cud(
+       struct xfs_trans                *tp,
+       struct xfs_cui_log_item         *cuip)
  {
+       struct xfs_cud_log_item         *cudp;
+
+       cudp = kmem_zone_zalloc(xfs_cud_zone, KM_SLEEP);
+       xfs_log_item_init(tp->t_mountp, &cudp->cud_item, XFS_LI_CUD,
+                         &xfs_cud_item_ops);
+       cudp->cud_cuip = cuip;
+       cudp->cud_format.cud_cui_id = cuip->cui_format.cui_id;
+
+       xfs_trans_add_item(tp, &cudp->cud_item);
+       return cudp;
  }
  
  /*
- * There isn't much you can do to push on an cud item.  It is simply stuck
- * waiting for the log to be flushed to disk.
+ * Finish an refcount update and log it to the CUD. Note that the
+ * transaction is marked dirty regardless of whether the refcount
+ * update succeeds or fails to support the CUI/CUD lifecycle rules.
   */
-STATIC uint
-xfs_cud_item_push(
-       struct xfs_log_item     *lip,
-       struct list_head        *buffer_list)
+static int
+xfs_trans_log_finish_refcount_update(
+       struct xfs_trans                *tp,
+       struct xfs_cud_log_item         *cudp,
+       enum xfs_refcount_intent_type   type,
+       xfs_fsblock_t                   startblock,
+       xfs_extlen_t                    blockcount,
+       xfs_fsblock_t                   *new_fsb,
+       xfs_extlen_t                    *new_len,
+       struct xfs_btree_cur            **pcur)
  {
-       return XFS_ITEM_PINNED;
+       int                             error;
+
+       error = xfs_refcount_finish_one(tp, type, startblock,
+                       blockcount, new_fsb, new_len, pcur);
+
+       /*
+        * Mark the transaction dirty, even on error. This ensures the
+        * transaction is aborted, which:
+        *
+        * 1.) releases the CUI and frees the CUD
+        * 2.) shuts down the filesystem
+        */
+       tp->t_flags |= XFS_TRANS_DIRTY;
+       set_bit(XFS_LI_DIRTY, &cudp->cud_item.li_flags);
+
+       return error;
  }
  
-/*
- * The CUD is either committed or aborted if the transaction is cancelled. If
- * the transaction is cancelled, drop our reference to the CUI and free the
- * CUD.
- */
-STATIC void
-xfs_cud_item_unlock(
-       struct xfs_log_item     *lip)
+/* Sort refcount intents by AG. */
+static int
+xfs_refcount_update_diff_items(
+       void                            *priv,
+       struct list_head                *a,
+       struct list_head                *b)
  {
-       struct xfs_cud_log_item *cudp = CUD_ITEM(lip);
+       struct xfs_mount                *mp = priv;
+       struct xfs_refcount_intent      *ra;
+       struct xfs_refcount_intent      *rb;
+
+       ra = container_of(a, struct xfs_refcount_intent, ri_list);
+       rb = container_of(b, struct xfs_refcount_intent, ri_list);
+       return  XFS_FSB_TO_AGNO(mp, ra->ri_startblock) -
+               XFS_FSB_TO_AGNO(mp, rb->ri_startblock);
+}
  
-       if (test_bit(XFS_LI_ABORTED, &lip->li_flags)) {
-               xfs_cui_release(cudp->cud_cuip);
-               kmem_zone_free(xfs_cud_zone, cudp);
+/* Get an CUI. */
+STATIC void *
+xfs_refcount_update_create_intent(
+       struct xfs_trans                *tp,
+       unsigned int                    count)
+{
+       struct xfs_cui_log_item         *cuip;
+
+       ASSERT(tp != NULL);
+       ASSERT(count > 0);
+
+       cuip = xfs_cui_init(tp->t_mountp, count);
+       ASSERT(cuip != NULL);
+
+       /*
+        * Get a log_item_desc to point at the new item.
+        */
+       xfs_trans_add_item(tp, &cuip->cui_item);
+       return cuip;
+}
+
+/* Set the phys extent flags for this reverse mapping. */
+static void
+xfs_trans_set_refcount_flags(
+       struct xfs_phys_extent          *refc,
+       enum xfs_refcount_intent_type   type)
+{
+       refc->pe_flags = 0;
+       switch (type) {
+       case XFS_REFCOUNT_INCREASE:
+       case XFS_REFCOUNT_DECREASE:
+       case XFS_REFCOUNT_ALLOC_COW:
+       case XFS_REFCOUNT_FREE_COW:
+               refc->pe_flags |= type;
+               break;
+       default:
+               ASSERT(0);
         }
  }
  
-/*
- * When the cud item is committed to disk, all we need to do is delete our
- * reference to our partner cui item and then free ourselves. Since we're
- * freeing ourselves we must return -1 to keep the transaction code from
- * further referencing this item.
- */
-STATIC xfs_lsn_t
-xfs_cud_item_committed(
-       struct xfs_log_item     *lip,
-       xfs_lsn_t               lsn)
+/* Log refcount updates in the intent item. */
+STATIC void
+xfs_refcount_update_log_item(
+       struct xfs_trans                *tp,
+       void                            *intent,
+       struct list_head                *item)
  {
-       struct xfs_cud_log_item *cudp = CUD_ITEM(lip);
+       struct xfs_cui_log_item         *cuip = intent;
+       struct xfs_refcount_intent      *refc;
+       uint                            next_extent;
+       struct xfs_phys_extent          *ext;
+
+       refc = container_of(item, struct xfs_refcount_intent, ri_list);
+
+       tp->t_flags |= XFS_TRANS_DIRTY;
+       set_bit(XFS_LI_DIRTY, &cuip->cui_item.li_flags);
  
         /*
-        * Drop the CUI reference regardless of whether the CUD has been
-        * aborted. Once the CUD transaction is constructed, it is the sole
-        * responsibility of the CUD to release the CUI (even if the CUI is
-        * aborted due to log I/O error).
+        * atomic_inc_return gives us the value after the increment;
+        * we want to use it as an array index so we need to subtract 1 from
+        * it.
          */
-       xfs_cui_release(cudp->cud_cuip);
-       kmem_zone_free(xfs_cud_zone, cudp);
+       next_extent = atomic_inc_return(&cuip->cui_next_extent) - 1;
+       ASSERT(next_extent < cuip->cui_format.cui_nextents);
+       ext = &cuip->cui_format.cui_extents[next_extent];
+       ext->pe_startblock = refc->ri_startblock;
+       ext->pe_len = refc->ri_blockcount;
+       xfs_trans_set_refcount_flags(ext, refc->ri_type);
+}
  
-       return (xfs_lsn_t)-1;
+/* Get an CUD so we can process all the deferred refcount updates. */
+STATIC void *
+xfs_refcount_update_create_done(
+       struct xfs_trans                *tp,
+       void                            *intent,
+       unsigned int                    count)
+{
+       return xfs_trans_get_cud(tp, intent);
  }
  
-/*
- * The CUD dependency tracking op doesn't do squat.  It can't because
- * it doesn't know where the free extent is coming from.  The dependency
- * tracking has to be handled by the "enclosing" metadata object.  For
- * example, for inodes, the inode is locked throughout the extent freeing
- * so the dependency should be recorded there.
- */
-STATIC void
-xfs_cud_item_committing(
-       struct xfs_log_item     *lip,
-       xfs_lsn_t               lsn)
+/* Process a deferred refcount update. */
+STATIC int
+xfs_refcount_update_finish_item(
+       struct xfs_trans                *tp,
+       struct list_head                *item,
+       void                            *done_item,
+       void                            **state)
  {
+       struct xfs_refcount_intent      *refc;
+       xfs_fsblock_t                   new_fsb;
+       xfs_extlen_t                    new_aglen;
+       int                             error;
+
+       refc = container_of(item, struct xfs_refcount_intent, ri_list);
+       error = xfs_trans_log_finish_refcount_update(tp, done_item,
+                       refc->ri_type,
+                       refc->ri_startblock,
+                       refc->ri_blockcount,
+                       &new_fsb, &new_aglen,
+                       (struct xfs_btree_cur **)state);
+       /* Did we run out of reservation?  Requeue what we didn't finish. */
+       if (!error && new_aglen > 0) {
+               ASSERT(refc->ri_type == XFS_REFCOUNT_INCREASE ||
+                      refc->ri_type == XFS_REFCOUNT_DECREASE);
+               refc->ri_startblock = new_fsb;
+               refc->ri_blockcount = new_aglen;
+               return -EAGAIN;
+       }
+       kmem_free(refc);
+       return error;
  }
  
-/*
- * This is the ops vector shared by all cud log items.
- */
-static const struct xfs_item_ops xfs_cud_item_ops = {
-       .iop_size       = xfs_cud_item_size,
-       .iop_format     = xfs_cud_item_format,
-       .iop_pin        = xfs_cud_item_pin,
-       .iop_unpin      = xfs_cud_item_unpin,
-       .iop_unlock     = xfs_cud_item_unlock,
-       .iop_committed  = xfs_cud_item_committed,
-       .iop_push       = xfs_cud_item_push,
-       .iop_committing = xfs_cud_item_committing,
-};
+/* Clean up after processing deferred refcounts. */
+STATIC void
+xfs_refcount_update_finish_cleanup(
+       struct xfs_trans        *tp,
+       void                    *state,
+       int                     error)
+{
+       struct xfs_btree_cur    *rcur = state;
  
-/*
- * Allocate and initialize an cud item with the given number of extents.
- */
-struct xfs_cud_log_item *
-xfs_cud_init(
-       struct xfs_mount                *mp,
-       struct xfs_cui_log_item         *cuip)
+       xfs_refcount_finish_one_cleanup(tp, rcur, error);
+}
  
+/* Abort all pending CUIs. */
+STATIC void
+xfs_refcount_update_abort_intent(
+       void                            *intent)
  {
-       struct xfs_cud_log_item *cudp;
+       xfs_cui_release(intent);
+}
  
-       cudp = kmem_zone_zalloc(xfs_cud_zone, KM_SLEEP);
-       xfs_log_item_init(mp, &cudp->cud_item, XFS_LI_CUD, &xfs_cud_item_ops);
-       cudp->cud_cuip = cuip;
-       cudp->cud_format.cud_cui_id = cuip->cui_format.cui_id;
+/* Cancel a deferred refcount update. */
+STATIC void
+xfs_refcount_update_cancel_item(
+       struct list_head                *item)
+{
+       struct xfs_refcount_intent      *refc;
  
-       return cudp;
+       refc = container_of(item, struct xfs_refcount_intent, ri_list);
+       kmem_free(refc);
  }
  
+const struct xfs_defer_op_type xfs_refcount_update_defer_type = {
+       .max_items      = XFS_CUI_MAX_FAST_EXTENTS,
+       .diff_items     = xfs_refcount_update_diff_items,
+       .create_intent  = xfs_refcount_update_create_intent,
+       .abort_intent   = xfs_refcount_update_abort_intent,
+       .log_item       = xfs_refcount_update_log_item,
+       .create_done    = xfs_refcount_update_create_done,
+       .finish_item    = xfs_refcount_update_finish_item,
+       .finish_cleanup = xfs_refcount_update_finish_cleanup,
+       .cancel_item    = xfs_refcount_update_cancel_item,
+};
+
  /*
   * Process a refcount update intent item that was recovered from the log.
   * We need to update the refcountbt.
diff --git a/fs/xfs/xfs_refcount_item.h b/fs/xfs/xfs_refcount_item.h

index 3896dcc2368f8e19b206ef49abc6bab0f12852b5..e47530f30489deb04dcacba6aacf98c90e813b6e 100644 (file)
--- a/fs/xfs/xfs_refcount_item.h
+++ b/fs/xfs/xfs_refcount_item.h
@@ -78,8 +78,6 @@ extern struct kmem_zone       *xfs_cui_zone;
  extern struct kmem_zone        *xfs_cud_zone;
  
  struct xfs_cui_log_item *xfs_cui_init(struct xfs_mount *, uint);
-struct xfs_cud_log_item *xfs_cud_init(struct xfs_mount *,
-               struct xfs_cui_log_item *);
  void xfs_cui_item_free(struct xfs_cui_log_item *);
  void xfs_cui_release(struct xfs_cui_log_item *);
  int xfs_cui_recover(struct xfs_trans *parent_tp, struct xfs_cui_log_item *cuip);
diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c

index 680ae7662a78ef260fd4897b244b69898a239c5a..c4ec7afd1170a7550df8704b2bb71ae450c927f5 100644 (file)
--- a/fs/xfs/xfs_reflink.c
+++ b/fs/xfs/xfs_reflink.c
@@ -11,21 +11,12 @@
  #include "xfs_trans_resv.h"
  #include "xfs_mount.h"
  #include "xfs_defer.h"
-#include "xfs_da_format.h"
-#include "xfs_da_btree.h"
  #include "xfs_inode.h"
  #include "xfs_trans.h"
-#include "xfs_inode_item.h"
  #include "xfs_bmap.h"
  #include "xfs_bmap_util.h"
-#include "xfs_error.h"
-#include "xfs_dir2.h"
-#include "xfs_dir2_priv.h"
-#include "xfs_ioctl.h"
  #include "xfs_trace.h"
-#include "xfs_log.h"
  #include "xfs_icache.h"
-#include "xfs_pnfs.h"
  #include "xfs_btree.h"
  #include "xfs_refcount_btree.h"
  #include "xfs_refcount.h"
@@ -33,11 +24,9 @@
  #include "xfs_trans_space.h"
  #include "xfs_bit.h"
  #include "xfs_alloc.h"
-#include "xfs_quota_defs.h"
  #include "xfs_quota.h"
  #include "xfs_reflink.h"
  #include "xfs_iomap.h"
-#include "xfs_rmap_btree.h"
  #include "xfs_sb.h"
  #include "xfs_ag_resv.h"
  
@@ -572,7 +561,7 @@ xfs_reflink_cancel_cow_range(
  
         /* Start a rolling transaction to remove the mappings */
         error = xfs_trans_alloc(ip->i_mount, &M_RES(ip->i_mount)->tr_write,
-                       0, 0, XFS_TRANS_NOFS, &tp);
+                       0, 0, 0, &tp);
         if (error)
                 goto out;
  
@@ -631,7 +620,7 @@ xfs_reflink_end_cow_extent(
  
         resblks = XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK);
         error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks, 0,
-                       XFS_TRANS_RESERVE | XFS_TRANS_NOFS, &tp);
+                       XFS_TRANS_RESERVE, &tp);
         if (error)
                 return error;
  
diff --git a/fs/xfs/xfs_rmap_item.c b/fs/xfs/xfs_rmap_item.c

index 127dc9c32a54247be2b24ec56de1f2d91e82a6c4..77ed557b6127c6c23c00ea285ad9ce3fa0721dcd 100644 (file)
--- a/fs/xfs/xfs_rmap_item.c
+++ b/fs/xfs/xfs_rmap_item.c
@@ -14,7 +14,6 @@
  #include "xfs_defer.h"
  #include "xfs_trans.h"
  #include "xfs_trans_priv.h"
-#include "xfs_buf_item.h"
  #include "xfs_rmap_item.h"
  #include "xfs_log.h"
  #include "xfs_rmap.h"
@@ -93,15 +92,6 @@ xfs_rui_item_format(
                         xfs_rui_log_format_sizeof(ruip->rui_format.rui_nextents));
  }
  
-/*
- * Pinning has no meaning for an rui item, so just return.
- */
-STATIC void
-xfs_rui_item_pin(
-       struct xfs_log_item     *lip)
-{
-}
-
  /*
   * The unpin operation is the last place an RUI is manipulated in the log. It is
   * either inserted in the AIL or aborted in the event of a log I/O error. In
@@ -120,72 +110,23 @@ xfs_rui_item_unpin(
         xfs_rui_release(ruip);
  }
  
-/*
- * RUI items have no locking or pushing.  However, since RUIs are pulled from
- * the AIL when their corresponding RUDs are committed to disk, their situation
- * is very similar to being pinned.  Return XFS_ITEM_PINNED so that the caller
- * will eventually flush the log.  This should help in getting the RUI out of
- * the AIL.
- */
-STATIC uint
-xfs_rui_item_push(
-       struct xfs_log_item     *lip,
-       struct list_head        *buffer_list)
-{
-       return XFS_ITEM_PINNED;
-}
-
  /*
   * The RUI has been either committed or aborted if the transaction has been
   * cancelled. If the transaction was cancelled, an RUD isn't going to be
   * constructed and thus we free the RUI here directly.
   */
  STATIC void
-xfs_rui_item_unlock(
+xfs_rui_item_release(
         struct xfs_log_item     *lip)
  {
-       if (test_bit(XFS_LI_ABORTED, &lip->li_flags))
-               xfs_rui_release(RUI_ITEM(lip));
+       xfs_rui_release(RUI_ITEM(lip));
  }
  
-/*
- * The RUI is logged only once and cannot be moved in the log, so simply return
- * the lsn at which it's been logged.
- */
-STATIC xfs_lsn_t
-xfs_rui_item_committed(
-       struct xfs_log_item     *lip,
-       xfs_lsn_t               lsn)
-{
-       return lsn;
-}
-
-/*
- * The RUI dependency tracking op doesn't do squat.  It can't because
- * it doesn't know where the free extent is coming from.  The dependency
- * tracking has to be handled by the "enclosing" metadata object.  For
- * example, for inodes, the inode is locked throughout the extent freeing
- * so the dependency should be recorded there.
- */
-STATIC void
-xfs_rui_item_committing(
-       struct xfs_log_item     *lip,
-       xfs_lsn_t               lsn)
-{
-}
-
-/*
- * This is the ops vector shared by all rui log items.
- */
  static const struct xfs_item_ops xfs_rui_item_ops = {
         .iop_size       = xfs_rui_item_size,
         .iop_format     = xfs_rui_item_format,
-       .iop_pin        = xfs_rui_item_pin,
         .iop_unpin      = xfs_rui_item_unpin,
-       .iop_unlock     = xfs_rui_item_unlock,
-       .iop_committed  = xfs_rui_item_committed,
-       .iop_push       = xfs_rui_item_push,
-       .iop_committing = xfs_rui_item_committing,
+       .iop_release    = xfs_rui_item_release,
  };
  
  /*
@@ -275,126 +216,271 @@ xfs_rud_item_format(
  }
  
  /*
- * Pinning has no meaning for an rud item, so just return.
+ * The RUD is either committed or aborted if the transaction is cancelled. If
+ * the transaction is cancelled, drop our reference to the RUI and free the
+ * RUD.
   */
  STATIC void
-xfs_rud_item_pin(
+xfs_rud_item_release(
         struct xfs_log_item     *lip)
  {
+       struct xfs_rud_log_item *rudp = RUD_ITEM(lip);
+
+       xfs_rui_release(rudp->rud_ruip);
+       kmem_zone_free(xfs_rud_zone, rudp);
  }
  
-/*
- * Since pinning has no meaning for an rud item, unpinning does
- * not either.
- */
-STATIC void
-xfs_rud_item_unpin(
-       struct xfs_log_item     *lip,
-       int                     remove)
+static const struct xfs_item_ops xfs_rud_item_ops = {
+       .flags          = XFS_ITEM_RELEASE_WHEN_COMMITTED,
+       .iop_size       = xfs_rud_item_size,
+       .iop_format     = xfs_rud_item_format,
+       .iop_release    = xfs_rud_item_release,
+};
+
+static struct xfs_rud_log_item *
+xfs_trans_get_rud(
+       struct xfs_trans                *tp,
+       struct xfs_rui_log_item         *ruip)
  {
+       struct xfs_rud_log_item         *rudp;
+
+       rudp = kmem_zone_zalloc(xfs_rud_zone, KM_SLEEP);
+       xfs_log_item_init(tp->t_mountp, &rudp->rud_item, XFS_LI_RUD,
+                         &xfs_rud_item_ops);
+       rudp->rud_ruip = ruip;
+       rudp->rud_format.rud_rui_id = ruip->rui_format.rui_id;
+
+       xfs_trans_add_item(tp, &rudp->rud_item);
+       return rudp;
  }
  
-/*
- * There isn't much you can do to push on an rud item.  It is simply stuck
- * waiting for the log to be flushed to disk.
- */
-STATIC uint
-xfs_rud_item_push(
-       struct xfs_log_item     *lip,
-       struct list_head        *buffer_list)
+/* Set the map extent flags for this reverse mapping. */
+static void
+xfs_trans_set_rmap_flags(
+       struct xfs_map_extent           *rmap,
+       enum xfs_rmap_intent_type       type,
+       int                             whichfork,
+       xfs_exntst_t                    state)
  {
-       return XFS_ITEM_PINNED;
+       rmap->me_flags = 0;
+       if (state == XFS_EXT_UNWRITTEN)
+               rmap->me_flags |= XFS_RMAP_EXTENT_UNWRITTEN;
+       if (whichfork == XFS_ATTR_FORK)
+               rmap->me_flags |= XFS_RMAP_EXTENT_ATTR_FORK;
+       switch (type) {
+       case XFS_RMAP_MAP:
+               rmap->me_flags |= XFS_RMAP_EXTENT_MAP;
+               break;
+       case XFS_RMAP_MAP_SHARED:
+               rmap->me_flags |= XFS_RMAP_EXTENT_MAP_SHARED;
+               break;
+       case XFS_RMAP_UNMAP:
+               rmap->me_flags |= XFS_RMAP_EXTENT_UNMAP;
+               break;
+       case XFS_RMAP_UNMAP_SHARED:
+               rmap->me_flags |= XFS_RMAP_EXTENT_UNMAP_SHARED;
+               break;
+       case XFS_RMAP_CONVERT:
+               rmap->me_flags |= XFS_RMAP_EXTENT_CONVERT;
+               break;
+       case XFS_RMAP_CONVERT_SHARED:
+               rmap->me_flags |= XFS_RMAP_EXTENT_CONVERT_SHARED;
+               break;
+       case XFS_RMAP_ALLOC:
+               rmap->me_flags |= XFS_RMAP_EXTENT_ALLOC;
+               break;
+       case XFS_RMAP_FREE:
+               rmap->me_flags |= XFS_RMAP_EXTENT_FREE;
+               break;
+       default:
+               ASSERT(0);
+       }
  }
  
  /*
- * The RUD is either committed or aborted if the transaction is cancelled. If
- * the transaction is cancelled, drop our reference to the RUI and free the
- * RUD.
+ * Finish an rmap update and log it to the RUD. Note that the transaction is
+ * marked dirty regardless of whether the rmap update succeeds or fails to
+ * support the RUI/RUD lifecycle rules.
   */
-STATIC void
-xfs_rud_item_unlock(
-       struct xfs_log_item     *lip)
+static int
+xfs_trans_log_finish_rmap_update(
+       struct xfs_trans                *tp,
+       struct xfs_rud_log_item         *rudp,
+       enum xfs_rmap_intent_type       type,
+       uint64_t                        owner,
+       int                             whichfork,
+       xfs_fileoff_t                   startoff,
+       xfs_fsblock_t                   startblock,
+       xfs_filblks_t                   blockcount,
+       xfs_exntst_t                    state,
+       struct xfs_btree_cur            **pcur)
  {
-       struct xfs_rud_log_item *rudp = RUD_ITEM(lip);
+       int                             error;
  
-       if (test_bit(XFS_LI_ABORTED, &lip->li_flags)) {
-               xfs_rui_release(rudp->rud_ruip);
-               kmem_zone_free(xfs_rud_zone, rudp);
-       }
+       error = xfs_rmap_finish_one(tp, type, owner, whichfork, startoff,
+                       startblock, blockcount, state, pcur);
+
+       /*
+        * Mark the transaction dirty, even on error. This ensures the
+        * transaction is aborted, which:
+        *
+        * 1.) releases the RUI and frees the RUD
+        * 2.) shuts down the filesystem
+        */
+       tp->t_flags |= XFS_TRANS_DIRTY;
+       set_bit(XFS_LI_DIRTY, &rudp->rud_item.li_flags);
+
+       return error;
  }
  
-/*
- * When the rud item is committed to disk, all we need to do is delete our
- * reference to our partner rui item and then free ourselves. Since we're
- * freeing ourselves we must return -1 to keep the transaction code from
- * further referencing this item.
- */
-STATIC xfs_lsn_t
-xfs_rud_item_committed(
-       struct xfs_log_item     *lip,
-       xfs_lsn_t               lsn)
+/* Sort rmap intents by AG. */
+static int
+xfs_rmap_update_diff_items(
+       void                            *priv,
+       struct list_head                *a,
+       struct list_head                *b)
  {
-       struct xfs_rud_log_item *rudp = RUD_ITEM(lip);
+       struct xfs_mount                *mp = priv;
+       struct xfs_rmap_intent          *ra;
+       struct xfs_rmap_intent          *rb;
+
+       ra = container_of(a, struct xfs_rmap_intent, ri_list);
+       rb = container_of(b, struct xfs_rmap_intent, ri_list);
+       return  XFS_FSB_TO_AGNO(mp, ra->ri_bmap.br_startblock) -
+               XFS_FSB_TO_AGNO(mp, rb->ri_bmap.br_startblock);
+}
+
+/* Get an RUI. */
+STATIC void *
+xfs_rmap_update_create_intent(
+       struct xfs_trans                *tp,
+       unsigned int                    count)
+{
+       struct xfs_rui_log_item         *ruip;
+
+       ASSERT(tp != NULL);
+       ASSERT(count > 0);
+
+       ruip = xfs_rui_init(tp->t_mountp, count);
+       ASSERT(ruip != NULL);
  
         /*
-        * Drop the RUI reference regardless of whether the RUD has been
-        * aborted. Once the RUD transaction is constructed, it is the sole
-        * responsibility of the RUD to release the RUI (even if the RUI is
-        * aborted due to log I/O error).
+        * Get a log_item_desc to point at the new item.
          */
-       xfs_rui_release(rudp->rud_ruip);
-       kmem_zone_free(xfs_rud_zone, rudp);
-
-       return (xfs_lsn_t)-1;
+       xfs_trans_add_item(tp, &ruip->rui_item);
+       return ruip;
  }
  
-/*
- * The RUD dependency tracking op doesn't do squat.  It can't because
- * it doesn't know where the free extent is coming from.  The dependency
- * tracking has to be handled by the "enclosing" metadata object.  For
- * example, for inodes, the inode is locked throughout the extent freeing
- * so the dependency should be recorded there.
- */
+/* Log rmap updates in the intent item. */
  STATIC void
-xfs_rud_item_committing(
-       struct xfs_log_item     *lip,
-       xfs_lsn_t               lsn)
+xfs_rmap_update_log_item(
+       struct xfs_trans                *tp,
+       void                            *intent,
+       struct list_head                *item)
  {
+       struct xfs_rui_log_item         *ruip = intent;
+       struct xfs_rmap_intent          *rmap;
+       uint                            next_extent;
+       struct xfs_map_extent           *map;
+
+       rmap = container_of(item, struct xfs_rmap_intent, ri_list);
+
+       tp->t_flags |= XFS_TRANS_DIRTY;
+       set_bit(XFS_LI_DIRTY, &ruip->rui_item.li_flags);
+
+       /*
+        * atomic_inc_return gives us the value after the increment;
+        * we want to use it as an array index so we need to subtract 1 from
+        * it.
+        */
+       next_extent = atomic_inc_return(&ruip->rui_next_extent) - 1;
+       ASSERT(next_extent < ruip->rui_format.rui_nextents);
+       map = &ruip->rui_format.rui_extents[next_extent];
+       map->me_owner = rmap->ri_owner;
+       map->me_startblock = rmap->ri_bmap.br_startblock;
+       map->me_startoff = rmap->ri_bmap.br_startoff;
+       map->me_len = rmap->ri_bmap.br_blockcount;
+       xfs_trans_set_rmap_flags(map, rmap->ri_type, rmap->ri_whichfork,
+                       rmap->ri_bmap.br_state);
  }
  
-/*
- * This is the ops vector shared by all rud log items.
- */
-static const struct xfs_item_ops xfs_rud_item_ops = {
-       .iop_size       = xfs_rud_item_size,
-       .iop_format     = xfs_rud_item_format,
-       .iop_pin        = xfs_rud_item_pin,
-       .iop_unpin      = xfs_rud_item_unpin,
-       .iop_unlock     = xfs_rud_item_unlock,
-       .iop_committed  = xfs_rud_item_committed,
-       .iop_push       = xfs_rud_item_push,
-       .iop_committing = xfs_rud_item_committing,
-};
+/* Get an RUD so we can process all the deferred rmap updates. */
+STATIC void *
+xfs_rmap_update_create_done(
+       struct xfs_trans                *tp,
+       void                            *intent,
+       unsigned int                    count)
+{
+       return xfs_trans_get_rud(tp, intent);
+}
  
-/*
- * Allocate and initialize an rud item with the given number of extents.
- */
-struct xfs_rud_log_item *
-xfs_rud_init(
-       struct xfs_mount                *mp,
-       struct xfs_rui_log_item         *ruip)
+/* Process a deferred rmap update. */
+STATIC int
+xfs_rmap_update_finish_item(
+       struct xfs_trans                *tp,
+       struct list_head                *item,
+       void                            *done_item,
+       void                            **state)
+{
+       struct xfs_rmap_intent          *rmap;
+       int                             error;
+
+       rmap = container_of(item, struct xfs_rmap_intent, ri_list);
+       error = xfs_trans_log_finish_rmap_update(tp, done_item,
+                       rmap->ri_type,
+                       rmap->ri_owner, rmap->ri_whichfork,
+                       rmap->ri_bmap.br_startoff,
+                       rmap->ri_bmap.br_startblock,
+                       rmap->ri_bmap.br_blockcount,
+                       rmap->ri_bmap.br_state,
+                       (struct xfs_btree_cur **)state);
+       kmem_free(rmap);
+       return error;
+}
+
+/* Clean up after processing deferred rmaps. */
+STATIC void
+xfs_rmap_update_finish_cleanup(
+       struct xfs_trans        *tp,
+       void                    *state,
+       int                     error)
+{
+       struct xfs_btree_cur    *rcur = state;
+
+       xfs_rmap_finish_one_cleanup(tp, rcur, error);
+}
  
+/* Abort all pending RUIs. */
+STATIC void
+xfs_rmap_update_abort_intent(
+       void                            *intent)
  {
-       struct xfs_rud_log_item *rudp;
+       xfs_rui_release(intent);
+}
  
-       rudp = kmem_zone_zalloc(xfs_rud_zone, KM_SLEEP);
-       xfs_log_item_init(mp, &rudp->rud_item, XFS_LI_RUD, &xfs_rud_item_ops);
-       rudp->rud_ruip = ruip;
-       rudp->rud_format.rud_rui_id = ruip->rui_format.rui_id;
+/* Cancel a deferred rmap update. */
+STATIC void
+xfs_rmap_update_cancel_item(
+       struct list_head                *item)
+{
+       struct xfs_rmap_intent          *rmap;
  
-       return rudp;
+       rmap = container_of(item, struct xfs_rmap_intent, ri_list);
+       kmem_free(rmap);
  }
  
+const struct xfs_defer_op_type xfs_rmap_update_defer_type = {
+       .max_items      = XFS_RUI_MAX_FAST_EXTENTS,
+       .diff_items     = xfs_rmap_update_diff_items,
+       .create_intent  = xfs_rmap_update_create_intent,
+       .abort_intent   = xfs_rmap_update_abort_intent,
+       .log_item       = xfs_rmap_update_log_item,
+       .create_done    = xfs_rmap_update_create_done,
+       .finish_item    = xfs_rmap_update_finish_item,
+       .finish_cleanup = xfs_rmap_update_finish_cleanup,
+       .cancel_item    = xfs_rmap_update_cancel_item,
+};
+
  /*
   * Process an rmap update intent item that was recovered from the log.
   * We need to update the rmapbt.
diff --git a/fs/xfs/xfs_rmap_item.h b/fs/xfs/xfs_rmap_item.h

index 7e482baa27f5b5e3a2bf5e89b80933f04dac3b74..8708e4a5aa5c37a29ea994d5220de49edf38c13e 100644 (file)
--- a/fs/xfs/xfs_rmap_item.h
+++ b/fs/xfs/xfs_rmap_item.h
@@ -78,8 +78,6 @@ extern struct kmem_zone       *xfs_rui_zone;
  extern struct kmem_zone        *xfs_rud_zone;
  
  struct xfs_rui_log_item *xfs_rui_init(struct xfs_mount *, uint);
-struct xfs_rud_log_item *xfs_rud_init(struct xfs_mount *,
-               struct xfs_rui_log_item *);
  int xfs_rui_copy_format(struct xfs_log_iovec *buf,
                 struct xfs_rui_log_format *dst_rui_fmt);
  void xfs_rui_item_free(struct xfs_rui_log_item *);
diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c

index ac0fcdad0c4edee1f3085aef91641f2f0a2030ca..5fa4db3c3e320b39277ad576946a1cc934b4ea6e 100644 (file)
--- a/fs/xfs/xfs_rtalloc.c
+++ b/fs/xfs/xfs_rtalloc.c
@@ -11,17 +11,11 @@
  #include "xfs_trans_resv.h"
  #include "xfs_bit.h"
  #include "xfs_mount.h"
-#include "xfs_defer.h"
  #include "xfs_inode.h"
  #include "xfs_bmap.h"
-#include "xfs_bmap_util.h"
  #include "xfs_bmap_btree.h"
-#include "xfs_alloc.h"
-#include "xfs_error.h"
  #include "xfs_trans.h"
  #include "xfs_trans_space.h"
-#include "xfs_trace.h"
-#include "xfs_buf.h"
  #include "xfs_icache.h"
  #include "xfs_rtalloc.h"
  
diff --git a/fs/xfs/xfs_stats.c b/fs/xfs/xfs_stats.c

index cc509743facd8ddfedc6c8946446f515a79dfb69..113883c4f202e09e2e435f6883d7f6b484a05fe0 100644 (file)
--- a/fs/xfs/xfs_stats.c
+++ b/fs/xfs/xfs_stats.c
@@ -4,7 +4,6 @@
   * All Rights Reserved.
   */
  #include "xfs.h"
-#include <linux/proc_fs.h>
  
  struct xstats xfsstats;
  
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c

index a14d11d78bd80cfcd1b34beda1d63f931b336723..f9450235533cc4fcbeab7c3f5aba5f6a38cf1e99 100644 (file)
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -11,18 +11,15 @@
  #include "xfs_trans_resv.h"
  #include "xfs_sb.h"
  #include "xfs_mount.h"
-#include "xfs_da_format.h"
  #include "xfs_inode.h"
  #include "xfs_btree.h"
  #include "xfs_bmap.h"
  #include "xfs_alloc.h"
-#include "xfs_error.h"
  #include "xfs_fsops.h"
  #include "xfs_trans.h"
  #include "xfs_buf_item.h"
  #include "xfs_log.h"
  #include "xfs_log_priv.h"
-#include "xfs_da_btree.h"
  #include "xfs_dir2.h"
  #include "xfs_extfree_item.h"
  #include "xfs_mru_cache.h"
@@ -38,18 +35,8 @@
  #include "xfs_refcount_item.h"
  #include "xfs_bmap_item.h"
  #include "xfs_reflink.h"
-#include "xfs_defer.h"
  
-#include <linux/namei.h>
-#include <linux/dax.h>
-#include <linux/init.h>
-#include <linux/slab.h>
  #include <linux/magic.h>
-#include <linux/mount.h>
-#include <linux/mempool.h>
-#include <linux/writeback.h>
-#include <linux/kthread.h>
-#include <linux/freezer.h>
  #include <linux/parser.h>
  
  static const struct super_operations xfs_super_operations;
@@ -582,7 +569,7 @@ xfs_set_inode_alloc(
          * Calculate how much should be reserved for inodes to meet
          * the max inode percentage.  Used only for inode32.
          */
-       if (mp->m_maxicount) {
+       if (M_IGEO(mp)->maxicount) {
                 uint64_t        icount;
  
                 icount = sbp->sb_dblocks * sbp->sb_imax_pct;
@@ -840,16 +827,10 @@ xfs_init_mount_workqueues(
         if (!mp->m_reclaim_workqueue)
                 goto out_destroy_cil;
  
-       mp->m_log_workqueue = alloc_workqueue("xfs-log/%s",
-                       WQ_MEM_RECLAIM|WQ_FREEZABLE|WQ_HIGHPRI, 0,
-                       mp->m_fsname);
-       if (!mp->m_log_workqueue)
-               goto out_destroy_reclaim;
-
         mp->m_eofblocks_workqueue = alloc_workqueue("xfs-eofblocks/%s",
                         WQ_MEM_RECLAIM|WQ_FREEZABLE, 0, mp->m_fsname);
         if (!mp->m_eofblocks_workqueue)
-               goto out_destroy_log;
+               goto out_destroy_reclaim;
  
         mp->m_sync_workqueue = alloc_workqueue("xfs-sync/%s", WQ_FREEZABLE, 0,
                                                mp->m_fsname);
@@ -860,8 +841,6 @@ xfs_init_mount_workqueues(
  
  out_destroy_eofb:
         destroy_workqueue(mp->m_eofblocks_workqueue);
-out_destroy_log:
-       destroy_workqueue(mp->m_log_workqueue);
  out_destroy_reclaim:
         destroy_workqueue(mp->m_reclaim_workqueue);
  out_destroy_cil:
@@ -880,7 +859,6 @@ xfs_destroy_mount_workqueues(
  {
         destroy_workqueue(mp->m_sync_workqueue);
         destroy_workqueue(mp->m_eofblocks_workqueue);
-       destroy_workqueue(mp->m_log_workqueue);
         destroy_workqueue(mp->m_reclaim_workqueue);
         destroy_workqueue(mp->m_cil_workqueue);
         destroy_workqueue(mp->m_unwritten_workqueue);
@@ -1131,10 +1109,10 @@ xfs_fs_statfs(
  
         fakeinos = XFS_FSB_TO_INO(mp, statp->f_bfree);
         statp->f_files = min(icount + fakeinos, (uint64_t)XFS_MAXINUMBER);
-       if (mp->m_maxicount)
+       if (M_IGEO(mp)->maxicount)
                 statp->f_files = min_t(typeof(statp->f_files),
                                         statp->f_files,
-                                       mp->m_maxicount);
+                                       M_IGEO(mp)->maxicount);
  
         /* If sb_icount overshot maxicount, report actual allocation */
         statp->f_files = max_t(typeof(statp->f_files),
@@ -1685,6 +1663,8 @@ xfs_fs_fill_super(
         sb->s_maxbytes = xfs_max_file_offset(sb->s_blocksize_bits);
         sb->s_max_links = XFS_MAXLINK;
         sb->s_time_gran = 1;
+       sb->s_iflags |= SB_I_CGROUPWB;
+
         set_posix_acl_flag(sb);
  
         /* version 5 superblocks support inode version counters. */
diff --git a/fs/xfs/xfs_super.h b/fs/xfs/xfs_super.h

index 21cb49a43d7cb0c8c9945fc35cd71dc0e3fceb6d..763e43d22deeffc20862742ccdb4c13cbdd0e796 100644 (file)
--- a/fs/xfs/xfs_super.h
+++ b/fs/xfs/xfs_super.h
@@ -38,6 +38,18 @@ extern void xfs_qm_exit(void);
  # define XFS_SCRUB_STRING
  #endif
  
+#ifdef CONFIG_XFS_ONLINE_REPAIR
+# define XFS_REPAIR_STRING     "repair, "
+#else
+# define XFS_REPAIR_STRING
+#endif
+
+#ifdef CONFIG_XFS_WARN
+# define XFS_WARN_STRING       "verbose warnings, "
+#else
+# define XFS_WARN_STRING
+#endif
+
  #ifdef DEBUG
  # define XFS_DBG_STRING                "debug"
  #else
@@ -49,6 +61,8 @@ extern void xfs_qm_exit(void);
                                 XFS_SECURITY_STRING \
                                 XFS_REALTIME_STRING \
                                 XFS_SCRUB_STRING \
+                               XFS_REPAIR_STRING \
+                               XFS_WARN_STRING \
                                 XFS_DBG_STRING /* DBG must be last */
  
  struct xfs_inode;
diff --git a/fs/xfs/xfs_symlink.c b/fs/xfs/xfs_symlink.c

index b2c1177c717ff4f0083a3788f02c3e6d3c745374..ed66fd2de3273355fa5e74d6e953e72c0d827bdf 100644 (file)
--- a/fs/xfs/xfs_symlink.c
+++ b/fs/xfs/xfs_symlink.c
@@ -12,23 +12,14 @@
  #include "xfs_trans_resv.h"
  #include "xfs_bit.h"
  #include "xfs_mount.h"
-#include "xfs_da_format.h"
-#include "xfs_da_btree.h"
-#include "xfs_defer.h"
  #include "xfs_dir2.h"
  #include "xfs_inode.h"
-#include "xfs_ialloc.h"
-#include "xfs_alloc.h"
  #include "xfs_bmap.h"
  #include "xfs_bmap_btree.h"
-#include "xfs_bmap_util.h"
-#include "xfs_error.h"
  #include "xfs_quota.h"
  #include "xfs_trans_space.h"
  #include "xfs_trace.h"
-#include "xfs_symlink.h"
  #include "xfs_trans.h"
-#include "xfs_log.h"
  
  /* ----- Kernel only functions below ----- */
  int
diff --git a/fs/xfs/xfs_sysctl.c b/fs/xfs/xfs_sysctl.c

index 0cc034dfb78608e23b5a6fea5f41b076c8743b12..31b3bdbd2ebad14ed90aae053dbc40589c0f69d6 100644 (file)
--- a/fs/xfs/xfs_sysctl.c
+++ b/fs/xfs/xfs_sysctl.c
@@ -4,10 +4,7 @@
   * All Rights Reserved.
   */
  #include "xfs.h"
-#include <linux/sysctl.h>
-#include <linux/proc_fs.h>
  #include "xfs_error.h"
-#include "xfs_stats.h"
  
  static struct ctl_table_header *xfs_table_header;
  
diff --git a/fs/xfs/xfs_sysctl.h b/fs/xfs/xfs_sysctl.h

index ad7f9be130872c9e0664780115ba496bfd49e885..8abf4640f1d552af4cecc866c761b597d08d4fb7 100644 (file)
--- a/fs/xfs/xfs_sysctl.h
+++ b/fs/xfs/xfs_sysctl.h
@@ -82,6 +82,9 @@ enum {
  extern xfs_param_t     xfs_params;
  
  struct xfs_globals {
+#ifdef DEBUG
+       int     pwork_threads;          /* parallel workqueue threads */
+#endif
         int     log_recovery_delay;     /* log recovery delay (secs) */
         int     mount_delay;            /* mount setup delay (secs) */
         bool    bug_on_assert;          /* BUG() the kernel on assert failure */
diff --git a/fs/xfs/xfs_sysfs.c b/fs/xfs/xfs_sysfs.c

index cabda13f3c64168a7a33d01e37bf895f9e4a07a4..ddd0bf7a474026059a62710b42b7cf90e5757d1e 100644 (file)
--- a/fs/xfs/xfs_sysfs.c
+++ b/fs/xfs/xfs_sysfs.c
@@ -10,9 +10,7 @@
  #include "xfs_log_format.h"
  #include "xfs_trans_resv.h"
  #include "xfs_sysfs.h"
-#include "xfs_log.h"
  #include "xfs_log_priv.h"
-#include "xfs_stats.h"
  #include "xfs_mount.h"
  
  struct xfs_sysfs_attr {
@@ -206,11 +204,51 @@ always_cow_show(
  }
  XFS_SYSFS_ATTR_RW(always_cow);
  
+#ifdef DEBUG
+/*
+ * Override how many threads the parallel work queue is allowed to create.
+ * This has to be a debug-only global (instead of an errortag) because one of
+ * the main users of parallel workqueues is mount time quotacheck.
+ */
+STATIC ssize_t
+pwork_threads_store(
+       struct kobject  *kobject,
+       const char      *buf,
+       size_t          count)
+{
+       int             ret;
+       int             val;
+
+       ret = kstrtoint(buf, 0, &val);
+       if (ret)
+               return ret;
+
+       if (val < -1 || val > num_possible_cpus())
+               return -EINVAL;
+
+       xfs_globals.pwork_threads = val;
+
+       return count;
+}
+
+STATIC ssize_t
+pwork_threads_show(
+       struct kobject  *kobject,
+       char            *buf)
+{
+       return snprintf(buf, PAGE_SIZE, "%d\n", xfs_globals.pwork_threads);
+}
+XFS_SYSFS_ATTR_RW(pwork_threads);
+#endif /* DEBUG */
+
  static struct attribute *xfs_dbg_attrs[] = {
         ATTR_LIST(bug_on_assert),
         ATTR_LIST(log_recovery_delay),
         ATTR_LIST(mount_delay),
         ATTR_LIST(always_cow),
+#ifdef DEBUG
+       ATTR_LIST(pwork_threads),
+#endif
         NULL,
  };
  
diff --git a/fs/xfs/xfs_trace.c b/fs/xfs/xfs_trace.c

index cb6489c22cad2015126b6e7b1977c1507b017ad5..bc85b89f88cae1cf58a4353cc050f179fa6a43f3 100644 (file)
--- a/fs/xfs/xfs_trace.c
+++ b/fs/xfs/xfs_trace.c
@@ -15,24 +15,16 @@
  #include "xfs_inode.h"
  #include "xfs_btree.h"
  #include "xfs_da_btree.h"
-#include "xfs_ialloc.h"
-#include "xfs_itable.h"
  #include "xfs_alloc.h"
  #include "xfs_bmap.h"
  #include "xfs_attr.h"
-#include "xfs_attr_leaf.h"
  #include "xfs_trans.h"
-#include "xfs_log.h"
  #include "xfs_log_priv.h"
  #include "xfs_buf_item.h"
  #include "xfs_quota.h"
-#include "xfs_iomap.h"
-#include "xfs_aops.h"
  #include "xfs_dquot_item.h"
  #include "xfs_dquot.h"
  #include "xfs_log_recover.h"
-#include "xfs_inode_item.h"
-#include "xfs_bmap_btree.h"
  #include "xfs_filestream.h"
  #include "xfs_fsmap.h"
  
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h

index 2464ea351f837bd185da6ca7b2e146e30c9bae2d..8094b1920eeff6413648c2af6251c4de1dcb4a0b 100644 (file)
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -475,7 +475,7 @@ DEFINE_BUF_ITEM_EVENT(xfs_buf_item_ordered);
  DEFINE_BUF_ITEM_EVENT(xfs_buf_item_pin);
  DEFINE_BUF_ITEM_EVENT(xfs_buf_item_unpin);
  DEFINE_BUF_ITEM_EVENT(xfs_buf_item_unpin_stale);
-DEFINE_BUF_ITEM_EVENT(xfs_buf_item_unlock);
+DEFINE_BUF_ITEM_EVENT(xfs_buf_item_release);
  DEFINE_BUF_ITEM_EVENT(xfs_buf_item_committed);
  DEFINE_BUF_ITEM_EVENT(xfs_buf_item_push);
  DEFINE_BUF_ITEM_EVENT(xfs_trans_get_buf);
@@ -3360,6 +3360,7 @@ DEFINE_TRANS_EVENT(xfs_trans_dup);
  DEFINE_TRANS_EVENT(xfs_trans_free);
  DEFINE_TRANS_EVENT(xfs_trans_roll);
  DEFINE_TRANS_EVENT(xfs_trans_add_item);
+DEFINE_TRANS_EVENT(xfs_trans_commit_items);
  DEFINE_TRANS_EVENT(xfs_trans_free_items);
  
  TRACE_EVENT(xfs_iunlink_update_bucket,
@@ -3516,6 +3517,64 @@ DEFINE_EVENT(xfs_inode_corrupt_class, name,      \
  DEFINE_INODE_CORRUPT_EVENT(xfs_inode_mark_sick);
  DEFINE_INODE_CORRUPT_EVENT(xfs_inode_mark_healthy);
  
+TRACE_EVENT(xfs_iwalk_ag,
+       TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno,
+                xfs_agino_t startino),
+       TP_ARGS(mp, agno, startino),
+       TP_STRUCT__entry(
+               __field(dev_t, dev)
+               __field(xfs_agnumber_t, agno)
+               __field(xfs_agino_t, startino)
+       ),
+       TP_fast_assign(
+               __entry->dev = mp->m_super->s_dev;
+               __entry->agno = agno;
+               __entry->startino = startino;
+       ),
+       TP_printk("dev %d:%d agno %d startino %u",
+                 MAJOR(__entry->dev), MINOR(__entry->dev), __entry->agno,
+                 __entry->startino)
+)
+
+TRACE_EVENT(xfs_iwalk_ag_rec,
+       TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno,
+                struct xfs_inobt_rec_incore *irec),
+       TP_ARGS(mp, agno, irec),
+       TP_STRUCT__entry(
+               __field(dev_t, dev)
+               __field(xfs_agnumber_t, agno)
+               __field(xfs_agino_t, startino)
+               __field(uint64_t, freemask)
+       ),
+       TP_fast_assign(
+               __entry->dev = mp->m_super->s_dev;
+               __entry->agno = agno;
+               __entry->startino = irec->ir_startino;
+               __entry->freemask = irec->ir_free;
+       ),
+       TP_printk("dev %d:%d agno %d startino %u freemask 0x%llx",
+                 MAJOR(__entry->dev), MINOR(__entry->dev), __entry->agno,
+                 __entry->startino, __entry->freemask)
+)
+
+TRACE_EVENT(xfs_pwork_init,
+       TP_PROTO(struct xfs_mount *mp, unsigned int nr_threads, pid_t pid),
+       TP_ARGS(mp, nr_threads, pid),
+       TP_STRUCT__entry(
+               __field(dev_t, dev)
+               __field(unsigned int, nr_threads)
+               __field(pid_t, pid)
+       ),
+       TP_fast_assign(
+               __entry->dev = mp->m_super->s_dev;
+               __entry->nr_threads = nr_threads;
+               __entry->pid = pid;
+       ),
+       TP_printk("dev %d:%d nr_threads %u pid %u",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 __entry->nr_threads, __entry->pid)
+)
+
  #endif /* _TRACE_XFS_H */
  
  #undef TRACE_INCLUDE_PATH
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c

index 912b42f5fe4ac61ed79bbc729f5ccf094de66c32..d42a68d8313bdd5721f317a6c64858a67b13d6e6 100644 (file)
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -11,7 +11,6 @@
  #include "xfs_log_format.h"
  #include "xfs_trans_resv.h"
  #include "xfs_mount.h"
-#include "xfs_inode.h"
  #include "xfs_extent_busy.h"
  #include "xfs_quota.h"
  #include "xfs_trans.h"
@@ -264,9 +263,7 @@ xfs_trans_alloc(
          * GFP_NOFS allocation context so that we avoid lockdep false positives
          * by doing GFP_KERNEL allocations inside sb_start_intwrite().
          */
-       tp = kmem_zone_zalloc(xfs_trans_zone,
-               (flags & XFS_TRANS_NOFS) ? KM_NOFS : KM_SLEEP);
-
+       tp = kmem_zone_zalloc(xfs_trans_zone, KM_SLEEP);
         if (!(flags & XFS_TRANS_NO_WRITECOUNT))
                 sb_start_intwrite(mp->m_super);
  
@@ -452,7 +449,7 @@ xfs_trans_apply_sb_deltas(
         xfs_buf_t       *bp;
         int             whole = 0;
  
-       bp = xfs_trans_getsb(tp, tp->t_mountp, 0);
+       bp = xfs_trans_getsb(tp, tp->t_mountp);
         sbp = XFS_BUF_TO_SBP(bp);
  
         /*
@@ -767,10 +764,9 @@ xfs_trans_del_item(
  }
  
  /* Detach and unlock all of the items in a transaction */
-void
+static void
  xfs_trans_free_items(
         struct xfs_trans        *tp,
-       xfs_lsn_t               commit_lsn,
         bool                    abort)
  {
         struct xfs_log_item     *lip, *next;
@@ -779,11 +775,10 @@ xfs_trans_free_items(
  
         list_for_each_entry_safe(lip, next, &tp->t_items, li_trans) {
                 xfs_trans_del_item(lip);
-               if (commit_lsn != NULLCOMMITLSN)
-                       lip->li_ops->iop_committing(lip, commit_lsn);
                 if (abort)
                         set_bit(XFS_LI_ABORTED, &lip->li_flags);
-               lip->li_ops->iop_unlock(lip);
+               if (lip->li_ops->iop_release)
+                       lip->li_ops->iop_release(lip);
         }
  }
  
@@ -804,7 +799,8 @@ xfs_log_item_batch_insert(
         for (i = 0; i < nr_items; i++) {
                 struct xfs_log_item *lip = log_items[i];
  
-               lip->li_ops->iop_unpin(lip, 0);
+               if (lip->li_ops->iop_unpin)
+                       lip->li_ops->iop_unpin(lip, 0);
         }
  }
  
@@ -815,7 +811,7 @@ xfs_log_item_batch_insert(
   *
   * If we are called with the aborted flag set, it is because a log write during
   * a CIL checkpoint commit has failed. In this case, all the items in the
- * checkpoint have already gone through iop_commited and iop_unlock, which
+ * checkpoint have already gone through iop_committed and iop_committing, which
   * means that checkpoint commit abort handling is treated exactly the same
   * as an iclog write error even though we haven't started any IO yet. Hence in
   * this case all we need to do is iop_committed processing, followed by an
@@ -833,7 +829,7 @@ xfs_trans_committed_bulk(
         struct xfs_ail          *ailp,
         struct xfs_log_vec      *log_vector,
         xfs_lsn_t               commit_lsn,
-       int                     aborted)
+       bool                    aborted)
  {
  #define LOG_ITEM_BATCH_SIZE    32
         struct xfs_log_item     *log_items[LOG_ITEM_BATCH_SIZE];
@@ -852,7 +848,16 @@ xfs_trans_committed_bulk(
  
                 if (aborted)
                         set_bit(XFS_LI_ABORTED, &lip->li_flags);
-               item_lsn = lip->li_ops->iop_committed(lip, commit_lsn);
+
+               if (lip->li_ops->flags & XFS_ITEM_RELEASE_WHEN_COMMITTED) {
+                       lip->li_ops->iop_release(lip);
+                       continue;
+               }
+
+               if (lip->li_ops->iop_committed)
+                       item_lsn = lip->li_ops->iop_committed(lip, commit_lsn);
+               else
+                       item_lsn = commit_lsn;
  
                 /* item_lsn of -1 means the item needs no further processing */
                 if (XFS_LSN_CMP(item_lsn, (xfs_lsn_t)-1) == 0)
@@ -864,7 +869,8 @@ xfs_trans_committed_bulk(
                  */
                 if (aborted) {
                         ASSERT(XFS_FORCED_SHUTDOWN(ailp->ail_mount));
-                       lip->li_ops->iop_unpin(lip, 1);
+                       if (lip->li_ops->iop_unpin)
+                               lip->li_ops->iop_unpin(lip, 1);
                         continue;
                 }
  
@@ -882,7 +888,8 @@ xfs_trans_committed_bulk(
                                 xfs_trans_ail_update(ailp, lip, item_lsn);
                         else
                                 spin_unlock(&ailp->ail_lock);
-                       lip->li_ops->iop_unpin(lip, 0);
+                       if (lip->li_ops->iop_unpin)
+                               lip->li_ops->iop_unpin(lip, 0);
                         continue;
                 }
  
@@ -998,7 +1005,7 @@ out_unreserve:
                 tp->t_ticket = NULL;
         }
         current_restore_flags_nested(&tp->t_pflags, PF_MEMALLOC_NOFS);
-       xfs_trans_free_items(tp, NULLCOMMITLSN, !!error);
+       xfs_trans_free_items(tp, !!error);
         xfs_trans_free(tp);
  
         XFS_STATS_INC(mp, xs_trans_empty);
@@ -1060,7 +1067,7 @@ xfs_trans_cancel(
         /* mark this thread as no longer being in a transaction */
         current_restore_flags_nested(&tp->t_pflags, PF_MEMALLOC_NOFS);
  
-       xfs_trans_free_items(tp, NULLCOMMITLSN, dirty);
+       xfs_trans_free_items(tp, dirty);
         xfs_trans_free(tp);
  }
  
diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h

index c6e1c5704a8c2b0f94fcc56b7f4f8a34057bf752..64d7f171ebd32ae8ebe4992b64700d36e49f18f1 100644 (file)
--- a/fs/xfs/xfs_trans.h
+++ b/fs/xfs/xfs_trans.h
@@ -27,7 +27,7 @@ struct xfs_cud_log_item;
  struct xfs_bui_log_item;
  struct xfs_bud_log_item;
  
-typedef struct xfs_log_item {
+struct xfs_log_item {
         struct list_head                li_ail;         /* AIL pointers */
         struct list_head                li_trans;       /* transaction list */
         xfs_lsn_t                       li_lsn;         /* last on-disk lsn */
@@ -48,7 +48,7 @@ typedef struct xfs_log_item {
         struct xfs_log_vec              *li_lv;         /* active log vector */
         struct xfs_log_vec              *li_lv_shadow;  /* standby vector */
         xfs_lsn_t                       li_seq;         /* CIL commit seq */
-} xfs_log_item_t;
+};
  
  /*
   * li_flags use the (set/test/clear)_bit atomic interfaces because updates can
@@ -67,17 +67,24 @@ typedef struct xfs_log_item {
         { (1 << XFS_LI_DIRTY),          "DIRTY" }
  
  struct xfs_item_ops {
-       void (*iop_size)(xfs_log_item_t *, int *, int *);
-       void (*iop_format)(xfs_log_item_t *, struct xfs_log_vec *);
-       void (*iop_pin)(xfs_log_item_t *);
-       void (*iop_unpin)(xfs_log_item_t *, int remove);
+       unsigned flags;
+       void (*iop_size)(struct xfs_log_item *, int *, int *);
+       void (*iop_format)(struct xfs_log_item *, struct xfs_log_vec *);
+       void (*iop_pin)(struct xfs_log_item *);
+       void (*iop_unpin)(struct xfs_log_item *, int remove);
         uint (*iop_push)(struct xfs_log_item *, struct list_head *);
-       void (*iop_unlock)(xfs_log_item_t *);
-       xfs_lsn_t (*iop_committed)(xfs_log_item_t *, xfs_lsn_t);
-       void (*iop_committing)(xfs_log_item_t *, xfs_lsn_t);
-       void (*iop_error)(xfs_log_item_t *, xfs_buf_t *);
+       void (*iop_committing)(struct xfs_log_item *, xfs_lsn_t commit_lsn);
+       void (*iop_release)(struct xfs_log_item *);
+       xfs_lsn_t (*iop_committed)(struct xfs_log_item *, xfs_lsn_t);
+       void (*iop_error)(struct xfs_log_item *, xfs_buf_t *);
  };
  
+/*
+ * Release the log item as soon as committed.  This is for items just logging
+ * intents that never need to be written back in place.
+ */
+#define XFS_ITEM_RELEASE_WHEN_COMMITTED        (1 << 0)
+
  void   xfs_log_item_init(struct xfs_mount *mp, struct xfs_log_item *item,
                           int type, const struct xfs_item_ops *ops);
  
@@ -203,7 +210,7 @@ xfs_trans_read_buf(
                                       flags, bpp, ops);
  }
  
-struct xfs_buf *xfs_trans_getsb(xfs_trans_t *, struct xfs_mount *, int);
+struct xfs_buf *xfs_trans_getsb(xfs_trans_t *, struct xfs_mount *);
  
  void           xfs_trans_brelse(xfs_trans_t *, struct xfs_buf *);
  void           xfs_trans_bjoin(xfs_trans_t *, struct xfs_buf *);
@@ -223,14 +230,6 @@ void               xfs_trans_dirty_buf(struct xfs_trans *, struct xfs_buf *);
  bool           xfs_trans_buf_is_dirty(struct xfs_buf *bp);
  void           xfs_trans_log_inode(xfs_trans_t *, struct xfs_inode *, uint);
  
-struct xfs_efd_log_item        *xfs_trans_get_efd(struct xfs_trans *,
-                                 struct xfs_efi_log_item *,
-                                 uint);
-int            xfs_trans_free_extent(struct xfs_trans *,
-                                     struct xfs_efd_log_item *, xfs_fsblock_t,
-                                     xfs_extlen_t,
-                                     const struct xfs_owner_info *,
-                                     bool);
  int            xfs_trans_commit(struct xfs_trans *);
  int            xfs_trans_roll(struct xfs_trans **);
  int            xfs_trans_roll_inode(struct xfs_trans **, struct xfs_inode *);
@@ -245,37 +244,4 @@ void               xfs_trans_buf_copy_type(struct xfs_buf *dst_bp,
  
  extern kmem_zone_t     *xfs_trans_zone;
  
-/* rmap updates */
-enum xfs_rmap_intent_type;
-
-struct xfs_rud_log_item *xfs_trans_get_rud(struct xfs_trans *tp,
-               struct xfs_rui_log_item *ruip);
-int xfs_trans_log_finish_rmap_update(struct xfs_trans *tp,
-               struct xfs_rud_log_item *rudp, enum xfs_rmap_intent_type type,
-               uint64_t owner, int whichfork, xfs_fileoff_t startoff,
-               xfs_fsblock_t startblock, xfs_filblks_t blockcount,
-               xfs_exntst_t state, struct xfs_btree_cur **pcur);
-
-/* refcount updates */
-enum xfs_refcount_intent_type;
-
-struct xfs_cud_log_item *xfs_trans_get_cud(struct xfs_trans *tp,
-               struct xfs_cui_log_item *cuip);
-int xfs_trans_log_finish_refcount_update(struct xfs_trans *tp,
-               struct xfs_cud_log_item *cudp,
-               enum xfs_refcount_intent_type type, xfs_fsblock_t startblock,
-               xfs_extlen_t blockcount, xfs_fsblock_t *new_fsb,
-               xfs_extlen_t *new_len, struct xfs_btree_cur **pcur);
-
-/* mapping updates */
-enum xfs_bmap_intent_type;
-
-struct xfs_bud_log_item *xfs_trans_get_bud(struct xfs_trans *tp,
-               struct xfs_bui_log_item *buip);
-int xfs_trans_log_finish_bmap_update(struct xfs_trans *tp,
-               struct xfs_bud_log_item *rudp, enum xfs_bmap_intent_type type,
-               struct xfs_inode *ip, int whichfork, xfs_fileoff_t startoff,
-               xfs_fsblock_t startblock, xfs_filblks_t *blockcount,
-               xfs_exntst_t state);
-
  #endif /* __XFS_TRANS_H__ */
diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c

index d3a4e89bf4a0ddb916ed4f5d395285e2e2188869..6ccfd75d3c24ce7207e336b5c3b714c18bdcb69a 100644 (file)
--- a/fs/xfs/xfs_trans_ail.c
+++ b/fs/xfs/xfs_trans_ail.c
@@ -6,6 +6,7 @@
   */
  #include "xfs.h"
  #include "xfs_fs.h"
+#include "xfs_shared.h"
  #include "xfs_format.h"
  #include "xfs_log_format.h"
  #include "xfs_trans_resv.h"
@@ -74,29 +75,29 @@ xfs_ail_check(
   * Return a pointer to the last item in the AIL.  If the AIL is empty, then
   * return NULL.
   */
-static xfs_log_item_t *
+static struct xfs_log_item *
  xfs_ail_max(
         struct xfs_ail  *ailp)
  {
         if (list_empty(&ailp->ail_head))
                 return NULL;
  
-       return list_entry(ailp->ail_head.prev, xfs_log_item_t, li_ail);
+       return list_entry(ailp->ail_head.prev, struct xfs_log_item, li_ail);
  }
  
  /*
   * Return a pointer to the item which follows the given item in the AIL.  If
   * the given item is the last item in the list, then return NULL.
   */
-static xfs_log_item_t *
+static struct xfs_log_item *
  xfs_ail_next(
-       struct xfs_ail  *ailp,
-       xfs_log_item_t  *lip)
+       struct xfs_ail          *ailp,
+       struct xfs_log_item     *lip)
  {
         if (lip->li_ail.next == &ailp->ail_head)
                 return NULL;
  
-       return list_first_entry(&lip->li_ail, xfs_log_item_t, li_ail);
+       return list_first_entry(&lip->li_ail, struct xfs_log_item, li_ail);
  }
  
  /*
@@ -109,10 +110,10 @@ xfs_ail_next(
   */
  xfs_lsn_t
  xfs_ail_min_lsn(
-       struct xfs_ail  *ailp)
+       struct xfs_ail          *ailp)
  {
-       xfs_lsn_t       lsn = 0;
-       xfs_log_item_t  *lip;
+       xfs_lsn_t               lsn = 0;
+       struct xfs_log_item     *lip;
  
         spin_lock(&ailp->ail_lock);
         lip = xfs_ail_min(ailp);
@@ -128,10 +129,10 @@ xfs_ail_min_lsn(
   */
  static xfs_lsn_t
  xfs_ail_max_lsn(
-       struct xfs_ail  *ailp)
+       struct xfs_ail          *ailp)
  {
-       xfs_lsn_t       lsn = 0;
-       xfs_log_item_t  *lip;
+       xfs_lsn_t               lsn = 0;
+       struct xfs_log_item     *lip;
  
         spin_lock(&ailp->ail_lock);
         lip = xfs_ail_max(ailp);
@@ -216,13 +217,13 @@ xfs_trans_ail_cursor_clear(
   * ascending traversal.  Pass a @lsn of zero to initialise the cursor to the
   * first item in the AIL. Returns NULL if the list is empty.
   */
-xfs_log_item_t *
+struct xfs_log_item *
  xfs_trans_ail_cursor_first(
         struct xfs_ail          *ailp,
         struct xfs_ail_cursor   *cur,
         xfs_lsn_t               lsn)
  {
-       xfs_log_item_t          *lip;
+       struct xfs_log_item     *lip;
  
         xfs_trans_ail_cursor_init(ailp, cur);
  
@@ -248,7 +249,7 @@ __xfs_trans_ail_cursor_last(
         struct xfs_ail          *ailp,
         xfs_lsn_t               lsn)
  {
-       xfs_log_item_t          *lip;
+       struct xfs_log_item     *lip;
  
         list_for_each_entry_reverse(lip, &ailp->ail_head, li_ail) {
                 if (XFS_LSN_CMP(lip->li_lsn, lsn) <= 0)
@@ -327,8 +328,8 @@ xfs_ail_splice(
   */
  static void
  xfs_ail_delete(
-       struct xfs_ail  *ailp,
-       xfs_log_item_t  *lip)
+       struct xfs_ail          *ailp,
+       struct xfs_log_item     *lip)
  {
         xfs_ail_check(ailp, lip);
         list_del(&lip->li_ail);
@@ -347,6 +348,14 @@ xfsaild_push_item(
         if (XFS_TEST_ERROR(false, ailp->ail_mount, XFS_ERRTAG_LOG_ITEM_PIN))
                 return XFS_ITEM_PINNED;
  
+       /*
+        * Consider the item pinned if a push callback is not defined so the
+        * caller will force the log. This should only happen for intent items
+        * as they are unpinned once the associated done item is committed to
+        * the on-disk log.
+        */
+       if (!lip->li_ops->iop_push)
+               return XFS_ITEM_PINNED;
         return lip->li_ops->iop_push(lip, &ailp->ail_buf_list);
  }
  
@@ -356,7 +365,7 @@ xfsaild_push(
  {
         xfs_mount_t             *mp = ailp->ail_mount;
         struct xfs_ail_cursor   cur;
-       xfs_log_item_t          *lip;
+       struct xfs_log_item     *lip;
         xfs_lsn_t               lsn;
         xfs_lsn_t               target;
         long                    tout;
@@ -611,10 +620,10 @@ xfsaild(
   */
  void
  xfs_ail_push(
-       struct xfs_ail  *ailp,
-       xfs_lsn_t       threshold_lsn)
+       struct xfs_ail          *ailp,
+       xfs_lsn_t               threshold_lsn)
  {
-       xfs_log_item_t  *lip;
+       struct xfs_log_item     *lip;
  
         lip = xfs_ail_min(ailp);
         if (!lip || XFS_FORCED_SHUTDOWN(ailp->ail_mount) ||
@@ -699,7 +708,7 @@ xfs_trans_ail_update_bulk(
         int                     nr_items,
         xfs_lsn_t               lsn) __releases(ailp->ail_lock)
  {
-       xfs_log_item_t          *mlip;
+       struct xfs_log_item     *mlip;
         int                     mlip_changed = 0;
         int                     i;
         LIST_HEAD(tmp);
diff --git a/fs/xfs/xfs_trans_bmap.c b/fs/xfs/xfs_trans_bmap.c

deleted file mode 100644 (file)

index e1c7d55..0000000
--- a/fs/xfs/xfs_trans_bmap.c
+++ /dev/null
@@ -1,232 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0+
-/*
- * Copyright (C) 2016 Oracle.  All Rights Reserved.
- * Author: Darrick J. Wong <darrick.wong@oracle.com>
- */
-#include "xfs.h"
-#include "xfs_fs.h"
-#include "xfs_shared.h"
-#include "xfs_format.h"
-#include "xfs_log_format.h"
-#include "xfs_trans_resv.h"
-#include "xfs_mount.h"
-#include "xfs_defer.h"
-#include "xfs_trans.h"
-#include "xfs_trans_priv.h"
-#include "xfs_bmap_item.h"
-#include "xfs_alloc.h"
-#include "xfs_bmap.h"
-#include "xfs_inode.h"
-
-/*
- * This routine is called to allocate a "bmap update done"
- * log item.
- */
-struct xfs_bud_log_item *
-xfs_trans_get_bud(
-       struct xfs_trans                *tp,
-       struct xfs_bui_log_item         *buip)
-{
-       struct xfs_bud_log_item         *budp;
-
-       budp = xfs_bud_init(tp->t_mountp, buip);
-       xfs_trans_add_item(tp, &budp->bud_item);
-       return budp;
-}
-
-/*
- * Finish an bmap update and log it to the BUD. Note that the
- * transaction is marked dirty regardless of whether the bmap update
- * succeeds or fails to support the BUI/BUD lifecycle rules.
- */
-int
-xfs_trans_log_finish_bmap_update(
-       struct xfs_trans                *tp,
-       struct xfs_bud_log_item         *budp,
-       enum xfs_bmap_intent_type       type,
-       struct xfs_inode                *ip,
-       int                             whichfork,
-       xfs_fileoff_t                   startoff,
-       xfs_fsblock_t                   startblock,
-       xfs_filblks_t                   *blockcount,
-       xfs_exntst_t                    state)
-{
-       int                             error;
-
-       error = xfs_bmap_finish_one(tp, ip, type, whichfork, startoff,
-                       startblock, blockcount, state);
-
-       /*
-        * Mark the transaction dirty, even on error. This ensures the
-        * transaction is aborted, which:
-        *
-        * 1.) releases the BUI and frees the BUD
-        * 2.) shuts down the filesystem
-        */
-       tp->t_flags |= XFS_TRANS_DIRTY;
-       set_bit(XFS_LI_DIRTY, &budp->bud_item.li_flags);
-
-       return error;
-}
-
-/* Sort bmap intents by inode. */
-static int
-xfs_bmap_update_diff_items(
-       void                            *priv,
-       struct list_head                *a,
-       struct list_head                *b)
-{
-       struct xfs_bmap_intent          *ba;
-       struct xfs_bmap_intent          *bb;
-
-       ba = container_of(a, struct xfs_bmap_intent, bi_list);
-       bb = container_of(b, struct xfs_bmap_intent, bi_list);
-       return ba->bi_owner->i_ino - bb->bi_owner->i_ino;
-}
-
-/* Get an BUI. */
-STATIC void *
-xfs_bmap_update_create_intent(
-       struct xfs_trans                *tp,
-       unsigned int                    count)
-{
-       struct xfs_bui_log_item         *buip;
-
-       ASSERT(count == XFS_BUI_MAX_FAST_EXTENTS);
-       ASSERT(tp != NULL);
-
-       buip = xfs_bui_init(tp->t_mountp);
-       ASSERT(buip != NULL);
-
-       /*
-        * Get a log_item_desc to point at the new item.
-        */
-       xfs_trans_add_item(tp, &buip->bui_item);
-       return buip;
-}
-
-/* Set the map extent flags for this mapping. */
-static void
-xfs_trans_set_bmap_flags(
-       struct xfs_map_extent           *bmap,
-       enum xfs_bmap_intent_type       type,
-       int                             whichfork,
-       xfs_exntst_t                    state)
-{
-       bmap->me_flags = 0;
-       switch (type) {
-       case XFS_BMAP_MAP:
-       case XFS_BMAP_UNMAP:
-               bmap->me_flags = type;
-               break;
-       default:
-               ASSERT(0);
-       }
-       if (state == XFS_EXT_UNWRITTEN)
-               bmap->me_flags |= XFS_BMAP_EXTENT_UNWRITTEN;
-       if (whichfork == XFS_ATTR_FORK)
-               bmap->me_flags |= XFS_BMAP_EXTENT_ATTR_FORK;
-}
-
-/* Log bmap updates in the intent item. */
-STATIC void
-xfs_bmap_update_log_item(
-       struct xfs_trans                *tp,
-       void                            *intent,
-       struct list_head                *item)
-{
-       struct xfs_bui_log_item         *buip = intent;
-       struct xfs_bmap_intent          *bmap;
-       uint                            next_extent;
-       struct xfs_map_extent           *map;
-
-       bmap = container_of(item, struct xfs_bmap_intent, bi_list);
-
-       tp->t_flags |= XFS_TRANS_DIRTY;
-       set_bit(XFS_LI_DIRTY, &buip->bui_item.li_flags);
-
-       /*
-        * atomic_inc_return gives us the value after the increment;
-        * we want to use it as an array index so we need to subtract 1 from
-        * it.
-        */
-       next_extent = atomic_inc_return(&buip->bui_next_extent) - 1;
-       ASSERT(next_extent < buip->bui_format.bui_nextents);
-       map = &buip->bui_format.bui_extents[next_extent];
-       map->me_owner = bmap->bi_owner->i_ino;
-       map->me_startblock = bmap->bi_bmap.br_startblock;
-       map->me_startoff = bmap->bi_bmap.br_startoff;
-       map->me_len = bmap->bi_bmap.br_blockcount;
-       xfs_trans_set_bmap_flags(map, bmap->bi_type, bmap->bi_whichfork,
-                       bmap->bi_bmap.br_state);
-}
-
-/* Get an BUD so we can process all the deferred rmap updates. */
-STATIC void *
-xfs_bmap_update_create_done(
-       struct xfs_trans                *tp,
-       void                            *intent,
-       unsigned int                    count)
-{
-       return xfs_trans_get_bud(tp, intent);
-}
-
-/* Process a deferred rmap update. */
-STATIC int
-xfs_bmap_update_finish_item(
-       struct xfs_trans                *tp,
-       struct list_head                *item,
-       void                            *done_item,
-       void                            **state)
-{
-       struct xfs_bmap_intent          *bmap;
-       xfs_filblks_t                   count;
-       int                             error;
-
-       bmap = container_of(item, struct xfs_bmap_intent, bi_list);
-       count = bmap->bi_bmap.br_blockcount;
-       error = xfs_trans_log_finish_bmap_update(tp, done_item,
-                       bmap->bi_type,
-                       bmap->bi_owner, bmap->bi_whichfork,
-                       bmap->bi_bmap.br_startoff,
-                       bmap->bi_bmap.br_startblock,
-                       &count,
-                       bmap->bi_bmap.br_state);
-       if (!error && count > 0) {
-               ASSERT(bmap->bi_type == XFS_BMAP_UNMAP);
-               bmap->bi_bmap.br_blockcount = count;
-               return -EAGAIN;
-       }
-       kmem_free(bmap);
-       return error;
-}
-
-/* Abort all pending BUIs. */
-STATIC void
-xfs_bmap_update_abort_intent(
-       void                            *intent)
-{
-       xfs_bui_release(intent);
-}
-
-/* Cancel a deferred rmap update. */
-STATIC void
-xfs_bmap_update_cancel_item(
-       struct list_head                *item)
-{
-       struct xfs_bmap_intent          *bmap;
-
-       bmap = container_of(item, struct xfs_bmap_intent, bi_list);
-       kmem_free(bmap);
-}
-
-const struct xfs_defer_op_type xfs_bmap_update_defer_type = {
-       .max_items      = XFS_BUI_MAX_FAST_EXTENTS,
-       .diff_items     = xfs_bmap_update_diff_items,
-       .create_intent  = xfs_bmap_update_create_intent,
-       .abort_intent   = xfs_bmap_update_abort_intent,
-       .log_item       = xfs_bmap_update_log_item,
-       .create_done    = xfs_bmap_update_create_done,
-       .finish_item    = xfs_bmap_update_finish_item,
-       .cancel_item    = xfs_bmap_update_cancel_item,
-};
diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c

index 7d65ebf1e847a9c07c0fbb8178b26892ad3390e2..b5b3a78ef31c41e0f057d8f8ab752a21ec8e910e 100644 (file)
--- a/fs/xfs/xfs_trans_buf.c
+++ b/fs/xfs/xfs_trans_buf.c
@@ -10,11 +10,9 @@
  #include "xfs_log_format.h"
  #include "xfs_trans_resv.h"
  #include "xfs_mount.h"
-#include "xfs_inode.h"
  #include "xfs_trans.h"
  #include "xfs_buf_item.h"
  #include "xfs_trans_priv.h"
-#include "xfs_error.h"
  #include "xfs_trace.h"
  
  /*
@@ -174,8 +172,7 @@ xfs_trans_get_buf_map(
  xfs_buf_t *
  xfs_trans_getsb(
         xfs_trans_t             *tp,
-       struct xfs_mount        *mp,
-       int                     flags)
+       struct xfs_mount        *mp)
  {
         xfs_buf_t               *bp;
         struct xfs_buf_log_item *bip;
@@ -185,7 +182,7 @@ xfs_trans_getsb(
          * if tp is NULL.
          */
         if (tp == NULL)
-               return xfs_getsb(mp, flags);
+               return xfs_getsb(mp);
  
         /*
          * If the superblock buffer already has this transaction
@@ -203,7 +200,7 @@ xfs_trans_getsb(
                 return bp;
         }
  
-       bp = xfs_getsb(mp, flags);
+       bp = xfs_getsb(mp);
         if (bp == NULL)
                 return NULL;
  
@@ -428,7 +425,7 @@ xfs_trans_brelse(
  
  /*
   * Mark the buffer as not needing to be unlocked when the buf item's
- * iop_unlock() routine is called.  The buffer must already be locked
+ * iop_committing() routine is called.  The buffer must already be locked
   * and associated with the given transaction.
   */
  /* ARGSUSED */
diff --git a/fs/xfs/xfs_trans_dquot.c b/fs/xfs/xfs_trans_dquot.c

index cd664a03613fed2bb504675e009574a9b07ad5a9..1027c9ca6eb8a0e0adb8bb1ed558e5eab6115e08 100644 (file)
--- a/fs/xfs/xfs_trans_dquot.c
+++ b/fs/xfs/xfs_trans_dquot.c
@@ -11,7 +11,6 @@
  #include "xfs_trans_resv.h"
  #include "xfs_mount.h"
  #include "xfs_inode.h"
-#include "xfs_error.h"
  #include "xfs_trans.h"
  #include "xfs_trans_priv.h"
  #include "xfs_quota.h"
@@ -29,7 +28,6 @@ xfs_trans_dqjoin(
         xfs_trans_t     *tp,
         xfs_dquot_t     *dqp)
  {
-       ASSERT(dqp->q_transp != tp);
         ASSERT(XFS_DQ_IS_LOCKED(dqp));
         ASSERT(dqp->q_logitem.qli_dquot == dqp);
  
@@ -37,15 +35,8 @@ xfs_trans_dqjoin(
          * Get a log_item_desc to point at the new item.
          */
         xfs_trans_add_item(tp, &dqp->q_logitem.qli_item);
-
-       /*
-        * Initialize d_transp so we can later determine if this dquot is
-        * associated with this transaction.
-        */
-       dqp->q_transp = tp;
  }
  
-
  /*
   * This is called to mark the dquot as needing
   * to be logged when the transaction is committed.  The dquot must
@@ -61,7 +52,6 @@ xfs_trans_log_dquot(
         xfs_trans_t     *tp,
         xfs_dquot_t     *dqp)
  {
-       ASSERT(dqp->q_transp == tp);
         ASSERT(XFS_DQ_IS_LOCKED(dqp));
  
         tp->t_flags |= XFS_TRANS_DIRTY;
@@ -347,7 +337,6 @@ xfs_trans_apply_dquot_deltas(
                                 break;
  
                         ASSERT(XFS_DQ_IS_LOCKED(dqp));
-                       ASSERT(dqp->q_transp == tp);
  
                         /*
                          * adjust the actual number of blocks used
diff --git a/fs/xfs/xfs_trans_extfree.c b/fs/xfs/xfs_trans_extfree.c

deleted file mode 100644 (file)

index 8ee7a3f..0000000
--- a/fs/xfs/xfs_trans_extfree.c
+++ /dev/null
@@ -1,286 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Copyright (c) 2000,2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- */
-#include "xfs.h"
-#include "xfs_fs.h"
-#include "xfs_shared.h"
-#include "xfs_format.h"
-#include "xfs_log_format.h"
-#include "xfs_trans_resv.h"
-#include "xfs_bit.h"
-#include "xfs_mount.h"
-#include "xfs_defer.h"
-#include "xfs_trans.h"
-#include "xfs_trans_priv.h"
-#include "xfs_extfree_item.h"
-#include "xfs_alloc.h"
-#include "xfs_bmap.h"
-#include "xfs_trace.h"
-
-/*
- * This routine is called to allocate an "extent free done"
- * log item that will hold nextents worth of extents.  The
- * caller must use all nextents extents, because we are not
- * flexible about this at all.
- */
-struct xfs_efd_log_item *
-xfs_trans_get_efd(struct xfs_trans             *tp,
-                 struct xfs_efi_log_item       *efip,
-                 uint                          nextents)
-{
-       struct xfs_efd_log_item                 *efdp;
-
-       ASSERT(tp != NULL);
-       ASSERT(nextents > 0);
-
-       efdp = xfs_efd_init(tp->t_mountp, efip, nextents);
-       ASSERT(efdp != NULL);
-
-       /*
-        * Get a log_item_desc to point at the new item.
-        */
-       xfs_trans_add_item(tp, &efdp->efd_item);
-       return efdp;
-}
-
-/*
- * Free an extent and log it to the EFD. Note that the transaction is marked
- * dirty regardless of whether the extent free succeeds or fails to support the
- * EFI/EFD lifecycle rules.
- */
-int
-xfs_trans_free_extent(
-       struct xfs_trans                *tp,
-       struct xfs_efd_log_item         *efdp,
-       xfs_fsblock_t                   start_block,
-       xfs_extlen_t                    ext_len,
-       const struct xfs_owner_info     *oinfo,
-       bool                            skip_discard)
-{
-       struct xfs_mount                *mp = tp->t_mountp;
-       struct xfs_extent               *extp;
-       uint                            next_extent;
-       xfs_agnumber_t                  agno = XFS_FSB_TO_AGNO(mp, start_block);
-       xfs_agblock_t                   agbno = XFS_FSB_TO_AGBNO(mp,
-                                                               start_block);
-       int                             error;
-
-       trace_xfs_bmap_free_deferred(tp->t_mountp, agno, 0, agbno, ext_len);
-
-       error = __xfs_free_extent(tp, start_block, ext_len,
-                                 oinfo, XFS_AG_RESV_NONE, skip_discard);
-       /*
-        * Mark the transaction dirty, even on error. This ensures the
-        * transaction is aborted, which:
-        *
-        * 1.) releases the EFI and frees the EFD
-        * 2.) shuts down the filesystem
-        */
-       tp->t_flags |= XFS_TRANS_DIRTY;
-       set_bit(XFS_LI_DIRTY, &efdp->efd_item.li_flags);
-
-       next_extent = efdp->efd_next_extent;
-       ASSERT(next_extent < efdp->efd_format.efd_nextents);
-       extp = &(efdp->efd_format.efd_extents[next_extent]);
-       extp->ext_start = start_block;
-       extp->ext_len = ext_len;
-       efdp->efd_next_extent++;
-
-       return error;
-}
-
-/* Sort bmap items by AG. */
-static int
-xfs_extent_free_diff_items(
-       void                            *priv,
-       struct list_head                *a,
-       struct list_head                *b)
-{
-       struct xfs_mount                *mp = priv;
-       struct xfs_extent_free_item     *ra;
-       struct xfs_extent_free_item     *rb;
-
-       ra = container_of(a, struct xfs_extent_free_item, xefi_list);
-       rb = container_of(b, struct xfs_extent_free_item, xefi_list);
-       return  XFS_FSB_TO_AGNO(mp, ra->xefi_startblock) -
-               XFS_FSB_TO_AGNO(mp, rb->xefi_startblock);
-}
-
-/* Get an EFI. */
-STATIC void *
-xfs_extent_free_create_intent(
-       struct xfs_trans                *tp,
-       unsigned int                    count)
-{
-       struct xfs_efi_log_item         *efip;
-
-       ASSERT(tp != NULL);
-       ASSERT(count > 0);
-
-       efip = xfs_efi_init(tp->t_mountp, count);
-       ASSERT(efip != NULL);
-
-       /*
-        * Get a log_item_desc to point at the new item.
-        */
-       xfs_trans_add_item(tp, &efip->efi_item);
-       return efip;
-}
-
-/* Log a free extent to the intent item. */
-STATIC void
-xfs_extent_free_log_item(
-       struct xfs_trans                *tp,
-       void                            *intent,
-       struct list_head                *item)
-{
-       struct xfs_efi_log_item         *efip = intent;
-       struct xfs_extent_free_item     *free;
-       uint                            next_extent;
-       struct xfs_extent               *extp;
-
-       free = container_of(item, struct xfs_extent_free_item, xefi_list);
-
-       tp->t_flags |= XFS_TRANS_DIRTY;
-       set_bit(XFS_LI_DIRTY, &efip->efi_item.li_flags);
-
-       /*
-        * atomic_inc_return gives us the value after the increment;
-        * we want to use it as an array index so we need to subtract 1 from
-        * it.
-        */
-       next_extent = atomic_inc_return(&efip->efi_next_extent) - 1;
-       ASSERT(next_extent < efip->efi_format.efi_nextents);
-       extp = &efip->efi_format.efi_extents[next_extent];
-       extp->ext_start = free->xefi_startblock;
-       extp->ext_len = free->xefi_blockcount;
-}
-
-/* Get an EFD so we can process all the free extents. */
-STATIC void *
-xfs_extent_free_create_done(
-       struct xfs_trans                *tp,
-       void                            *intent,
-       unsigned int                    count)
-{
-       return xfs_trans_get_efd(tp, intent, count);
-}
-
-/* Process a free extent. */
-STATIC int
-xfs_extent_free_finish_item(
-       struct xfs_trans                *tp,
-       struct list_head                *item,
-       void                            *done_item,
-       void                            **state)
-{
-       struct xfs_extent_free_item     *free;
-       int                             error;
-
-       free = container_of(item, struct xfs_extent_free_item, xefi_list);
-       error = xfs_trans_free_extent(tp, done_item,
-                       free->xefi_startblock,
-                       free->xefi_blockcount,
-                       &free->xefi_oinfo, free->xefi_skip_discard);
-       kmem_free(free);
-       return error;
-}
-
-/* Abort all pending EFIs. */
-STATIC void
-xfs_extent_free_abort_intent(
-       void                            *intent)
-{
-       xfs_efi_release(intent);
-}
-
-/* Cancel a free extent. */
-STATIC void
-xfs_extent_free_cancel_item(
-       struct list_head                *item)
-{
-       struct xfs_extent_free_item     *free;
-
-       free = container_of(item, struct xfs_extent_free_item, xefi_list);
-       kmem_free(free);
-}
-
-const struct xfs_defer_op_type xfs_extent_free_defer_type = {
-       .max_items      = XFS_EFI_MAX_FAST_EXTENTS,
-       .diff_items     = xfs_extent_free_diff_items,
-       .create_intent  = xfs_extent_free_create_intent,
-       .abort_intent   = xfs_extent_free_abort_intent,
-       .log_item       = xfs_extent_free_log_item,
-       .create_done    = xfs_extent_free_create_done,
-       .finish_item    = xfs_extent_free_finish_item,
-       .cancel_item    = xfs_extent_free_cancel_item,
-};
-
-/*
- * AGFL blocks are accounted differently in the reserve pools and are not
- * inserted into the busy extent list.
- */
-STATIC int
-xfs_agfl_free_finish_item(
-       struct xfs_trans                *tp,
-       struct list_head                *item,
-       void                            *done_item,
-       void                            **state)
-{
-       struct xfs_mount                *mp = tp->t_mountp;
-       struct xfs_efd_log_item         *efdp = done_item;
-       struct xfs_extent_free_item     *free;
-       struct xfs_extent               *extp;
-       struct xfs_buf                  *agbp;
-       int                             error;
-       xfs_agnumber_t                  agno;
-       xfs_agblock_t                   agbno;
-       uint                            next_extent;
-
-       free = container_of(item, struct xfs_extent_free_item, xefi_list);
-       ASSERT(free->xefi_blockcount == 1);
-       agno = XFS_FSB_TO_AGNO(mp, free->xefi_startblock);
-       agbno = XFS_FSB_TO_AGBNO(mp, free->xefi_startblock);
-
-       trace_xfs_agfl_free_deferred(mp, agno, 0, agbno, free->xefi_blockcount);
-
-       error = xfs_alloc_read_agf(mp, tp, agno, 0, &agbp);
-       if (!error)
-               error = xfs_free_agfl_block(tp, agno, agbno, agbp,
-                                           &free->xefi_oinfo);
-
-       /*
-        * Mark the transaction dirty, even on error. This ensures the
-        * transaction is aborted, which:
-        *
-        * 1.) releases the EFI and frees the EFD
-        * 2.) shuts down the filesystem
-        */
-       tp->t_flags |= XFS_TRANS_DIRTY;
-       set_bit(XFS_LI_DIRTY, &efdp->efd_item.li_flags);
-
-       next_extent = efdp->efd_next_extent;
-       ASSERT(next_extent < efdp->efd_format.efd_nextents);
-       extp = &(efdp->efd_format.efd_extents[next_extent]);
-       extp->ext_start = free->xefi_startblock;
-       extp->ext_len = free->xefi_blockcount;
-       efdp->efd_next_extent++;
-
-       kmem_free(free);
-       return error;
-}
-
-
-/* sub-type with special handling for AGFL deferred frees */
-const struct xfs_defer_op_type xfs_agfl_free_defer_type = {
-       .max_items      = XFS_EFI_MAX_FAST_EXTENTS,
-       .diff_items     = xfs_extent_free_diff_items,
-       .create_intent  = xfs_extent_free_create_intent,
-       .abort_intent   = xfs_extent_free_abort_intent,
-       .log_item       = xfs_extent_free_log_item,
-       .create_done    = xfs_extent_free_create_done,
-       .finish_item    = xfs_agfl_free_finish_item,
-       .cancel_item    = xfs_extent_free_cancel_item,
-};
diff --git a/fs/xfs/xfs_trans_inode.c b/fs/xfs/xfs_trans_inode.c

index 542927321a61b5e2ff52b30faca74dac268c1722..93d14e47269d1f1cbc18906de9edbb6ab18dadd4 100644 (file)
--- a/fs/xfs/xfs_trans_inode.c
+++ b/fs/xfs/xfs_trans_inode.c
@@ -8,13 +8,10 @@
  #include "xfs_shared.h"
  #include "xfs_format.h"
  #include "xfs_log_format.h"
-#include "xfs_trans_resv.h"
-#include "xfs_mount.h"
  #include "xfs_inode.h"
  #include "xfs_trans.h"
  #include "xfs_trans_priv.h"
  #include "xfs_inode_item.h"
-#include "xfs_trace.h"
  
  #include <linux/iversion.h>
  
diff --git a/fs/xfs/xfs_trans_priv.h b/fs/xfs/xfs_trans_priv.h

index 091eae9f4e7434e7d40364567dc98e84aa177060..2e073c1c4614f2a79cc9452854da1cead65fb06c 100644 (file)
--- a/fs/xfs/xfs_trans_priv.h
+++ b/fs/xfs/xfs_trans_priv.h
@@ -16,12 +16,10 @@ struct xfs_log_vec;
  void   xfs_trans_init(struct xfs_mount *);
  void   xfs_trans_add_item(struct xfs_trans *, struct xfs_log_item *);
  void   xfs_trans_del_item(struct xfs_log_item *);
-void   xfs_trans_free_items(struct xfs_trans *tp, xfs_lsn_t commit_lsn,
-                               bool abort);
  void   xfs_trans_unreserve_and_mod_sb(struct xfs_trans *tp);
  
  void   xfs_trans_committed_bulk(struct xfs_ail *ailp, struct xfs_log_vec *lv,
-                               xfs_lsn_t commit_lsn, int aborted);
+                               xfs_lsn_t commit_lsn, bool aborted);
  /*
   * AIL traversal cursor.
   *
diff --git a/fs/xfs/xfs_trans_refcount.c b/fs/xfs/xfs_trans_refcount.c

deleted file mode 100644 (file)

index 8d73472..0000000
--- a/fs/xfs/xfs_trans_refcount.c
+++ /dev/null
@@ -1,240 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0+
-/*
- * Copyright (C) 2016 Oracle.  All Rights Reserved.
- * Author: Darrick J. Wong <darrick.wong@oracle.com>
- */
-#include "xfs.h"
-#include "xfs_fs.h"
-#include "xfs_shared.h"
-#include "xfs_format.h"
-#include "xfs_log_format.h"
-#include "xfs_trans_resv.h"
-#include "xfs_mount.h"
-#include "xfs_defer.h"
-#include "xfs_trans.h"
-#include "xfs_trans_priv.h"
-#include "xfs_refcount_item.h"
-#include "xfs_alloc.h"
-#include "xfs_refcount.h"
-
-/*
- * This routine is called to allocate a "refcount update done"
- * log item.
- */
-struct xfs_cud_log_item *
-xfs_trans_get_cud(
-       struct xfs_trans                *tp,
-       struct xfs_cui_log_item         *cuip)
-{
-       struct xfs_cud_log_item         *cudp;
-
-       cudp = xfs_cud_init(tp->t_mountp, cuip);
-       xfs_trans_add_item(tp, &cudp->cud_item);
-       return cudp;
-}
-
-/*
- * Finish an refcount update and log it to the CUD. Note that the
- * transaction is marked dirty regardless of whether the refcount
- * update succeeds or fails to support the CUI/CUD lifecycle rules.
- */
-int
-xfs_trans_log_finish_refcount_update(
-       struct xfs_trans                *tp,
-       struct xfs_cud_log_item         *cudp,
-       enum xfs_refcount_intent_type   type,
-       xfs_fsblock_t                   startblock,
-       xfs_extlen_t                    blockcount,
-       xfs_fsblock_t                   *new_fsb,
-       xfs_extlen_t                    *new_len,
-       struct xfs_btree_cur            **pcur)
-{
-       int                             error;
-
-       error = xfs_refcount_finish_one(tp, type, startblock,
-                       blockcount, new_fsb, new_len, pcur);
-
-       /*
-        * Mark the transaction dirty, even on error. This ensures the
-        * transaction is aborted, which:
-        *
-        * 1.) releases the CUI and frees the CUD
-        * 2.) shuts down the filesystem
-        */
-       tp->t_flags |= XFS_TRANS_DIRTY;
-       set_bit(XFS_LI_DIRTY, &cudp->cud_item.li_flags);
-
-       return error;
-}
-
-/* Sort refcount intents by AG. */
-static int
-xfs_refcount_update_diff_items(
-       void                            *priv,
-       struct list_head                *a,
-       struct list_head                *b)
-{
-       struct xfs_mount                *mp = priv;
-       struct xfs_refcount_intent      *ra;
-       struct xfs_refcount_intent      *rb;
-
-       ra = container_of(a, struct xfs_refcount_intent, ri_list);
-       rb = container_of(b, struct xfs_refcount_intent, ri_list);
-       return  XFS_FSB_TO_AGNO(mp, ra->ri_startblock) -
-               XFS_FSB_TO_AGNO(mp, rb->ri_startblock);
-}
-
-/* Get an CUI. */
-STATIC void *
-xfs_refcount_update_create_intent(
-       struct xfs_trans                *tp,
-       unsigned int                    count)
-{
-       struct xfs_cui_log_item         *cuip;
-
-       ASSERT(tp != NULL);
-       ASSERT(count > 0);
-
-       cuip = xfs_cui_init(tp->t_mountp, count);
-       ASSERT(cuip != NULL);
-
-       /*
-        * Get a log_item_desc to point at the new item.
-        */
-       xfs_trans_add_item(tp, &cuip->cui_item);
-       return cuip;
-}
-
-/* Set the phys extent flags for this reverse mapping. */
-static void
-xfs_trans_set_refcount_flags(
-       struct xfs_phys_extent          *refc,
-       enum xfs_refcount_intent_type   type)
-{
-       refc->pe_flags = 0;
-       switch (type) {
-       case XFS_REFCOUNT_INCREASE:
-       case XFS_REFCOUNT_DECREASE:
-       case XFS_REFCOUNT_ALLOC_COW:
-       case XFS_REFCOUNT_FREE_COW:
-               refc->pe_flags |= type;
-               break;
-       default:
-               ASSERT(0);
-       }
-}
-
-/* Log refcount updates in the intent item. */
-STATIC void
-xfs_refcount_update_log_item(
-       struct xfs_trans                *tp,
-       void                            *intent,
-       struct list_head                *item)
-{
-       struct xfs_cui_log_item         *cuip = intent;
-       struct xfs_refcount_intent      *refc;
-       uint                            next_extent;
-       struct xfs_phys_extent          *ext;
-
-       refc = container_of(item, struct xfs_refcount_intent, ri_list);
-
-       tp->t_flags |= XFS_TRANS_DIRTY;
-       set_bit(XFS_LI_DIRTY, &cuip->cui_item.li_flags);
-
-       /*
-        * atomic_inc_return gives us the value after the increment;
-        * we want to use it as an array index so we need to subtract 1 from
-        * it.
-        */
-       next_extent = atomic_inc_return(&cuip->cui_next_extent) - 1;
-       ASSERT(next_extent < cuip->cui_format.cui_nextents);
-       ext = &cuip->cui_format.cui_extents[next_extent];
-       ext->pe_startblock = refc->ri_startblock;
-       ext->pe_len = refc->ri_blockcount;
-       xfs_trans_set_refcount_flags(ext, refc->ri_type);
-}
-
-/* Get an CUD so we can process all the deferred refcount updates. */
-STATIC void *
-xfs_refcount_update_create_done(
-       struct xfs_trans                *tp,
-       void                            *intent,
-       unsigned int                    count)
-{
-       return xfs_trans_get_cud(tp, intent);
-}
-
-/* Process a deferred refcount update. */
-STATIC int
-xfs_refcount_update_finish_item(
-       struct xfs_trans                *tp,
-       struct list_head                *item,
-       void                            *done_item,
-       void                            **state)
-{
-       struct xfs_refcount_intent      *refc;
-       xfs_fsblock_t                   new_fsb;
-       xfs_extlen_t                    new_aglen;
-       int                             error;
-
-       refc = container_of(item, struct xfs_refcount_intent, ri_list);
-       error = xfs_trans_log_finish_refcount_update(tp, done_item,
-                       refc->ri_type,
-                       refc->ri_startblock,
-                       refc->ri_blockcount,
-                       &new_fsb, &new_aglen,
-                       (struct xfs_btree_cur **)state);
-       /* Did we run out of reservation?  Requeue what we didn't finish. */
-       if (!error && new_aglen > 0) {
-               ASSERT(refc->ri_type == XFS_REFCOUNT_INCREASE ||
-                      refc->ri_type == XFS_REFCOUNT_DECREASE);
-               refc->ri_startblock = new_fsb;
-               refc->ri_blockcount = new_aglen;
-               return -EAGAIN;
-       }
-       kmem_free(refc);
-       return error;
-}
-
-/* Clean up after processing deferred refcounts. */
-STATIC void
-xfs_refcount_update_finish_cleanup(
-       struct xfs_trans        *tp,
-       void                    *state,
-       int                     error)
-{
-       struct xfs_btree_cur    *rcur = state;
-
-       xfs_refcount_finish_one_cleanup(tp, rcur, error);
-}
-
-/* Abort all pending CUIs. */
-STATIC void
-xfs_refcount_update_abort_intent(
-       void                            *intent)
-{
-       xfs_cui_release(intent);
-}
-
-/* Cancel a deferred refcount update. */
-STATIC void
-xfs_refcount_update_cancel_item(
-       struct list_head                *item)
-{
-       struct xfs_refcount_intent      *refc;
-
-       refc = container_of(item, struct xfs_refcount_intent, ri_list);
-       kmem_free(refc);
-}
-
-const struct xfs_defer_op_type xfs_refcount_update_defer_type = {
-       .max_items      = XFS_CUI_MAX_FAST_EXTENTS,
-       .diff_items     = xfs_refcount_update_diff_items,
-       .create_intent  = xfs_refcount_update_create_intent,
-       .abort_intent   = xfs_refcount_update_abort_intent,
-       .log_item       = xfs_refcount_update_log_item,
-       .create_done    = xfs_refcount_update_create_done,
-       .finish_item    = xfs_refcount_update_finish_item,
-       .finish_cleanup = xfs_refcount_update_finish_cleanup,
-       .cancel_item    = xfs_refcount_update_cancel_item,
-};
diff --git a/fs/xfs/xfs_trans_rmap.c b/fs/xfs/xfs_trans_rmap.c

deleted file mode 100644 (file)

index 5c7936b..0000000
--- a/fs/xfs/xfs_trans_rmap.c
+++ /dev/null
@@ -1,257 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0+
-/*
- * Copyright (C) 2016 Oracle.  All Rights Reserved.
- * Author: Darrick J. Wong <darrick.wong@oracle.com>
- */
-#include "xfs.h"
-#include "xfs_fs.h"
-#include "xfs_shared.h"
-#include "xfs_format.h"
-#include "xfs_log_format.h"
-#include "xfs_trans_resv.h"
-#include "xfs_mount.h"
-#include "xfs_defer.h"
-#include "xfs_trans.h"
-#include "xfs_trans_priv.h"
-#include "xfs_rmap_item.h"
-#include "xfs_alloc.h"
-#include "xfs_rmap.h"
-
-/* Set the map extent flags for this reverse mapping. */
-static void
-xfs_trans_set_rmap_flags(
-       struct xfs_map_extent           *rmap,
-       enum xfs_rmap_intent_type       type,
-       int                             whichfork,
-       xfs_exntst_t                    state)
-{
-       rmap->me_flags = 0;
-       if (state == XFS_EXT_UNWRITTEN)
-               rmap->me_flags |= XFS_RMAP_EXTENT_UNWRITTEN;
-       if (whichfork == XFS_ATTR_FORK)
-               rmap->me_flags |= XFS_RMAP_EXTENT_ATTR_FORK;
-       switch (type) {
-       case XFS_RMAP_MAP:
-               rmap->me_flags |= XFS_RMAP_EXTENT_MAP;
-               break;
-       case XFS_RMAP_MAP_SHARED:
-               rmap->me_flags |= XFS_RMAP_EXTENT_MAP_SHARED;
-               break;
-       case XFS_RMAP_UNMAP:
-               rmap->me_flags |= XFS_RMAP_EXTENT_UNMAP;
-               break;
-       case XFS_RMAP_UNMAP_SHARED:
-               rmap->me_flags |= XFS_RMAP_EXTENT_UNMAP_SHARED;
-               break;
-       case XFS_RMAP_CONVERT:
-               rmap->me_flags |= XFS_RMAP_EXTENT_CONVERT;
-               break;
-       case XFS_RMAP_CONVERT_SHARED:
-               rmap->me_flags |= XFS_RMAP_EXTENT_CONVERT_SHARED;
-               break;
-       case XFS_RMAP_ALLOC:
-               rmap->me_flags |= XFS_RMAP_EXTENT_ALLOC;
-               break;
-       case XFS_RMAP_FREE:
-               rmap->me_flags |= XFS_RMAP_EXTENT_FREE;
-               break;
-       default:
-               ASSERT(0);
-       }
-}
-
-struct xfs_rud_log_item *
-xfs_trans_get_rud(
-       struct xfs_trans                *tp,
-       struct xfs_rui_log_item         *ruip)
-{
-       struct xfs_rud_log_item         *rudp;
-
-       rudp = xfs_rud_init(tp->t_mountp, ruip);
-       xfs_trans_add_item(tp, &rudp->rud_item);
-       return rudp;
-}
-
-/*
- * Finish an rmap update and log it to the RUD. Note that the transaction is
- * marked dirty regardless of whether the rmap update succeeds or fails to
- * support the RUI/RUD lifecycle rules.
- */
-int
-xfs_trans_log_finish_rmap_update(
-       struct xfs_trans                *tp,
-       struct xfs_rud_log_item         *rudp,
-       enum xfs_rmap_intent_type       type,
-       uint64_t                        owner,
-       int                             whichfork,
-       xfs_fileoff_t                   startoff,
-       xfs_fsblock_t                   startblock,
-       xfs_filblks_t                   blockcount,
-       xfs_exntst_t                    state,
-       struct xfs_btree_cur            **pcur)
-{
-       int                             error;
-
-       error = xfs_rmap_finish_one(tp, type, owner, whichfork, startoff,
-                       startblock, blockcount, state, pcur);
-
-       /*
-        * Mark the transaction dirty, even on error. This ensures the
-        * transaction is aborted, which:
-        *
-        * 1.) releases the RUI and frees the RUD
-        * 2.) shuts down the filesystem
-        */
-       tp->t_flags |= XFS_TRANS_DIRTY;
-       set_bit(XFS_LI_DIRTY, &rudp->rud_item.li_flags);
-
-       return error;
-}
-
-/* Sort rmap intents by AG. */
-static int
-xfs_rmap_update_diff_items(
-       void                            *priv,
-       struct list_head                *a,
-       struct list_head                *b)
-{
-       struct xfs_mount                *mp = priv;
-       struct xfs_rmap_intent          *ra;
-       struct xfs_rmap_intent          *rb;
-
-       ra = container_of(a, struct xfs_rmap_intent, ri_list);
-       rb = container_of(b, struct xfs_rmap_intent, ri_list);
-       return  XFS_FSB_TO_AGNO(mp, ra->ri_bmap.br_startblock) -
-               XFS_FSB_TO_AGNO(mp, rb->ri_bmap.br_startblock);
-}
-
-/* Get an RUI. */
-STATIC void *
-xfs_rmap_update_create_intent(
-       struct xfs_trans                *tp,
-       unsigned int                    count)
-{
-       struct xfs_rui_log_item         *ruip;
-
-       ASSERT(tp != NULL);
-       ASSERT(count > 0);
-
-       ruip = xfs_rui_init(tp->t_mountp, count);
-       ASSERT(ruip != NULL);
-
-       /*
-        * Get a log_item_desc to point at the new item.
-        */
-       xfs_trans_add_item(tp, &ruip->rui_item);
-       return ruip;
-}
-
-/* Log rmap updates in the intent item. */
-STATIC void
-xfs_rmap_update_log_item(
-       struct xfs_trans                *tp,
-       void                            *intent,
-       struct list_head                *item)
-{
-       struct xfs_rui_log_item         *ruip = intent;
-       struct xfs_rmap_intent          *rmap;
-       uint                            next_extent;
-       struct xfs_map_extent           *map;
-
-       rmap = container_of(item, struct xfs_rmap_intent, ri_list);
-
-       tp->t_flags |= XFS_TRANS_DIRTY;
-       set_bit(XFS_LI_DIRTY, &ruip->rui_item.li_flags);
-
-       /*
-        * atomic_inc_return gives us the value after the increment;
-        * we want to use it as an array index so we need to subtract 1 from
-        * it.
-        */
-       next_extent = atomic_inc_return(&ruip->rui_next_extent) - 1;
-       ASSERT(next_extent < ruip->rui_format.rui_nextents);
-       map = &ruip->rui_format.rui_extents[next_extent];
-       map->me_owner = rmap->ri_owner;
-       map->me_startblock = rmap->ri_bmap.br_startblock;
-       map->me_startoff = rmap->ri_bmap.br_startoff;
-       map->me_len = rmap->ri_bmap.br_blockcount;
-       xfs_trans_set_rmap_flags(map, rmap->ri_type, rmap->ri_whichfork,
-                       rmap->ri_bmap.br_state);
-}
-
-/* Get an RUD so we can process all the deferred rmap updates. */
-STATIC void *
-xfs_rmap_update_create_done(
-       struct xfs_trans                *tp,
-       void                            *intent,
-       unsigned int                    count)
-{
-       return xfs_trans_get_rud(tp, intent);
-}
-
-/* Process a deferred rmap update. */
-STATIC int
-xfs_rmap_update_finish_item(
-       struct xfs_trans                *tp,
-       struct list_head                *item,
-       void                            *done_item,
-       void                            **state)
-{
-       struct xfs_rmap_intent          *rmap;
-       int                             error;
-
-       rmap = container_of(item, struct xfs_rmap_intent, ri_list);
-       error = xfs_trans_log_finish_rmap_update(tp, done_item,
-                       rmap->ri_type,
-                       rmap->ri_owner, rmap->ri_whichfork,
-                       rmap->ri_bmap.br_startoff,
-                       rmap->ri_bmap.br_startblock,
-                       rmap->ri_bmap.br_blockcount,
-                       rmap->ri_bmap.br_state,
-                       (struct xfs_btree_cur **)state);
-       kmem_free(rmap);
-       return error;
-}
-
-/* Clean up after processing deferred rmaps. */
-STATIC void
-xfs_rmap_update_finish_cleanup(
-       struct xfs_trans        *tp,
-       void                    *state,
-       int                     error)
-{
-       struct xfs_btree_cur    *rcur = state;
-
-       xfs_rmap_finish_one_cleanup(tp, rcur, error);
-}
-
-/* Abort all pending RUIs. */
-STATIC void
-xfs_rmap_update_abort_intent(
-       void                            *intent)
-{
-       xfs_rui_release(intent);
-}
-
-/* Cancel a deferred rmap update. */
-STATIC void
-xfs_rmap_update_cancel_item(
-       struct list_head                *item)
-{
-       struct xfs_rmap_intent          *rmap;
-
-       rmap = container_of(item, struct xfs_rmap_intent, ri_list);
-       kmem_free(rmap);
-}
-
-const struct xfs_defer_op_type xfs_rmap_update_defer_type = {
-       .max_items      = XFS_RUI_MAX_FAST_EXTENTS,
-       .diff_items     = xfs_rmap_update_diff_items,
-       .create_intent  = xfs_rmap_update_create_intent,
-       .abort_intent   = xfs_rmap_update_abort_intent,
-       .log_item       = xfs_rmap_update_log_item,
-       .create_done    = xfs_rmap_update_create_done,
-       .finish_item    = xfs_rmap_update_finish_item,
-       .finish_cleanup = xfs_rmap_update_finish_cleanup,
-       .cancel_item    = xfs_rmap_update_cancel_item,
-};
diff --git a/fs/xfs/xfs_xattr.c b/fs/xfs/xfs_xattr.c

index 9a63016009a1394f41beaff8323a5568b6ceab22..3123b5aaad2a15ef3652892372c7808ebc091dd5 100644 (file)
--- a/fs/xfs/xfs_xattr.c
+++ b/fs/xfs/xfs_xattr.c
@@ -5,15 +5,12 @@
   */
  
  #include "xfs.h"
+#include "xfs_shared.h"
  #include "xfs_format.h"
  #include "xfs_log_format.h"
-#include "xfs_trans_resv.h"
-#include "xfs_mount.h"
  #include "xfs_da_format.h"
  #include "xfs_inode.h"
  #include "xfs_attr.h"
-#include "xfs_attr_leaf.h"
-#include "xfs_acl.h"
  
  #include <linux/posix_acl_xattr.h>
  #include <linux/xattr.h>
author	Linus Torvalds <torvalds@linux-foundation.org>
	Sat, 13 Jul 2019 00:17:51 +0000 (17:17 -0700)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Sat, 13 Jul 2019 00:17:51 +0000 (17:17 -0700)
Documentation/filesystems/xfs-self-describing-metadata.txt		patch \| blob \| history
MAINTAINERS		patch \| blob \| history
fs/xfs/Makefile		patch \| blob \| history
fs/xfs/kmem.c		patch \| blob \| history
fs/xfs/kmem.h		patch \| blob \| history
fs/xfs/libxfs/xfs_ag.c		patch \| blob \| history
fs/xfs/libxfs/xfs_ag_resv.c		patch \| blob \| history
fs/xfs/libxfs/xfs_alloc.c		patch \| blob \| history
fs/xfs/libxfs/xfs_alloc_btree.c		patch \| blob \| history
fs/xfs/libxfs/xfs_attr.c		patch \| blob \| history
fs/xfs/libxfs/xfs_attr.h		patch \| blob \| history
fs/xfs/libxfs/xfs_attr_leaf.c		patch \| blob \| history
fs/xfs/libxfs/xfs_attr_remote.c		patch \| blob \| history
fs/xfs/libxfs/xfs_bit.c		patch \| blob \| history
fs/xfs/libxfs/xfs_bmap.c		patch \| blob \| history
fs/xfs/libxfs/xfs_bmap_btree.c		patch \| blob \| history
fs/xfs/libxfs/xfs_btree.c		patch \| blob \| history
fs/xfs/libxfs/xfs_btree.h		patch \| blob \| history
fs/xfs/libxfs/xfs_da_btree.c		patch \| blob \| history
fs/xfs/libxfs/xfs_da_format.c		patch \| blob \| history
fs/xfs/libxfs/xfs_defer.c		patch \| blob \| history
fs/xfs/libxfs/xfs_dir2.c		patch \| blob \| history
fs/xfs/libxfs/xfs_dir2_block.c		patch \| blob \| history
fs/xfs/libxfs/xfs_dir2_data.c		patch \| blob \| history
fs/xfs/libxfs/xfs_dir2_leaf.c		patch \| blob \| history
fs/xfs/libxfs/xfs_dir2_node.c		patch \| blob \| history
fs/xfs/libxfs/xfs_dir2_sf.c		patch \| blob \| history
fs/xfs/libxfs/xfs_dquot_buf.c		patch \| blob \| history
fs/xfs/libxfs/xfs_format.h		patch \| blob \| history
fs/xfs/libxfs/xfs_fs.h		patch \| blob \| history
fs/xfs/libxfs/xfs_health.h		patch \| blob \| history
fs/xfs/libxfs/xfs_ialloc.c		patch \| blob \| history
fs/xfs/libxfs/xfs_ialloc.h		patch \| blob \| history
fs/xfs/libxfs/xfs_ialloc_btree.c		patch \| blob \| history
fs/xfs/libxfs/xfs_ialloc_btree.h		patch \| blob \| history
fs/xfs/libxfs/xfs_iext_tree.c		patch \| blob \| history
fs/xfs/libxfs/xfs_inode_buf.c		patch \| blob \| history
fs/xfs/libxfs/xfs_inode_fork.c		patch \| blob \| history
fs/xfs/libxfs/xfs_log_rlimit.c		patch \| blob \| history
fs/xfs/libxfs/xfs_refcount.c		patch \| blob \| history
fs/xfs/libxfs/xfs_refcount_btree.c		patch \| blob \| history
fs/xfs/libxfs/xfs_rmap.c		patch \| blob \| history
fs/xfs/libxfs/xfs_rmap_btree.c		patch \| blob \| history
fs/xfs/libxfs/xfs_rtbitmap.c		patch \| blob \| history
fs/xfs/libxfs/xfs_sb.c		patch \| blob \| history
fs/xfs/libxfs/xfs_shared.h		patch \| blob \| history
fs/xfs/libxfs/xfs_symlink_remote.c		patch \| blob \| history
fs/xfs/libxfs/xfs_trans_resv.c		patch \| blob \| history
fs/xfs/libxfs/xfs_trans_space.h		patch \| blob \| history
fs/xfs/libxfs/xfs_types.c		patch \| blob \| history
fs/xfs/scrub/agheader.c		patch \| blob \| history
fs/xfs/scrub/agheader_repair.c		patch \| blob \| history
fs/xfs/scrub/alloc.c		patch \| blob \| history
fs/xfs/scrub/attr.c		patch \| blob \| history
fs/xfs/scrub/attr.h	[new file with mode: 0644]	patch \| blob
fs/xfs/scrub/bitmap.c		patch \| blob \| history
fs/xfs/scrub/bmap.c		patch \| blob \| history
fs/xfs/scrub/btree.c		patch \| blob \| history
fs/xfs/scrub/common.c		patch \| blob \| history
fs/xfs/scrub/dabtree.c		patch \| blob \| history
fs/xfs/scrub/dir.c		patch \| blob \| history
fs/xfs/scrub/fscounters.c		patch \| blob \| history
fs/xfs/scrub/health.c		patch \| blob \| history
fs/xfs/scrub/ialloc.c		patch \| blob \| history
fs/xfs/scrub/inode.c		patch \| blob \| history
fs/xfs/scrub/parent.c		patch \| blob \| history
fs/xfs/scrub/quota.c		patch \| blob \| history
fs/xfs/scrub/refcount.c		patch \| blob \| history
fs/xfs/scrub/repair.c		patch \| blob \| history
fs/xfs/scrub/rmap.c		patch \| blob \| history
fs/xfs/scrub/rtbitmap.c		patch \| blob \| history
fs/xfs/scrub/scrub.c		patch \| blob \| history
fs/xfs/scrub/symlink.c		patch \| blob \| history
fs/xfs/scrub/trace.c		patch \| blob \| history
fs/xfs/xfs_acl.c		patch \| blob \| history
fs/xfs/xfs_aops.c		patch \| blob \| history
fs/xfs/xfs_aops.h		patch \| blob \| history
fs/xfs/xfs_attr_inactive.c		patch \| blob \| history
fs/xfs/xfs_attr_list.c		patch \| blob \| history
fs/xfs/xfs_bio_io.c	[new file with mode: 0644]	patch \| blob
fs/xfs/xfs_bmap_item.c		patch \| blob \| history
fs/xfs/xfs_bmap_item.h		patch \| blob \| history
fs/xfs/xfs_bmap_util.c		patch \| blob \| history
fs/xfs/xfs_buf.c		patch \| blob \| history
fs/xfs/xfs_buf.h		patch \| blob \| history
fs/xfs/xfs_buf_item.c		patch \| blob \| history
fs/xfs/xfs_buf_item.h		patch \| blob \| history
fs/xfs/xfs_dir2_readdir.c		patch \| blob \| history
fs/xfs/xfs_discard.c		patch \| blob \| history
fs/xfs/xfs_dquot.c		patch \| blob \| history
fs/xfs/xfs_dquot.h		patch \| blob \| history
fs/xfs/xfs_dquot_item.c		patch \| blob \| history
fs/xfs/xfs_dquot_item.h		patch \| blob \| history
fs/xfs/xfs_error.c		patch \| blob \| history
fs/xfs/xfs_export.c		patch \| blob \| history
fs/xfs/xfs_extfree_item.c		patch \| blob \| history
fs/xfs/xfs_extfree_item.h		patch \| blob \| history
fs/xfs/xfs_file.c		patch \| blob \| history
fs/xfs/xfs_filestream.c		patch \| blob \| history
fs/xfs/xfs_fsmap.c		patch \| blob \| history
fs/xfs/xfs_fsops.c		patch \| blob \| history
fs/xfs/xfs_globals.c		patch \| blob \| history
fs/xfs/xfs_health.c		patch \| blob \| history
fs/xfs/xfs_icache.c		patch \| blob \| history
fs/xfs/xfs_icreate_item.c		patch \| blob \| history
fs/xfs/xfs_inode.c		patch \| blob \| history
fs/xfs/xfs_inode_item.c		patch \| blob \| history
fs/xfs/xfs_inode_item.h		patch \| blob \| history
fs/xfs/xfs_ioctl.c		patch \| blob \| history
fs/xfs/xfs_ioctl.h		patch \| blob \| history
fs/xfs/xfs_ioctl32.c		patch \| blob \| history
fs/xfs/xfs_ioctl32.h		patch \| blob \| history
fs/xfs/xfs_iomap.c		patch \| blob \| history
fs/xfs/xfs_iops.c		patch \| blob \| history
fs/xfs/xfs_itable.c		patch \| blob \| history
fs/xfs/xfs_itable.h		patch \| blob \| history
fs/xfs/xfs_iwalk.c	[new file with mode: 0644]	patch \| blob
fs/xfs/xfs_iwalk.h	[new file with mode: 0644]	patch \| blob
fs/xfs/xfs_linux.h		patch \| blob \| history
fs/xfs/xfs_log.c		patch \| blob \| history
fs/xfs/xfs_log.h		patch \| blob \| history
fs/xfs/xfs_log_cil.c		patch \| blob \| history
fs/xfs/xfs_log_priv.h		patch \| blob \| history
fs/xfs/xfs_log_recover.c		patch \| blob \| history
fs/xfs/xfs_message.c		patch \| blob \| history
fs/xfs/xfs_mount.c		patch \| blob \| history
fs/xfs/xfs_mount.h		patch \| blob \| history
fs/xfs/xfs_ondisk.h		patch \| blob \| history
fs/xfs/xfs_pnfs.c		patch \| blob \| history
fs/xfs/xfs_pwork.c	[new file with mode: 0644]	patch \| blob
fs/xfs/xfs_pwork.h	[new file with mode: 0644]	patch \| blob
fs/xfs/xfs_qm.c		patch \| blob \| history
fs/xfs/xfs_qm_bhv.c		patch \| blob \| history
fs/xfs/xfs_qm_syscalls.c		patch \| blob \| history
fs/xfs/xfs_quotaops.c		patch \| blob \| history
fs/xfs/xfs_refcount_item.c		patch \| blob \| history
fs/xfs/xfs_refcount_item.h		patch \| blob \| history
fs/xfs/xfs_reflink.c		patch \| blob \| history
fs/xfs/xfs_rmap_item.c		patch \| blob \| history
fs/xfs/xfs_rmap_item.h		patch \| blob \| history
fs/xfs/xfs_rtalloc.c		patch \| blob \| history
fs/xfs/xfs_stats.c		patch \| blob \| history
fs/xfs/xfs_super.c		patch \| blob \| history
fs/xfs/xfs_super.h		patch \| blob \| history
fs/xfs/xfs_symlink.c		patch \| blob \| history
fs/xfs/xfs_sysctl.c		patch \| blob \| history
fs/xfs/xfs_sysctl.h		patch \| blob \| history
fs/xfs/xfs_sysfs.c		patch \| blob \| history
fs/xfs/xfs_trace.c		patch \| blob \| history
fs/xfs/xfs_trace.h		patch \| blob \| history
fs/xfs/xfs_trans.c		patch \| blob \| history
fs/xfs/xfs_trans.h		patch \| blob \| history
fs/xfs/xfs_trans_ail.c		patch \| blob \| history
fs/xfs/xfs_trans_bmap.c	[deleted file]	patch \| blob \| history
fs/xfs/xfs_trans_buf.c		patch \| blob \| history
fs/xfs/xfs_trans_dquot.c		patch \| blob \| history
fs/xfs/xfs_trans_extfree.c	[deleted file]	patch \| blob \| history
fs/xfs/xfs_trans_inode.c		patch \| blob \| history
fs/xfs/xfs_trans_priv.h		patch \| blob \| history
fs/xfs/xfs_trans_refcount.c	[deleted file]	patch \| blob \| history
fs/xfs/xfs_trans_rmap.c	[deleted file]	patch \| blob \| history
fs/xfs/xfs_xattr.c		patch \| blob \| history